igb_main.c revision 866cff06903ed63b7410c75ce8d4e0c86127a563
1/*******************************************************************************
2
3  Intel(R) Gigabit Ethernet Linux driver
4  Copyright(c) 2007-2011 Intel Corporation.
5
6  This program is free software; you can redistribute it and/or modify it
7  under the terms and conditions of the GNU General Public License,
8  version 2, as published by the Free Software Foundation.
9
10  This program is distributed in the hope it will be useful, but WITHOUT
11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  more details.
14
15  You should have received a copy of the GNU General Public License along with
16  this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19  The full GNU General Public License is included in this distribution in
20  the file called "COPYING".
21
22  Contact Information:
23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26*******************************************************************************/
27
28#include <linux/module.h>
29#include <linux/types.h>
30#include <linux/init.h>
31#include <linux/bitops.h>
32#include <linux/vmalloc.h>
33#include <linux/pagemap.h>
34#include <linux/netdevice.h>
35#include <linux/ipv6.h>
36#include <linux/slab.h>
37#include <net/checksum.h>
38#include <net/ip6_checksum.h>
39#include <linux/net_tstamp.h>
40#include <linux/mii.h>
41#include <linux/ethtool.h>
42#include <linux/if.h>
43#include <linux/if_vlan.h>
44#include <linux/pci.h>
45#include <linux/pci-aspm.h>
46#include <linux/delay.h>
47#include <linux/interrupt.h>
48#include <linux/ip.h>
49#include <linux/tcp.h>
50#include <linux/sctp.h>
51#include <linux/if_ether.h>
52#include <linux/aer.h>
53#include <linux/prefetch.h>
54#ifdef CONFIG_IGB_DCA
55#include <linux/dca.h>
56#endif
57#include "igb.h"
58
59#define MAJ 3
60#define MIN 0
61#define BUILD 6
62#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
63__stringify(BUILD) "-k"
64char igb_driver_name[] = "igb";
65char igb_driver_version[] = DRV_VERSION;
66static const char igb_driver_string[] =
67				"Intel(R) Gigabit Ethernet Network Driver";
68static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
69
70static const struct e1000_info *igb_info_tbl[] = {
71	[board_82575] = &e1000_82575_info,
72};
73
74static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
75	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
76	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
77	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
78	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
79	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
80	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
81	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
82	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
83	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
84	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
85	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
86	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
87	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
88	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
89	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
90	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
91	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
92	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
93	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
94	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
95	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
96	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
97	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
98	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
99	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
100	/* required last entry */
101	{0, }
102};
103
104MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
105
106void igb_reset(struct igb_adapter *);
107static int igb_setup_all_tx_resources(struct igb_adapter *);
108static int igb_setup_all_rx_resources(struct igb_adapter *);
109static void igb_free_all_tx_resources(struct igb_adapter *);
110static void igb_free_all_rx_resources(struct igb_adapter *);
111static void igb_setup_mrqc(struct igb_adapter *);
112static int igb_probe(struct pci_dev *, const struct pci_device_id *);
113static void __devexit igb_remove(struct pci_dev *pdev);
114static void igb_init_hw_timer(struct igb_adapter *adapter);
115static int igb_sw_init(struct igb_adapter *);
116static int igb_open(struct net_device *);
117static int igb_close(struct net_device *);
118static void igb_configure_tx(struct igb_adapter *);
119static void igb_configure_rx(struct igb_adapter *);
120static void igb_clean_all_tx_rings(struct igb_adapter *);
121static void igb_clean_all_rx_rings(struct igb_adapter *);
122static void igb_clean_tx_ring(struct igb_ring *);
123static void igb_clean_rx_ring(struct igb_ring *);
124static void igb_set_rx_mode(struct net_device *);
125static void igb_update_phy_info(unsigned long);
126static void igb_watchdog(unsigned long);
127static void igb_watchdog_task(struct work_struct *);
128static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
129static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
130						 struct rtnl_link_stats64 *stats);
131static int igb_change_mtu(struct net_device *, int);
132static int igb_set_mac(struct net_device *, void *);
133static void igb_set_uta(struct igb_adapter *adapter);
134static irqreturn_t igb_intr(int irq, void *);
135static irqreturn_t igb_intr_msi(int irq, void *);
136static irqreturn_t igb_msix_other(int irq, void *);
137static irqreturn_t igb_msix_ring(int irq, void *);
138#ifdef CONFIG_IGB_DCA
139static void igb_update_dca(struct igb_q_vector *);
140static void igb_setup_dca(struct igb_adapter *);
141#endif /* CONFIG_IGB_DCA */
142static int igb_poll(struct napi_struct *, int);
143static bool igb_clean_tx_irq(struct igb_q_vector *);
144static bool igb_clean_rx_irq(struct igb_q_vector *, int);
145static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
146static void igb_tx_timeout(struct net_device *);
147static void igb_reset_task(struct work_struct *);
148static void igb_vlan_mode(struct net_device *netdev, u32 features);
149static void igb_vlan_rx_add_vid(struct net_device *, u16);
150static void igb_vlan_rx_kill_vid(struct net_device *, u16);
151static void igb_restore_vlan(struct igb_adapter *);
152static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
153static void igb_ping_all_vfs(struct igb_adapter *);
154static void igb_msg_task(struct igb_adapter *);
155static void igb_vmm_control(struct igb_adapter *);
156static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
157static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
158static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
159static int igb_ndo_set_vf_vlan(struct net_device *netdev,
160			       int vf, u16 vlan, u8 qos);
161static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
162static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
163				 struct ifla_vf_info *ivi);
164static void igb_check_vf_rate_limit(struct igb_adapter *);
165
166#ifdef CONFIG_PM
167static int igb_suspend(struct pci_dev *, pm_message_t);
168static int igb_resume(struct pci_dev *);
169#endif
170static void igb_shutdown(struct pci_dev *);
171#ifdef CONFIG_IGB_DCA
172static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
173static struct notifier_block dca_notifier = {
174	.notifier_call	= igb_notify_dca,
175	.next		= NULL,
176	.priority	= 0
177};
178#endif
179#ifdef CONFIG_NET_POLL_CONTROLLER
180/* for netdump / net console */
181static void igb_netpoll(struct net_device *);
182#endif
183#ifdef CONFIG_PCI_IOV
184static unsigned int max_vfs = 0;
185module_param(max_vfs, uint, 0);
186MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
187                 "per physical function");
188#endif /* CONFIG_PCI_IOV */
189
190static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
191		     pci_channel_state_t);
192static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
193static void igb_io_resume(struct pci_dev *);
194
195static struct pci_error_handlers igb_err_handler = {
196	.error_detected = igb_io_error_detected,
197	.slot_reset = igb_io_slot_reset,
198	.resume = igb_io_resume,
199};
200
201
202static struct pci_driver igb_driver = {
203	.name     = igb_driver_name,
204	.id_table = igb_pci_tbl,
205	.probe    = igb_probe,
206	.remove   = __devexit_p(igb_remove),
207#ifdef CONFIG_PM
208	/* Power Management Hooks */
209	.suspend  = igb_suspend,
210	.resume   = igb_resume,
211#endif
212	.shutdown = igb_shutdown,
213	.err_handler = &igb_err_handler
214};
215
216MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
217MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
218MODULE_LICENSE("GPL");
219MODULE_VERSION(DRV_VERSION);
220
221struct igb_reg_info {
222	u32 ofs;
223	char *name;
224};
225
226static const struct igb_reg_info igb_reg_info_tbl[] = {
227
228	/* General Registers */
229	{E1000_CTRL, "CTRL"},
230	{E1000_STATUS, "STATUS"},
231	{E1000_CTRL_EXT, "CTRL_EXT"},
232
233	/* Interrupt Registers */
234	{E1000_ICR, "ICR"},
235
236	/* RX Registers */
237	{E1000_RCTL, "RCTL"},
238	{E1000_RDLEN(0), "RDLEN"},
239	{E1000_RDH(0), "RDH"},
240	{E1000_RDT(0), "RDT"},
241	{E1000_RXDCTL(0), "RXDCTL"},
242	{E1000_RDBAL(0), "RDBAL"},
243	{E1000_RDBAH(0), "RDBAH"},
244
245	/* TX Registers */
246	{E1000_TCTL, "TCTL"},
247	{E1000_TDBAL(0), "TDBAL"},
248	{E1000_TDBAH(0), "TDBAH"},
249	{E1000_TDLEN(0), "TDLEN"},
250	{E1000_TDH(0), "TDH"},
251	{E1000_TDT(0), "TDT"},
252	{E1000_TXDCTL(0), "TXDCTL"},
253	{E1000_TDFH, "TDFH"},
254	{E1000_TDFT, "TDFT"},
255	{E1000_TDFHS, "TDFHS"},
256	{E1000_TDFPC, "TDFPC"},
257
258	/* List Terminator */
259	{}
260};
261
262/*
263 * igb_regdump - register printout routine
264 */
265static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
266{
267	int n = 0;
268	char rname[16];
269	u32 regs[8];
270
271	switch (reginfo->ofs) {
272	case E1000_RDLEN(0):
273		for (n = 0; n < 4; n++)
274			regs[n] = rd32(E1000_RDLEN(n));
275		break;
276	case E1000_RDH(0):
277		for (n = 0; n < 4; n++)
278			regs[n] = rd32(E1000_RDH(n));
279		break;
280	case E1000_RDT(0):
281		for (n = 0; n < 4; n++)
282			regs[n] = rd32(E1000_RDT(n));
283		break;
284	case E1000_RXDCTL(0):
285		for (n = 0; n < 4; n++)
286			regs[n] = rd32(E1000_RXDCTL(n));
287		break;
288	case E1000_RDBAL(0):
289		for (n = 0; n < 4; n++)
290			regs[n] = rd32(E1000_RDBAL(n));
291		break;
292	case E1000_RDBAH(0):
293		for (n = 0; n < 4; n++)
294			regs[n] = rd32(E1000_RDBAH(n));
295		break;
296	case E1000_TDBAL(0):
297		for (n = 0; n < 4; n++)
298			regs[n] = rd32(E1000_RDBAL(n));
299		break;
300	case E1000_TDBAH(0):
301		for (n = 0; n < 4; n++)
302			regs[n] = rd32(E1000_TDBAH(n));
303		break;
304	case E1000_TDLEN(0):
305		for (n = 0; n < 4; n++)
306			regs[n] = rd32(E1000_TDLEN(n));
307		break;
308	case E1000_TDH(0):
309		for (n = 0; n < 4; n++)
310			regs[n] = rd32(E1000_TDH(n));
311		break;
312	case E1000_TDT(0):
313		for (n = 0; n < 4; n++)
314			regs[n] = rd32(E1000_TDT(n));
315		break;
316	case E1000_TXDCTL(0):
317		for (n = 0; n < 4; n++)
318			regs[n] = rd32(E1000_TXDCTL(n));
319		break;
320	default:
321		printk(KERN_INFO "%-15s %08x\n",
322			reginfo->name, rd32(reginfo->ofs));
323		return;
324	}
325
326	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
327	printk(KERN_INFO "%-15s ", rname);
328	for (n = 0; n < 4; n++)
329		printk(KERN_CONT "%08x ", regs[n]);
330	printk(KERN_CONT "\n");
331}
332
333/*
334 * igb_dump - Print registers, tx-rings and rx-rings
335 */
336static void igb_dump(struct igb_adapter *adapter)
337{
338	struct net_device *netdev = adapter->netdev;
339	struct e1000_hw *hw = &adapter->hw;
340	struct igb_reg_info *reginfo;
341	struct igb_ring *tx_ring;
342	union e1000_adv_tx_desc *tx_desc;
343	struct my_u0 { u64 a; u64 b; } *u0;
344	struct igb_ring *rx_ring;
345	union e1000_adv_rx_desc *rx_desc;
346	u32 staterr;
347	u16 i, n;
348
349	if (!netif_msg_hw(adapter))
350		return;
351
352	/* Print netdevice Info */
353	if (netdev) {
354		dev_info(&adapter->pdev->dev, "Net device Info\n");
355		printk(KERN_INFO "Device Name     state            "
356			"trans_start      last_rx\n");
357		printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
358		netdev->name,
359		netdev->state,
360		netdev->trans_start,
361		netdev->last_rx);
362	}
363
364	/* Print Registers */
365	dev_info(&adapter->pdev->dev, "Register Dump\n");
366	printk(KERN_INFO " Register Name   Value\n");
367	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
368	     reginfo->name; reginfo++) {
369		igb_regdump(hw, reginfo);
370	}
371
372	/* Print TX Ring Summary */
373	if (!netdev || !netif_running(netdev))
374		goto exit;
375
376	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
377	printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
378		" leng ntw timestamp\n");
379	for (n = 0; n < adapter->num_tx_queues; n++) {
380		struct igb_tx_buffer *buffer_info;
381		tx_ring = adapter->tx_ring[n];
382		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
383		printk(KERN_INFO " %5d %5X %5X %016llX %04X %p %016llX\n",
384			   n, tx_ring->next_to_use, tx_ring->next_to_clean,
385			   (u64)buffer_info->dma,
386			   buffer_info->length,
387			   buffer_info->next_to_watch,
388			   (u64)buffer_info->time_stamp);
389	}
390
391	/* Print TX Rings */
392	if (!netif_msg_tx_done(adapter))
393		goto rx_ring_summary;
394
395	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
396
397	/* Transmit Descriptor Formats
398	 *
399	 * Advanced Transmit Descriptor
400	 *   +--------------------------------------------------------------+
401	 * 0 |         Buffer Address [63:0]                                |
402	 *   +--------------------------------------------------------------+
403	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
404	 *   +--------------------------------------------------------------+
405	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
406	 */
407
408	for (n = 0; n < adapter->num_tx_queues; n++) {
409		tx_ring = adapter->tx_ring[n];
410		printk(KERN_INFO "------------------------------------\n");
411		printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
412		printk(KERN_INFO "------------------------------------\n");
413		printk(KERN_INFO "T [desc]     [address 63:0  ] "
414			"[PlPOCIStDDM Ln] [bi->dma       ] "
415			"leng  ntw timestamp        bi->skb\n");
416
417		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
418			struct igb_tx_buffer *buffer_info;
419			tx_desc = IGB_TX_DESC(tx_ring, i);
420			buffer_info = &tx_ring->tx_buffer_info[i];
421			u0 = (struct my_u0 *)tx_desc;
422			printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
423				" %04X  %p %016llX %p", i,
424				le64_to_cpu(u0->a),
425				le64_to_cpu(u0->b),
426				(u64)buffer_info->dma,
427				buffer_info->length,
428				buffer_info->next_to_watch,
429				(u64)buffer_info->time_stamp,
430				buffer_info->skb);
431			if (i == tx_ring->next_to_use &&
432				i == tx_ring->next_to_clean)
433				printk(KERN_CONT " NTC/U\n");
434			else if (i == tx_ring->next_to_use)
435				printk(KERN_CONT " NTU\n");
436			else if (i == tx_ring->next_to_clean)
437				printk(KERN_CONT " NTC\n");
438			else
439				printk(KERN_CONT "\n");
440
441			if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
442				print_hex_dump(KERN_INFO, "",
443					DUMP_PREFIX_ADDRESS,
444					16, 1, phys_to_virt(buffer_info->dma),
445					buffer_info->length, true);
446		}
447	}
448
449	/* Print RX Rings Summary */
450rx_ring_summary:
451	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
452	printk(KERN_INFO "Queue [NTU] [NTC]\n");
453	for (n = 0; n < adapter->num_rx_queues; n++) {
454		rx_ring = adapter->rx_ring[n];
455		printk(KERN_INFO " %5d %5X %5X\n", n,
456			   rx_ring->next_to_use, rx_ring->next_to_clean);
457	}
458
459	/* Print RX Rings */
460	if (!netif_msg_rx_status(adapter))
461		goto exit;
462
463	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
464
465	/* Advanced Receive Descriptor (Read) Format
466	 *    63                                           1        0
467	 *    +-----------------------------------------------------+
468	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
469	 *    +----------------------------------------------+------+
470	 *  8 |       Header Buffer Address [63:1]           |  DD  |
471	 *    +-----------------------------------------------------+
472	 *
473	 *
474	 * Advanced Receive Descriptor (Write-Back) Format
475	 *
476	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
477	 *   +------------------------------------------------------+
478	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
479	 *   | Checksum   Ident  |   |           |    | Type | Type |
480	 *   +------------------------------------------------------+
481	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
482	 *   +------------------------------------------------------+
483	 *   63       48 47    32 31            20 19               0
484	 */
485
486	for (n = 0; n < adapter->num_rx_queues; n++) {
487		rx_ring = adapter->rx_ring[n];
488		printk(KERN_INFO "------------------------------------\n");
489		printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
490		printk(KERN_INFO "------------------------------------\n");
491		printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
492			"[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
493			"<-- Adv Rx Read format\n");
494		printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
495			"[vl er S cks ln] ---------------- [bi->skb] "
496			"<-- Adv Rx Write-Back format\n");
497
498		for (i = 0; i < rx_ring->count; i++) {
499			struct igb_rx_buffer *buffer_info;
500			buffer_info = &rx_ring->rx_buffer_info[i];
501			rx_desc = IGB_RX_DESC(rx_ring, i);
502			u0 = (struct my_u0 *)rx_desc;
503			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
504			if (staterr & E1000_RXD_STAT_DD) {
505				/* Descriptor Done */
506				printk(KERN_INFO "RWB[0x%03X]     %016llX "
507					"%016llX ---------------- %p", i,
508					le64_to_cpu(u0->a),
509					le64_to_cpu(u0->b),
510					buffer_info->skb);
511			} else {
512				printk(KERN_INFO "R  [0x%03X]     %016llX "
513					"%016llX %016llX %p", i,
514					le64_to_cpu(u0->a),
515					le64_to_cpu(u0->b),
516					(u64)buffer_info->dma,
517					buffer_info->skb);
518
519				if (netif_msg_pktdata(adapter)) {
520					print_hex_dump(KERN_INFO, "",
521						DUMP_PREFIX_ADDRESS,
522						16, 1,
523						phys_to_virt(buffer_info->dma),
524						IGB_RX_HDR_LEN, true);
525					print_hex_dump(KERN_INFO, "",
526					  DUMP_PREFIX_ADDRESS,
527					  16, 1,
528					  phys_to_virt(
529					    buffer_info->page_dma +
530					    buffer_info->page_offset),
531					  PAGE_SIZE/2, true);
532				}
533			}
534
535			if (i == rx_ring->next_to_use)
536				printk(KERN_CONT " NTU\n");
537			else if (i == rx_ring->next_to_clean)
538				printk(KERN_CONT " NTC\n");
539			else
540				printk(KERN_CONT "\n");
541
542		}
543	}
544
545exit:
546	return;
547}
548
549
550/**
551 * igb_read_clock - read raw cycle counter (to be used by time counter)
552 */
553static cycle_t igb_read_clock(const struct cyclecounter *tc)
554{
555	struct igb_adapter *adapter =
556		container_of(tc, struct igb_adapter, cycles);
557	struct e1000_hw *hw = &adapter->hw;
558	u64 stamp = 0;
559	int shift = 0;
560
561	/*
562	 * The timestamp latches on lowest register read. For the 82580
563	 * the lowest register is SYSTIMR instead of SYSTIML.  However we never
564	 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
565	 */
566	if (hw->mac.type == e1000_82580) {
567		stamp = rd32(E1000_SYSTIMR) >> 8;
568		shift = IGB_82580_TSYNC_SHIFT;
569	}
570
571	stamp |= (u64)rd32(E1000_SYSTIML) << shift;
572	stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
573	return stamp;
574}
575
576/**
577 * igb_get_hw_dev - return device
578 * used by hardware layer to print debugging information
579 **/
580struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
581{
582	struct igb_adapter *adapter = hw->back;
583	return adapter->netdev;
584}
585
586/**
587 * igb_init_module - Driver Registration Routine
588 *
589 * igb_init_module is the first routine called when the driver is
590 * loaded. All it does is register with the PCI subsystem.
591 **/
592static int __init igb_init_module(void)
593{
594	int ret;
595	printk(KERN_INFO "%s - version %s\n",
596	       igb_driver_string, igb_driver_version);
597
598	printk(KERN_INFO "%s\n", igb_copyright);
599
600#ifdef CONFIG_IGB_DCA
601	dca_register_notify(&dca_notifier);
602#endif
603	ret = pci_register_driver(&igb_driver);
604	return ret;
605}
606
607module_init(igb_init_module);
608
609/**
610 * igb_exit_module - Driver Exit Cleanup Routine
611 *
612 * igb_exit_module is called just before the driver is removed
613 * from memory.
614 **/
615static void __exit igb_exit_module(void)
616{
617#ifdef CONFIG_IGB_DCA
618	dca_unregister_notify(&dca_notifier);
619#endif
620	pci_unregister_driver(&igb_driver);
621}
622
623module_exit(igb_exit_module);
624
625#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
626/**
627 * igb_cache_ring_register - Descriptor ring to register mapping
628 * @adapter: board private structure to initialize
629 *
630 * Once we know the feature-set enabled for the device, we'll cache
631 * the register offset the descriptor ring is assigned to.
632 **/
633static void igb_cache_ring_register(struct igb_adapter *adapter)
634{
635	int i = 0, j = 0;
636	u32 rbase_offset = adapter->vfs_allocated_count;
637
638	switch (adapter->hw.mac.type) {
639	case e1000_82576:
640		/* The queues are allocated for virtualization such that VF 0
641		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
642		 * In order to avoid collision we start at the first free queue
643		 * and continue consuming queues in the same sequence
644		 */
645		if (adapter->vfs_allocated_count) {
646			for (; i < adapter->rss_queues; i++)
647				adapter->rx_ring[i]->reg_idx = rbase_offset +
648				                               Q_IDX_82576(i);
649		}
650	case e1000_82575:
651	case e1000_82580:
652	case e1000_i350:
653	default:
654		for (; i < adapter->num_rx_queues; i++)
655			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
656		for (; j < adapter->num_tx_queues; j++)
657			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
658		break;
659	}
660}
661
662static void igb_free_queues(struct igb_adapter *adapter)
663{
664	int i;
665
666	for (i = 0; i < adapter->num_tx_queues; i++) {
667		kfree(adapter->tx_ring[i]);
668		adapter->tx_ring[i] = NULL;
669	}
670	for (i = 0; i < adapter->num_rx_queues; i++) {
671		kfree(adapter->rx_ring[i]);
672		adapter->rx_ring[i] = NULL;
673	}
674	adapter->num_rx_queues = 0;
675	adapter->num_tx_queues = 0;
676}
677
678/**
679 * igb_alloc_queues - Allocate memory for all rings
680 * @adapter: board private structure to initialize
681 *
682 * We allocate one ring per queue at run-time since we don't know the
683 * number of queues at compile-time.
684 **/
685static int igb_alloc_queues(struct igb_adapter *adapter)
686{
687	struct igb_ring *ring;
688	int i;
689	int orig_node = adapter->node;
690
691	for (i = 0; i < adapter->num_tx_queues; i++) {
692		if (orig_node == -1) {
693			int cur_node = next_online_node(adapter->node);
694			if (cur_node == MAX_NUMNODES)
695				cur_node = first_online_node;
696			adapter->node = cur_node;
697		}
698		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
699				    adapter->node);
700		if (!ring)
701			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
702		if (!ring)
703			goto err;
704		ring->count = adapter->tx_ring_count;
705		ring->queue_index = i;
706		ring->dev = &adapter->pdev->dev;
707		ring->netdev = adapter->netdev;
708		ring->numa_node = adapter->node;
709		/* For 82575, context index must be unique per ring. */
710		if (adapter->hw.mac.type == e1000_82575)
711			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
712		adapter->tx_ring[i] = ring;
713	}
714	/* Restore the adapter's original node */
715	adapter->node = orig_node;
716
717	for (i = 0; i < adapter->num_rx_queues; i++) {
718		if (orig_node == -1) {
719			int cur_node = next_online_node(adapter->node);
720			if (cur_node == MAX_NUMNODES)
721				cur_node = first_online_node;
722			adapter->node = cur_node;
723		}
724		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
725				    adapter->node);
726		if (!ring)
727			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
728		if (!ring)
729			goto err;
730		ring->count = adapter->rx_ring_count;
731		ring->queue_index = i;
732		ring->dev = &adapter->pdev->dev;
733		ring->netdev = adapter->netdev;
734		ring->numa_node = adapter->node;
735		/* enable rx checksum */
736		set_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags);
737		/* set flag indicating ring supports SCTP checksum offload */
738		if (adapter->hw.mac.type >= e1000_82576)
739			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
740		adapter->rx_ring[i] = ring;
741	}
742	/* Restore the adapter's original node */
743	adapter->node = orig_node;
744
745	igb_cache_ring_register(adapter);
746
747	return 0;
748
749err:
750	/* Restore the adapter's original node */
751	adapter->node = orig_node;
752	igb_free_queues(adapter);
753
754	return -ENOMEM;
755}
756
757#define IGB_N0_QUEUE -1
758static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
759{
760	u32 msixbm = 0;
761	struct igb_adapter *adapter = q_vector->adapter;
762	struct e1000_hw *hw = &adapter->hw;
763	u32 ivar, index;
764	int rx_queue = IGB_N0_QUEUE;
765	int tx_queue = IGB_N0_QUEUE;
766
767	if (q_vector->rx_ring)
768		rx_queue = q_vector->rx_ring->reg_idx;
769	if (q_vector->tx_ring)
770		tx_queue = q_vector->tx_ring->reg_idx;
771
772	switch (hw->mac.type) {
773	case e1000_82575:
774		/* The 82575 assigns vectors using a bitmask, which matches the
775		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
776		   or more queues to a vector, we write the appropriate bits
777		   into the MSIXBM register for that vector. */
778		if (rx_queue > IGB_N0_QUEUE)
779			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
780		if (tx_queue > IGB_N0_QUEUE)
781			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
782		if (!adapter->msix_entries && msix_vector == 0)
783			msixbm |= E1000_EIMS_OTHER;
784		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
785		q_vector->eims_value = msixbm;
786		break;
787	case e1000_82576:
788		/* 82576 uses a table-based method for assigning vectors.
789		   Each queue has a single entry in the table to which we write
790		   a vector number along with a "valid" bit.  Sadly, the layout
791		   of the table is somewhat counterintuitive. */
792		if (rx_queue > IGB_N0_QUEUE) {
793			index = (rx_queue & 0x7);
794			ivar = array_rd32(E1000_IVAR0, index);
795			if (rx_queue < 8) {
796				/* vector goes into low byte of register */
797				ivar = ivar & 0xFFFFFF00;
798				ivar |= msix_vector | E1000_IVAR_VALID;
799			} else {
800				/* vector goes into third byte of register */
801				ivar = ivar & 0xFF00FFFF;
802				ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
803			}
804			array_wr32(E1000_IVAR0, index, ivar);
805		}
806		if (tx_queue > IGB_N0_QUEUE) {
807			index = (tx_queue & 0x7);
808			ivar = array_rd32(E1000_IVAR0, index);
809			if (tx_queue < 8) {
810				/* vector goes into second byte of register */
811				ivar = ivar & 0xFFFF00FF;
812				ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
813			} else {
814				/* vector goes into high byte of register */
815				ivar = ivar & 0x00FFFFFF;
816				ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
817			}
818			array_wr32(E1000_IVAR0, index, ivar);
819		}
820		q_vector->eims_value = 1 << msix_vector;
821		break;
822	case e1000_82580:
823	case e1000_i350:
824		/* 82580 uses the same table-based approach as 82576 but has fewer
825		   entries as a result we carry over for queues greater than 4. */
826		if (rx_queue > IGB_N0_QUEUE) {
827			index = (rx_queue >> 1);
828			ivar = array_rd32(E1000_IVAR0, index);
829			if (rx_queue & 0x1) {
830				/* vector goes into third byte of register */
831				ivar = ivar & 0xFF00FFFF;
832				ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
833			} else {
834				/* vector goes into low byte of register */
835				ivar = ivar & 0xFFFFFF00;
836				ivar |= msix_vector | E1000_IVAR_VALID;
837			}
838			array_wr32(E1000_IVAR0, index, ivar);
839		}
840		if (tx_queue > IGB_N0_QUEUE) {
841			index = (tx_queue >> 1);
842			ivar = array_rd32(E1000_IVAR0, index);
843			if (tx_queue & 0x1) {
844				/* vector goes into high byte of register */
845				ivar = ivar & 0x00FFFFFF;
846				ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
847			} else {
848				/* vector goes into second byte of register */
849				ivar = ivar & 0xFFFF00FF;
850				ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
851			}
852			array_wr32(E1000_IVAR0, index, ivar);
853		}
854		q_vector->eims_value = 1 << msix_vector;
855		break;
856	default:
857		BUG();
858		break;
859	}
860
861	/* add q_vector eims value to global eims_enable_mask */
862	adapter->eims_enable_mask |= q_vector->eims_value;
863
864	/* configure q_vector to set itr on first interrupt */
865	q_vector->set_itr = 1;
866}
867
868/**
869 * igb_configure_msix - Configure MSI-X hardware
870 *
871 * igb_configure_msix sets up the hardware to properly
872 * generate MSI-X interrupts.
873 **/
874static void igb_configure_msix(struct igb_adapter *adapter)
875{
876	u32 tmp;
877	int i, vector = 0;
878	struct e1000_hw *hw = &adapter->hw;
879
880	adapter->eims_enable_mask = 0;
881
882	/* set vector for other causes, i.e. link changes */
883	switch (hw->mac.type) {
884	case e1000_82575:
885		tmp = rd32(E1000_CTRL_EXT);
886		/* enable MSI-X PBA support*/
887		tmp |= E1000_CTRL_EXT_PBA_CLR;
888
889		/* Auto-Mask interrupts upon ICR read. */
890		tmp |= E1000_CTRL_EXT_EIAME;
891		tmp |= E1000_CTRL_EXT_IRCA;
892
893		wr32(E1000_CTRL_EXT, tmp);
894
895		/* enable msix_other interrupt */
896		array_wr32(E1000_MSIXBM(0), vector++,
897		                      E1000_EIMS_OTHER);
898		adapter->eims_other = E1000_EIMS_OTHER;
899
900		break;
901
902	case e1000_82576:
903	case e1000_82580:
904	case e1000_i350:
905		/* Turn on MSI-X capability first, or our settings
906		 * won't stick.  And it will take days to debug. */
907		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
908		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
909		                E1000_GPIE_NSICR);
910
911		/* enable msix_other interrupt */
912		adapter->eims_other = 1 << vector;
913		tmp = (vector++ | E1000_IVAR_VALID) << 8;
914
915		wr32(E1000_IVAR_MISC, tmp);
916		break;
917	default:
918		/* do nothing, since nothing else supports MSI-X */
919		break;
920	} /* switch (hw->mac.type) */
921
922	adapter->eims_enable_mask |= adapter->eims_other;
923
924	for (i = 0; i < adapter->num_q_vectors; i++)
925		igb_assign_vector(adapter->q_vector[i], vector++);
926
927	wrfl();
928}
929
930/**
931 * igb_request_msix - Initialize MSI-X interrupts
932 *
933 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
934 * kernel.
935 **/
936static int igb_request_msix(struct igb_adapter *adapter)
937{
938	struct net_device *netdev = adapter->netdev;
939	struct e1000_hw *hw = &adapter->hw;
940	int i, err = 0, vector = 0;
941
942	err = request_irq(adapter->msix_entries[vector].vector,
943	                  igb_msix_other, 0, netdev->name, adapter);
944	if (err)
945		goto out;
946	vector++;
947
948	for (i = 0; i < adapter->num_q_vectors; i++) {
949		struct igb_q_vector *q_vector = adapter->q_vector[i];
950
951		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
952
953		if (q_vector->rx_ring && q_vector->tx_ring)
954			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
955			        q_vector->rx_ring->queue_index);
956		else if (q_vector->tx_ring)
957			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
958			        q_vector->tx_ring->queue_index);
959		else if (q_vector->rx_ring)
960			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
961			        q_vector->rx_ring->queue_index);
962		else
963			sprintf(q_vector->name, "%s-unused", netdev->name);
964
965		err = request_irq(adapter->msix_entries[vector].vector,
966		                  igb_msix_ring, 0, q_vector->name,
967		                  q_vector);
968		if (err)
969			goto out;
970		vector++;
971	}
972
973	igb_configure_msix(adapter);
974	return 0;
975out:
976	return err;
977}
978
979static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
980{
981	if (adapter->msix_entries) {
982		pci_disable_msix(adapter->pdev);
983		kfree(adapter->msix_entries);
984		adapter->msix_entries = NULL;
985	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
986		pci_disable_msi(adapter->pdev);
987	}
988}
989
990/**
991 * igb_free_q_vectors - Free memory allocated for interrupt vectors
992 * @adapter: board private structure to initialize
993 *
994 * This function frees the memory allocated to the q_vectors.  In addition if
995 * NAPI is enabled it will delete any references to the NAPI struct prior
996 * to freeing the q_vector.
997 **/
998static void igb_free_q_vectors(struct igb_adapter *adapter)
999{
1000	int v_idx;
1001
1002	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1003		struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1004		adapter->q_vector[v_idx] = NULL;
1005		if (!q_vector)
1006			continue;
1007		netif_napi_del(&q_vector->napi);
1008		kfree(q_vector);
1009	}
1010	adapter->num_q_vectors = 0;
1011}
1012
1013/**
1014 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1015 *
1016 * This function resets the device so that it has 0 rx queues, tx queues, and
1017 * MSI-X interrupts allocated.
1018 */
1019static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1020{
1021	igb_free_queues(adapter);
1022	igb_free_q_vectors(adapter);
1023	igb_reset_interrupt_capability(adapter);
1024}
1025
1026/**
1027 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1028 *
1029 * Attempt to configure interrupts using the best available
1030 * capabilities of the hardware and kernel.
1031 **/
1032static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1033{
1034	int err;
1035	int numvecs, i;
1036
1037	/* Number of supported queues. */
1038	adapter->num_rx_queues = adapter->rss_queues;
1039	if (adapter->vfs_allocated_count)
1040		adapter->num_tx_queues = 1;
1041	else
1042		adapter->num_tx_queues = adapter->rss_queues;
1043
1044	/* start with one vector for every rx queue */
1045	numvecs = adapter->num_rx_queues;
1046
1047	/* if tx handler is separate add 1 for every tx queue */
1048	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1049		numvecs += adapter->num_tx_queues;
1050
1051	/* store the number of vectors reserved for queues */
1052	adapter->num_q_vectors = numvecs;
1053
1054	/* add 1 vector for link status interrupts */
1055	numvecs++;
1056	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1057					GFP_KERNEL);
1058	if (!adapter->msix_entries)
1059		goto msi_only;
1060
1061	for (i = 0; i < numvecs; i++)
1062		adapter->msix_entries[i].entry = i;
1063
1064	err = pci_enable_msix(adapter->pdev,
1065			      adapter->msix_entries,
1066			      numvecs);
1067	if (err == 0)
1068		goto out;
1069
1070	igb_reset_interrupt_capability(adapter);
1071
1072	/* If we can't do MSI-X, try MSI */
1073msi_only:
1074#ifdef CONFIG_PCI_IOV
1075	/* disable SR-IOV for non MSI-X configurations */
1076	if (adapter->vf_data) {
1077		struct e1000_hw *hw = &adapter->hw;
1078		/* disable iov and allow time for transactions to clear */
1079		pci_disable_sriov(adapter->pdev);
1080		msleep(500);
1081
1082		kfree(adapter->vf_data);
1083		adapter->vf_data = NULL;
1084		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1085		wrfl();
1086		msleep(100);
1087		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1088	}
1089#endif
1090	adapter->vfs_allocated_count = 0;
1091	adapter->rss_queues = 1;
1092	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1093	adapter->num_rx_queues = 1;
1094	adapter->num_tx_queues = 1;
1095	adapter->num_q_vectors = 1;
1096	if (!pci_enable_msi(adapter->pdev))
1097		adapter->flags |= IGB_FLAG_HAS_MSI;
1098out:
1099	/* Notify the stack of the (possibly) reduced queue counts. */
1100	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1101	return netif_set_real_num_rx_queues(adapter->netdev,
1102					    adapter->num_rx_queues);
1103}
1104
1105/**
1106 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1107 * @adapter: board private structure to initialize
1108 *
1109 * We allocate one q_vector per queue interrupt.  If allocation fails we
1110 * return -ENOMEM.
1111 **/
1112static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1113{
1114	struct igb_q_vector *q_vector;
1115	struct e1000_hw *hw = &adapter->hw;
1116	int v_idx;
1117	int orig_node = adapter->node;
1118
1119	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1120		if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1121						adapter->num_tx_queues)) &&
1122		    (adapter->num_rx_queues == v_idx))
1123			adapter->node = orig_node;
1124		if (orig_node == -1) {
1125			int cur_node = next_online_node(adapter->node);
1126			if (cur_node == MAX_NUMNODES)
1127				cur_node = first_online_node;
1128			adapter->node = cur_node;
1129		}
1130		q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1131					adapter->node);
1132		if (!q_vector)
1133			q_vector = kzalloc(sizeof(struct igb_q_vector),
1134					   GFP_KERNEL);
1135		if (!q_vector)
1136			goto err_out;
1137		q_vector->adapter = adapter;
1138		q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1139		q_vector->itr_val = IGB_START_ITR;
1140		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1141		adapter->q_vector[v_idx] = q_vector;
1142	}
1143	/* Restore the adapter's original node */
1144	adapter->node = orig_node;
1145
1146	return 0;
1147
1148err_out:
1149	/* Restore the adapter's original node */
1150	adapter->node = orig_node;
1151	igb_free_q_vectors(adapter);
1152	return -ENOMEM;
1153}
1154
1155static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1156                                      int ring_idx, int v_idx)
1157{
1158	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1159
1160	q_vector->rx_ring = adapter->rx_ring[ring_idx];
1161	q_vector->rx_ring->q_vector = q_vector;
1162	q_vector->itr_val = adapter->rx_itr_setting;
1163	if (q_vector->itr_val && q_vector->itr_val <= 3)
1164		q_vector->itr_val = IGB_START_ITR;
1165}
1166
1167static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1168                                      int ring_idx, int v_idx)
1169{
1170	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1171
1172	q_vector->tx_ring = adapter->tx_ring[ring_idx];
1173	q_vector->tx_ring->q_vector = q_vector;
1174	q_vector->itr_val = adapter->tx_itr_setting;
1175	q_vector->tx_work_limit = adapter->tx_work_limit;
1176	if (q_vector->itr_val && q_vector->itr_val <= 3)
1177		q_vector->itr_val = IGB_START_ITR;
1178}
1179
1180/**
1181 * igb_map_ring_to_vector - maps allocated queues to vectors
1182 *
1183 * This function maps the recently allocated queues to vectors.
1184 **/
1185static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1186{
1187	int i;
1188	int v_idx = 0;
1189
1190	if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1191	    (adapter->num_q_vectors < adapter->num_tx_queues))
1192		return -ENOMEM;
1193
1194	if (adapter->num_q_vectors >=
1195	    (adapter->num_rx_queues + adapter->num_tx_queues)) {
1196		for (i = 0; i < adapter->num_rx_queues; i++)
1197			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1198		for (i = 0; i < adapter->num_tx_queues; i++)
1199			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1200	} else {
1201		for (i = 0; i < adapter->num_rx_queues; i++) {
1202			if (i < adapter->num_tx_queues)
1203				igb_map_tx_ring_to_vector(adapter, i, v_idx);
1204			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1205		}
1206		for (; i < adapter->num_tx_queues; i++)
1207			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1208	}
1209	return 0;
1210}
1211
1212/**
1213 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1214 *
1215 * This function initializes the interrupts and allocates all of the queues.
1216 **/
1217static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1218{
1219	struct pci_dev *pdev = adapter->pdev;
1220	int err;
1221
1222	err = igb_set_interrupt_capability(adapter);
1223	if (err)
1224		return err;
1225
1226	err = igb_alloc_q_vectors(adapter);
1227	if (err) {
1228		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1229		goto err_alloc_q_vectors;
1230	}
1231
1232	err = igb_alloc_queues(adapter);
1233	if (err) {
1234		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1235		goto err_alloc_queues;
1236	}
1237
1238	err = igb_map_ring_to_vector(adapter);
1239	if (err) {
1240		dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1241		goto err_map_queues;
1242	}
1243
1244
1245	return 0;
1246err_map_queues:
1247	igb_free_queues(adapter);
1248err_alloc_queues:
1249	igb_free_q_vectors(adapter);
1250err_alloc_q_vectors:
1251	igb_reset_interrupt_capability(adapter);
1252	return err;
1253}
1254
1255/**
1256 * igb_request_irq - initialize interrupts
1257 *
1258 * Attempts to configure interrupts using the best available
1259 * capabilities of the hardware and kernel.
1260 **/
1261static int igb_request_irq(struct igb_adapter *adapter)
1262{
1263	struct net_device *netdev = adapter->netdev;
1264	struct pci_dev *pdev = adapter->pdev;
1265	int err = 0;
1266
1267	if (adapter->msix_entries) {
1268		err = igb_request_msix(adapter);
1269		if (!err)
1270			goto request_done;
1271		/* fall back to MSI */
1272		igb_clear_interrupt_scheme(adapter);
1273		if (!pci_enable_msi(adapter->pdev))
1274			adapter->flags |= IGB_FLAG_HAS_MSI;
1275		igb_free_all_tx_resources(adapter);
1276		igb_free_all_rx_resources(adapter);
1277		adapter->num_tx_queues = 1;
1278		adapter->num_rx_queues = 1;
1279		adapter->num_q_vectors = 1;
1280		err = igb_alloc_q_vectors(adapter);
1281		if (err) {
1282			dev_err(&pdev->dev,
1283			        "Unable to allocate memory for vectors\n");
1284			goto request_done;
1285		}
1286		err = igb_alloc_queues(adapter);
1287		if (err) {
1288			dev_err(&pdev->dev,
1289			        "Unable to allocate memory for queues\n");
1290			igb_free_q_vectors(adapter);
1291			goto request_done;
1292		}
1293		igb_setup_all_tx_resources(adapter);
1294		igb_setup_all_rx_resources(adapter);
1295	} else {
1296		igb_assign_vector(adapter->q_vector[0], 0);
1297	}
1298
1299	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1300		err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1301				  netdev->name, adapter);
1302		if (!err)
1303			goto request_done;
1304
1305		/* fall back to legacy interrupts */
1306		igb_reset_interrupt_capability(adapter);
1307		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1308	}
1309
1310	err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1311			  netdev->name, adapter);
1312
1313	if (err)
1314		dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1315			err);
1316
1317request_done:
1318	return err;
1319}
1320
1321static void igb_free_irq(struct igb_adapter *adapter)
1322{
1323	if (adapter->msix_entries) {
1324		int vector = 0, i;
1325
1326		free_irq(adapter->msix_entries[vector++].vector, adapter);
1327
1328		for (i = 0; i < adapter->num_q_vectors; i++) {
1329			struct igb_q_vector *q_vector = adapter->q_vector[i];
1330			free_irq(adapter->msix_entries[vector++].vector,
1331			         q_vector);
1332		}
1333	} else {
1334		free_irq(adapter->pdev->irq, adapter);
1335	}
1336}
1337
1338/**
1339 * igb_irq_disable - Mask off interrupt generation on the NIC
1340 * @adapter: board private structure
1341 **/
1342static void igb_irq_disable(struct igb_adapter *adapter)
1343{
1344	struct e1000_hw *hw = &adapter->hw;
1345
1346	/*
1347	 * we need to be careful when disabling interrupts.  The VFs are also
1348	 * mapped into these registers and so clearing the bits can cause
1349	 * issues on the VF drivers so we only need to clear what we set
1350	 */
1351	if (adapter->msix_entries) {
1352		u32 regval = rd32(E1000_EIAM);
1353		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1354		wr32(E1000_EIMC, adapter->eims_enable_mask);
1355		regval = rd32(E1000_EIAC);
1356		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1357	}
1358
1359	wr32(E1000_IAM, 0);
1360	wr32(E1000_IMC, ~0);
1361	wrfl();
1362	if (adapter->msix_entries) {
1363		int i;
1364		for (i = 0; i < adapter->num_q_vectors; i++)
1365			synchronize_irq(adapter->msix_entries[i].vector);
1366	} else {
1367		synchronize_irq(adapter->pdev->irq);
1368	}
1369}
1370
1371/**
1372 * igb_irq_enable - Enable default interrupt generation settings
1373 * @adapter: board private structure
1374 **/
1375static void igb_irq_enable(struct igb_adapter *adapter)
1376{
1377	struct e1000_hw *hw = &adapter->hw;
1378
1379	if (adapter->msix_entries) {
1380		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1381		u32 regval = rd32(E1000_EIAC);
1382		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1383		regval = rd32(E1000_EIAM);
1384		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1385		wr32(E1000_EIMS, adapter->eims_enable_mask);
1386		if (adapter->vfs_allocated_count) {
1387			wr32(E1000_MBVFIMR, 0xFF);
1388			ims |= E1000_IMS_VMMB;
1389		}
1390		if (adapter->hw.mac.type == e1000_82580)
1391			ims |= E1000_IMS_DRSTA;
1392
1393		wr32(E1000_IMS, ims);
1394	} else {
1395		wr32(E1000_IMS, IMS_ENABLE_MASK |
1396				E1000_IMS_DRSTA);
1397		wr32(E1000_IAM, IMS_ENABLE_MASK |
1398				E1000_IMS_DRSTA);
1399	}
1400}
1401
1402static void igb_update_mng_vlan(struct igb_adapter *adapter)
1403{
1404	struct e1000_hw *hw = &adapter->hw;
1405	u16 vid = adapter->hw.mng_cookie.vlan_id;
1406	u16 old_vid = adapter->mng_vlan_id;
1407
1408	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1409		/* add VID to filter table */
1410		igb_vfta_set(hw, vid, true);
1411		adapter->mng_vlan_id = vid;
1412	} else {
1413		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1414	}
1415
1416	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1417	    (vid != old_vid) &&
1418	    !test_bit(old_vid, adapter->active_vlans)) {
1419		/* remove VID from filter table */
1420		igb_vfta_set(hw, old_vid, false);
1421	}
1422}
1423
1424/**
1425 * igb_release_hw_control - release control of the h/w to f/w
1426 * @adapter: address of board private structure
1427 *
1428 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1429 * For ASF and Pass Through versions of f/w this means that the
1430 * driver is no longer loaded.
1431 *
1432 **/
1433static void igb_release_hw_control(struct igb_adapter *adapter)
1434{
1435	struct e1000_hw *hw = &adapter->hw;
1436	u32 ctrl_ext;
1437
1438	/* Let firmware take over control of h/w */
1439	ctrl_ext = rd32(E1000_CTRL_EXT);
1440	wr32(E1000_CTRL_EXT,
1441			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1442}
1443
1444/**
1445 * igb_get_hw_control - get control of the h/w from f/w
1446 * @adapter: address of board private structure
1447 *
1448 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1449 * For ASF and Pass Through versions of f/w this means that
1450 * the driver is loaded.
1451 *
1452 **/
1453static void igb_get_hw_control(struct igb_adapter *adapter)
1454{
1455	struct e1000_hw *hw = &adapter->hw;
1456	u32 ctrl_ext;
1457
1458	/* Let firmware know the driver has taken over */
1459	ctrl_ext = rd32(E1000_CTRL_EXT);
1460	wr32(E1000_CTRL_EXT,
1461			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1462}
1463
1464/**
1465 * igb_configure - configure the hardware for RX and TX
1466 * @adapter: private board structure
1467 **/
1468static void igb_configure(struct igb_adapter *adapter)
1469{
1470	struct net_device *netdev = adapter->netdev;
1471	int i;
1472
1473	igb_get_hw_control(adapter);
1474	igb_set_rx_mode(netdev);
1475
1476	igb_restore_vlan(adapter);
1477
1478	igb_setup_tctl(adapter);
1479	igb_setup_mrqc(adapter);
1480	igb_setup_rctl(adapter);
1481
1482	igb_configure_tx(adapter);
1483	igb_configure_rx(adapter);
1484
1485	igb_rx_fifo_flush_82575(&adapter->hw);
1486
1487	/* call igb_desc_unused which always leaves
1488	 * at least 1 descriptor unused to make sure
1489	 * next_to_use != next_to_clean */
1490	for (i = 0; i < adapter->num_rx_queues; i++) {
1491		struct igb_ring *ring = adapter->rx_ring[i];
1492		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1493	}
1494}
1495
1496/**
1497 * igb_power_up_link - Power up the phy/serdes link
1498 * @adapter: address of board private structure
1499 **/
1500void igb_power_up_link(struct igb_adapter *adapter)
1501{
1502	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1503		igb_power_up_phy_copper(&adapter->hw);
1504	else
1505		igb_power_up_serdes_link_82575(&adapter->hw);
1506}
1507
1508/**
1509 * igb_power_down_link - Power down the phy/serdes link
1510 * @adapter: address of board private structure
1511 */
1512static void igb_power_down_link(struct igb_adapter *adapter)
1513{
1514	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1515		igb_power_down_phy_copper_82575(&adapter->hw);
1516	else
1517		igb_shutdown_serdes_link_82575(&adapter->hw);
1518}
1519
1520/**
1521 * igb_up - Open the interface and prepare it to handle traffic
1522 * @adapter: board private structure
1523 **/
1524int igb_up(struct igb_adapter *adapter)
1525{
1526	struct e1000_hw *hw = &adapter->hw;
1527	int i;
1528
1529	/* hardware has been reset, we need to reload some things */
1530	igb_configure(adapter);
1531
1532	clear_bit(__IGB_DOWN, &adapter->state);
1533
1534	for (i = 0; i < adapter->num_q_vectors; i++) {
1535		struct igb_q_vector *q_vector = adapter->q_vector[i];
1536		napi_enable(&q_vector->napi);
1537	}
1538	if (adapter->msix_entries)
1539		igb_configure_msix(adapter);
1540	else
1541		igb_assign_vector(adapter->q_vector[0], 0);
1542
1543	/* Clear any pending interrupts. */
1544	rd32(E1000_ICR);
1545	igb_irq_enable(adapter);
1546
1547	/* notify VFs that reset has been completed */
1548	if (adapter->vfs_allocated_count) {
1549		u32 reg_data = rd32(E1000_CTRL_EXT);
1550		reg_data |= E1000_CTRL_EXT_PFRSTD;
1551		wr32(E1000_CTRL_EXT, reg_data);
1552	}
1553
1554	netif_tx_start_all_queues(adapter->netdev);
1555
1556	/* start the watchdog. */
1557	hw->mac.get_link_status = 1;
1558	schedule_work(&adapter->watchdog_task);
1559
1560	return 0;
1561}
1562
1563void igb_down(struct igb_adapter *adapter)
1564{
1565	struct net_device *netdev = adapter->netdev;
1566	struct e1000_hw *hw = &adapter->hw;
1567	u32 tctl, rctl;
1568	int i;
1569
1570	/* signal that we're down so the interrupt handler does not
1571	 * reschedule our watchdog timer */
1572	set_bit(__IGB_DOWN, &adapter->state);
1573
1574	/* disable receives in the hardware */
1575	rctl = rd32(E1000_RCTL);
1576	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1577	/* flush and sleep below */
1578
1579	netif_tx_stop_all_queues(netdev);
1580
1581	/* disable transmits in the hardware */
1582	tctl = rd32(E1000_TCTL);
1583	tctl &= ~E1000_TCTL_EN;
1584	wr32(E1000_TCTL, tctl);
1585	/* flush both disables and wait for them to finish */
1586	wrfl();
1587	msleep(10);
1588
1589	for (i = 0; i < adapter->num_q_vectors; i++) {
1590		struct igb_q_vector *q_vector = adapter->q_vector[i];
1591		napi_disable(&q_vector->napi);
1592	}
1593
1594	igb_irq_disable(adapter);
1595
1596	del_timer_sync(&adapter->watchdog_timer);
1597	del_timer_sync(&adapter->phy_info_timer);
1598
1599	netif_carrier_off(netdev);
1600
1601	/* record the stats before reset*/
1602	spin_lock(&adapter->stats64_lock);
1603	igb_update_stats(adapter, &adapter->stats64);
1604	spin_unlock(&adapter->stats64_lock);
1605
1606	adapter->link_speed = 0;
1607	adapter->link_duplex = 0;
1608
1609	if (!pci_channel_offline(adapter->pdev))
1610		igb_reset(adapter);
1611	igb_clean_all_tx_rings(adapter);
1612	igb_clean_all_rx_rings(adapter);
1613#ifdef CONFIG_IGB_DCA
1614
1615	/* since we reset the hardware DCA settings were cleared */
1616	igb_setup_dca(adapter);
1617#endif
1618}
1619
1620void igb_reinit_locked(struct igb_adapter *adapter)
1621{
1622	WARN_ON(in_interrupt());
1623	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1624		msleep(1);
1625	igb_down(adapter);
1626	igb_up(adapter);
1627	clear_bit(__IGB_RESETTING, &adapter->state);
1628}
1629
1630void igb_reset(struct igb_adapter *adapter)
1631{
1632	struct pci_dev *pdev = adapter->pdev;
1633	struct e1000_hw *hw = &adapter->hw;
1634	struct e1000_mac_info *mac = &hw->mac;
1635	struct e1000_fc_info *fc = &hw->fc;
1636	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1637	u16 hwm;
1638
1639	/* Repartition Pba for greater than 9k mtu
1640	 * To take effect CTRL.RST is required.
1641	 */
1642	switch (mac->type) {
1643	case e1000_i350:
1644	case e1000_82580:
1645		pba = rd32(E1000_RXPBS);
1646		pba = igb_rxpbs_adjust_82580(pba);
1647		break;
1648	case e1000_82576:
1649		pba = rd32(E1000_RXPBS);
1650		pba &= E1000_RXPBS_SIZE_MASK_82576;
1651		break;
1652	case e1000_82575:
1653	default:
1654		pba = E1000_PBA_34K;
1655		break;
1656	}
1657
1658	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1659	    (mac->type < e1000_82576)) {
1660		/* adjust PBA for jumbo frames */
1661		wr32(E1000_PBA, pba);
1662
1663		/* To maintain wire speed transmits, the Tx FIFO should be
1664		 * large enough to accommodate two full transmit packets,
1665		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1666		 * the Rx FIFO should be large enough to accommodate at least
1667		 * one full receive packet and is similarly rounded up and
1668		 * expressed in KB. */
1669		pba = rd32(E1000_PBA);
1670		/* upper 16 bits has Tx packet buffer allocation size in KB */
1671		tx_space = pba >> 16;
1672		/* lower 16 bits has Rx packet buffer allocation size in KB */
1673		pba &= 0xffff;
1674		/* the tx fifo also stores 16 bytes of information about the tx
1675		 * but don't include ethernet FCS because hardware appends it */
1676		min_tx_space = (adapter->max_frame_size +
1677				sizeof(union e1000_adv_tx_desc) -
1678				ETH_FCS_LEN) * 2;
1679		min_tx_space = ALIGN(min_tx_space, 1024);
1680		min_tx_space >>= 10;
1681		/* software strips receive CRC, so leave room for it */
1682		min_rx_space = adapter->max_frame_size;
1683		min_rx_space = ALIGN(min_rx_space, 1024);
1684		min_rx_space >>= 10;
1685
1686		/* If current Tx allocation is less than the min Tx FIFO size,
1687		 * and the min Tx FIFO size is less than the current Rx FIFO
1688		 * allocation, take space away from current Rx allocation */
1689		if (tx_space < min_tx_space &&
1690		    ((min_tx_space - tx_space) < pba)) {
1691			pba = pba - (min_tx_space - tx_space);
1692
1693			/* if short on rx space, rx wins and must trump tx
1694			 * adjustment */
1695			if (pba < min_rx_space)
1696				pba = min_rx_space;
1697		}
1698		wr32(E1000_PBA, pba);
1699	}
1700
1701	/* flow control settings */
1702	/* The high water mark must be low enough to fit one full frame
1703	 * (or the size used for early receive) above it in the Rx FIFO.
1704	 * Set it to the lower of:
1705	 * - 90% of the Rx FIFO size, or
1706	 * - the full Rx FIFO size minus one full frame */
1707	hwm = min(((pba << 10) * 9 / 10),
1708			((pba << 10) - 2 * adapter->max_frame_size));
1709
1710	fc->high_water = hwm & 0xFFF0;	/* 16-byte granularity */
1711	fc->low_water = fc->high_water - 16;
1712	fc->pause_time = 0xFFFF;
1713	fc->send_xon = 1;
1714	fc->current_mode = fc->requested_mode;
1715
1716	/* disable receive for all VFs and wait one second */
1717	if (adapter->vfs_allocated_count) {
1718		int i;
1719		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1720			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1721
1722		/* ping all the active vfs to let them know we are going down */
1723		igb_ping_all_vfs(adapter);
1724
1725		/* disable transmits and receives */
1726		wr32(E1000_VFRE, 0);
1727		wr32(E1000_VFTE, 0);
1728	}
1729
1730	/* Allow time for pending master requests to run */
1731	hw->mac.ops.reset_hw(hw);
1732	wr32(E1000_WUC, 0);
1733
1734	if (hw->mac.ops.init_hw(hw))
1735		dev_err(&pdev->dev, "Hardware Error\n");
1736	if (hw->mac.type > e1000_82580) {
1737		if (adapter->flags & IGB_FLAG_DMAC) {
1738			u32 reg;
1739
1740			/*
1741			 * DMA Coalescing high water mark needs to be higher
1742			 * than * the * Rx threshold.  The Rx threshold is
1743			 * currently * pba - 6, so we * should use a high water
1744			 * mark of pba * - 4. */
1745			hwm = (pba - 4) << 10;
1746
1747			reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1748			       & E1000_DMACR_DMACTHR_MASK);
1749
1750			/* transition to L0x or L1 if available..*/
1751			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1752
1753			/* watchdog timer= +-1000 usec in 32usec intervals */
1754			reg |= (1000 >> 5);
1755			wr32(E1000_DMACR, reg);
1756
1757			/* no lower threshold to disable coalescing(smart fifb)
1758			 * -UTRESH=0*/
1759			wr32(E1000_DMCRTRH, 0);
1760
1761			/* set hwm to PBA -  2 * max frame size */
1762			wr32(E1000_FCRTC, hwm);
1763
1764			/*
1765			 * This sets the time to wait before requesting tran-
1766			 * sition to * low power state to number of usecs needed
1767			 * to receive 1 512 * byte frame at gigabit line rate
1768			 */
1769			reg = rd32(E1000_DMCTLX);
1770			reg |= IGB_DMCTLX_DCFLUSH_DIS;
1771
1772			/* Delay 255 usec before entering Lx state. */
1773			reg |= 0xFF;
1774			wr32(E1000_DMCTLX, reg);
1775
1776			/* free space in Tx packet buffer to wake from DMAC */
1777			wr32(E1000_DMCTXTH,
1778			     (IGB_MIN_TXPBSIZE -
1779			     (IGB_TX_BUF_4096 + adapter->max_frame_size))
1780			     >> 6);
1781
1782			/* make low power state decision controlled by DMAC */
1783			reg = rd32(E1000_PCIEMISC);
1784			reg |= E1000_PCIEMISC_LX_DECISION;
1785			wr32(E1000_PCIEMISC, reg);
1786		} /* end if IGB_FLAG_DMAC set */
1787	}
1788	if (hw->mac.type == e1000_82580) {
1789		u32 reg = rd32(E1000_PCIEMISC);
1790		wr32(E1000_PCIEMISC,
1791		                reg & ~E1000_PCIEMISC_LX_DECISION);
1792	}
1793	if (!netif_running(adapter->netdev))
1794		igb_power_down_link(adapter);
1795
1796	igb_update_mng_vlan(adapter);
1797
1798	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1799	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1800
1801	igb_get_phy_info(hw);
1802}
1803
1804static u32 igb_fix_features(struct net_device *netdev, u32 features)
1805{
1806	/*
1807	 * Since there is no support for separate rx/tx vlan accel
1808	 * enable/disable make sure tx flag is always in same state as rx.
1809	 */
1810	if (features & NETIF_F_HW_VLAN_RX)
1811		features |= NETIF_F_HW_VLAN_TX;
1812	else
1813		features &= ~NETIF_F_HW_VLAN_TX;
1814
1815	return features;
1816}
1817
1818static int igb_set_features(struct net_device *netdev, u32 features)
1819{
1820	struct igb_adapter *adapter = netdev_priv(netdev);
1821	int i;
1822	u32 changed = netdev->features ^ features;
1823
1824	for (i = 0; i < adapter->num_rx_queues; i++) {
1825		if (features & NETIF_F_RXCSUM)
1826			set_bit(IGB_RING_FLAG_RX_CSUM,
1827				&adapter->rx_ring[i]->flags);
1828		else
1829			clear_bit(IGB_RING_FLAG_RX_CSUM,
1830				  &adapter->rx_ring[i]->flags);
1831	}
1832
1833	if (changed & NETIF_F_HW_VLAN_RX)
1834		igb_vlan_mode(netdev, features);
1835
1836	return 0;
1837}
1838
1839static const struct net_device_ops igb_netdev_ops = {
1840	.ndo_open		= igb_open,
1841	.ndo_stop		= igb_close,
1842	.ndo_start_xmit		= igb_xmit_frame,
1843	.ndo_get_stats64	= igb_get_stats64,
1844	.ndo_set_rx_mode	= igb_set_rx_mode,
1845	.ndo_set_mac_address	= igb_set_mac,
1846	.ndo_change_mtu		= igb_change_mtu,
1847	.ndo_do_ioctl		= igb_ioctl,
1848	.ndo_tx_timeout		= igb_tx_timeout,
1849	.ndo_validate_addr	= eth_validate_addr,
1850	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
1851	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
1852	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
1853	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
1854	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
1855	.ndo_get_vf_config	= igb_ndo_get_vf_config,
1856#ifdef CONFIG_NET_POLL_CONTROLLER
1857	.ndo_poll_controller	= igb_netpoll,
1858#endif
1859	.ndo_fix_features	= igb_fix_features,
1860	.ndo_set_features	= igb_set_features,
1861};
1862
1863/**
1864 * igb_probe - Device Initialization Routine
1865 * @pdev: PCI device information struct
1866 * @ent: entry in igb_pci_tbl
1867 *
1868 * Returns 0 on success, negative on failure
1869 *
1870 * igb_probe initializes an adapter identified by a pci_dev structure.
1871 * The OS initialization, configuring of the adapter private structure,
1872 * and a hardware reset occur.
1873 **/
1874static int __devinit igb_probe(struct pci_dev *pdev,
1875			       const struct pci_device_id *ent)
1876{
1877	struct net_device *netdev;
1878	struct igb_adapter *adapter;
1879	struct e1000_hw *hw;
1880	u16 eeprom_data = 0;
1881	s32 ret_val;
1882	static int global_quad_port_a; /* global quad port a indication */
1883	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1884	unsigned long mmio_start, mmio_len;
1885	int err, pci_using_dac;
1886	u16 eeprom_apme_mask = IGB_EEPROM_APME;
1887	u8 part_str[E1000_PBANUM_LENGTH];
1888
1889	/* Catch broken hardware that put the wrong VF device ID in
1890	 * the PCIe SR-IOV capability.
1891	 */
1892	if (pdev->is_virtfn) {
1893		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1894		     pci_name(pdev), pdev->vendor, pdev->device);
1895		return -EINVAL;
1896	}
1897
1898	err = pci_enable_device_mem(pdev);
1899	if (err)
1900		return err;
1901
1902	pci_using_dac = 0;
1903	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1904	if (!err) {
1905		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1906		if (!err)
1907			pci_using_dac = 1;
1908	} else {
1909		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1910		if (err) {
1911			err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1912			if (err) {
1913				dev_err(&pdev->dev, "No usable DMA "
1914					"configuration, aborting\n");
1915				goto err_dma;
1916			}
1917		}
1918	}
1919
1920	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1921	                                   IORESOURCE_MEM),
1922	                                   igb_driver_name);
1923	if (err)
1924		goto err_pci_reg;
1925
1926	pci_enable_pcie_error_reporting(pdev);
1927
1928	pci_set_master(pdev);
1929	pci_save_state(pdev);
1930
1931	err = -ENOMEM;
1932	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1933				   IGB_MAX_TX_QUEUES);
1934	if (!netdev)
1935		goto err_alloc_etherdev;
1936
1937	SET_NETDEV_DEV(netdev, &pdev->dev);
1938
1939	pci_set_drvdata(pdev, netdev);
1940	adapter = netdev_priv(netdev);
1941	adapter->netdev = netdev;
1942	adapter->pdev = pdev;
1943	hw = &adapter->hw;
1944	hw->back = adapter;
1945	adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1946
1947	mmio_start = pci_resource_start(pdev, 0);
1948	mmio_len = pci_resource_len(pdev, 0);
1949
1950	err = -EIO;
1951	hw->hw_addr = ioremap(mmio_start, mmio_len);
1952	if (!hw->hw_addr)
1953		goto err_ioremap;
1954
1955	netdev->netdev_ops = &igb_netdev_ops;
1956	igb_set_ethtool_ops(netdev);
1957	netdev->watchdog_timeo = 5 * HZ;
1958
1959	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1960
1961	netdev->mem_start = mmio_start;
1962	netdev->mem_end = mmio_start + mmio_len;
1963
1964	/* PCI config space info */
1965	hw->vendor_id = pdev->vendor;
1966	hw->device_id = pdev->device;
1967	hw->revision_id = pdev->revision;
1968	hw->subsystem_vendor_id = pdev->subsystem_vendor;
1969	hw->subsystem_device_id = pdev->subsystem_device;
1970
1971	/* Copy the default MAC, PHY and NVM function pointers */
1972	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1973	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1974	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1975	/* Initialize skew-specific constants */
1976	err = ei->get_invariants(hw);
1977	if (err)
1978		goto err_sw_init;
1979
1980	/* setup the private structure */
1981	err = igb_sw_init(adapter);
1982	if (err)
1983		goto err_sw_init;
1984
1985	igb_get_bus_info_pcie(hw);
1986
1987	hw->phy.autoneg_wait_to_complete = false;
1988
1989	/* Copper options */
1990	if (hw->phy.media_type == e1000_media_type_copper) {
1991		hw->phy.mdix = AUTO_ALL_MODES;
1992		hw->phy.disable_polarity_correction = false;
1993		hw->phy.ms_type = e1000_ms_hw_default;
1994	}
1995
1996	if (igb_check_reset_block(hw))
1997		dev_info(&pdev->dev,
1998			"PHY reset is blocked due to SOL/IDER session.\n");
1999
2000	netdev->hw_features = NETIF_F_SG |
2001			   NETIF_F_IP_CSUM |
2002			   NETIF_F_IPV6_CSUM |
2003			   NETIF_F_TSO |
2004			   NETIF_F_TSO6 |
2005			   NETIF_F_RXCSUM |
2006			   NETIF_F_HW_VLAN_RX;
2007
2008	netdev->features = netdev->hw_features |
2009			   NETIF_F_HW_VLAN_TX |
2010			   NETIF_F_HW_VLAN_FILTER;
2011
2012	netdev->vlan_features |= NETIF_F_TSO;
2013	netdev->vlan_features |= NETIF_F_TSO6;
2014	netdev->vlan_features |= NETIF_F_IP_CSUM;
2015	netdev->vlan_features |= NETIF_F_IPV6_CSUM;
2016	netdev->vlan_features |= NETIF_F_SG;
2017
2018	if (pci_using_dac) {
2019		netdev->features |= NETIF_F_HIGHDMA;
2020		netdev->vlan_features |= NETIF_F_HIGHDMA;
2021	}
2022
2023	if (hw->mac.type >= e1000_82576) {
2024		netdev->hw_features |= NETIF_F_SCTP_CSUM;
2025		netdev->features |= NETIF_F_SCTP_CSUM;
2026	}
2027
2028	netdev->priv_flags |= IFF_UNICAST_FLT;
2029
2030	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2031
2032	/* before reading the NVM, reset the controller to put the device in a
2033	 * known good starting state */
2034	hw->mac.ops.reset_hw(hw);
2035
2036	/* make sure the NVM is good */
2037	if (hw->nvm.ops.validate(hw) < 0) {
2038		dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2039		err = -EIO;
2040		goto err_eeprom;
2041	}
2042
2043	/* copy the MAC address out of the NVM */
2044	if (hw->mac.ops.read_mac_addr(hw))
2045		dev_err(&pdev->dev, "NVM Read Error\n");
2046
2047	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2048	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2049
2050	if (!is_valid_ether_addr(netdev->perm_addr)) {
2051		dev_err(&pdev->dev, "Invalid MAC Address\n");
2052		err = -EIO;
2053		goto err_eeprom;
2054	}
2055
2056	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2057	            (unsigned long) adapter);
2058	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2059	            (unsigned long) adapter);
2060
2061	INIT_WORK(&adapter->reset_task, igb_reset_task);
2062	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2063
2064	/* Initialize link properties that are user-changeable */
2065	adapter->fc_autoneg = true;
2066	hw->mac.autoneg = true;
2067	hw->phy.autoneg_advertised = 0x2f;
2068
2069	hw->fc.requested_mode = e1000_fc_default;
2070	hw->fc.current_mode = e1000_fc_default;
2071
2072	igb_validate_mdi_setting(hw);
2073
2074	/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2075	 * enable the ACPI Magic Packet filter
2076	 */
2077
2078	if (hw->bus.func == 0)
2079		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2080	else if (hw->mac.type >= e1000_82580)
2081		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2082		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2083		                 &eeprom_data);
2084	else if (hw->bus.func == 1)
2085		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2086
2087	if (eeprom_data & eeprom_apme_mask)
2088		adapter->eeprom_wol |= E1000_WUFC_MAG;
2089
2090	/* now that we have the eeprom settings, apply the special cases where
2091	 * the eeprom may be wrong or the board simply won't support wake on
2092	 * lan on a particular port */
2093	switch (pdev->device) {
2094	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2095		adapter->eeprom_wol = 0;
2096		break;
2097	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2098	case E1000_DEV_ID_82576_FIBER:
2099	case E1000_DEV_ID_82576_SERDES:
2100		/* Wake events only supported on port A for dual fiber
2101		 * regardless of eeprom setting */
2102		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2103			adapter->eeprom_wol = 0;
2104		break;
2105	case E1000_DEV_ID_82576_QUAD_COPPER:
2106	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2107		/* if quad port adapter, disable WoL on all but port A */
2108		if (global_quad_port_a != 0)
2109			adapter->eeprom_wol = 0;
2110		else
2111			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2112		/* Reset for multiple quad port adapters */
2113		if (++global_quad_port_a == 4)
2114			global_quad_port_a = 0;
2115		break;
2116	}
2117
2118	/* initialize the wol settings based on the eeprom settings */
2119	adapter->wol = adapter->eeprom_wol;
2120	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2121
2122	/* reset the hardware with the new settings */
2123	igb_reset(adapter);
2124
2125	/* let the f/w know that the h/w is now under the control of the
2126	 * driver. */
2127	igb_get_hw_control(adapter);
2128
2129	strcpy(netdev->name, "eth%d");
2130	err = register_netdev(netdev);
2131	if (err)
2132		goto err_register;
2133
2134	igb_vlan_mode(netdev, netdev->features);
2135
2136	/* carrier off reporting is important to ethtool even BEFORE open */
2137	netif_carrier_off(netdev);
2138
2139#ifdef CONFIG_IGB_DCA
2140	if (dca_add_requester(&pdev->dev) == 0) {
2141		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2142		dev_info(&pdev->dev, "DCA enabled\n");
2143		igb_setup_dca(adapter);
2144	}
2145
2146#endif
2147	/* do hw tstamp init after resetting */
2148	igb_init_hw_timer(adapter);
2149
2150	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2151	/* print bus type/speed/width info */
2152	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2153		 netdev->name,
2154		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2155		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2156		                                            "unknown"),
2157		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2158		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2159		  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2160		   "unknown"),
2161		 netdev->dev_addr);
2162
2163	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2164	if (ret_val)
2165		strcpy(part_str, "Unknown");
2166	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2167	dev_info(&pdev->dev,
2168		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2169		adapter->msix_entries ? "MSI-X" :
2170		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2171		adapter->num_rx_queues, adapter->num_tx_queues);
2172	switch (hw->mac.type) {
2173	case e1000_i350:
2174		igb_set_eee_i350(hw);
2175		break;
2176	default:
2177		break;
2178	}
2179	return 0;
2180
2181err_register:
2182	igb_release_hw_control(adapter);
2183err_eeprom:
2184	if (!igb_check_reset_block(hw))
2185		igb_reset_phy(hw);
2186
2187	if (hw->flash_address)
2188		iounmap(hw->flash_address);
2189err_sw_init:
2190	igb_clear_interrupt_scheme(adapter);
2191	iounmap(hw->hw_addr);
2192err_ioremap:
2193	free_netdev(netdev);
2194err_alloc_etherdev:
2195	pci_release_selected_regions(pdev,
2196	                             pci_select_bars(pdev, IORESOURCE_MEM));
2197err_pci_reg:
2198err_dma:
2199	pci_disable_device(pdev);
2200	return err;
2201}
2202
2203/**
2204 * igb_remove - Device Removal Routine
2205 * @pdev: PCI device information struct
2206 *
2207 * igb_remove is called by the PCI subsystem to alert the driver
2208 * that it should release a PCI device.  The could be caused by a
2209 * Hot-Plug event, or because the driver is going to be removed from
2210 * memory.
2211 **/
2212static void __devexit igb_remove(struct pci_dev *pdev)
2213{
2214	struct net_device *netdev = pci_get_drvdata(pdev);
2215	struct igb_adapter *adapter = netdev_priv(netdev);
2216	struct e1000_hw *hw = &adapter->hw;
2217
2218	/*
2219	 * The watchdog timer may be rescheduled, so explicitly
2220	 * disable watchdog from being rescheduled.
2221	 */
2222	set_bit(__IGB_DOWN, &adapter->state);
2223	del_timer_sync(&adapter->watchdog_timer);
2224	del_timer_sync(&adapter->phy_info_timer);
2225
2226	cancel_work_sync(&adapter->reset_task);
2227	cancel_work_sync(&adapter->watchdog_task);
2228
2229#ifdef CONFIG_IGB_DCA
2230	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2231		dev_info(&pdev->dev, "DCA disabled\n");
2232		dca_remove_requester(&pdev->dev);
2233		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2234		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2235	}
2236#endif
2237
2238	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2239	 * would have already happened in close and is redundant. */
2240	igb_release_hw_control(adapter);
2241
2242	unregister_netdev(netdev);
2243
2244	igb_clear_interrupt_scheme(adapter);
2245
2246#ifdef CONFIG_PCI_IOV
2247	/* reclaim resources allocated to VFs */
2248	if (adapter->vf_data) {
2249		/* disable iov and allow time for transactions to clear */
2250		pci_disable_sriov(pdev);
2251		msleep(500);
2252
2253		kfree(adapter->vf_data);
2254		adapter->vf_data = NULL;
2255		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2256		wrfl();
2257		msleep(100);
2258		dev_info(&pdev->dev, "IOV Disabled\n");
2259	}
2260#endif
2261
2262	iounmap(hw->hw_addr);
2263	if (hw->flash_address)
2264		iounmap(hw->flash_address);
2265	pci_release_selected_regions(pdev,
2266	                             pci_select_bars(pdev, IORESOURCE_MEM));
2267
2268	free_netdev(netdev);
2269
2270	pci_disable_pcie_error_reporting(pdev);
2271
2272	pci_disable_device(pdev);
2273}
2274
2275/**
2276 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2277 * @adapter: board private structure to initialize
2278 *
2279 * This function initializes the vf specific data storage and then attempts to
2280 * allocate the VFs.  The reason for ordering it this way is because it is much
2281 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2282 * the memory for the VFs.
2283 **/
2284static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2285{
2286#ifdef CONFIG_PCI_IOV
2287	struct pci_dev *pdev = adapter->pdev;
2288
2289	if (adapter->vfs_allocated_count) {
2290		adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2291		                           sizeof(struct vf_data_storage),
2292		                           GFP_KERNEL);
2293		/* if allocation failed then we do not support SR-IOV */
2294		if (!adapter->vf_data) {
2295			adapter->vfs_allocated_count = 0;
2296			dev_err(&pdev->dev, "Unable to allocate memory for VF "
2297			        "Data Storage\n");
2298		}
2299	}
2300
2301	if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2302		kfree(adapter->vf_data);
2303		adapter->vf_data = NULL;
2304#endif /* CONFIG_PCI_IOV */
2305		adapter->vfs_allocated_count = 0;
2306#ifdef CONFIG_PCI_IOV
2307	} else {
2308		unsigned char mac_addr[ETH_ALEN];
2309		int i;
2310		dev_info(&pdev->dev, "%d vfs allocated\n",
2311		         adapter->vfs_allocated_count);
2312		for (i = 0; i < adapter->vfs_allocated_count; i++) {
2313			random_ether_addr(mac_addr);
2314			igb_set_vf_mac(adapter, i, mac_addr);
2315		}
2316		/* DMA Coalescing is not supported in IOV mode. */
2317		if (adapter->flags & IGB_FLAG_DMAC)
2318			adapter->flags &= ~IGB_FLAG_DMAC;
2319	}
2320#endif /* CONFIG_PCI_IOV */
2321}
2322
2323
2324/**
2325 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2326 * @adapter: board private structure to initialize
2327 *
2328 * igb_init_hw_timer initializes the function pointer and values for the hw
2329 * timer found in hardware.
2330 **/
2331static void igb_init_hw_timer(struct igb_adapter *adapter)
2332{
2333	struct e1000_hw *hw = &adapter->hw;
2334
2335	switch (hw->mac.type) {
2336	case e1000_i350:
2337	case e1000_82580:
2338		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2339		adapter->cycles.read = igb_read_clock;
2340		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2341		adapter->cycles.mult = 1;
2342		/*
2343		 * The 82580 timesync updates the system timer every 8ns by 8ns
2344		 * and the value cannot be shifted.  Instead we need to shift
2345		 * the registers to generate a 64bit timer value.  As a result
2346		 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2347		 * 24 in order to generate a larger value for synchronization.
2348		 */
2349		adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2350		/* disable system timer temporarily by setting bit 31 */
2351		wr32(E1000_TSAUXC, 0x80000000);
2352		wrfl();
2353
2354		/* Set registers so that rollover occurs soon to test this. */
2355		wr32(E1000_SYSTIMR, 0x00000000);
2356		wr32(E1000_SYSTIML, 0x80000000);
2357		wr32(E1000_SYSTIMH, 0x000000FF);
2358		wrfl();
2359
2360		/* enable system timer by clearing bit 31 */
2361		wr32(E1000_TSAUXC, 0x0);
2362		wrfl();
2363
2364		timecounter_init(&adapter->clock,
2365				 &adapter->cycles,
2366				 ktime_to_ns(ktime_get_real()));
2367		/*
2368		 * Synchronize our NIC clock against system wall clock. NIC
2369		 * time stamp reading requires ~3us per sample, each sample
2370		 * was pretty stable even under load => only require 10
2371		 * samples for each offset comparison.
2372		 */
2373		memset(&adapter->compare, 0, sizeof(adapter->compare));
2374		adapter->compare.source = &adapter->clock;
2375		adapter->compare.target = ktime_get_real;
2376		adapter->compare.num_samples = 10;
2377		timecompare_update(&adapter->compare, 0);
2378		break;
2379	case e1000_82576:
2380		/*
2381		 * Initialize hardware timer: we keep it running just in case
2382		 * that some program needs it later on.
2383		 */
2384		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2385		adapter->cycles.read = igb_read_clock;
2386		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2387		adapter->cycles.mult = 1;
2388		/**
2389		 * Scale the NIC clock cycle by a large factor so that
2390		 * relatively small clock corrections can be added or
2391		 * subtracted at each clock tick. The drawbacks of a large
2392		 * factor are a) that the clock register overflows more quickly
2393		 * (not such a big deal) and b) that the increment per tick has
2394		 * to fit into 24 bits.  As a result we need to use a shift of
2395		 * 19 so we can fit a value of 16 into the TIMINCA register.
2396		 */
2397		adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2398		wr32(E1000_TIMINCA,
2399		                (1 << E1000_TIMINCA_16NS_SHIFT) |
2400		                (16 << IGB_82576_TSYNC_SHIFT));
2401
2402		/* Set registers so that rollover occurs soon to test this. */
2403		wr32(E1000_SYSTIML, 0x00000000);
2404		wr32(E1000_SYSTIMH, 0xFF800000);
2405		wrfl();
2406
2407		timecounter_init(&adapter->clock,
2408				 &adapter->cycles,
2409				 ktime_to_ns(ktime_get_real()));
2410		/*
2411		 * Synchronize our NIC clock against system wall clock. NIC
2412		 * time stamp reading requires ~3us per sample, each sample
2413		 * was pretty stable even under load => only require 10
2414		 * samples for each offset comparison.
2415		 */
2416		memset(&adapter->compare, 0, sizeof(adapter->compare));
2417		adapter->compare.source = &adapter->clock;
2418		adapter->compare.target = ktime_get_real;
2419		adapter->compare.num_samples = 10;
2420		timecompare_update(&adapter->compare, 0);
2421		break;
2422	case e1000_82575:
2423		/* 82575 does not support timesync */
2424	default:
2425		break;
2426	}
2427
2428}
2429
2430/**
2431 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2432 * @adapter: board private structure to initialize
2433 *
2434 * igb_sw_init initializes the Adapter private data structure.
2435 * Fields are initialized based on PCI device information and
2436 * OS network device settings (MTU size).
2437 **/
2438static int __devinit igb_sw_init(struct igb_adapter *adapter)
2439{
2440	struct e1000_hw *hw = &adapter->hw;
2441	struct net_device *netdev = adapter->netdev;
2442	struct pci_dev *pdev = adapter->pdev;
2443
2444	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2445
2446	/* set default ring sizes */
2447	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2448	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2449
2450	/* set default ITR values */
2451	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2452	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2453
2454	/* set default work limits */
2455	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2456
2457	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2458				  VLAN_HLEN;
2459	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2460
2461	adapter->node = -1;
2462
2463	spin_lock_init(&adapter->stats64_lock);
2464#ifdef CONFIG_PCI_IOV
2465	switch (hw->mac.type) {
2466	case e1000_82576:
2467	case e1000_i350:
2468		if (max_vfs > 7) {
2469			dev_warn(&pdev->dev,
2470				 "Maximum of 7 VFs per PF, using max\n");
2471			adapter->vfs_allocated_count = 7;
2472		} else
2473			adapter->vfs_allocated_count = max_vfs;
2474		break;
2475	default:
2476		break;
2477	}
2478#endif /* CONFIG_PCI_IOV */
2479	adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2480	/* i350 cannot do RSS and SR-IOV at the same time */
2481	if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2482		adapter->rss_queues = 1;
2483
2484	/*
2485	 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2486	 * then we should combine the queues into a queue pair in order to
2487	 * conserve interrupts due to limited supply
2488	 */
2489	if ((adapter->rss_queues > 4) ||
2490	    ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2491		adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2492
2493	/* This call may decrease the number of queues */
2494	if (igb_init_interrupt_scheme(adapter)) {
2495		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2496		return -ENOMEM;
2497	}
2498
2499	igb_probe_vfs(adapter);
2500
2501	/* Explicitly disable IRQ since the NIC can be in any state. */
2502	igb_irq_disable(adapter);
2503
2504	if (hw->mac.type == e1000_i350)
2505		adapter->flags &= ~IGB_FLAG_DMAC;
2506
2507	set_bit(__IGB_DOWN, &adapter->state);
2508	return 0;
2509}
2510
2511/**
2512 * igb_open - Called when a network interface is made active
2513 * @netdev: network interface device structure
2514 *
2515 * Returns 0 on success, negative value on failure
2516 *
2517 * The open entry point is called when a network interface is made
2518 * active by the system (IFF_UP).  At this point all resources needed
2519 * for transmit and receive operations are allocated, the interrupt
2520 * handler is registered with the OS, the watchdog timer is started,
2521 * and the stack is notified that the interface is ready.
2522 **/
2523static int igb_open(struct net_device *netdev)
2524{
2525	struct igb_adapter *adapter = netdev_priv(netdev);
2526	struct e1000_hw *hw = &adapter->hw;
2527	int err;
2528	int i;
2529
2530	/* disallow open during test */
2531	if (test_bit(__IGB_TESTING, &adapter->state))
2532		return -EBUSY;
2533
2534	netif_carrier_off(netdev);
2535
2536	/* allocate transmit descriptors */
2537	err = igb_setup_all_tx_resources(adapter);
2538	if (err)
2539		goto err_setup_tx;
2540
2541	/* allocate receive descriptors */
2542	err = igb_setup_all_rx_resources(adapter);
2543	if (err)
2544		goto err_setup_rx;
2545
2546	igb_power_up_link(adapter);
2547
2548	/* before we allocate an interrupt, we must be ready to handle it.
2549	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2550	 * as soon as we call pci_request_irq, so we have to setup our
2551	 * clean_rx handler before we do so.  */
2552	igb_configure(adapter);
2553
2554	err = igb_request_irq(adapter);
2555	if (err)
2556		goto err_req_irq;
2557
2558	/* From here on the code is the same as igb_up() */
2559	clear_bit(__IGB_DOWN, &adapter->state);
2560
2561	for (i = 0; i < adapter->num_q_vectors; i++) {
2562		struct igb_q_vector *q_vector = adapter->q_vector[i];
2563		napi_enable(&q_vector->napi);
2564	}
2565
2566	/* Clear any pending interrupts. */
2567	rd32(E1000_ICR);
2568
2569	igb_irq_enable(adapter);
2570
2571	/* notify VFs that reset has been completed */
2572	if (adapter->vfs_allocated_count) {
2573		u32 reg_data = rd32(E1000_CTRL_EXT);
2574		reg_data |= E1000_CTRL_EXT_PFRSTD;
2575		wr32(E1000_CTRL_EXT, reg_data);
2576	}
2577
2578	netif_tx_start_all_queues(netdev);
2579
2580	/* start the watchdog. */
2581	hw->mac.get_link_status = 1;
2582	schedule_work(&adapter->watchdog_task);
2583
2584	return 0;
2585
2586err_req_irq:
2587	igb_release_hw_control(adapter);
2588	igb_power_down_link(adapter);
2589	igb_free_all_rx_resources(adapter);
2590err_setup_rx:
2591	igb_free_all_tx_resources(adapter);
2592err_setup_tx:
2593	igb_reset(adapter);
2594
2595	return err;
2596}
2597
2598/**
2599 * igb_close - Disables a network interface
2600 * @netdev: network interface device structure
2601 *
2602 * Returns 0, this is not allowed to fail
2603 *
2604 * The close entry point is called when an interface is de-activated
2605 * by the OS.  The hardware is still under the driver's control, but
2606 * needs to be disabled.  A global MAC reset is issued to stop the
2607 * hardware, and all transmit and receive resources are freed.
2608 **/
2609static int igb_close(struct net_device *netdev)
2610{
2611	struct igb_adapter *adapter = netdev_priv(netdev);
2612
2613	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2614	igb_down(adapter);
2615
2616	igb_free_irq(adapter);
2617
2618	igb_free_all_tx_resources(adapter);
2619	igb_free_all_rx_resources(adapter);
2620
2621	return 0;
2622}
2623
2624/**
2625 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2626 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2627 *
2628 * Return 0 on success, negative on failure
2629 **/
2630int igb_setup_tx_resources(struct igb_ring *tx_ring)
2631{
2632	struct device *dev = tx_ring->dev;
2633	int orig_node = dev_to_node(dev);
2634	int size;
2635
2636	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2637	tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2638	if (!tx_ring->tx_buffer_info)
2639		tx_ring->tx_buffer_info = vzalloc(size);
2640	if (!tx_ring->tx_buffer_info)
2641		goto err;
2642
2643	/* round up to nearest 4K */
2644	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2645	tx_ring->size = ALIGN(tx_ring->size, 4096);
2646
2647	set_dev_node(dev, tx_ring->numa_node);
2648	tx_ring->desc = dma_alloc_coherent(dev,
2649					   tx_ring->size,
2650					   &tx_ring->dma,
2651					   GFP_KERNEL);
2652	set_dev_node(dev, orig_node);
2653	if (!tx_ring->desc)
2654		tx_ring->desc = dma_alloc_coherent(dev,
2655						   tx_ring->size,
2656						   &tx_ring->dma,
2657						   GFP_KERNEL);
2658
2659	if (!tx_ring->desc)
2660		goto err;
2661
2662	tx_ring->next_to_use = 0;
2663	tx_ring->next_to_clean = 0;
2664
2665	return 0;
2666
2667err:
2668	vfree(tx_ring->tx_buffer_info);
2669	dev_err(dev,
2670		"Unable to allocate memory for the transmit descriptor ring\n");
2671	return -ENOMEM;
2672}
2673
2674/**
2675 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2676 *				  (Descriptors) for all queues
2677 * @adapter: board private structure
2678 *
2679 * Return 0 on success, negative on failure
2680 **/
2681static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2682{
2683	struct pci_dev *pdev = adapter->pdev;
2684	int i, err = 0;
2685
2686	for (i = 0; i < adapter->num_tx_queues; i++) {
2687		err = igb_setup_tx_resources(adapter->tx_ring[i]);
2688		if (err) {
2689			dev_err(&pdev->dev,
2690				"Allocation for Tx Queue %u failed\n", i);
2691			for (i--; i >= 0; i--)
2692				igb_free_tx_resources(adapter->tx_ring[i]);
2693			break;
2694		}
2695	}
2696
2697	return err;
2698}
2699
2700/**
2701 * igb_setup_tctl - configure the transmit control registers
2702 * @adapter: Board private structure
2703 **/
2704void igb_setup_tctl(struct igb_adapter *adapter)
2705{
2706	struct e1000_hw *hw = &adapter->hw;
2707	u32 tctl;
2708
2709	/* disable queue 0 which is enabled by default on 82575 and 82576 */
2710	wr32(E1000_TXDCTL(0), 0);
2711
2712	/* Program the Transmit Control Register */
2713	tctl = rd32(E1000_TCTL);
2714	tctl &= ~E1000_TCTL_CT;
2715	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2716		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2717
2718	igb_config_collision_dist(hw);
2719
2720	/* Enable transmits */
2721	tctl |= E1000_TCTL_EN;
2722
2723	wr32(E1000_TCTL, tctl);
2724}
2725
2726/**
2727 * igb_configure_tx_ring - Configure transmit ring after Reset
2728 * @adapter: board private structure
2729 * @ring: tx ring to configure
2730 *
2731 * Configure a transmit ring after a reset.
2732 **/
2733void igb_configure_tx_ring(struct igb_adapter *adapter,
2734                           struct igb_ring *ring)
2735{
2736	struct e1000_hw *hw = &adapter->hw;
2737	u32 txdctl = 0;
2738	u64 tdba = ring->dma;
2739	int reg_idx = ring->reg_idx;
2740
2741	/* disable the queue */
2742	wr32(E1000_TXDCTL(reg_idx), 0);
2743	wrfl();
2744	mdelay(10);
2745
2746	wr32(E1000_TDLEN(reg_idx),
2747	                ring->count * sizeof(union e1000_adv_tx_desc));
2748	wr32(E1000_TDBAL(reg_idx),
2749	                tdba & 0x00000000ffffffffULL);
2750	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2751
2752	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2753	wr32(E1000_TDH(reg_idx), 0);
2754	writel(0, ring->tail);
2755
2756	txdctl |= IGB_TX_PTHRESH;
2757	txdctl |= IGB_TX_HTHRESH << 8;
2758	txdctl |= IGB_TX_WTHRESH << 16;
2759
2760	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2761	wr32(E1000_TXDCTL(reg_idx), txdctl);
2762}
2763
2764/**
2765 * igb_configure_tx - Configure transmit Unit after Reset
2766 * @adapter: board private structure
2767 *
2768 * Configure the Tx unit of the MAC after a reset.
2769 **/
2770static void igb_configure_tx(struct igb_adapter *adapter)
2771{
2772	int i;
2773
2774	for (i = 0; i < adapter->num_tx_queues; i++)
2775		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2776}
2777
2778/**
2779 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2780 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2781 *
2782 * Returns 0 on success, negative on failure
2783 **/
2784int igb_setup_rx_resources(struct igb_ring *rx_ring)
2785{
2786	struct device *dev = rx_ring->dev;
2787	int orig_node = dev_to_node(dev);
2788	int size, desc_len;
2789
2790	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2791	rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2792	if (!rx_ring->rx_buffer_info)
2793		rx_ring->rx_buffer_info = vzalloc(size);
2794	if (!rx_ring->rx_buffer_info)
2795		goto err;
2796
2797	desc_len = sizeof(union e1000_adv_rx_desc);
2798
2799	/* Round up to nearest 4K */
2800	rx_ring->size = rx_ring->count * desc_len;
2801	rx_ring->size = ALIGN(rx_ring->size, 4096);
2802
2803	set_dev_node(dev, rx_ring->numa_node);
2804	rx_ring->desc = dma_alloc_coherent(dev,
2805					   rx_ring->size,
2806					   &rx_ring->dma,
2807					   GFP_KERNEL);
2808	set_dev_node(dev, orig_node);
2809	if (!rx_ring->desc)
2810		rx_ring->desc = dma_alloc_coherent(dev,
2811						   rx_ring->size,
2812						   &rx_ring->dma,
2813						   GFP_KERNEL);
2814
2815	if (!rx_ring->desc)
2816		goto err;
2817
2818	rx_ring->next_to_clean = 0;
2819	rx_ring->next_to_use = 0;
2820
2821	return 0;
2822
2823err:
2824	vfree(rx_ring->rx_buffer_info);
2825	rx_ring->rx_buffer_info = NULL;
2826	dev_err(dev, "Unable to allocate memory for the receive descriptor"
2827		" ring\n");
2828	return -ENOMEM;
2829}
2830
2831/**
2832 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2833 *				  (Descriptors) for all queues
2834 * @adapter: board private structure
2835 *
2836 * Return 0 on success, negative on failure
2837 **/
2838static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2839{
2840	struct pci_dev *pdev = adapter->pdev;
2841	int i, err = 0;
2842
2843	for (i = 0; i < adapter->num_rx_queues; i++) {
2844		err = igb_setup_rx_resources(adapter->rx_ring[i]);
2845		if (err) {
2846			dev_err(&pdev->dev,
2847				"Allocation for Rx Queue %u failed\n", i);
2848			for (i--; i >= 0; i--)
2849				igb_free_rx_resources(adapter->rx_ring[i]);
2850			break;
2851		}
2852	}
2853
2854	return err;
2855}
2856
2857/**
2858 * igb_setup_mrqc - configure the multiple receive queue control registers
2859 * @adapter: Board private structure
2860 **/
2861static void igb_setup_mrqc(struct igb_adapter *adapter)
2862{
2863	struct e1000_hw *hw = &adapter->hw;
2864	u32 mrqc, rxcsum;
2865	u32 j, num_rx_queues, shift = 0, shift2 = 0;
2866	union e1000_reta {
2867		u32 dword;
2868		u8  bytes[4];
2869	} reta;
2870	static const u8 rsshash[40] = {
2871		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2872		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2873		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2874		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2875
2876	/* Fill out hash function seeds */
2877	for (j = 0; j < 10; j++) {
2878		u32 rsskey = rsshash[(j * 4)];
2879		rsskey |= rsshash[(j * 4) + 1] << 8;
2880		rsskey |= rsshash[(j * 4) + 2] << 16;
2881		rsskey |= rsshash[(j * 4) + 3] << 24;
2882		array_wr32(E1000_RSSRK(0), j, rsskey);
2883	}
2884
2885	num_rx_queues = adapter->rss_queues;
2886
2887	if (adapter->vfs_allocated_count) {
2888		/* 82575 and 82576 supports 2 RSS queues for VMDq */
2889		switch (hw->mac.type) {
2890		case e1000_i350:
2891		case e1000_82580:
2892			num_rx_queues = 1;
2893			shift = 0;
2894			break;
2895		case e1000_82576:
2896			shift = 3;
2897			num_rx_queues = 2;
2898			break;
2899		case e1000_82575:
2900			shift = 2;
2901			shift2 = 6;
2902		default:
2903			break;
2904		}
2905	} else {
2906		if (hw->mac.type == e1000_82575)
2907			shift = 6;
2908	}
2909
2910	for (j = 0; j < (32 * 4); j++) {
2911		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2912		if (shift2)
2913			reta.bytes[j & 3] |= num_rx_queues << shift2;
2914		if ((j & 3) == 3)
2915			wr32(E1000_RETA(j >> 2), reta.dword);
2916	}
2917
2918	/*
2919	 * Disable raw packet checksumming so that RSS hash is placed in
2920	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2921	 * offloads as they are enabled by default
2922	 */
2923	rxcsum = rd32(E1000_RXCSUM);
2924	rxcsum |= E1000_RXCSUM_PCSD;
2925
2926	if (adapter->hw.mac.type >= e1000_82576)
2927		/* Enable Receive Checksum Offload for SCTP */
2928		rxcsum |= E1000_RXCSUM_CRCOFL;
2929
2930	/* Don't need to set TUOFL or IPOFL, they default to 1 */
2931	wr32(E1000_RXCSUM, rxcsum);
2932
2933	/* If VMDq is enabled then we set the appropriate mode for that, else
2934	 * we default to RSS so that an RSS hash is calculated per packet even
2935	 * if we are only using one queue */
2936	if (adapter->vfs_allocated_count) {
2937		if (hw->mac.type > e1000_82575) {
2938			/* Set the default pool for the PF's first queue */
2939			u32 vtctl = rd32(E1000_VT_CTL);
2940			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2941				   E1000_VT_CTL_DISABLE_DEF_POOL);
2942			vtctl |= adapter->vfs_allocated_count <<
2943				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2944			wr32(E1000_VT_CTL, vtctl);
2945		}
2946		if (adapter->rss_queues > 1)
2947			mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2948		else
2949			mrqc = E1000_MRQC_ENABLE_VMDQ;
2950	} else {
2951		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2952	}
2953	igb_vmm_control(adapter);
2954
2955	/*
2956	 * Generate RSS hash based on TCP port numbers and/or
2957	 * IPv4/v6 src and dst addresses since UDP cannot be
2958	 * hashed reliably due to IP fragmentation
2959	 */
2960	mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2961		E1000_MRQC_RSS_FIELD_IPV4_TCP |
2962		E1000_MRQC_RSS_FIELD_IPV6 |
2963		E1000_MRQC_RSS_FIELD_IPV6_TCP |
2964		E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2965
2966	wr32(E1000_MRQC, mrqc);
2967}
2968
2969/**
2970 * igb_setup_rctl - configure the receive control registers
2971 * @adapter: Board private structure
2972 **/
2973void igb_setup_rctl(struct igb_adapter *adapter)
2974{
2975	struct e1000_hw *hw = &adapter->hw;
2976	u32 rctl;
2977
2978	rctl = rd32(E1000_RCTL);
2979
2980	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2981	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2982
2983	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2984		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2985
2986	/*
2987	 * enable stripping of CRC. It's unlikely this will break BMC
2988	 * redirection as it did with e1000. Newer features require
2989	 * that the HW strips the CRC.
2990	 */
2991	rctl |= E1000_RCTL_SECRC;
2992
2993	/* disable store bad packets and clear size bits. */
2994	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2995
2996	/* enable LPE to prevent packets larger than max_frame_size */
2997	rctl |= E1000_RCTL_LPE;
2998
2999	/* disable queue 0 to prevent tail write w/o re-config */
3000	wr32(E1000_RXDCTL(0), 0);
3001
3002	/* Attention!!!  For SR-IOV PF driver operations you must enable
3003	 * queue drop for all VF and PF queues to prevent head of line blocking
3004	 * if an un-trusted VF does not provide descriptors to hardware.
3005	 */
3006	if (adapter->vfs_allocated_count) {
3007		/* set all queue drop enable bits */
3008		wr32(E1000_QDE, ALL_QUEUES);
3009	}
3010
3011	wr32(E1000_RCTL, rctl);
3012}
3013
3014static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3015                                   int vfn)
3016{
3017	struct e1000_hw *hw = &adapter->hw;
3018	u32 vmolr;
3019
3020	/* if it isn't the PF check to see if VFs are enabled and
3021	 * increase the size to support vlan tags */
3022	if (vfn < adapter->vfs_allocated_count &&
3023	    adapter->vf_data[vfn].vlans_enabled)
3024		size += VLAN_TAG_SIZE;
3025
3026	vmolr = rd32(E1000_VMOLR(vfn));
3027	vmolr &= ~E1000_VMOLR_RLPML_MASK;
3028	vmolr |= size | E1000_VMOLR_LPE;
3029	wr32(E1000_VMOLR(vfn), vmolr);
3030
3031	return 0;
3032}
3033
3034/**
3035 * igb_rlpml_set - set maximum receive packet size
3036 * @adapter: board private structure
3037 *
3038 * Configure maximum receivable packet size.
3039 **/
3040static void igb_rlpml_set(struct igb_adapter *adapter)
3041{
3042	u32 max_frame_size = adapter->max_frame_size;
3043	struct e1000_hw *hw = &adapter->hw;
3044	u16 pf_id = adapter->vfs_allocated_count;
3045
3046	if (pf_id) {
3047		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3048		/*
3049		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3050		 * to our max jumbo frame size, in case we need to enable
3051		 * jumbo frames on one of the rings later.
3052		 * This will not pass over-length frames into the default
3053		 * queue because it's gated by the VMOLR.RLPML.
3054		 */
3055		max_frame_size = MAX_JUMBO_FRAME_SIZE;
3056	}
3057
3058	wr32(E1000_RLPML, max_frame_size);
3059}
3060
3061static inline void igb_set_vmolr(struct igb_adapter *adapter,
3062				 int vfn, bool aupe)
3063{
3064	struct e1000_hw *hw = &adapter->hw;
3065	u32 vmolr;
3066
3067	/*
3068	 * This register exists only on 82576 and newer so if we are older then
3069	 * we should exit and do nothing
3070	 */
3071	if (hw->mac.type < e1000_82576)
3072		return;
3073
3074	vmolr = rd32(E1000_VMOLR(vfn));
3075	vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3076	if (aupe)
3077		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3078	else
3079		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3080
3081	/* clear all bits that might not be set */
3082	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3083
3084	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3085		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3086	/*
3087	 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3088	 * multicast packets
3089	 */
3090	if (vfn <= adapter->vfs_allocated_count)
3091		vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
3092
3093	wr32(E1000_VMOLR(vfn), vmolr);
3094}
3095
3096/**
3097 * igb_configure_rx_ring - Configure a receive ring after Reset
3098 * @adapter: board private structure
3099 * @ring: receive ring to be configured
3100 *
3101 * Configure the Rx unit of the MAC after a reset.
3102 **/
3103void igb_configure_rx_ring(struct igb_adapter *adapter,
3104                           struct igb_ring *ring)
3105{
3106	struct e1000_hw *hw = &adapter->hw;
3107	u64 rdba = ring->dma;
3108	int reg_idx = ring->reg_idx;
3109	u32 srrctl = 0, rxdctl = 0;
3110
3111	/* disable the queue */
3112	wr32(E1000_RXDCTL(reg_idx), 0);
3113
3114	/* Set DMA base address registers */
3115	wr32(E1000_RDBAL(reg_idx),
3116	     rdba & 0x00000000ffffffffULL);
3117	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3118	wr32(E1000_RDLEN(reg_idx),
3119	               ring->count * sizeof(union e1000_adv_rx_desc));
3120
3121	/* initialize head and tail */
3122	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3123	wr32(E1000_RDH(reg_idx), 0);
3124	writel(0, ring->tail);
3125
3126	/* set descriptor configuration */
3127	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3128#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3129	srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3130#else
3131	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3132#endif
3133	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3134	if (hw->mac.type == e1000_82580)
3135		srrctl |= E1000_SRRCTL_TIMESTAMP;
3136	/* Only set Drop Enable if we are supporting multiple queues */
3137	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3138		srrctl |= E1000_SRRCTL_DROP_EN;
3139
3140	wr32(E1000_SRRCTL(reg_idx), srrctl);
3141
3142	/* set filtering for VMDQ pools */
3143	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3144
3145	rxdctl |= IGB_RX_PTHRESH;
3146	rxdctl |= IGB_RX_HTHRESH << 8;
3147	rxdctl |= IGB_RX_WTHRESH << 16;
3148
3149	/* enable receive descriptor fetching */
3150	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3151	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3152}
3153
3154/**
3155 * igb_configure_rx - Configure receive Unit after Reset
3156 * @adapter: board private structure
3157 *
3158 * Configure the Rx unit of the MAC after a reset.
3159 **/
3160static void igb_configure_rx(struct igb_adapter *adapter)
3161{
3162	int i;
3163
3164	/* set UTA to appropriate mode */
3165	igb_set_uta(adapter);
3166
3167	/* set the correct pool for the PF default MAC address in entry 0 */
3168	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3169	                 adapter->vfs_allocated_count);
3170
3171	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3172	 * the Base and Length of the Rx Descriptor Ring */
3173	for (i = 0; i < adapter->num_rx_queues; i++)
3174		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3175}
3176
3177/**
3178 * igb_free_tx_resources - Free Tx Resources per Queue
3179 * @tx_ring: Tx descriptor ring for a specific queue
3180 *
3181 * Free all transmit software resources
3182 **/
3183void igb_free_tx_resources(struct igb_ring *tx_ring)
3184{
3185	igb_clean_tx_ring(tx_ring);
3186
3187	vfree(tx_ring->tx_buffer_info);
3188	tx_ring->tx_buffer_info = NULL;
3189
3190	/* if not set, then don't free */
3191	if (!tx_ring->desc)
3192		return;
3193
3194	dma_free_coherent(tx_ring->dev, tx_ring->size,
3195			  tx_ring->desc, tx_ring->dma);
3196
3197	tx_ring->desc = NULL;
3198}
3199
3200/**
3201 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3202 * @adapter: board private structure
3203 *
3204 * Free all transmit software resources
3205 **/
3206static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3207{
3208	int i;
3209
3210	for (i = 0; i < adapter->num_tx_queues; i++)
3211		igb_free_tx_resources(adapter->tx_ring[i]);
3212}
3213
3214void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3215				    struct igb_tx_buffer *tx_buffer)
3216{
3217	if (tx_buffer->skb) {
3218		dev_kfree_skb_any(tx_buffer->skb);
3219		if (tx_buffer->dma)
3220			dma_unmap_single(ring->dev,
3221					 tx_buffer->dma,
3222					 tx_buffer->length,
3223					 DMA_TO_DEVICE);
3224	} else if (tx_buffer->dma) {
3225		dma_unmap_page(ring->dev,
3226			       tx_buffer->dma,
3227			       tx_buffer->length,
3228			       DMA_TO_DEVICE);
3229	}
3230	tx_buffer->next_to_watch = NULL;
3231	tx_buffer->skb = NULL;
3232	tx_buffer->dma = 0;
3233	/* buffer_info must be completely set up in the transmit path */
3234}
3235
3236/**
3237 * igb_clean_tx_ring - Free Tx Buffers
3238 * @tx_ring: ring to be cleaned
3239 **/
3240static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3241{
3242	struct igb_tx_buffer *buffer_info;
3243	unsigned long size;
3244	u16 i;
3245
3246	if (!tx_ring->tx_buffer_info)
3247		return;
3248	/* Free all the Tx ring sk_buffs */
3249
3250	for (i = 0; i < tx_ring->count; i++) {
3251		buffer_info = &tx_ring->tx_buffer_info[i];
3252		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3253	}
3254
3255	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3256	memset(tx_ring->tx_buffer_info, 0, size);
3257
3258	/* Zero out the descriptor ring */
3259	memset(tx_ring->desc, 0, tx_ring->size);
3260
3261	tx_ring->next_to_use = 0;
3262	tx_ring->next_to_clean = 0;
3263}
3264
3265/**
3266 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3267 * @adapter: board private structure
3268 **/
3269static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3270{
3271	int i;
3272
3273	for (i = 0; i < adapter->num_tx_queues; i++)
3274		igb_clean_tx_ring(adapter->tx_ring[i]);
3275}
3276
3277/**
3278 * igb_free_rx_resources - Free Rx Resources
3279 * @rx_ring: ring to clean the resources from
3280 *
3281 * Free all receive software resources
3282 **/
3283void igb_free_rx_resources(struct igb_ring *rx_ring)
3284{
3285	igb_clean_rx_ring(rx_ring);
3286
3287	vfree(rx_ring->rx_buffer_info);
3288	rx_ring->rx_buffer_info = NULL;
3289
3290	/* if not set, then don't free */
3291	if (!rx_ring->desc)
3292		return;
3293
3294	dma_free_coherent(rx_ring->dev, rx_ring->size,
3295			  rx_ring->desc, rx_ring->dma);
3296
3297	rx_ring->desc = NULL;
3298}
3299
3300/**
3301 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3302 * @adapter: board private structure
3303 *
3304 * Free all receive software resources
3305 **/
3306static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3307{
3308	int i;
3309
3310	for (i = 0; i < adapter->num_rx_queues; i++)
3311		igb_free_rx_resources(adapter->rx_ring[i]);
3312}
3313
3314/**
3315 * igb_clean_rx_ring - Free Rx Buffers per Queue
3316 * @rx_ring: ring to free buffers from
3317 **/
3318static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3319{
3320	unsigned long size;
3321	u16 i;
3322
3323	if (!rx_ring->rx_buffer_info)
3324		return;
3325
3326	/* Free all the Rx ring sk_buffs */
3327	for (i = 0; i < rx_ring->count; i++) {
3328		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3329		if (buffer_info->dma) {
3330			dma_unmap_single(rx_ring->dev,
3331			                 buffer_info->dma,
3332					 IGB_RX_HDR_LEN,
3333					 DMA_FROM_DEVICE);
3334			buffer_info->dma = 0;
3335		}
3336
3337		if (buffer_info->skb) {
3338			dev_kfree_skb(buffer_info->skb);
3339			buffer_info->skb = NULL;
3340		}
3341		if (buffer_info->page_dma) {
3342			dma_unmap_page(rx_ring->dev,
3343			               buffer_info->page_dma,
3344				       PAGE_SIZE / 2,
3345				       DMA_FROM_DEVICE);
3346			buffer_info->page_dma = 0;
3347		}
3348		if (buffer_info->page) {
3349			put_page(buffer_info->page);
3350			buffer_info->page = NULL;
3351			buffer_info->page_offset = 0;
3352		}
3353	}
3354
3355	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3356	memset(rx_ring->rx_buffer_info, 0, size);
3357
3358	/* Zero out the descriptor ring */
3359	memset(rx_ring->desc, 0, rx_ring->size);
3360
3361	rx_ring->next_to_clean = 0;
3362	rx_ring->next_to_use = 0;
3363}
3364
3365/**
3366 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3367 * @adapter: board private structure
3368 **/
3369static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3370{
3371	int i;
3372
3373	for (i = 0; i < adapter->num_rx_queues; i++)
3374		igb_clean_rx_ring(adapter->rx_ring[i]);
3375}
3376
3377/**
3378 * igb_set_mac - Change the Ethernet Address of the NIC
3379 * @netdev: network interface device structure
3380 * @p: pointer to an address structure
3381 *
3382 * Returns 0 on success, negative on failure
3383 **/
3384static int igb_set_mac(struct net_device *netdev, void *p)
3385{
3386	struct igb_adapter *adapter = netdev_priv(netdev);
3387	struct e1000_hw *hw = &adapter->hw;
3388	struct sockaddr *addr = p;
3389
3390	if (!is_valid_ether_addr(addr->sa_data))
3391		return -EADDRNOTAVAIL;
3392
3393	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3394	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3395
3396	/* set the correct pool for the new PF MAC address in entry 0 */
3397	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3398	                 adapter->vfs_allocated_count);
3399
3400	return 0;
3401}
3402
3403/**
3404 * igb_write_mc_addr_list - write multicast addresses to MTA
3405 * @netdev: network interface device structure
3406 *
3407 * Writes multicast address list to the MTA hash table.
3408 * Returns: -ENOMEM on failure
3409 *                0 on no addresses written
3410 *                X on writing X addresses to MTA
3411 **/
3412static int igb_write_mc_addr_list(struct net_device *netdev)
3413{
3414	struct igb_adapter *adapter = netdev_priv(netdev);
3415	struct e1000_hw *hw = &adapter->hw;
3416	struct netdev_hw_addr *ha;
3417	u8  *mta_list;
3418	int i;
3419
3420	if (netdev_mc_empty(netdev)) {
3421		/* nothing to program, so clear mc list */
3422		igb_update_mc_addr_list(hw, NULL, 0);
3423		igb_restore_vf_multicasts(adapter);
3424		return 0;
3425	}
3426
3427	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3428	if (!mta_list)
3429		return -ENOMEM;
3430
3431	/* The shared function expects a packed array of only addresses. */
3432	i = 0;
3433	netdev_for_each_mc_addr(ha, netdev)
3434		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3435
3436	igb_update_mc_addr_list(hw, mta_list, i);
3437	kfree(mta_list);
3438
3439	return netdev_mc_count(netdev);
3440}
3441
3442/**
3443 * igb_write_uc_addr_list - write unicast addresses to RAR table
3444 * @netdev: network interface device structure
3445 *
3446 * Writes unicast address list to the RAR table.
3447 * Returns: -ENOMEM on failure/insufficient address space
3448 *                0 on no addresses written
3449 *                X on writing X addresses to the RAR table
3450 **/
3451static int igb_write_uc_addr_list(struct net_device *netdev)
3452{
3453	struct igb_adapter *adapter = netdev_priv(netdev);
3454	struct e1000_hw *hw = &adapter->hw;
3455	unsigned int vfn = adapter->vfs_allocated_count;
3456	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3457	int count = 0;
3458
3459	/* return ENOMEM indicating insufficient memory for addresses */
3460	if (netdev_uc_count(netdev) > rar_entries)
3461		return -ENOMEM;
3462
3463	if (!netdev_uc_empty(netdev) && rar_entries) {
3464		struct netdev_hw_addr *ha;
3465
3466		netdev_for_each_uc_addr(ha, netdev) {
3467			if (!rar_entries)
3468				break;
3469			igb_rar_set_qsel(adapter, ha->addr,
3470			                 rar_entries--,
3471			                 vfn);
3472			count++;
3473		}
3474	}
3475	/* write the addresses in reverse order to avoid write combining */
3476	for (; rar_entries > 0 ; rar_entries--) {
3477		wr32(E1000_RAH(rar_entries), 0);
3478		wr32(E1000_RAL(rar_entries), 0);
3479	}
3480	wrfl();
3481
3482	return count;
3483}
3484
3485/**
3486 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3487 * @netdev: network interface device structure
3488 *
3489 * The set_rx_mode entry point is called whenever the unicast or multicast
3490 * address lists or the network interface flags are updated.  This routine is
3491 * responsible for configuring the hardware for proper unicast, multicast,
3492 * promiscuous mode, and all-multi behavior.
3493 **/
3494static void igb_set_rx_mode(struct net_device *netdev)
3495{
3496	struct igb_adapter *adapter = netdev_priv(netdev);
3497	struct e1000_hw *hw = &adapter->hw;
3498	unsigned int vfn = adapter->vfs_allocated_count;
3499	u32 rctl, vmolr = 0;
3500	int count;
3501
3502	/* Check for Promiscuous and All Multicast modes */
3503	rctl = rd32(E1000_RCTL);
3504
3505	/* clear the effected bits */
3506	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3507
3508	if (netdev->flags & IFF_PROMISC) {
3509		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3510		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3511	} else {
3512		if (netdev->flags & IFF_ALLMULTI) {
3513			rctl |= E1000_RCTL_MPE;
3514			vmolr |= E1000_VMOLR_MPME;
3515		} else {
3516			/*
3517			 * Write addresses to the MTA, if the attempt fails
3518			 * then we should just turn on promiscuous mode so
3519			 * that we can at least receive multicast traffic
3520			 */
3521			count = igb_write_mc_addr_list(netdev);
3522			if (count < 0) {
3523				rctl |= E1000_RCTL_MPE;
3524				vmolr |= E1000_VMOLR_MPME;
3525			} else if (count) {
3526				vmolr |= E1000_VMOLR_ROMPE;
3527			}
3528		}
3529		/*
3530		 * Write addresses to available RAR registers, if there is not
3531		 * sufficient space to store all the addresses then enable
3532		 * unicast promiscuous mode
3533		 */
3534		count = igb_write_uc_addr_list(netdev);
3535		if (count < 0) {
3536			rctl |= E1000_RCTL_UPE;
3537			vmolr |= E1000_VMOLR_ROPE;
3538		}
3539		rctl |= E1000_RCTL_VFE;
3540	}
3541	wr32(E1000_RCTL, rctl);
3542
3543	/*
3544	 * In order to support SR-IOV and eventually VMDq it is necessary to set
3545	 * the VMOLR to enable the appropriate modes.  Without this workaround
3546	 * we will have issues with VLAN tag stripping not being done for frames
3547	 * that are only arriving because we are the default pool
3548	 */
3549	if (hw->mac.type < e1000_82576)
3550		return;
3551
3552	vmolr |= rd32(E1000_VMOLR(vfn)) &
3553	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3554	wr32(E1000_VMOLR(vfn), vmolr);
3555	igb_restore_vf_multicasts(adapter);
3556}
3557
3558static void igb_check_wvbr(struct igb_adapter *adapter)
3559{
3560	struct e1000_hw *hw = &adapter->hw;
3561	u32 wvbr = 0;
3562
3563	switch (hw->mac.type) {
3564	case e1000_82576:
3565	case e1000_i350:
3566		if (!(wvbr = rd32(E1000_WVBR)))
3567			return;
3568		break;
3569	default:
3570		break;
3571	}
3572
3573	adapter->wvbr |= wvbr;
3574}
3575
3576#define IGB_STAGGERED_QUEUE_OFFSET 8
3577
3578static void igb_spoof_check(struct igb_adapter *adapter)
3579{
3580	int j;
3581
3582	if (!adapter->wvbr)
3583		return;
3584
3585	for(j = 0; j < adapter->vfs_allocated_count; j++) {
3586		if (adapter->wvbr & (1 << j) ||
3587		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3588			dev_warn(&adapter->pdev->dev,
3589				"Spoof event(s) detected on VF %d\n", j);
3590			adapter->wvbr &=
3591				~((1 << j) |
3592				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3593		}
3594	}
3595}
3596
3597/* Need to wait a few seconds after link up to get diagnostic information from
3598 * the phy */
3599static void igb_update_phy_info(unsigned long data)
3600{
3601	struct igb_adapter *adapter = (struct igb_adapter *) data;
3602	igb_get_phy_info(&adapter->hw);
3603}
3604
3605/**
3606 * igb_has_link - check shared code for link and determine up/down
3607 * @adapter: pointer to driver private info
3608 **/
3609bool igb_has_link(struct igb_adapter *adapter)
3610{
3611	struct e1000_hw *hw = &adapter->hw;
3612	bool link_active = false;
3613	s32 ret_val = 0;
3614
3615	/* get_link_status is set on LSC (link status) interrupt or
3616	 * rx sequence error interrupt.  get_link_status will stay
3617	 * false until the e1000_check_for_link establishes link
3618	 * for copper adapters ONLY
3619	 */
3620	switch (hw->phy.media_type) {
3621	case e1000_media_type_copper:
3622		if (hw->mac.get_link_status) {
3623			ret_val = hw->mac.ops.check_for_link(hw);
3624			link_active = !hw->mac.get_link_status;
3625		} else {
3626			link_active = true;
3627		}
3628		break;
3629	case e1000_media_type_internal_serdes:
3630		ret_val = hw->mac.ops.check_for_link(hw);
3631		link_active = hw->mac.serdes_has_link;
3632		break;
3633	default:
3634	case e1000_media_type_unknown:
3635		break;
3636	}
3637
3638	return link_active;
3639}
3640
3641static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3642{
3643	bool ret = false;
3644	u32 ctrl_ext, thstat;
3645
3646	/* check for thermal sensor event on i350, copper only */
3647	if (hw->mac.type == e1000_i350) {
3648		thstat = rd32(E1000_THSTAT);
3649		ctrl_ext = rd32(E1000_CTRL_EXT);
3650
3651		if ((hw->phy.media_type == e1000_media_type_copper) &&
3652		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3653			ret = !!(thstat & event);
3654		}
3655	}
3656
3657	return ret;
3658}
3659
3660/**
3661 * igb_watchdog - Timer Call-back
3662 * @data: pointer to adapter cast into an unsigned long
3663 **/
3664static void igb_watchdog(unsigned long data)
3665{
3666	struct igb_adapter *adapter = (struct igb_adapter *)data;
3667	/* Do the rest outside of interrupt context */
3668	schedule_work(&adapter->watchdog_task);
3669}
3670
3671static void igb_watchdog_task(struct work_struct *work)
3672{
3673	struct igb_adapter *adapter = container_of(work,
3674	                                           struct igb_adapter,
3675                                                   watchdog_task);
3676	struct e1000_hw *hw = &adapter->hw;
3677	struct net_device *netdev = adapter->netdev;
3678	u32 link;
3679	int i;
3680
3681	link = igb_has_link(adapter);
3682	if (link) {
3683		if (!netif_carrier_ok(netdev)) {
3684			u32 ctrl;
3685			hw->mac.ops.get_speed_and_duplex(hw,
3686			                                 &adapter->link_speed,
3687			                                 &adapter->link_duplex);
3688
3689			ctrl = rd32(E1000_CTRL);
3690			/* Links status message must follow this format */
3691			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3692				 "Flow Control: %s\n",
3693			       netdev->name,
3694			       adapter->link_speed,
3695			       adapter->link_duplex == FULL_DUPLEX ?
3696				 "Full Duplex" : "Half Duplex",
3697			       ((ctrl & E1000_CTRL_TFCE) &&
3698			        (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3699			       ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3700			       ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3701
3702			/* check for thermal sensor event */
3703			if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3704				printk(KERN_INFO "igb: %s The network adapter "
3705						 "link speed was downshifted "
3706						 "because it overheated.\n",
3707						 netdev->name);
3708			}
3709
3710			/* adjust timeout factor according to speed/duplex */
3711			adapter->tx_timeout_factor = 1;
3712			switch (adapter->link_speed) {
3713			case SPEED_10:
3714				adapter->tx_timeout_factor = 14;
3715				break;
3716			case SPEED_100:
3717				/* maybe add some timeout factor ? */
3718				break;
3719			}
3720
3721			netif_carrier_on(netdev);
3722
3723			igb_ping_all_vfs(adapter);
3724			igb_check_vf_rate_limit(adapter);
3725
3726			/* link state has changed, schedule phy info update */
3727			if (!test_bit(__IGB_DOWN, &adapter->state))
3728				mod_timer(&adapter->phy_info_timer,
3729					  round_jiffies(jiffies + 2 * HZ));
3730		}
3731	} else {
3732		if (netif_carrier_ok(netdev)) {
3733			adapter->link_speed = 0;
3734			adapter->link_duplex = 0;
3735
3736			/* check for thermal sensor event */
3737			if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3738				printk(KERN_ERR "igb: %s The network adapter "
3739						"was stopped because it "
3740						"overheated.\n",
3741						netdev->name);
3742			}
3743
3744			/* Links status message must follow this format */
3745			printk(KERN_INFO "igb: %s NIC Link is Down\n",
3746			       netdev->name);
3747			netif_carrier_off(netdev);
3748
3749			igb_ping_all_vfs(adapter);
3750
3751			/* link state has changed, schedule phy info update */
3752			if (!test_bit(__IGB_DOWN, &adapter->state))
3753				mod_timer(&adapter->phy_info_timer,
3754					  round_jiffies(jiffies + 2 * HZ));
3755		}
3756	}
3757
3758	spin_lock(&adapter->stats64_lock);
3759	igb_update_stats(adapter, &adapter->stats64);
3760	spin_unlock(&adapter->stats64_lock);
3761
3762	for (i = 0; i < adapter->num_tx_queues; i++) {
3763		struct igb_ring *tx_ring = adapter->tx_ring[i];
3764		if (!netif_carrier_ok(netdev)) {
3765			/* We've lost link, so the controller stops DMA,
3766			 * but we've got queued Tx work that's never going
3767			 * to get done, so reset controller to flush Tx.
3768			 * (Do the reset outside of interrupt context). */
3769			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3770				adapter->tx_timeout_count++;
3771				schedule_work(&adapter->reset_task);
3772				/* return immediately since reset is imminent */
3773				return;
3774			}
3775		}
3776
3777		/* Force detection of hung controller every watchdog period */
3778		tx_ring->detect_tx_hung = true;
3779	}
3780
3781	/* Cause software interrupt to ensure rx ring is cleaned */
3782	if (adapter->msix_entries) {
3783		u32 eics = 0;
3784		for (i = 0; i < adapter->num_q_vectors; i++) {
3785			struct igb_q_vector *q_vector = adapter->q_vector[i];
3786			eics |= q_vector->eims_value;
3787		}
3788		wr32(E1000_EICS, eics);
3789	} else {
3790		wr32(E1000_ICS, E1000_ICS_RXDMT0);
3791	}
3792
3793	igb_spoof_check(adapter);
3794
3795	/* Reset the timer */
3796	if (!test_bit(__IGB_DOWN, &adapter->state))
3797		mod_timer(&adapter->watchdog_timer,
3798			  round_jiffies(jiffies + 2 * HZ));
3799}
3800
3801enum latency_range {
3802	lowest_latency = 0,
3803	low_latency = 1,
3804	bulk_latency = 2,
3805	latency_invalid = 255
3806};
3807
3808/**
3809 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3810 *
3811 *      Stores a new ITR value based on strictly on packet size.  This
3812 *      algorithm is less sophisticated than that used in igb_update_itr,
3813 *      due to the difficulty of synchronizing statistics across multiple
3814 *      receive rings.  The divisors and thresholds used by this function
3815 *      were determined based on theoretical maximum wire speed and testing
3816 *      data, in order to minimize response time while increasing bulk
3817 *      throughput.
3818 *      This functionality is controlled by the InterruptThrottleRate module
3819 *      parameter (see igb_param.c)
3820 *      NOTE:  This function is called only when operating in a multiqueue
3821 *             receive environment.
3822 * @q_vector: pointer to q_vector
3823 **/
3824static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3825{
3826	int new_val = q_vector->itr_val;
3827	int avg_wire_size = 0;
3828	struct igb_adapter *adapter = q_vector->adapter;
3829	struct igb_ring *ring;
3830	unsigned int packets;
3831
3832	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3833	 * ints/sec - ITR timer value of 120 ticks.
3834	 */
3835	if (adapter->link_speed != SPEED_1000) {
3836		new_val = 976;
3837		goto set_itr_val;
3838	}
3839
3840	ring = q_vector->rx_ring;
3841	if (ring) {
3842		packets = ACCESS_ONCE(ring->total_packets);
3843
3844		if (packets)
3845			avg_wire_size = ring->total_bytes / packets;
3846	}
3847
3848	ring = q_vector->tx_ring;
3849	if (ring) {
3850		packets = ACCESS_ONCE(ring->total_packets);
3851
3852		if (packets)
3853			avg_wire_size = max_t(u32, avg_wire_size,
3854			                      ring->total_bytes / packets);
3855	}
3856
3857	/* if avg_wire_size isn't set no work was done */
3858	if (!avg_wire_size)
3859		goto clear_counts;
3860
3861	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3862	avg_wire_size += 24;
3863
3864	/* Don't starve jumbo frames */
3865	avg_wire_size = min(avg_wire_size, 3000);
3866
3867	/* Give a little boost to mid-size frames */
3868	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3869		new_val = avg_wire_size / 3;
3870	else
3871		new_val = avg_wire_size / 2;
3872
3873	/* when in itr mode 3 do not exceed 20K ints/sec */
3874	if (adapter->rx_itr_setting == 3 && new_val < 196)
3875		new_val = 196;
3876
3877set_itr_val:
3878	if (new_val != q_vector->itr_val) {
3879		q_vector->itr_val = new_val;
3880		q_vector->set_itr = 1;
3881	}
3882clear_counts:
3883	if (q_vector->rx_ring) {
3884		q_vector->rx_ring->total_bytes = 0;
3885		q_vector->rx_ring->total_packets = 0;
3886	}
3887	if (q_vector->tx_ring) {
3888		q_vector->tx_ring->total_bytes = 0;
3889		q_vector->tx_ring->total_packets = 0;
3890	}
3891}
3892
3893/**
3894 * igb_update_itr - update the dynamic ITR value based on statistics
3895 *      Stores a new ITR value based on packets and byte
3896 *      counts during the last interrupt.  The advantage of per interrupt
3897 *      computation is faster updates and more accurate ITR for the current
3898 *      traffic pattern.  Constants in this function were computed
3899 *      based on theoretical maximum wire speed and thresholds were set based
3900 *      on testing data as well as attempting to minimize response time
3901 *      while increasing bulk throughput.
3902 *      this functionality is controlled by the InterruptThrottleRate module
3903 *      parameter (see igb_param.c)
3904 *      NOTE:  These calculations are only valid when operating in a single-
3905 *             queue environment.
3906 * @adapter: pointer to adapter
3907 * @itr_setting: current q_vector->itr_val
3908 * @packets: the number of packets during this measurement interval
3909 * @bytes: the number of bytes during this measurement interval
3910 **/
3911static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3912				   int packets, int bytes)
3913{
3914	unsigned int retval = itr_setting;
3915
3916	if (packets == 0)
3917		goto update_itr_done;
3918
3919	switch (itr_setting) {
3920	case lowest_latency:
3921		/* handle TSO and jumbo frames */
3922		if (bytes/packets > 8000)
3923			retval = bulk_latency;
3924		else if ((packets < 5) && (bytes > 512))
3925			retval = low_latency;
3926		break;
3927	case low_latency:  /* 50 usec aka 20000 ints/s */
3928		if (bytes > 10000) {
3929			/* this if handles the TSO accounting */
3930			if (bytes/packets > 8000) {
3931				retval = bulk_latency;
3932			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3933				retval = bulk_latency;
3934			} else if ((packets > 35)) {
3935				retval = lowest_latency;
3936			}
3937		} else if (bytes/packets > 2000) {
3938			retval = bulk_latency;
3939		} else if (packets <= 2 && bytes < 512) {
3940			retval = lowest_latency;
3941		}
3942		break;
3943	case bulk_latency: /* 250 usec aka 4000 ints/s */
3944		if (bytes > 25000) {
3945			if (packets > 35)
3946				retval = low_latency;
3947		} else if (bytes < 1500) {
3948			retval = low_latency;
3949		}
3950		break;
3951	}
3952
3953update_itr_done:
3954	return retval;
3955}
3956
3957static void igb_set_itr(struct igb_adapter *adapter)
3958{
3959	struct igb_q_vector *q_vector = adapter->q_vector[0];
3960	u16 current_itr;
3961	u32 new_itr = q_vector->itr_val;
3962
3963	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3964	if (adapter->link_speed != SPEED_1000) {
3965		current_itr = 0;
3966		new_itr = 4000;
3967		goto set_itr_now;
3968	}
3969
3970	adapter->rx_itr = igb_update_itr(adapter,
3971				    adapter->rx_itr,
3972				    q_vector->rx_ring->total_packets,
3973				    q_vector->rx_ring->total_bytes);
3974
3975	adapter->tx_itr = igb_update_itr(adapter,
3976				    adapter->tx_itr,
3977				    q_vector->tx_ring->total_packets,
3978				    q_vector->tx_ring->total_bytes);
3979	current_itr = max(adapter->rx_itr, adapter->tx_itr);
3980
3981	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3982	if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3983		current_itr = low_latency;
3984
3985	switch (current_itr) {
3986	/* counts and packets in update_itr are dependent on these numbers */
3987	case lowest_latency:
3988		new_itr = 56;  /* aka 70,000 ints/sec */
3989		break;
3990	case low_latency:
3991		new_itr = 196; /* aka 20,000 ints/sec */
3992		break;
3993	case bulk_latency:
3994		new_itr = 980; /* aka 4,000 ints/sec */
3995		break;
3996	default:
3997		break;
3998	}
3999
4000set_itr_now:
4001	q_vector->rx_ring->total_bytes = 0;
4002	q_vector->rx_ring->total_packets = 0;
4003	q_vector->tx_ring->total_bytes = 0;
4004	q_vector->tx_ring->total_packets = 0;
4005
4006	if (new_itr != q_vector->itr_val) {
4007		/* this attempts to bias the interrupt rate towards Bulk
4008		 * by adding intermediate steps when interrupt rate is
4009		 * increasing */
4010		new_itr = new_itr > q_vector->itr_val ?
4011		             max((new_itr * q_vector->itr_val) /
4012		                 (new_itr + (q_vector->itr_val >> 2)),
4013		                 new_itr) :
4014			     new_itr;
4015		/* Don't write the value here; it resets the adapter's
4016		 * internal timer, and causes us to delay far longer than
4017		 * we should between interrupts.  Instead, we write the ITR
4018		 * value at the beginning of the next interrupt so the timing
4019		 * ends up being correct.
4020		 */
4021		q_vector->itr_val = new_itr;
4022		q_vector->set_itr = 1;
4023	}
4024}
4025
4026void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4027		     u32 type_tucmd, u32 mss_l4len_idx)
4028{
4029	struct e1000_adv_tx_context_desc *context_desc;
4030	u16 i = tx_ring->next_to_use;
4031
4032	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4033
4034	i++;
4035	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4036
4037	/* set bits to identify this as an advanced context descriptor */
4038	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4039
4040	/* For 82575, context index must be unique per ring. */
4041	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4042		mss_l4len_idx |= tx_ring->reg_idx << 4;
4043
4044	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
4045	context_desc->seqnum_seed	= 0;
4046	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
4047	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
4048}
4049
4050static int igb_tso(struct igb_ring *tx_ring,
4051		   struct igb_tx_buffer *first,
4052		   u8 *hdr_len)
4053{
4054	struct sk_buff *skb = first->skb;
4055	u32 vlan_macip_lens, type_tucmd;
4056	u32 mss_l4len_idx, l4len;
4057
4058	if (!skb_is_gso(skb))
4059		return 0;
4060
4061	if (skb_header_cloned(skb)) {
4062		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4063		if (err)
4064			return err;
4065	}
4066
4067	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4068	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4069
4070	if (first->protocol == __constant_htons(ETH_P_IP)) {
4071		struct iphdr *iph = ip_hdr(skb);
4072		iph->tot_len = 0;
4073		iph->check = 0;
4074		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4075							 iph->daddr, 0,
4076							 IPPROTO_TCP,
4077							 0);
4078		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4079		first->tx_flags |= IGB_TX_FLAGS_TSO |
4080				   IGB_TX_FLAGS_CSUM |
4081				   IGB_TX_FLAGS_IPV4;
4082	} else if (skb_is_gso_v6(skb)) {
4083		ipv6_hdr(skb)->payload_len = 0;
4084		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4085						       &ipv6_hdr(skb)->daddr,
4086						       0, IPPROTO_TCP, 0);
4087		first->tx_flags |= IGB_TX_FLAGS_TSO |
4088				   IGB_TX_FLAGS_CSUM;
4089	}
4090
4091	/* compute header lengths */
4092	l4len = tcp_hdrlen(skb);
4093	*hdr_len = skb_transport_offset(skb) + l4len;
4094
4095	/* update gso size and bytecount with header size */
4096	first->gso_segs = skb_shinfo(skb)->gso_segs;
4097	first->bytecount += (first->gso_segs - 1) * *hdr_len;
4098
4099	/* MSS L4LEN IDX */
4100	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4101	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4102
4103	/* VLAN MACLEN IPLEN */
4104	vlan_macip_lens = skb_network_header_len(skb);
4105	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4106	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4107
4108	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4109
4110	return 1;
4111}
4112
4113static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4114{
4115	struct sk_buff *skb = first->skb;
4116	u32 vlan_macip_lens = 0;
4117	u32 mss_l4len_idx = 0;
4118	u32 type_tucmd = 0;
4119
4120	if (skb->ip_summed != CHECKSUM_PARTIAL) {
4121		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4122			return;
4123	} else {
4124		u8 l4_hdr = 0;
4125		switch (first->protocol) {
4126		case __constant_htons(ETH_P_IP):
4127			vlan_macip_lens |= skb_network_header_len(skb);
4128			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4129			l4_hdr = ip_hdr(skb)->protocol;
4130			break;
4131		case __constant_htons(ETH_P_IPV6):
4132			vlan_macip_lens |= skb_network_header_len(skb);
4133			l4_hdr = ipv6_hdr(skb)->nexthdr;
4134			break;
4135		default:
4136			if (unlikely(net_ratelimit())) {
4137				dev_warn(tx_ring->dev,
4138				 "partial checksum but proto=%x!\n",
4139				 first->protocol);
4140			}
4141			break;
4142		}
4143
4144		switch (l4_hdr) {
4145		case IPPROTO_TCP:
4146			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4147			mss_l4len_idx = tcp_hdrlen(skb) <<
4148					E1000_ADVTXD_L4LEN_SHIFT;
4149			break;
4150		case IPPROTO_SCTP:
4151			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4152			mss_l4len_idx = sizeof(struct sctphdr) <<
4153					E1000_ADVTXD_L4LEN_SHIFT;
4154			break;
4155		case IPPROTO_UDP:
4156			mss_l4len_idx = sizeof(struct udphdr) <<
4157					E1000_ADVTXD_L4LEN_SHIFT;
4158			break;
4159		default:
4160			if (unlikely(net_ratelimit())) {
4161				dev_warn(tx_ring->dev,
4162				 "partial checksum but l4 proto=%x!\n",
4163				 l4_hdr);
4164			}
4165			break;
4166		}
4167
4168		/* update TX checksum flag */
4169		first->tx_flags |= IGB_TX_FLAGS_CSUM;
4170	}
4171
4172	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4173	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4174
4175	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4176}
4177
4178static __le32 igb_tx_cmd_type(u32 tx_flags)
4179{
4180	/* set type for advanced descriptor with frame checksum insertion */
4181	__le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4182				      E1000_ADVTXD_DCMD_IFCS |
4183				      E1000_ADVTXD_DCMD_DEXT);
4184
4185	/* set HW vlan bit if vlan is present */
4186	if (tx_flags & IGB_TX_FLAGS_VLAN)
4187		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4188
4189	/* set timestamp bit if present */
4190	if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4191		cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4192
4193	/* set segmentation bits for TSO */
4194	if (tx_flags & IGB_TX_FLAGS_TSO)
4195		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4196
4197	return cmd_type;
4198}
4199
4200static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4201				 union e1000_adv_tx_desc *tx_desc,
4202				 u32 tx_flags, unsigned int paylen)
4203{
4204	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4205
4206	/* 82575 requires a unique index per ring if any offload is enabled */
4207	if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4208	    test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4209		olinfo_status |= tx_ring->reg_idx << 4;
4210
4211	/* insert L4 checksum */
4212	if (tx_flags & IGB_TX_FLAGS_CSUM) {
4213		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4214
4215		/* insert IPv4 checksum */
4216		if (tx_flags & IGB_TX_FLAGS_IPV4)
4217			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4218	}
4219
4220	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4221}
4222
4223/*
4224 * The largest size we can write to the descriptor is 65535.  In order to
4225 * maintain a power of two alignment we have to limit ourselves to 32K.
4226 */
4227#define IGB_MAX_TXD_PWR	15
4228#define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
4229
4230static void igb_tx_map(struct igb_ring *tx_ring,
4231		       struct igb_tx_buffer *first,
4232		       const u8 hdr_len)
4233{
4234	struct sk_buff *skb = first->skb;
4235	struct igb_tx_buffer *tx_buffer_info;
4236	union e1000_adv_tx_desc *tx_desc;
4237	dma_addr_t dma;
4238	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4239	unsigned int data_len = skb->data_len;
4240	unsigned int size = skb_headlen(skb);
4241	unsigned int paylen = skb->len - hdr_len;
4242	__le32 cmd_type;
4243	u32 tx_flags = first->tx_flags;
4244	u16 i = tx_ring->next_to_use;
4245
4246	tx_desc = IGB_TX_DESC(tx_ring, i);
4247
4248	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4249	cmd_type = igb_tx_cmd_type(tx_flags);
4250
4251	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4252	if (dma_mapping_error(tx_ring->dev, dma))
4253		goto dma_error;
4254
4255	/* record length, and DMA address */
4256	first->length = size;
4257	first->dma = dma;
4258	tx_desc->read.buffer_addr = cpu_to_le64(dma);
4259
4260	for (;;) {
4261		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4262			tx_desc->read.cmd_type_len =
4263				cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4264
4265			i++;
4266			tx_desc++;
4267			if (i == tx_ring->count) {
4268				tx_desc = IGB_TX_DESC(tx_ring, 0);
4269				i = 0;
4270			}
4271
4272			dma += IGB_MAX_DATA_PER_TXD;
4273			size -= IGB_MAX_DATA_PER_TXD;
4274
4275			tx_desc->read.olinfo_status = 0;
4276			tx_desc->read.buffer_addr = cpu_to_le64(dma);
4277		}
4278
4279		if (likely(!data_len))
4280			break;
4281
4282		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4283
4284		i++;
4285		tx_desc++;
4286		if (i == tx_ring->count) {
4287			tx_desc = IGB_TX_DESC(tx_ring, 0);
4288			i = 0;
4289		}
4290
4291		size = frag->size;
4292		data_len -= size;
4293
4294		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4295				   size, DMA_TO_DEVICE);
4296		if (dma_mapping_error(tx_ring->dev, dma))
4297			goto dma_error;
4298
4299		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4300		tx_buffer_info->length = size;
4301		tx_buffer_info->dma = dma;
4302
4303		tx_desc->read.olinfo_status = 0;
4304		tx_desc->read.buffer_addr = cpu_to_le64(dma);
4305
4306		frag++;
4307	}
4308
4309	/* write last descriptor with RS and EOP bits */
4310	cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4311	tx_desc->read.cmd_type_len = cmd_type;
4312
4313	/* set the timestamp */
4314	first->time_stamp = jiffies;
4315
4316	/*
4317	 * Force memory writes to complete before letting h/w know there
4318	 * are new descriptors to fetch.  (Only applicable for weak-ordered
4319	 * memory model archs, such as IA-64).
4320	 *
4321	 * We also need this memory barrier to make certain all of the
4322	 * status bits have been updated before next_to_watch is written.
4323	 */
4324	wmb();
4325
4326	/* set next_to_watch value indicating a packet is present */
4327	first->next_to_watch = tx_desc;
4328
4329	i++;
4330	if (i == tx_ring->count)
4331		i = 0;
4332
4333	tx_ring->next_to_use = i;
4334
4335	writel(i, tx_ring->tail);
4336
4337	/* we need this if more than one processor can write to our tail
4338	 * at a time, it syncronizes IO on IA64/Altix systems */
4339	mmiowb();
4340
4341	return;
4342
4343dma_error:
4344	dev_err(tx_ring->dev, "TX DMA map failed\n");
4345
4346	/* clear dma mappings for failed tx_buffer_info map */
4347	for (;;) {
4348		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4349		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4350		if (tx_buffer_info == first)
4351			break;
4352		if (i == 0)
4353			i = tx_ring->count;
4354		i--;
4355	}
4356
4357	tx_ring->next_to_use = i;
4358}
4359
4360static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4361{
4362	struct net_device *netdev = tx_ring->netdev;
4363
4364	netif_stop_subqueue(netdev, tx_ring->queue_index);
4365
4366	/* Herbert's original patch had:
4367	 *  smp_mb__after_netif_stop_queue();
4368	 * but since that doesn't exist yet, just open code it. */
4369	smp_mb();
4370
4371	/* We need to check again in a case another CPU has just
4372	 * made room available. */
4373	if (igb_desc_unused(tx_ring) < size)
4374		return -EBUSY;
4375
4376	/* A reprieve! */
4377	netif_wake_subqueue(netdev, tx_ring->queue_index);
4378
4379	u64_stats_update_begin(&tx_ring->tx_syncp2);
4380	tx_ring->tx_stats.restart_queue2++;
4381	u64_stats_update_end(&tx_ring->tx_syncp2);
4382
4383	return 0;
4384}
4385
4386static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4387{
4388	if (igb_desc_unused(tx_ring) >= size)
4389		return 0;
4390	return __igb_maybe_stop_tx(tx_ring, size);
4391}
4392
4393netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4394				struct igb_ring *tx_ring)
4395{
4396	struct igb_tx_buffer *first;
4397	int tso;
4398	u32 tx_flags = 0;
4399	__be16 protocol = vlan_get_protocol(skb);
4400	u8 hdr_len = 0;
4401
4402	/* need: 1 descriptor per page,
4403	 *       + 2 desc gap to keep tail from touching head,
4404	 *       + 1 desc for skb->data,
4405	 *       + 1 desc for context descriptor,
4406	 * otherwise try next time */
4407	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4408		/* this is a hard error */
4409		return NETDEV_TX_BUSY;
4410	}
4411
4412	/* record the location of the first descriptor for this packet */
4413	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4414	first->skb = skb;
4415	first->bytecount = skb->len;
4416	first->gso_segs = 1;
4417
4418	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4419		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4420		tx_flags |= IGB_TX_FLAGS_TSTAMP;
4421	}
4422
4423	if (vlan_tx_tag_present(skb)) {
4424		tx_flags |= IGB_TX_FLAGS_VLAN;
4425		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4426	}
4427
4428	/* record initial flags and protocol */
4429	first->tx_flags = tx_flags;
4430	first->protocol = protocol;
4431
4432	tso = igb_tso(tx_ring, first, &hdr_len);
4433	if (tso < 0)
4434		goto out_drop;
4435	else if (!tso)
4436		igb_tx_csum(tx_ring, first);
4437
4438	igb_tx_map(tx_ring, first, hdr_len);
4439
4440	/* Make sure there is space in the ring for the next send. */
4441	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4442
4443	return NETDEV_TX_OK;
4444
4445out_drop:
4446	igb_unmap_and_free_tx_resource(tx_ring, first);
4447
4448	return NETDEV_TX_OK;
4449}
4450
4451static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4452						    struct sk_buff *skb)
4453{
4454	unsigned int r_idx = skb->queue_mapping;
4455
4456	if (r_idx >= adapter->num_tx_queues)
4457		r_idx = r_idx % adapter->num_tx_queues;
4458
4459	return adapter->tx_ring[r_idx];
4460}
4461
4462static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4463				  struct net_device *netdev)
4464{
4465	struct igb_adapter *adapter = netdev_priv(netdev);
4466
4467	if (test_bit(__IGB_DOWN, &adapter->state)) {
4468		dev_kfree_skb_any(skb);
4469		return NETDEV_TX_OK;
4470	}
4471
4472	if (skb->len <= 0) {
4473		dev_kfree_skb_any(skb);
4474		return NETDEV_TX_OK;
4475	}
4476
4477	/*
4478	 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4479	 * in order to meet this minimum size requirement.
4480	 */
4481	if (skb->len < 17) {
4482		if (skb_padto(skb, 17))
4483			return NETDEV_TX_OK;
4484		skb->len = 17;
4485	}
4486
4487	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4488}
4489
4490/**
4491 * igb_tx_timeout - Respond to a Tx Hang
4492 * @netdev: network interface device structure
4493 **/
4494static void igb_tx_timeout(struct net_device *netdev)
4495{
4496	struct igb_adapter *adapter = netdev_priv(netdev);
4497	struct e1000_hw *hw = &adapter->hw;
4498
4499	/* Do the reset outside of interrupt context */
4500	adapter->tx_timeout_count++;
4501
4502	if (hw->mac.type == e1000_82580)
4503		hw->dev_spec._82575.global_device_reset = true;
4504
4505	schedule_work(&adapter->reset_task);
4506	wr32(E1000_EICS,
4507	     (adapter->eims_enable_mask & ~adapter->eims_other));
4508}
4509
4510static void igb_reset_task(struct work_struct *work)
4511{
4512	struct igb_adapter *adapter;
4513	adapter = container_of(work, struct igb_adapter, reset_task);
4514
4515	igb_dump(adapter);
4516	netdev_err(adapter->netdev, "Reset adapter\n");
4517	igb_reinit_locked(adapter);
4518}
4519
4520/**
4521 * igb_get_stats64 - Get System Network Statistics
4522 * @netdev: network interface device structure
4523 * @stats: rtnl_link_stats64 pointer
4524 *
4525 **/
4526static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4527						 struct rtnl_link_stats64 *stats)
4528{
4529	struct igb_adapter *adapter = netdev_priv(netdev);
4530
4531	spin_lock(&adapter->stats64_lock);
4532	igb_update_stats(adapter, &adapter->stats64);
4533	memcpy(stats, &adapter->stats64, sizeof(*stats));
4534	spin_unlock(&adapter->stats64_lock);
4535
4536	return stats;
4537}
4538
4539/**
4540 * igb_change_mtu - Change the Maximum Transfer Unit
4541 * @netdev: network interface device structure
4542 * @new_mtu: new value for maximum frame size
4543 *
4544 * Returns 0 on success, negative on failure
4545 **/
4546static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4547{
4548	struct igb_adapter *adapter = netdev_priv(netdev);
4549	struct pci_dev *pdev = adapter->pdev;
4550	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4551
4552	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4553		dev_err(&pdev->dev, "Invalid MTU setting\n");
4554		return -EINVAL;
4555	}
4556
4557#define MAX_STD_JUMBO_FRAME_SIZE 9238
4558	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4559		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4560		return -EINVAL;
4561	}
4562
4563	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4564		msleep(1);
4565
4566	/* igb_down has a dependency on max_frame_size */
4567	adapter->max_frame_size = max_frame;
4568
4569	if (netif_running(netdev))
4570		igb_down(adapter);
4571
4572	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4573		 netdev->mtu, new_mtu);
4574	netdev->mtu = new_mtu;
4575
4576	if (netif_running(netdev))
4577		igb_up(adapter);
4578	else
4579		igb_reset(adapter);
4580
4581	clear_bit(__IGB_RESETTING, &adapter->state);
4582
4583	return 0;
4584}
4585
4586/**
4587 * igb_update_stats - Update the board statistics counters
4588 * @adapter: board private structure
4589 **/
4590
4591void igb_update_stats(struct igb_adapter *adapter,
4592		      struct rtnl_link_stats64 *net_stats)
4593{
4594	struct e1000_hw *hw = &adapter->hw;
4595	struct pci_dev *pdev = adapter->pdev;
4596	u32 reg, mpc;
4597	u16 phy_tmp;
4598	int i;
4599	u64 bytes, packets;
4600	unsigned int start;
4601	u64 _bytes, _packets;
4602
4603#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4604
4605	/*
4606	 * Prevent stats update while adapter is being reset, or if the pci
4607	 * connection is down.
4608	 */
4609	if (adapter->link_speed == 0)
4610		return;
4611	if (pci_channel_offline(pdev))
4612		return;
4613
4614	bytes = 0;
4615	packets = 0;
4616	for (i = 0; i < adapter->num_rx_queues; i++) {
4617		u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4618		struct igb_ring *ring = adapter->rx_ring[i];
4619
4620		ring->rx_stats.drops += rqdpc_tmp;
4621		net_stats->rx_fifo_errors += rqdpc_tmp;
4622
4623		do {
4624			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4625			_bytes = ring->rx_stats.bytes;
4626			_packets = ring->rx_stats.packets;
4627		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4628		bytes += _bytes;
4629		packets += _packets;
4630	}
4631
4632	net_stats->rx_bytes = bytes;
4633	net_stats->rx_packets = packets;
4634
4635	bytes = 0;
4636	packets = 0;
4637	for (i = 0; i < adapter->num_tx_queues; i++) {
4638		struct igb_ring *ring = adapter->tx_ring[i];
4639		do {
4640			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4641			_bytes = ring->tx_stats.bytes;
4642			_packets = ring->tx_stats.packets;
4643		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4644		bytes += _bytes;
4645		packets += _packets;
4646	}
4647	net_stats->tx_bytes = bytes;
4648	net_stats->tx_packets = packets;
4649
4650	/* read stats registers */
4651	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4652	adapter->stats.gprc += rd32(E1000_GPRC);
4653	adapter->stats.gorc += rd32(E1000_GORCL);
4654	rd32(E1000_GORCH); /* clear GORCL */
4655	adapter->stats.bprc += rd32(E1000_BPRC);
4656	adapter->stats.mprc += rd32(E1000_MPRC);
4657	adapter->stats.roc += rd32(E1000_ROC);
4658
4659	adapter->stats.prc64 += rd32(E1000_PRC64);
4660	adapter->stats.prc127 += rd32(E1000_PRC127);
4661	adapter->stats.prc255 += rd32(E1000_PRC255);
4662	adapter->stats.prc511 += rd32(E1000_PRC511);
4663	adapter->stats.prc1023 += rd32(E1000_PRC1023);
4664	adapter->stats.prc1522 += rd32(E1000_PRC1522);
4665	adapter->stats.symerrs += rd32(E1000_SYMERRS);
4666	adapter->stats.sec += rd32(E1000_SEC);
4667
4668	mpc = rd32(E1000_MPC);
4669	adapter->stats.mpc += mpc;
4670	net_stats->rx_fifo_errors += mpc;
4671	adapter->stats.scc += rd32(E1000_SCC);
4672	adapter->stats.ecol += rd32(E1000_ECOL);
4673	adapter->stats.mcc += rd32(E1000_MCC);
4674	adapter->stats.latecol += rd32(E1000_LATECOL);
4675	adapter->stats.dc += rd32(E1000_DC);
4676	adapter->stats.rlec += rd32(E1000_RLEC);
4677	adapter->stats.xonrxc += rd32(E1000_XONRXC);
4678	adapter->stats.xontxc += rd32(E1000_XONTXC);
4679	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4680	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4681	adapter->stats.fcruc += rd32(E1000_FCRUC);
4682	adapter->stats.gptc += rd32(E1000_GPTC);
4683	adapter->stats.gotc += rd32(E1000_GOTCL);
4684	rd32(E1000_GOTCH); /* clear GOTCL */
4685	adapter->stats.rnbc += rd32(E1000_RNBC);
4686	adapter->stats.ruc += rd32(E1000_RUC);
4687	adapter->stats.rfc += rd32(E1000_RFC);
4688	adapter->stats.rjc += rd32(E1000_RJC);
4689	adapter->stats.tor += rd32(E1000_TORH);
4690	adapter->stats.tot += rd32(E1000_TOTH);
4691	adapter->stats.tpr += rd32(E1000_TPR);
4692
4693	adapter->stats.ptc64 += rd32(E1000_PTC64);
4694	adapter->stats.ptc127 += rd32(E1000_PTC127);
4695	adapter->stats.ptc255 += rd32(E1000_PTC255);
4696	adapter->stats.ptc511 += rd32(E1000_PTC511);
4697	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4698	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4699
4700	adapter->stats.mptc += rd32(E1000_MPTC);
4701	adapter->stats.bptc += rd32(E1000_BPTC);
4702
4703	adapter->stats.tpt += rd32(E1000_TPT);
4704	adapter->stats.colc += rd32(E1000_COLC);
4705
4706	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4707	/* read internal phy specific stats */
4708	reg = rd32(E1000_CTRL_EXT);
4709	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4710		adapter->stats.rxerrc += rd32(E1000_RXERRC);
4711		adapter->stats.tncrs += rd32(E1000_TNCRS);
4712	}
4713
4714	adapter->stats.tsctc += rd32(E1000_TSCTC);
4715	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4716
4717	adapter->stats.iac += rd32(E1000_IAC);
4718	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4719	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4720	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4721	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4722	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4723	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4724	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4725	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4726
4727	/* Fill out the OS statistics structure */
4728	net_stats->multicast = adapter->stats.mprc;
4729	net_stats->collisions = adapter->stats.colc;
4730
4731	/* Rx Errors */
4732
4733	/* RLEC on some newer hardware can be incorrect so build
4734	 * our own version based on RUC and ROC */
4735	net_stats->rx_errors = adapter->stats.rxerrc +
4736		adapter->stats.crcerrs + adapter->stats.algnerrc +
4737		adapter->stats.ruc + adapter->stats.roc +
4738		adapter->stats.cexterr;
4739	net_stats->rx_length_errors = adapter->stats.ruc +
4740				      adapter->stats.roc;
4741	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4742	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4743	net_stats->rx_missed_errors = adapter->stats.mpc;
4744
4745	/* Tx Errors */
4746	net_stats->tx_errors = adapter->stats.ecol +
4747			       adapter->stats.latecol;
4748	net_stats->tx_aborted_errors = adapter->stats.ecol;
4749	net_stats->tx_window_errors = adapter->stats.latecol;
4750	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4751
4752	/* Tx Dropped needs to be maintained elsewhere */
4753
4754	/* Phy Stats */
4755	if (hw->phy.media_type == e1000_media_type_copper) {
4756		if ((adapter->link_speed == SPEED_1000) &&
4757		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4758			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4759			adapter->phy_stats.idle_errors += phy_tmp;
4760		}
4761	}
4762
4763	/* Management Stats */
4764	adapter->stats.mgptc += rd32(E1000_MGTPTC);
4765	adapter->stats.mgprc += rd32(E1000_MGTPRC);
4766	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4767
4768	/* OS2BMC Stats */
4769	reg = rd32(E1000_MANC);
4770	if (reg & E1000_MANC_EN_BMC2OS) {
4771		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4772		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4773		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4774		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4775	}
4776}
4777
4778static irqreturn_t igb_msix_other(int irq, void *data)
4779{
4780	struct igb_adapter *adapter = data;
4781	struct e1000_hw *hw = &adapter->hw;
4782	u32 icr = rd32(E1000_ICR);
4783	/* reading ICR causes bit 31 of EICR to be cleared */
4784
4785	if (icr & E1000_ICR_DRSTA)
4786		schedule_work(&adapter->reset_task);
4787
4788	if (icr & E1000_ICR_DOUTSYNC) {
4789		/* HW is reporting DMA is out of sync */
4790		adapter->stats.doosync++;
4791		/* The DMA Out of Sync is also indication of a spoof event
4792		 * in IOV mode. Check the Wrong VM Behavior register to
4793		 * see if it is really a spoof event. */
4794		igb_check_wvbr(adapter);
4795	}
4796
4797	/* Check for a mailbox event */
4798	if (icr & E1000_ICR_VMMB)
4799		igb_msg_task(adapter);
4800
4801	if (icr & E1000_ICR_LSC) {
4802		hw->mac.get_link_status = 1;
4803		/* guard against interrupt when we're going down */
4804		if (!test_bit(__IGB_DOWN, &adapter->state))
4805			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4806	}
4807
4808	if (adapter->vfs_allocated_count)
4809		wr32(E1000_IMS, E1000_IMS_LSC |
4810				E1000_IMS_VMMB |
4811				E1000_IMS_DOUTSYNC);
4812	else
4813		wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4814	wr32(E1000_EIMS, adapter->eims_other);
4815
4816	return IRQ_HANDLED;
4817}
4818
4819static void igb_write_itr(struct igb_q_vector *q_vector)
4820{
4821	struct igb_adapter *adapter = q_vector->adapter;
4822	u32 itr_val = q_vector->itr_val & 0x7FFC;
4823
4824	if (!q_vector->set_itr)
4825		return;
4826
4827	if (!itr_val)
4828		itr_val = 0x4;
4829
4830	if (adapter->hw.mac.type == e1000_82575)
4831		itr_val |= itr_val << 16;
4832	else
4833		itr_val |= 0x8000000;
4834
4835	writel(itr_val, q_vector->itr_register);
4836	q_vector->set_itr = 0;
4837}
4838
4839static irqreturn_t igb_msix_ring(int irq, void *data)
4840{
4841	struct igb_q_vector *q_vector = data;
4842
4843	/* Write the ITR value calculated from the previous interrupt. */
4844	igb_write_itr(q_vector);
4845
4846	napi_schedule(&q_vector->napi);
4847
4848	return IRQ_HANDLED;
4849}
4850
4851#ifdef CONFIG_IGB_DCA
4852static void igb_update_dca(struct igb_q_vector *q_vector)
4853{
4854	struct igb_adapter *adapter = q_vector->adapter;
4855	struct e1000_hw *hw = &adapter->hw;
4856	int cpu = get_cpu();
4857
4858	if (q_vector->cpu == cpu)
4859		goto out_no_update;
4860
4861	if (q_vector->tx_ring) {
4862		int q = q_vector->tx_ring->reg_idx;
4863		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4864		if (hw->mac.type == e1000_82575) {
4865			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4866			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4867		} else {
4868			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4869			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4870			              E1000_DCA_TXCTRL_CPUID_SHIFT;
4871		}
4872		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4873		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4874	}
4875	if (q_vector->rx_ring) {
4876		int q = q_vector->rx_ring->reg_idx;
4877		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4878		if (hw->mac.type == e1000_82575) {
4879			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4880			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4881		} else {
4882			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4883			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4884			              E1000_DCA_RXCTRL_CPUID_SHIFT;
4885		}
4886		dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4887		dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4888		dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4889		wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4890	}
4891	q_vector->cpu = cpu;
4892out_no_update:
4893	put_cpu();
4894}
4895
4896static void igb_setup_dca(struct igb_adapter *adapter)
4897{
4898	struct e1000_hw *hw = &adapter->hw;
4899	int i;
4900
4901	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4902		return;
4903
4904	/* Always use CB2 mode, difference is masked in the CB driver. */
4905	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4906
4907	for (i = 0; i < adapter->num_q_vectors; i++) {
4908		adapter->q_vector[i]->cpu = -1;
4909		igb_update_dca(adapter->q_vector[i]);
4910	}
4911}
4912
4913static int __igb_notify_dca(struct device *dev, void *data)
4914{
4915	struct net_device *netdev = dev_get_drvdata(dev);
4916	struct igb_adapter *adapter = netdev_priv(netdev);
4917	struct pci_dev *pdev = adapter->pdev;
4918	struct e1000_hw *hw = &adapter->hw;
4919	unsigned long event = *(unsigned long *)data;
4920
4921	switch (event) {
4922	case DCA_PROVIDER_ADD:
4923		/* if already enabled, don't do it again */
4924		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4925			break;
4926		if (dca_add_requester(dev) == 0) {
4927			adapter->flags |= IGB_FLAG_DCA_ENABLED;
4928			dev_info(&pdev->dev, "DCA enabled\n");
4929			igb_setup_dca(adapter);
4930			break;
4931		}
4932		/* Fall Through since DCA is disabled. */
4933	case DCA_PROVIDER_REMOVE:
4934		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4935			/* without this a class_device is left
4936			 * hanging around in the sysfs model */
4937			dca_remove_requester(dev);
4938			dev_info(&pdev->dev, "DCA disabled\n");
4939			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4940			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4941		}
4942		break;
4943	}
4944
4945	return 0;
4946}
4947
4948static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4949                          void *p)
4950{
4951	int ret_val;
4952
4953	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4954	                                 __igb_notify_dca);
4955
4956	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4957}
4958#endif /* CONFIG_IGB_DCA */
4959
4960static void igb_ping_all_vfs(struct igb_adapter *adapter)
4961{
4962	struct e1000_hw *hw = &adapter->hw;
4963	u32 ping;
4964	int i;
4965
4966	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4967		ping = E1000_PF_CONTROL_MSG;
4968		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4969			ping |= E1000_VT_MSGTYPE_CTS;
4970		igb_write_mbx(hw, &ping, 1, i);
4971	}
4972}
4973
4974static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4975{
4976	struct e1000_hw *hw = &adapter->hw;
4977	u32 vmolr = rd32(E1000_VMOLR(vf));
4978	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4979
4980	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4981	                    IGB_VF_FLAG_MULTI_PROMISC);
4982	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4983
4984	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4985		vmolr |= E1000_VMOLR_MPME;
4986		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4987		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4988	} else {
4989		/*
4990		 * if we have hashes and we are clearing a multicast promisc
4991		 * flag we need to write the hashes to the MTA as this step
4992		 * was previously skipped
4993		 */
4994		if (vf_data->num_vf_mc_hashes > 30) {
4995			vmolr |= E1000_VMOLR_MPME;
4996		} else if (vf_data->num_vf_mc_hashes) {
4997			int j;
4998			vmolr |= E1000_VMOLR_ROMPE;
4999			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5000				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5001		}
5002	}
5003
5004	wr32(E1000_VMOLR(vf), vmolr);
5005
5006	/* there are flags left unprocessed, likely not supported */
5007	if (*msgbuf & E1000_VT_MSGINFO_MASK)
5008		return -EINVAL;
5009
5010	return 0;
5011
5012}
5013
5014static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5015				  u32 *msgbuf, u32 vf)
5016{
5017	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5018	u16 *hash_list = (u16 *)&msgbuf[1];
5019	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5020	int i;
5021
5022	/* salt away the number of multicast addresses assigned
5023	 * to this VF for later use to restore when the PF multi cast
5024	 * list changes
5025	 */
5026	vf_data->num_vf_mc_hashes = n;
5027
5028	/* only up to 30 hash values supported */
5029	if (n > 30)
5030		n = 30;
5031
5032	/* store the hashes for later use */
5033	for (i = 0; i < n; i++)
5034		vf_data->vf_mc_hashes[i] = hash_list[i];
5035
5036	/* Flush and reset the mta with the new values */
5037	igb_set_rx_mode(adapter->netdev);
5038
5039	return 0;
5040}
5041
5042static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5043{
5044	struct e1000_hw *hw = &adapter->hw;
5045	struct vf_data_storage *vf_data;
5046	int i, j;
5047
5048	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5049		u32 vmolr = rd32(E1000_VMOLR(i));
5050		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5051
5052		vf_data = &adapter->vf_data[i];
5053
5054		if ((vf_data->num_vf_mc_hashes > 30) ||
5055		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5056			vmolr |= E1000_VMOLR_MPME;
5057		} else if (vf_data->num_vf_mc_hashes) {
5058			vmolr |= E1000_VMOLR_ROMPE;
5059			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5060				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5061		}
5062		wr32(E1000_VMOLR(i), vmolr);
5063	}
5064}
5065
5066static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5067{
5068	struct e1000_hw *hw = &adapter->hw;
5069	u32 pool_mask, reg, vid;
5070	int i;
5071
5072	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5073
5074	/* Find the vlan filter for this id */
5075	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5076		reg = rd32(E1000_VLVF(i));
5077
5078		/* remove the vf from the pool */
5079		reg &= ~pool_mask;
5080
5081		/* if pool is empty then remove entry from vfta */
5082		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5083		    (reg & E1000_VLVF_VLANID_ENABLE)) {
5084			reg = 0;
5085			vid = reg & E1000_VLVF_VLANID_MASK;
5086			igb_vfta_set(hw, vid, false);
5087		}
5088
5089		wr32(E1000_VLVF(i), reg);
5090	}
5091
5092	adapter->vf_data[vf].vlans_enabled = 0;
5093}
5094
5095static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5096{
5097	struct e1000_hw *hw = &adapter->hw;
5098	u32 reg, i;
5099
5100	/* The vlvf table only exists on 82576 hardware and newer */
5101	if (hw->mac.type < e1000_82576)
5102		return -1;
5103
5104	/* we only need to do this if VMDq is enabled */
5105	if (!adapter->vfs_allocated_count)
5106		return -1;
5107
5108	/* Find the vlan filter for this id */
5109	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5110		reg = rd32(E1000_VLVF(i));
5111		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5112		    vid == (reg & E1000_VLVF_VLANID_MASK))
5113			break;
5114	}
5115
5116	if (add) {
5117		if (i == E1000_VLVF_ARRAY_SIZE) {
5118			/* Did not find a matching VLAN ID entry that was
5119			 * enabled.  Search for a free filter entry, i.e.
5120			 * one without the enable bit set
5121			 */
5122			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5123				reg = rd32(E1000_VLVF(i));
5124				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5125					break;
5126			}
5127		}
5128		if (i < E1000_VLVF_ARRAY_SIZE) {
5129			/* Found an enabled/available entry */
5130			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5131
5132			/* if !enabled we need to set this up in vfta */
5133			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5134				/* add VID to filter table */
5135				igb_vfta_set(hw, vid, true);
5136				reg |= E1000_VLVF_VLANID_ENABLE;
5137			}
5138			reg &= ~E1000_VLVF_VLANID_MASK;
5139			reg |= vid;
5140			wr32(E1000_VLVF(i), reg);
5141
5142			/* do not modify RLPML for PF devices */
5143			if (vf >= adapter->vfs_allocated_count)
5144				return 0;
5145
5146			if (!adapter->vf_data[vf].vlans_enabled) {
5147				u32 size;
5148				reg = rd32(E1000_VMOLR(vf));
5149				size = reg & E1000_VMOLR_RLPML_MASK;
5150				size += 4;
5151				reg &= ~E1000_VMOLR_RLPML_MASK;
5152				reg |= size;
5153				wr32(E1000_VMOLR(vf), reg);
5154			}
5155
5156			adapter->vf_data[vf].vlans_enabled++;
5157			return 0;
5158		}
5159	} else {
5160		if (i < E1000_VLVF_ARRAY_SIZE) {
5161			/* remove vf from the pool */
5162			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5163			/* if pool is empty then remove entry from vfta */
5164			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5165				reg = 0;
5166				igb_vfta_set(hw, vid, false);
5167			}
5168			wr32(E1000_VLVF(i), reg);
5169
5170			/* do not modify RLPML for PF devices */
5171			if (vf >= adapter->vfs_allocated_count)
5172				return 0;
5173
5174			adapter->vf_data[vf].vlans_enabled--;
5175			if (!adapter->vf_data[vf].vlans_enabled) {
5176				u32 size;
5177				reg = rd32(E1000_VMOLR(vf));
5178				size = reg & E1000_VMOLR_RLPML_MASK;
5179				size -= 4;
5180				reg &= ~E1000_VMOLR_RLPML_MASK;
5181				reg |= size;
5182				wr32(E1000_VMOLR(vf), reg);
5183			}
5184		}
5185	}
5186	return 0;
5187}
5188
5189static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5190{
5191	struct e1000_hw *hw = &adapter->hw;
5192
5193	if (vid)
5194		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5195	else
5196		wr32(E1000_VMVIR(vf), 0);
5197}
5198
5199static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5200			       int vf, u16 vlan, u8 qos)
5201{
5202	int err = 0;
5203	struct igb_adapter *adapter = netdev_priv(netdev);
5204
5205	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5206		return -EINVAL;
5207	if (vlan || qos) {
5208		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5209		if (err)
5210			goto out;
5211		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5212		igb_set_vmolr(adapter, vf, !vlan);
5213		adapter->vf_data[vf].pf_vlan = vlan;
5214		adapter->vf_data[vf].pf_qos = qos;
5215		dev_info(&adapter->pdev->dev,
5216			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5217		if (test_bit(__IGB_DOWN, &adapter->state)) {
5218			dev_warn(&adapter->pdev->dev,
5219				 "The VF VLAN has been set,"
5220				 " but the PF device is not up.\n");
5221			dev_warn(&adapter->pdev->dev,
5222				 "Bring the PF device up before"
5223				 " attempting to use the VF device.\n");
5224		}
5225	} else {
5226		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5227				   false, vf);
5228		igb_set_vmvir(adapter, vlan, vf);
5229		igb_set_vmolr(adapter, vf, true);
5230		adapter->vf_data[vf].pf_vlan = 0;
5231		adapter->vf_data[vf].pf_qos = 0;
5232       }
5233out:
5234       return err;
5235}
5236
5237static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5238{
5239	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5240	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5241
5242	return igb_vlvf_set(adapter, vid, add, vf);
5243}
5244
5245static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5246{
5247	/* clear flags - except flag that indicates PF has set the MAC */
5248	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5249	adapter->vf_data[vf].last_nack = jiffies;
5250
5251	/* reset offloads to defaults */
5252	igb_set_vmolr(adapter, vf, true);
5253
5254	/* reset vlans for device */
5255	igb_clear_vf_vfta(adapter, vf);
5256	if (adapter->vf_data[vf].pf_vlan)
5257		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5258				    adapter->vf_data[vf].pf_vlan,
5259				    adapter->vf_data[vf].pf_qos);
5260	else
5261		igb_clear_vf_vfta(adapter, vf);
5262
5263	/* reset multicast table array for vf */
5264	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5265
5266	/* Flush and reset the mta with the new values */
5267	igb_set_rx_mode(adapter->netdev);
5268}
5269
5270static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5271{
5272	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5273
5274	/* generate a new mac address as we were hotplug removed/added */
5275	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5276		random_ether_addr(vf_mac);
5277
5278	/* process remaining reset events */
5279	igb_vf_reset(adapter, vf);
5280}
5281
5282static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5283{
5284	struct e1000_hw *hw = &adapter->hw;
5285	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5286	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5287	u32 reg, msgbuf[3];
5288	u8 *addr = (u8 *)(&msgbuf[1]);
5289
5290	/* process all the same items cleared in a function level reset */
5291	igb_vf_reset(adapter, vf);
5292
5293	/* set vf mac address */
5294	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5295
5296	/* enable transmit and receive for vf */
5297	reg = rd32(E1000_VFTE);
5298	wr32(E1000_VFTE, reg | (1 << vf));
5299	reg = rd32(E1000_VFRE);
5300	wr32(E1000_VFRE, reg | (1 << vf));
5301
5302	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5303
5304	/* reply to reset with ack and vf mac address */
5305	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5306	memcpy(addr, vf_mac, 6);
5307	igb_write_mbx(hw, msgbuf, 3, vf);
5308}
5309
5310static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5311{
5312	/*
5313	 * The VF MAC Address is stored in a packed array of bytes
5314	 * starting at the second 32 bit word of the msg array
5315	 */
5316	unsigned char *addr = (char *)&msg[1];
5317	int err = -1;
5318
5319	if (is_valid_ether_addr(addr))
5320		err = igb_set_vf_mac(adapter, vf, addr);
5321
5322	return err;
5323}
5324
5325static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5326{
5327	struct e1000_hw *hw = &adapter->hw;
5328	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5329	u32 msg = E1000_VT_MSGTYPE_NACK;
5330
5331	/* if device isn't clear to send it shouldn't be reading either */
5332	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5333	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5334		igb_write_mbx(hw, &msg, 1, vf);
5335		vf_data->last_nack = jiffies;
5336	}
5337}
5338
5339static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5340{
5341	struct pci_dev *pdev = adapter->pdev;
5342	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5343	struct e1000_hw *hw = &adapter->hw;
5344	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5345	s32 retval;
5346
5347	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5348
5349	if (retval) {
5350		/* if receive failed revoke VF CTS stats and restart init */
5351		dev_err(&pdev->dev, "Error receiving message from VF\n");
5352		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5353		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5354			return;
5355		goto out;
5356	}
5357
5358	/* this is a message we already processed, do nothing */
5359	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5360		return;
5361
5362	/*
5363	 * until the vf completes a reset it should not be
5364	 * allowed to start any configuration.
5365	 */
5366
5367	if (msgbuf[0] == E1000_VF_RESET) {
5368		igb_vf_reset_msg(adapter, vf);
5369		return;
5370	}
5371
5372	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5373		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5374			return;
5375		retval = -1;
5376		goto out;
5377	}
5378
5379	switch ((msgbuf[0] & 0xFFFF)) {
5380	case E1000_VF_SET_MAC_ADDR:
5381		retval = -EINVAL;
5382		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5383			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5384		else
5385			dev_warn(&pdev->dev,
5386				 "VF %d attempted to override administratively "
5387				 "set MAC address\nReload the VF driver to "
5388				 "resume operations\n", vf);
5389		break;
5390	case E1000_VF_SET_PROMISC:
5391		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5392		break;
5393	case E1000_VF_SET_MULTICAST:
5394		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5395		break;
5396	case E1000_VF_SET_LPE:
5397		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5398		break;
5399	case E1000_VF_SET_VLAN:
5400		retval = -1;
5401		if (vf_data->pf_vlan)
5402			dev_warn(&pdev->dev,
5403				 "VF %d attempted to override administratively "
5404				 "set VLAN tag\nReload the VF driver to "
5405				 "resume operations\n", vf);
5406		else
5407			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5408		break;
5409	default:
5410		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5411		retval = -1;
5412		break;
5413	}
5414
5415	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5416out:
5417	/* notify the VF of the results of what it sent us */
5418	if (retval)
5419		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5420	else
5421		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5422
5423	igb_write_mbx(hw, msgbuf, 1, vf);
5424}
5425
5426static void igb_msg_task(struct igb_adapter *adapter)
5427{
5428	struct e1000_hw *hw = &adapter->hw;
5429	u32 vf;
5430
5431	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5432		/* process any reset requests */
5433		if (!igb_check_for_rst(hw, vf))
5434			igb_vf_reset_event(adapter, vf);
5435
5436		/* process any messages pending */
5437		if (!igb_check_for_msg(hw, vf))
5438			igb_rcv_msg_from_vf(adapter, vf);
5439
5440		/* process any acks */
5441		if (!igb_check_for_ack(hw, vf))
5442			igb_rcv_ack_from_vf(adapter, vf);
5443	}
5444}
5445
5446/**
5447 *  igb_set_uta - Set unicast filter table address
5448 *  @adapter: board private structure
5449 *
5450 *  The unicast table address is a register array of 32-bit registers.
5451 *  The table is meant to be used in a way similar to how the MTA is used
5452 *  however due to certain limitations in the hardware it is necessary to
5453 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5454 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5455 **/
5456static void igb_set_uta(struct igb_adapter *adapter)
5457{
5458	struct e1000_hw *hw = &adapter->hw;
5459	int i;
5460
5461	/* The UTA table only exists on 82576 hardware and newer */
5462	if (hw->mac.type < e1000_82576)
5463		return;
5464
5465	/* we only need to do this if VMDq is enabled */
5466	if (!adapter->vfs_allocated_count)
5467		return;
5468
5469	for (i = 0; i < hw->mac.uta_reg_count; i++)
5470		array_wr32(E1000_UTA, i, ~0);
5471}
5472
5473/**
5474 * igb_intr_msi - Interrupt Handler
5475 * @irq: interrupt number
5476 * @data: pointer to a network interface device structure
5477 **/
5478static irqreturn_t igb_intr_msi(int irq, void *data)
5479{
5480	struct igb_adapter *adapter = data;
5481	struct igb_q_vector *q_vector = adapter->q_vector[0];
5482	struct e1000_hw *hw = &adapter->hw;
5483	/* read ICR disables interrupts using IAM */
5484	u32 icr = rd32(E1000_ICR);
5485
5486	igb_write_itr(q_vector);
5487
5488	if (icr & E1000_ICR_DRSTA)
5489		schedule_work(&adapter->reset_task);
5490
5491	if (icr & E1000_ICR_DOUTSYNC) {
5492		/* HW is reporting DMA is out of sync */
5493		adapter->stats.doosync++;
5494	}
5495
5496	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5497		hw->mac.get_link_status = 1;
5498		if (!test_bit(__IGB_DOWN, &adapter->state))
5499			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5500	}
5501
5502	napi_schedule(&q_vector->napi);
5503
5504	return IRQ_HANDLED;
5505}
5506
5507/**
5508 * igb_intr - Legacy Interrupt Handler
5509 * @irq: interrupt number
5510 * @data: pointer to a network interface device structure
5511 **/
5512static irqreturn_t igb_intr(int irq, void *data)
5513{
5514	struct igb_adapter *adapter = data;
5515	struct igb_q_vector *q_vector = adapter->q_vector[0];
5516	struct e1000_hw *hw = &adapter->hw;
5517	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5518	 * need for the IMC write */
5519	u32 icr = rd32(E1000_ICR);
5520	if (!icr)
5521		return IRQ_NONE;  /* Not our interrupt */
5522
5523	igb_write_itr(q_vector);
5524
5525	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5526	 * not set, then the adapter didn't send an interrupt */
5527	if (!(icr & E1000_ICR_INT_ASSERTED))
5528		return IRQ_NONE;
5529
5530	if (icr & E1000_ICR_DRSTA)
5531		schedule_work(&adapter->reset_task);
5532
5533	if (icr & E1000_ICR_DOUTSYNC) {
5534		/* HW is reporting DMA is out of sync */
5535		adapter->stats.doosync++;
5536	}
5537
5538	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5539		hw->mac.get_link_status = 1;
5540		/* guard against interrupt when we're going down */
5541		if (!test_bit(__IGB_DOWN, &adapter->state))
5542			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5543	}
5544
5545	napi_schedule(&q_vector->napi);
5546
5547	return IRQ_HANDLED;
5548}
5549
5550static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5551{
5552	struct igb_adapter *adapter = q_vector->adapter;
5553	struct e1000_hw *hw = &adapter->hw;
5554
5555	if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5556	    (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5557		if (!adapter->msix_entries)
5558			igb_set_itr(adapter);
5559		else
5560			igb_update_ring_itr(q_vector);
5561	}
5562
5563	if (!test_bit(__IGB_DOWN, &adapter->state)) {
5564		if (adapter->msix_entries)
5565			wr32(E1000_EIMS, q_vector->eims_value);
5566		else
5567			igb_irq_enable(adapter);
5568	}
5569}
5570
5571/**
5572 * igb_poll - NAPI Rx polling callback
5573 * @napi: napi polling structure
5574 * @budget: count of how many packets we should handle
5575 **/
5576static int igb_poll(struct napi_struct *napi, int budget)
5577{
5578	struct igb_q_vector *q_vector = container_of(napi,
5579	                                             struct igb_q_vector,
5580	                                             napi);
5581	bool clean_complete = true;
5582
5583#ifdef CONFIG_IGB_DCA
5584	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5585		igb_update_dca(q_vector);
5586#endif
5587	if (q_vector->tx_ring)
5588		clean_complete = igb_clean_tx_irq(q_vector);
5589
5590	if (q_vector->rx_ring)
5591		clean_complete &= igb_clean_rx_irq(q_vector, budget);
5592
5593	/* If all work not completed, return budget and keep polling */
5594	if (!clean_complete)
5595		return budget;
5596
5597	/* If not enough Rx work done, exit the polling mode */
5598	napi_complete(napi);
5599	igb_ring_irq_enable(q_vector);
5600
5601	return 0;
5602}
5603
5604/**
5605 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5606 * @adapter: board private structure
5607 * @shhwtstamps: timestamp structure to update
5608 * @regval: unsigned 64bit system time value.
5609 *
5610 * We need to convert the system time value stored in the RX/TXSTMP registers
5611 * into a hwtstamp which can be used by the upper level timestamping functions
5612 */
5613static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5614                                   struct skb_shared_hwtstamps *shhwtstamps,
5615                                   u64 regval)
5616{
5617	u64 ns;
5618
5619	/*
5620	 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5621	 * 24 to match clock shift we setup earlier.
5622	 */
5623	if (adapter->hw.mac.type == e1000_82580)
5624		regval <<= IGB_82580_TSYNC_SHIFT;
5625
5626	ns = timecounter_cyc2time(&adapter->clock, regval);
5627	timecompare_update(&adapter->compare, ns);
5628	memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5629	shhwtstamps->hwtstamp = ns_to_ktime(ns);
5630	shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5631}
5632
5633/**
5634 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5635 * @q_vector: pointer to q_vector containing needed info
5636 * @buffer: pointer to igb_tx_buffer structure
5637 *
5638 * If we were asked to do hardware stamping and such a time stamp is
5639 * available, then it must have been for this skb here because we only
5640 * allow only one such packet into the queue.
5641 */
5642static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5643			    struct igb_tx_buffer *buffer_info)
5644{
5645	struct igb_adapter *adapter = q_vector->adapter;
5646	struct e1000_hw *hw = &adapter->hw;
5647	struct skb_shared_hwtstamps shhwtstamps;
5648	u64 regval;
5649
5650	/* if skb does not support hw timestamp or TX stamp not valid exit */
5651	if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5652	    !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5653		return;
5654
5655	regval = rd32(E1000_TXSTMPL);
5656	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5657
5658	igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5659	skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5660}
5661
5662/**
5663 * igb_clean_tx_irq - Reclaim resources after transmit completes
5664 * @q_vector: pointer to q_vector containing needed info
5665 * returns true if ring is completely cleaned
5666 **/
5667static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5668{
5669	struct igb_adapter *adapter = q_vector->adapter;
5670	struct igb_ring *tx_ring = q_vector->tx_ring;
5671	struct igb_tx_buffer *tx_buffer;
5672	union e1000_adv_tx_desc *tx_desc, *eop_desc;
5673	unsigned int total_bytes = 0, total_packets = 0;
5674	unsigned int budget = q_vector->tx_work_limit;
5675	unsigned int i = tx_ring->next_to_clean;
5676
5677	if (test_bit(__IGB_DOWN, &adapter->state))
5678		return true;
5679
5680	tx_buffer = &tx_ring->tx_buffer_info[i];
5681	tx_desc = IGB_TX_DESC(tx_ring, i);
5682	i -= tx_ring->count;
5683
5684	for (; budget; budget--) {
5685		eop_desc = tx_buffer->next_to_watch;
5686
5687		/* prevent any other reads prior to eop_desc */
5688		rmb();
5689
5690		/* if next_to_watch is not set then there is no work pending */
5691		if (!eop_desc)
5692			break;
5693
5694		/* if DD is not set pending work has not been completed */
5695		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5696			break;
5697
5698		/* clear next_to_watch to prevent false hangs */
5699		tx_buffer->next_to_watch = NULL;
5700
5701		/* update the statistics for this packet */
5702		total_bytes += tx_buffer->bytecount;
5703		total_packets += tx_buffer->gso_segs;
5704
5705		/* retrieve hardware timestamp */
5706		igb_tx_hwtstamp(q_vector, tx_buffer);
5707
5708		/* free the skb */
5709		dev_kfree_skb_any(tx_buffer->skb);
5710		tx_buffer->skb = NULL;
5711
5712		/* unmap skb header data */
5713		dma_unmap_single(tx_ring->dev,
5714				 tx_buffer->dma,
5715				 tx_buffer->length,
5716				 DMA_TO_DEVICE);
5717
5718		/* clear last DMA location and unmap remaining buffers */
5719		while (tx_desc != eop_desc) {
5720			tx_buffer->dma = 0;
5721
5722			tx_buffer++;
5723			tx_desc++;
5724			i++;
5725			if (unlikely(!i)) {
5726				i -= tx_ring->count;
5727				tx_buffer = tx_ring->tx_buffer_info;
5728				tx_desc = IGB_TX_DESC(tx_ring, 0);
5729			}
5730
5731			/* unmap any remaining paged data */
5732			if (tx_buffer->dma) {
5733				dma_unmap_page(tx_ring->dev,
5734					       tx_buffer->dma,
5735					       tx_buffer->length,
5736					       DMA_TO_DEVICE);
5737			}
5738		}
5739
5740		/* clear last DMA location */
5741		tx_buffer->dma = 0;
5742
5743		/* move us one more past the eop_desc for start of next pkt */
5744		tx_buffer++;
5745		tx_desc++;
5746		i++;
5747		if (unlikely(!i)) {
5748			i -= tx_ring->count;
5749			tx_buffer = tx_ring->tx_buffer_info;
5750			tx_desc = IGB_TX_DESC(tx_ring, 0);
5751		}
5752	}
5753
5754	i += tx_ring->count;
5755	tx_ring->next_to_clean = i;
5756	u64_stats_update_begin(&tx_ring->tx_syncp);
5757	tx_ring->tx_stats.bytes += total_bytes;
5758	tx_ring->tx_stats.packets += total_packets;
5759	u64_stats_update_end(&tx_ring->tx_syncp);
5760	tx_ring->total_bytes += total_bytes;
5761	tx_ring->total_packets += total_packets;
5762
5763	if (tx_ring->detect_tx_hung) {
5764		struct e1000_hw *hw = &adapter->hw;
5765
5766		eop_desc = tx_buffer->next_to_watch;
5767
5768		/* Detect a transmit hang in hardware, this serializes the
5769		 * check with the clearing of time_stamp and movement of i */
5770		tx_ring->detect_tx_hung = false;
5771		if (eop_desc &&
5772		    time_after(jiffies, tx_buffer->time_stamp +
5773			       (adapter->tx_timeout_factor * HZ)) &&
5774		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5775
5776			/* detected Tx unit hang */
5777			dev_err(tx_ring->dev,
5778				"Detected Tx Unit Hang\n"
5779				"  Tx Queue             <%d>\n"
5780				"  TDH                  <%x>\n"
5781				"  TDT                  <%x>\n"
5782				"  next_to_use          <%x>\n"
5783				"  next_to_clean        <%x>\n"
5784				"buffer_info[next_to_clean]\n"
5785				"  time_stamp           <%lx>\n"
5786				"  next_to_watch        <%p>\n"
5787				"  jiffies              <%lx>\n"
5788				"  desc.status          <%x>\n",
5789				tx_ring->queue_index,
5790				rd32(E1000_TDH(tx_ring->reg_idx)),
5791				readl(tx_ring->tail),
5792				tx_ring->next_to_use,
5793				tx_ring->next_to_clean,
5794				tx_buffer->time_stamp,
5795				eop_desc,
5796				jiffies,
5797				eop_desc->wb.status);
5798			netif_stop_subqueue(tx_ring->netdev,
5799					    tx_ring->queue_index);
5800
5801			/* we are about to reset, no point in enabling stuff */
5802			return true;
5803		}
5804	}
5805
5806	if (unlikely(total_packets &&
5807		     netif_carrier_ok(tx_ring->netdev) &&
5808		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5809		/* Make sure that anybody stopping the queue after this
5810		 * sees the new next_to_clean.
5811		 */
5812		smp_mb();
5813		if (__netif_subqueue_stopped(tx_ring->netdev,
5814					     tx_ring->queue_index) &&
5815		    !(test_bit(__IGB_DOWN, &adapter->state))) {
5816			netif_wake_subqueue(tx_ring->netdev,
5817					    tx_ring->queue_index);
5818
5819			u64_stats_update_begin(&tx_ring->tx_syncp);
5820			tx_ring->tx_stats.restart_queue++;
5821			u64_stats_update_end(&tx_ring->tx_syncp);
5822		}
5823	}
5824
5825	return !!budget;
5826}
5827
5828static inline void igb_rx_checksum(struct igb_ring *ring,
5829				   u32 status_err, struct sk_buff *skb)
5830{
5831	skb_checksum_none_assert(skb);
5832
5833	/* Ignore Checksum bit is set or checksum is disabled through ethtool */
5834	if (!test_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags) ||
5835	     (status_err & E1000_RXD_STAT_IXSM))
5836		return;
5837
5838	/* TCP/UDP checksum error bit is set */
5839	if (status_err &
5840	    (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5841		/*
5842		 * work around errata with sctp packets where the TCPE aka
5843		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5844		 * packets, (aka let the stack check the crc32c)
5845		 */
5846		if (!((skb->len == 60) &&
5847		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5848			u64_stats_update_begin(&ring->rx_syncp);
5849			ring->rx_stats.csum_err++;
5850			u64_stats_update_end(&ring->rx_syncp);
5851		}
5852		/* let the stack verify checksum errors */
5853		return;
5854	}
5855	/* It must be a TCP or UDP packet with a valid checksum */
5856	if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5857		skb->ip_summed = CHECKSUM_UNNECESSARY;
5858
5859	dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5860}
5861
5862static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5863                                   struct sk_buff *skb)
5864{
5865	struct igb_adapter *adapter = q_vector->adapter;
5866	struct e1000_hw *hw = &adapter->hw;
5867	u64 regval;
5868
5869	/*
5870	 * If this bit is set, then the RX registers contain the time stamp. No
5871	 * other packet will be time stamped until we read these registers, so
5872	 * read the registers to make them available again. Because only one
5873	 * packet can be time stamped at a time, we know that the register
5874	 * values must belong to this one here and therefore we don't need to
5875	 * compare any of the additional attributes stored for it.
5876	 *
5877	 * If nothing went wrong, then it should have a shared tx_flags that we
5878	 * can turn into a skb_shared_hwtstamps.
5879	 */
5880	if (staterr & E1000_RXDADV_STAT_TSIP) {
5881		u32 *stamp = (u32 *)skb->data;
5882		regval = le32_to_cpu(*(stamp + 2));
5883		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5884		skb_pull(skb, IGB_TS_HDR_LEN);
5885	} else {
5886		if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5887			return;
5888
5889		regval = rd32(E1000_RXSTMPL);
5890		regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5891	}
5892
5893	igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5894}
5895static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5896{
5897	/* HW will not DMA in data larger than the given buffer, even if it
5898	 * parses the (NFS, of course) header to be larger.  In that case, it
5899	 * fills the header buffer and spills the rest into the page.
5900	 */
5901	u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5902	           E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5903	if (hlen > IGB_RX_HDR_LEN)
5904		hlen = IGB_RX_HDR_LEN;
5905	return hlen;
5906}
5907
5908static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5909{
5910	struct igb_ring *rx_ring = q_vector->rx_ring;
5911	union e1000_adv_rx_desc *rx_desc;
5912	const int current_node = numa_node_id();
5913	unsigned int total_bytes = 0, total_packets = 0;
5914	u32 staterr;
5915	u16 cleaned_count = igb_desc_unused(rx_ring);
5916	u16 i = rx_ring->next_to_clean;
5917
5918	rx_desc = IGB_RX_DESC(rx_ring, i);
5919	staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5920
5921	while (staterr & E1000_RXD_STAT_DD) {
5922		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5923		struct sk_buff *skb = buffer_info->skb;
5924		union e1000_adv_rx_desc *next_rxd;
5925
5926		buffer_info->skb = NULL;
5927		prefetch(skb->data);
5928
5929		i++;
5930		if (i == rx_ring->count)
5931			i = 0;
5932
5933		next_rxd = IGB_RX_DESC(rx_ring, i);
5934		prefetch(next_rxd);
5935
5936		/*
5937		 * This memory barrier is needed to keep us from reading
5938		 * any other fields out of the rx_desc until we know the
5939		 * RXD_STAT_DD bit is set
5940		 */
5941		rmb();
5942
5943		if (!skb_is_nonlinear(skb)) {
5944			__skb_put(skb, igb_get_hlen(rx_desc));
5945			dma_unmap_single(rx_ring->dev, buffer_info->dma,
5946					 IGB_RX_HDR_LEN,
5947					 DMA_FROM_DEVICE);
5948			buffer_info->dma = 0;
5949		}
5950
5951		if (rx_desc->wb.upper.length) {
5952			u16 length = le16_to_cpu(rx_desc->wb.upper.length);
5953
5954			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5955						buffer_info->page,
5956						buffer_info->page_offset,
5957						length);
5958
5959			skb->len += length;
5960			skb->data_len += length;
5961			skb->truesize += length;
5962
5963			if ((page_count(buffer_info->page) != 1) ||
5964			    (page_to_nid(buffer_info->page) != current_node))
5965				buffer_info->page = NULL;
5966			else
5967				get_page(buffer_info->page);
5968
5969			dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
5970				       PAGE_SIZE / 2, DMA_FROM_DEVICE);
5971			buffer_info->page_dma = 0;
5972		}
5973
5974		if (!(staterr & E1000_RXD_STAT_EOP)) {
5975			struct igb_rx_buffer *next_buffer;
5976			next_buffer = &rx_ring->rx_buffer_info[i];
5977			buffer_info->skb = next_buffer->skb;
5978			buffer_info->dma = next_buffer->dma;
5979			next_buffer->skb = skb;
5980			next_buffer->dma = 0;
5981			goto next_desc;
5982		}
5983
5984		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5985			dev_kfree_skb_any(skb);
5986			goto next_desc;
5987		}
5988
5989		if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5990			igb_rx_hwtstamp(q_vector, staterr, skb);
5991		total_bytes += skb->len;
5992		total_packets++;
5993
5994		igb_rx_checksum(rx_ring, staterr, skb);
5995
5996		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
5997
5998		if (staterr & E1000_RXD_STAT_VP) {
5999			u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6000
6001			__vlan_hwaccel_put_tag(skb, vid);
6002		}
6003		napi_gro_receive(&q_vector->napi, skb);
6004
6005		budget--;
6006next_desc:
6007		if (!budget)
6008			break;
6009
6010		cleaned_count++;
6011		/* return some buffers to hardware, one at a time is too slow */
6012		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6013			igb_alloc_rx_buffers(rx_ring, cleaned_count);
6014			cleaned_count = 0;
6015		}
6016
6017		/* use prefetched values */
6018		rx_desc = next_rxd;
6019		staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
6020	}
6021
6022	rx_ring->next_to_clean = i;
6023	u64_stats_update_begin(&rx_ring->rx_syncp);
6024	rx_ring->rx_stats.packets += total_packets;
6025	rx_ring->rx_stats.bytes += total_bytes;
6026	u64_stats_update_end(&rx_ring->rx_syncp);
6027	rx_ring->total_packets += total_packets;
6028	rx_ring->total_bytes += total_bytes;
6029
6030	if (cleaned_count)
6031		igb_alloc_rx_buffers(rx_ring, cleaned_count);
6032
6033	return !!budget;
6034}
6035
6036static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6037				 struct igb_rx_buffer *bi)
6038{
6039	struct sk_buff *skb = bi->skb;
6040	dma_addr_t dma = bi->dma;
6041
6042	if (dma)
6043		return true;
6044
6045	if (likely(!skb)) {
6046		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6047						IGB_RX_HDR_LEN);
6048		bi->skb = skb;
6049		if (!skb) {
6050			rx_ring->rx_stats.alloc_failed++;
6051			return false;
6052		}
6053
6054		/* initialize skb for ring */
6055		skb_record_rx_queue(skb, rx_ring->queue_index);
6056	}
6057
6058	dma = dma_map_single(rx_ring->dev, skb->data,
6059			     IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6060
6061	if (dma_mapping_error(rx_ring->dev, dma)) {
6062		rx_ring->rx_stats.alloc_failed++;
6063		return false;
6064	}
6065
6066	bi->dma = dma;
6067	return true;
6068}
6069
6070static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6071				  struct igb_rx_buffer *bi)
6072{
6073	struct page *page = bi->page;
6074	dma_addr_t page_dma = bi->page_dma;
6075	unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6076
6077	if (page_dma)
6078		return true;
6079
6080	if (!page) {
6081		page = netdev_alloc_page(rx_ring->netdev);
6082		bi->page = page;
6083		if (unlikely(!page)) {
6084			rx_ring->rx_stats.alloc_failed++;
6085			return false;
6086		}
6087	}
6088
6089	page_dma = dma_map_page(rx_ring->dev, page,
6090				page_offset, PAGE_SIZE / 2,
6091				DMA_FROM_DEVICE);
6092
6093	if (dma_mapping_error(rx_ring->dev, page_dma)) {
6094		rx_ring->rx_stats.alloc_failed++;
6095		return false;
6096	}
6097
6098	bi->page_dma = page_dma;
6099	bi->page_offset = page_offset;
6100	return true;
6101}
6102
6103/**
6104 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6105 * @adapter: address of board private structure
6106 **/
6107void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6108{
6109	union e1000_adv_rx_desc *rx_desc;
6110	struct igb_rx_buffer *bi;
6111	u16 i = rx_ring->next_to_use;
6112
6113	rx_desc = IGB_RX_DESC(rx_ring, i);
6114	bi = &rx_ring->rx_buffer_info[i];
6115	i -= rx_ring->count;
6116
6117	while (cleaned_count--) {
6118		if (!igb_alloc_mapped_skb(rx_ring, bi))
6119			break;
6120
6121		/* Refresh the desc even if buffer_addrs didn't change
6122		 * because each write-back erases this info. */
6123		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6124
6125		if (!igb_alloc_mapped_page(rx_ring, bi))
6126			break;
6127
6128		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6129
6130		rx_desc++;
6131		bi++;
6132		i++;
6133		if (unlikely(!i)) {
6134			rx_desc = IGB_RX_DESC(rx_ring, 0);
6135			bi = rx_ring->rx_buffer_info;
6136			i -= rx_ring->count;
6137		}
6138
6139		/* clear the hdr_addr for the next_to_use descriptor */
6140		rx_desc->read.hdr_addr = 0;
6141	}
6142
6143	i += rx_ring->count;
6144
6145	if (rx_ring->next_to_use != i) {
6146		rx_ring->next_to_use = i;
6147
6148		/* Force memory writes to complete before letting h/w
6149		 * know there are new descriptors to fetch.  (Only
6150		 * applicable for weak-ordered memory model archs,
6151		 * such as IA-64). */
6152		wmb();
6153		writel(i, rx_ring->tail);
6154	}
6155}
6156
6157/**
6158 * igb_mii_ioctl -
6159 * @netdev:
6160 * @ifreq:
6161 * @cmd:
6162 **/
6163static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6164{
6165	struct igb_adapter *adapter = netdev_priv(netdev);
6166	struct mii_ioctl_data *data = if_mii(ifr);
6167
6168	if (adapter->hw.phy.media_type != e1000_media_type_copper)
6169		return -EOPNOTSUPP;
6170
6171	switch (cmd) {
6172	case SIOCGMIIPHY:
6173		data->phy_id = adapter->hw.phy.addr;
6174		break;
6175	case SIOCGMIIREG:
6176		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6177		                     &data->val_out))
6178			return -EIO;
6179		break;
6180	case SIOCSMIIREG:
6181	default:
6182		return -EOPNOTSUPP;
6183	}
6184	return 0;
6185}
6186
6187/**
6188 * igb_hwtstamp_ioctl - control hardware time stamping
6189 * @netdev:
6190 * @ifreq:
6191 * @cmd:
6192 *
6193 * Outgoing time stamping can be enabled and disabled. Play nice and
6194 * disable it when requested, although it shouldn't case any overhead
6195 * when no packet needs it. At most one packet in the queue may be
6196 * marked for time stamping, otherwise it would be impossible to tell
6197 * for sure to which packet the hardware time stamp belongs.
6198 *
6199 * Incoming time stamping has to be configured via the hardware
6200 * filters. Not all combinations are supported, in particular event
6201 * type has to be specified. Matching the kind of event packet is
6202 * not supported, with the exception of "all V2 events regardless of
6203 * level 2 or 4".
6204 *
6205 **/
6206static int igb_hwtstamp_ioctl(struct net_device *netdev,
6207			      struct ifreq *ifr, int cmd)
6208{
6209	struct igb_adapter *adapter = netdev_priv(netdev);
6210	struct e1000_hw *hw = &adapter->hw;
6211	struct hwtstamp_config config;
6212	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6213	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6214	u32 tsync_rx_cfg = 0;
6215	bool is_l4 = false;
6216	bool is_l2 = false;
6217	u32 regval;
6218
6219	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6220		return -EFAULT;
6221
6222	/* reserved for future extensions */
6223	if (config.flags)
6224		return -EINVAL;
6225
6226	switch (config.tx_type) {
6227	case HWTSTAMP_TX_OFF:
6228		tsync_tx_ctl = 0;
6229	case HWTSTAMP_TX_ON:
6230		break;
6231	default:
6232		return -ERANGE;
6233	}
6234
6235	switch (config.rx_filter) {
6236	case HWTSTAMP_FILTER_NONE:
6237		tsync_rx_ctl = 0;
6238		break;
6239	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6240	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6241	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6242	case HWTSTAMP_FILTER_ALL:
6243		/*
6244		 * register TSYNCRXCFG must be set, therefore it is not
6245		 * possible to time stamp both Sync and Delay_Req messages
6246		 * => fall back to time stamping all packets
6247		 */
6248		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6249		config.rx_filter = HWTSTAMP_FILTER_ALL;
6250		break;
6251	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6252		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6253		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6254		is_l4 = true;
6255		break;
6256	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6257		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6258		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6259		is_l4 = true;
6260		break;
6261	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6262	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6263		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6264		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6265		is_l2 = true;
6266		is_l4 = true;
6267		config.rx_filter = HWTSTAMP_FILTER_SOME;
6268		break;
6269	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6270	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6271		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6272		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6273		is_l2 = true;
6274		is_l4 = true;
6275		config.rx_filter = HWTSTAMP_FILTER_SOME;
6276		break;
6277	case HWTSTAMP_FILTER_PTP_V2_EVENT:
6278	case HWTSTAMP_FILTER_PTP_V2_SYNC:
6279	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6280		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6281		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6282		is_l2 = true;
6283		break;
6284	default:
6285		return -ERANGE;
6286	}
6287
6288	if (hw->mac.type == e1000_82575) {
6289		if (tsync_rx_ctl | tsync_tx_ctl)
6290			return -EINVAL;
6291		return 0;
6292	}
6293
6294	/*
6295	 * Per-packet timestamping only works if all packets are
6296	 * timestamped, so enable timestamping in all packets as
6297	 * long as one rx filter was configured.
6298	 */
6299	if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6300		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6301		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6302	}
6303
6304	/* enable/disable TX */
6305	regval = rd32(E1000_TSYNCTXCTL);
6306	regval &= ~E1000_TSYNCTXCTL_ENABLED;
6307	regval |= tsync_tx_ctl;
6308	wr32(E1000_TSYNCTXCTL, regval);
6309
6310	/* enable/disable RX */
6311	regval = rd32(E1000_TSYNCRXCTL);
6312	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6313	regval |= tsync_rx_ctl;
6314	wr32(E1000_TSYNCRXCTL, regval);
6315
6316	/* define which PTP packets are time stamped */
6317	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6318
6319	/* define ethertype filter for timestamped packets */
6320	if (is_l2)
6321		wr32(E1000_ETQF(3),
6322		                (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6323		                 E1000_ETQF_1588 | /* enable timestamping */
6324		                 ETH_P_1588));     /* 1588 eth protocol type */
6325	else
6326		wr32(E1000_ETQF(3), 0);
6327
6328#define PTP_PORT 319
6329	/* L4 Queue Filter[3]: filter by destination port and protocol */
6330	if (is_l4) {
6331		u32 ftqf = (IPPROTO_UDP /* UDP */
6332			| E1000_FTQF_VF_BP /* VF not compared */
6333			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6334			| E1000_FTQF_MASK); /* mask all inputs */
6335		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6336
6337		wr32(E1000_IMIR(3), htons(PTP_PORT));
6338		wr32(E1000_IMIREXT(3),
6339		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6340		if (hw->mac.type == e1000_82576) {
6341			/* enable source port check */
6342			wr32(E1000_SPQF(3), htons(PTP_PORT));
6343			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6344		}
6345		wr32(E1000_FTQF(3), ftqf);
6346	} else {
6347		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6348	}
6349	wrfl();
6350
6351	adapter->hwtstamp_config = config;
6352
6353	/* clear TX/RX time stamp registers, just to be sure */
6354	regval = rd32(E1000_TXSTMPH);
6355	regval = rd32(E1000_RXSTMPH);
6356
6357	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6358		-EFAULT : 0;
6359}
6360
6361/**
6362 * igb_ioctl -
6363 * @netdev:
6364 * @ifreq:
6365 * @cmd:
6366 **/
6367static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6368{
6369	switch (cmd) {
6370	case SIOCGMIIPHY:
6371	case SIOCGMIIREG:
6372	case SIOCSMIIREG:
6373		return igb_mii_ioctl(netdev, ifr, cmd);
6374	case SIOCSHWTSTAMP:
6375		return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6376	default:
6377		return -EOPNOTSUPP;
6378	}
6379}
6380
6381s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6382{
6383	struct igb_adapter *adapter = hw->back;
6384	u16 cap_offset;
6385
6386	cap_offset = adapter->pdev->pcie_cap;
6387	if (!cap_offset)
6388		return -E1000_ERR_CONFIG;
6389
6390	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6391
6392	return 0;
6393}
6394
6395s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6396{
6397	struct igb_adapter *adapter = hw->back;
6398	u16 cap_offset;
6399
6400	cap_offset = adapter->pdev->pcie_cap;
6401	if (!cap_offset)
6402		return -E1000_ERR_CONFIG;
6403
6404	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6405
6406	return 0;
6407}
6408
6409static void igb_vlan_mode(struct net_device *netdev, u32 features)
6410{
6411	struct igb_adapter *adapter = netdev_priv(netdev);
6412	struct e1000_hw *hw = &adapter->hw;
6413	u32 ctrl, rctl;
6414
6415	igb_irq_disable(adapter);
6416
6417	if (features & NETIF_F_HW_VLAN_RX) {
6418		/* enable VLAN tag insert/strip */
6419		ctrl = rd32(E1000_CTRL);
6420		ctrl |= E1000_CTRL_VME;
6421		wr32(E1000_CTRL, ctrl);
6422
6423		/* Disable CFI check */
6424		rctl = rd32(E1000_RCTL);
6425		rctl &= ~E1000_RCTL_CFIEN;
6426		wr32(E1000_RCTL, rctl);
6427	} else {
6428		/* disable VLAN tag insert/strip */
6429		ctrl = rd32(E1000_CTRL);
6430		ctrl &= ~E1000_CTRL_VME;
6431		wr32(E1000_CTRL, ctrl);
6432	}
6433
6434	igb_rlpml_set(adapter);
6435
6436	if (!test_bit(__IGB_DOWN, &adapter->state))
6437		igb_irq_enable(adapter);
6438}
6439
6440static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6441{
6442	struct igb_adapter *adapter = netdev_priv(netdev);
6443	struct e1000_hw *hw = &adapter->hw;
6444	int pf_id = adapter->vfs_allocated_count;
6445
6446	/* attempt to add filter to vlvf array */
6447	igb_vlvf_set(adapter, vid, true, pf_id);
6448
6449	/* add the filter since PF can receive vlans w/o entry in vlvf */
6450	igb_vfta_set(hw, vid, true);
6451
6452	set_bit(vid, adapter->active_vlans);
6453}
6454
6455static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6456{
6457	struct igb_adapter *adapter = netdev_priv(netdev);
6458	struct e1000_hw *hw = &adapter->hw;
6459	int pf_id = adapter->vfs_allocated_count;
6460	s32 err;
6461
6462	igb_irq_disable(adapter);
6463
6464	if (!test_bit(__IGB_DOWN, &adapter->state))
6465		igb_irq_enable(adapter);
6466
6467	/* remove vlan from VLVF table array */
6468	err = igb_vlvf_set(adapter, vid, false, pf_id);
6469
6470	/* if vid was not present in VLVF just remove it from table */
6471	if (err)
6472		igb_vfta_set(hw, vid, false);
6473
6474	clear_bit(vid, adapter->active_vlans);
6475}
6476
6477static void igb_restore_vlan(struct igb_adapter *adapter)
6478{
6479	u16 vid;
6480
6481	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6482		igb_vlan_rx_add_vid(adapter->netdev, vid);
6483}
6484
6485int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6486{
6487	struct pci_dev *pdev = adapter->pdev;
6488	struct e1000_mac_info *mac = &adapter->hw.mac;
6489
6490	mac->autoneg = 0;
6491
6492	/* Make sure dplx is at most 1 bit and lsb of speed is not set
6493	 * for the switch() below to work */
6494	if ((spd & 1) || (dplx & ~1))
6495		goto err_inval;
6496
6497	/* Fiber NIC's only allow 1000 Gbps Full duplex */
6498	if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6499	    spd != SPEED_1000 &&
6500	    dplx != DUPLEX_FULL)
6501		goto err_inval;
6502
6503	switch (spd + dplx) {
6504	case SPEED_10 + DUPLEX_HALF:
6505		mac->forced_speed_duplex = ADVERTISE_10_HALF;
6506		break;
6507	case SPEED_10 + DUPLEX_FULL:
6508		mac->forced_speed_duplex = ADVERTISE_10_FULL;
6509		break;
6510	case SPEED_100 + DUPLEX_HALF:
6511		mac->forced_speed_duplex = ADVERTISE_100_HALF;
6512		break;
6513	case SPEED_100 + DUPLEX_FULL:
6514		mac->forced_speed_duplex = ADVERTISE_100_FULL;
6515		break;
6516	case SPEED_1000 + DUPLEX_FULL:
6517		mac->autoneg = 1;
6518		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6519		break;
6520	case SPEED_1000 + DUPLEX_HALF: /* not supported */
6521	default:
6522		goto err_inval;
6523	}
6524	return 0;
6525
6526err_inval:
6527	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6528	return -EINVAL;
6529}
6530
6531static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6532{
6533	struct net_device *netdev = pci_get_drvdata(pdev);
6534	struct igb_adapter *adapter = netdev_priv(netdev);
6535	struct e1000_hw *hw = &adapter->hw;
6536	u32 ctrl, rctl, status;
6537	u32 wufc = adapter->wol;
6538#ifdef CONFIG_PM
6539	int retval = 0;
6540#endif
6541
6542	netif_device_detach(netdev);
6543
6544	if (netif_running(netdev))
6545		igb_close(netdev);
6546
6547	igb_clear_interrupt_scheme(adapter);
6548
6549#ifdef CONFIG_PM
6550	retval = pci_save_state(pdev);
6551	if (retval)
6552		return retval;
6553#endif
6554
6555	status = rd32(E1000_STATUS);
6556	if (status & E1000_STATUS_LU)
6557		wufc &= ~E1000_WUFC_LNKC;
6558
6559	if (wufc) {
6560		igb_setup_rctl(adapter);
6561		igb_set_rx_mode(netdev);
6562
6563		/* turn on all-multi mode if wake on multicast is enabled */
6564		if (wufc & E1000_WUFC_MC) {
6565			rctl = rd32(E1000_RCTL);
6566			rctl |= E1000_RCTL_MPE;
6567			wr32(E1000_RCTL, rctl);
6568		}
6569
6570		ctrl = rd32(E1000_CTRL);
6571		/* advertise wake from D3Cold */
6572		#define E1000_CTRL_ADVD3WUC 0x00100000
6573		/* phy power management enable */
6574		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6575		ctrl |= E1000_CTRL_ADVD3WUC;
6576		wr32(E1000_CTRL, ctrl);
6577
6578		/* Allow time for pending master requests to run */
6579		igb_disable_pcie_master(hw);
6580
6581		wr32(E1000_WUC, E1000_WUC_PME_EN);
6582		wr32(E1000_WUFC, wufc);
6583	} else {
6584		wr32(E1000_WUC, 0);
6585		wr32(E1000_WUFC, 0);
6586	}
6587
6588	*enable_wake = wufc || adapter->en_mng_pt;
6589	if (!*enable_wake)
6590		igb_power_down_link(adapter);
6591	else
6592		igb_power_up_link(adapter);
6593
6594	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6595	 * would have already happened in close and is redundant. */
6596	igb_release_hw_control(adapter);
6597
6598	pci_disable_device(pdev);
6599
6600	return 0;
6601}
6602
6603#ifdef CONFIG_PM
6604static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6605{
6606	int retval;
6607	bool wake;
6608
6609	retval = __igb_shutdown(pdev, &wake);
6610	if (retval)
6611		return retval;
6612
6613	if (wake) {
6614		pci_prepare_to_sleep(pdev);
6615	} else {
6616		pci_wake_from_d3(pdev, false);
6617		pci_set_power_state(pdev, PCI_D3hot);
6618	}
6619
6620	return 0;
6621}
6622
6623static int igb_resume(struct pci_dev *pdev)
6624{
6625	struct net_device *netdev = pci_get_drvdata(pdev);
6626	struct igb_adapter *adapter = netdev_priv(netdev);
6627	struct e1000_hw *hw = &adapter->hw;
6628	u32 err;
6629
6630	pci_set_power_state(pdev, PCI_D0);
6631	pci_restore_state(pdev);
6632	pci_save_state(pdev);
6633
6634	err = pci_enable_device_mem(pdev);
6635	if (err) {
6636		dev_err(&pdev->dev,
6637			"igb: Cannot enable PCI device from suspend\n");
6638		return err;
6639	}
6640	pci_set_master(pdev);
6641
6642	pci_enable_wake(pdev, PCI_D3hot, 0);
6643	pci_enable_wake(pdev, PCI_D3cold, 0);
6644
6645	if (igb_init_interrupt_scheme(adapter)) {
6646		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6647		return -ENOMEM;
6648	}
6649
6650	igb_reset(adapter);
6651
6652	/* let the f/w know that the h/w is now under the control of the
6653	 * driver. */
6654	igb_get_hw_control(adapter);
6655
6656	wr32(E1000_WUS, ~0);
6657
6658	if (netif_running(netdev)) {
6659		err = igb_open(netdev);
6660		if (err)
6661			return err;
6662	}
6663
6664	netif_device_attach(netdev);
6665
6666	return 0;
6667}
6668#endif
6669
6670static void igb_shutdown(struct pci_dev *pdev)
6671{
6672	bool wake;
6673
6674	__igb_shutdown(pdev, &wake);
6675
6676	if (system_state == SYSTEM_POWER_OFF) {
6677		pci_wake_from_d3(pdev, wake);
6678		pci_set_power_state(pdev, PCI_D3hot);
6679	}
6680}
6681
6682#ifdef CONFIG_NET_POLL_CONTROLLER
6683/*
6684 * Polling 'interrupt' - used by things like netconsole to send skbs
6685 * without having to re-enable interrupts. It's not called while
6686 * the interrupt routine is executing.
6687 */
6688static void igb_netpoll(struct net_device *netdev)
6689{
6690	struct igb_adapter *adapter = netdev_priv(netdev);
6691	struct e1000_hw *hw = &adapter->hw;
6692	int i;
6693
6694	if (!adapter->msix_entries) {
6695		struct igb_q_vector *q_vector = adapter->q_vector[0];
6696		igb_irq_disable(adapter);
6697		napi_schedule(&q_vector->napi);
6698		return;
6699	}
6700
6701	for (i = 0; i < adapter->num_q_vectors; i++) {
6702		struct igb_q_vector *q_vector = adapter->q_vector[i];
6703		wr32(E1000_EIMC, q_vector->eims_value);
6704		napi_schedule(&q_vector->napi);
6705	}
6706}
6707#endif /* CONFIG_NET_POLL_CONTROLLER */
6708
6709/**
6710 * igb_io_error_detected - called when PCI error is detected
6711 * @pdev: Pointer to PCI device
6712 * @state: The current pci connection state
6713 *
6714 * This function is called after a PCI bus error affecting
6715 * this device has been detected.
6716 */
6717static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6718					      pci_channel_state_t state)
6719{
6720	struct net_device *netdev = pci_get_drvdata(pdev);
6721	struct igb_adapter *adapter = netdev_priv(netdev);
6722
6723	netif_device_detach(netdev);
6724
6725	if (state == pci_channel_io_perm_failure)
6726		return PCI_ERS_RESULT_DISCONNECT;
6727
6728	if (netif_running(netdev))
6729		igb_down(adapter);
6730	pci_disable_device(pdev);
6731
6732	/* Request a slot slot reset. */
6733	return PCI_ERS_RESULT_NEED_RESET;
6734}
6735
6736/**
6737 * igb_io_slot_reset - called after the pci bus has been reset.
6738 * @pdev: Pointer to PCI device
6739 *
6740 * Restart the card from scratch, as if from a cold-boot. Implementation
6741 * resembles the first-half of the igb_resume routine.
6742 */
6743static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6744{
6745	struct net_device *netdev = pci_get_drvdata(pdev);
6746	struct igb_adapter *adapter = netdev_priv(netdev);
6747	struct e1000_hw *hw = &adapter->hw;
6748	pci_ers_result_t result;
6749	int err;
6750
6751	if (pci_enable_device_mem(pdev)) {
6752		dev_err(&pdev->dev,
6753			"Cannot re-enable PCI device after reset.\n");
6754		result = PCI_ERS_RESULT_DISCONNECT;
6755	} else {
6756		pci_set_master(pdev);
6757		pci_restore_state(pdev);
6758		pci_save_state(pdev);
6759
6760		pci_enable_wake(pdev, PCI_D3hot, 0);
6761		pci_enable_wake(pdev, PCI_D3cold, 0);
6762
6763		igb_reset(adapter);
6764		wr32(E1000_WUS, ~0);
6765		result = PCI_ERS_RESULT_RECOVERED;
6766	}
6767
6768	err = pci_cleanup_aer_uncorrect_error_status(pdev);
6769	if (err) {
6770		dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6771		        "failed 0x%0x\n", err);
6772		/* non-fatal, continue */
6773	}
6774
6775	return result;
6776}
6777
6778/**
6779 * igb_io_resume - called when traffic can start flowing again.
6780 * @pdev: Pointer to PCI device
6781 *
6782 * This callback is called when the error recovery driver tells us that
6783 * its OK to resume normal operation. Implementation resembles the
6784 * second-half of the igb_resume routine.
6785 */
6786static void igb_io_resume(struct pci_dev *pdev)
6787{
6788	struct net_device *netdev = pci_get_drvdata(pdev);
6789	struct igb_adapter *adapter = netdev_priv(netdev);
6790
6791	if (netif_running(netdev)) {
6792		if (igb_up(adapter)) {
6793			dev_err(&pdev->dev, "igb_up failed after reset\n");
6794			return;
6795		}
6796	}
6797
6798	netif_device_attach(netdev);
6799
6800	/* let the f/w know that the h/w is now under the control of the
6801	 * driver. */
6802	igb_get_hw_control(adapter);
6803}
6804
6805static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6806                             u8 qsel)
6807{
6808	u32 rar_low, rar_high;
6809	struct e1000_hw *hw = &adapter->hw;
6810
6811	/* HW expects these in little endian so we reverse the byte order
6812	 * from network order (big endian) to little endian
6813	 */
6814	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6815	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6816	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6817
6818	/* Indicate to hardware the Address is Valid. */
6819	rar_high |= E1000_RAH_AV;
6820
6821	if (hw->mac.type == e1000_82575)
6822		rar_high |= E1000_RAH_POOL_1 * qsel;
6823	else
6824		rar_high |= E1000_RAH_POOL_1 << qsel;
6825
6826	wr32(E1000_RAL(index), rar_low);
6827	wrfl();
6828	wr32(E1000_RAH(index), rar_high);
6829	wrfl();
6830}
6831
6832static int igb_set_vf_mac(struct igb_adapter *adapter,
6833                          int vf, unsigned char *mac_addr)
6834{
6835	struct e1000_hw *hw = &adapter->hw;
6836	/* VF MAC addresses start at end of receive addresses and moves
6837	 * torwards the first, as a result a collision should not be possible */
6838	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6839
6840	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6841
6842	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6843
6844	return 0;
6845}
6846
6847static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6848{
6849	struct igb_adapter *adapter = netdev_priv(netdev);
6850	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6851		return -EINVAL;
6852	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6853	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6854	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6855				      " change effective.");
6856	if (test_bit(__IGB_DOWN, &adapter->state)) {
6857		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6858			 " but the PF device is not up.\n");
6859		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6860			 " attempting to use the VF device.\n");
6861	}
6862	return igb_set_vf_mac(adapter, vf, mac);
6863}
6864
6865static int igb_link_mbps(int internal_link_speed)
6866{
6867	switch (internal_link_speed) {
6868	case SPEED_100:
6869		return 100;
6870	case SPEED_1000:
6871		return 1000;
6872	default:
6873		return 0;
6874	}
6875}
6876
6877static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6878				  int link_speed)
6879{
6880	int rf_dec, rf_int;
6881	u32 bcnrc_val;
6882
6883	if (tx_rate != 0) {
6884		/* Calculate the rate factor values to set */
6885		rf_int = link_speed / tx_rate;
6886		rf_dec = (link_speed - (rf_int * tx_rate));
6887		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6888
6889		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6890		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6891		               E1000_RTTBCNRC_RF_INT_MASK);
6892		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6893	} else {
6894		bcnrc_val = 0;
6895	}
6896
6897	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6898	wr32(E1000_RTTBCNRC, bcnrc_val);
6899}
6900
6901static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6902{
6903	int actual_link_speed, i;
6904	bool reset_rate = false;
6905
6906	/* VF TX rate limit was not set or not supported */
6907	if ((adapter->vf_rate_link_speed == 0) ||
6908	    (adapter->hw.mac.type != e1000_82576))
6909		return;
6910
6911	actual_link_speed = igb_link_mbps(adapter->link_speed);
6912	if (actual_link_speed != adapter->vf_rate_link_speed) {
6913		reset_rate = true;
6914		adapter->vf_rate_link_speed = 0;
6915		dev_info(&adapter->pdev->dev,
6916		         "Link speed has been changed. VF Transmit "
6917		         "rate is disabled\n");
6918	}
6919
6920	for (i = 0; i < adapter->vfs_allocated_count; i++) {
6921		if (reset_rate)
6922			adapter->vf_data[i].tx_rate = 0;
6923
6924		igb_set_vf_rate_limit(&adapter->hw, i,
6925		                      adapter->vf_data[i].tx_rate,
6926		                      actual_link_speed);
6927	}
6928}
6929
6930static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6931{
6932	struct igb_adapter *adapter = netdev_priv(netdev);
6933	struct e1000_hw *hw = &adapter->hw;
6934	int actual_link_speed;
6935
6936	if (hw->mac.type != e1000_82576)
6937		return -EOPNOTSUPP;
6938
6939	actual_link_speed = igb_link_mbps(adapter->link_speed);
6940	if ((vf >= adapter->vfs_allocated_count) ||
6941	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6942	    (tx_rate < 0) || (tx_rate > actual_link_speed))
6943		return -EINVAL;
6944
6945	adapter->vf_rate_link_speed = actual_link_speed;
6946	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6947	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6948
6949	return 0;
6950}
6951
6952static int igb_ndo_get_vf_config(struct net_device *netdev,
6953				 int vf, struct ifla_vf_info *ivi)
6954{
6955	struct igb_adapter *adapter = netdev_priv(netdev);
6956	if (vf >= adapter->vfs_allocated_count)
6957		return -EINVAL;
6958	ivi->vf = vf;
6959	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6960	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6961	ivi->vlan = adapter->vf_data[vf].pf_vlan;
6962	ivi->qos = adapter->vf_data[vf].pf_qos;
6963	return 0;
6964}
6965
6966static void igb_vmm_control(struct igb_adapter *adapter)
6967{
6968	struct e1000_hw *hw = &adapter->hw;
6969	u32 reg;
6970
6971	switch (hw->mac.type) {
6972	case e1000_82575:
6973	default:
6974		/* replication is not supported for 82575 */
6975		return;
6976	case e1000_82576:
6977		/* notify HW that the MAC is adding vlan tags */
6978		reg = rd32(E1000_DTXCTL);
6979		reg |= E1000_DTXCTL_VLAN_ADDED;
6980		wr32(E1000_DTXCTL, reg);
6981	case e1000_82580:
6982		/* enable replication vlan tag stripping */
6983		reg = rd32(E1000_RPLOLR);
6984		reg |= E1000_RPLOLR_STRVLAN;
6985		wr32(E1000_RPLOLR, reg);
6986	case e1000_i350:
6987		/* none of the above registers are supported by i350 */
6988		break;
6989	}
6990
6991	if (adapter->vfs_allocated_count) {
6992		igb_vmdq_set_loopback_pf(hw, true);
6993		igb_vmdq_set_replication_pf(hw, true);
6994		igb_vmdq_set_anti_spoofing_pf(hw, true,
6995						adapter->vfs_allocated_count);
6996	} else {
6997		igb_vmdq_set_loopback_pf(hw, false);
6998		igb_vmdq_set_replication_pf(hw, false);
6999	}
7000}
7001
7002/* igb_main.c */
7003