igb_main.c revision cfb8c3aa59302636c69890be10b2ef23a7ca83b2
1/*******************************************************************************
2
3  Intel(R) Gigabit Ethernet Linux driver
4  Copyright(c) 2007-2012 Intel Corporation.
5
6  This program is free software; you can redistribute it and/or modify it
7  under the terms and conditions of the GNU General Public License,
8  version 2, as published by the Free Software Foundation.
9
10  This program is distributed in the hope it will be useful, but WITHOUT
11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  more details.
14
15  You should have received a copy of the GNU General Public License along with
16  this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19  The full GNU General Public License is included in this distribution in
20  the file called "COPYING".
21
22  Contact Information:
23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26*******************************************************************************/
27
28#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29
30#include <linux/module.h>
31#include <linux/types.h>
32#include <linux/init.h>
33#include <linux/bitops.h>
34#include <linux/vmalloc.h>
35#include <linux/pagemap.h>
36#include <linux/netdevice.h>
37#include <linux/ipv6.h>
38#include <linux/slab.h>
39#include <net/checksum.h>
40#include <net/ip6_checksum.h>
41#include <linux/net_tstamp.h>
42#include <linux/mii.h>
43#include <linux/ethtool.h>
44#include <linux/if.h>
45#include <linux/if_vlan.h>
46#include <linux/pci.h>
47#include <linux/pci-aspm.h>
48#include <linux/delay.h>
49#include <linux/interrupt.h>
50#include <linux/ip.h>
51#include <linux/tcp.h>
52#include <linux/sctp.h>
53#include <linux/if_ether.h>
54#include <linux/aer.h>
55#include <linux/prefetch.h>
56#include <linux/pm_runtime.h>
57#ifdef CONFIG_IGB_DCA
58#include <linux/dca.h>
59#endif
60#include "igb.h"
61
62#define MAJ 3
63#define MIN 2
64#define BUILD 10
65#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66__stringify(BUILD) "-k"
67char igb_driver_name[] = "igb";
68char igb_driver_version[] = DRV_VERSION;
69static const char igb_driver_string[] =
70				"Intel(R) Gigabit Ethernet Network Driver";
71static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
72
73static const struct e1000_info *igb_info_tbl[] = {
74	[board_82575] = &e1000_82575_info,
75};
76
77static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
79	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
80	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
81	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
82	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
83	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
84	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
85	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
86	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
87	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
88	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
89	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
90	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
91	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
92	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
93	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
94	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
95	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
96	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
97	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
98	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
99	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
100	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
101	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
102	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
103	/* required last entry */
104	{0, }
105};
106
107MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
108
109void igb_reset(struct igb_adapter *);
110static int igb_setup_all_tx_resources(struct igb_adapter *);
111static int igb_setup_all_rx_resources(struct igb_adapter *);
112static void igb_free_all_tx_resources(struct igb_adapter *);
113static void igb_free_all_rx_resources(struct igb_adapter *);
114static void igb_setup_mrqc(struct igb_adapter *);
115static int igb_probe(struct pci_dev *, const struct pci_device_id *);
116static void __devexit igb_remove(struct pci_dev *pdev);
117static void igb_init_hw_timer(struct igb_adapter *adapter);
118static int igb_sw_init(struct igb_adapter *);
119static int igb_open(struct net_device *);
120static int igb_close(struct net_device *);
121static void igb_configure_tx(struct igb_adapter *);
122static void igb_configure_rx(struct igb_adapter *);
123static void igb_clean_all_tx_rings(struct igb_adapter *);
124static void igb_clean_all_rx_rings(struct igb_adapter *);
125static void igb_clean_tx_ring(struct igb_ring *);
126static void igb_clean_rx_ring(struct igb_ring *);
127static void igb_set_rx_mode(struct net_device *);
128static void igb_update_phy_info(unsigned long);
129static void igb_watchdog(unsigned long);
130static void igb_watchdog_task(struct work_struct *);
131static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
132static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
133						 struct rtnl_link_stats64 *stats);
134static int igb_change_mtu(struct net_device *, int);
135static int igb_set_mac(struct net_device *, void *);
136static void igb_set_uta(struct igb_adapter *adapter);
137static irqreturn_t igb_intr(int irq, void *);
138static irqreturn_t igb_intr_msi(int irq, void *);
139static irqreturn_t igb_msix_other(int irq, void *);
140static irqreturn_t igb_msix_ring(int irq, void *);
141#ifdef CONFIG_IGB_DCA
142static void igb_update_dca(struct igb_q_vector *);
143static void igb_setup_dca(struct igb_adapter *);
144#endif /* CONFIG_IGB_DCA */
145static int igb_poll(struct napi_struct *, int);
146static bool igb_clean_tx_irq(struct igb_q_vector *);
147static bool igb_clean_rx_irq(struct igb_q_vector *, int);
148static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
149static void igb_tx_timeout(struct net_device *);
150static void igb_reset_task(struct work_struct *);
151static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
152static int igb_vlan_rx_add_vid(struct net_device *, u16);
153static int igb_vlan_rx_kill_vid(struct net_device *, u16);
154static void igb_restore_vlan(struct igb_adapter *);
155static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
156static void igb_ping_all_vfs(struct igb_adapter *);
157static void igb_msg_task(struct igb_adapter *);
158static void igb_vmm_control(struct igb_adapter *);
159static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
160static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
161static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
162static int igb_ndo_set_vf_vlan(struct net_device *netdev,
163			       int vf, u16 vlan, u8 qos);
164static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
165static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
166				 struct ifla_vf_info *ivi);
167static void igb_check_vf_rate_limit(struct igb_adapter *);
168
169#ifdef CONFIG_PCI_IOV
170static int igb_vf_configure(struct igb_adapter *adapter, int vf);
171static int igb_find_enabled_vfs(struct igb_adapter *adapter);
172static int igb_check_vf_assignment(struct igb_adapter *adapter);
173#endif
174
175#ifdef CONFIG_PM
176#ifdef CONFIG_PM_SLEEP
177static int igb_suspend(struct device *);
178#endif
179static int igb_resume(struct device *);
180#ifdef CONFIG_PM_RUNTIME
181static int igb_runtime_suspend(struct device *dev);
182static int igb_runtime_resume(struct device *dev);
183static int igb_runtime_idle(struct device *dev);
184#endif
185static const struct dev_pm_ops igb_pm_ops = {
186	SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
187	SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
188			igb_runtime_idle)
189};
190#endif
191static void igb_shutdown(struct pci_dev *);
192#ifdef CONFIG_IGB_DCA
193static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
194static struct notifier_block dca_notifier = {
195	.notifier_call	= igb_notify_dca,
196	.next		= NULL,
197	.priority	= 0
198};
199#endif
200#ifdef CONFIG_NET_POLL_CONTROLLER
201/* for netdump / net console */
202static void igb_netpoll(struct net_device *);
203#endif
204#ifdef CONFIG_PCI_IOV
205static unsigned int max_vfs = 0;
206module_param(max_vfs, uint, 0);
207MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
208                 "per physical function");
209#endif /* CONFIG_PCI_IOV */
210
211static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
212		     pci_channel_state_t);
213static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
214static void igb_io_resume(struct pci_dev *);
215
216static struct pci_error_handlers igb_err_handler = {
217	.error_detected = igb_io_error_detected,
218	.slot_reset = igb_io_slot_reset,
219	.resume = igb_io_resume,
220};
221
222static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
223
224static struct pci_driver igb_driver = {
225	.name     = igb_driver_name,
226	.id_table = igb_pci_tbl,
227	.probe    = igb_probe,
228	.remove   = __devexit_p(igb_remove),
229#ifdef CONFIG_PM
230	.driver.pm = &igb_pm_ops,
231#endif
232	.shutdown = igb_shutdown,
233	.err_handler = &igb_err_handler
234};
235
236MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
237MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
238MODULE_LICENSE("GPL");
239MODULE_VERSION(DRV_VERSION);
240
241#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
242static int debug = -1;
243module_param(debug, int, 0);
244MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
245
246struct igb_reg_info {
247	u32 ofs;
248	char *name;
249};
250
251static const struct igb_reg_info igb_reg_info_tbl[] = {
252
253	/* General Registers */
254	{E1000_CTRL, "CTRL"},
255	{E1000_STATUS, "STATUS"},
256	{E1000_CTRL_EXT, "CTRL_EXT"},
257
258	/* Interrupt Registers */
259	{E1000_ICR, "ICR"},
260
261	/* RX Registers */
262	{E1000_RCTL, "RCTL"},
263	{E1000_RDLEN(0), "RDLEN"},
264	{E1000_RDH(0), "RDH"},
265	{E1000_RDT(0), "RDT"},
266	{E1000_RXDCTL(0), "RXDCTL"},
267	{E1000_RDBAL(0), "RDBAL"},
268	{E1000_RDBAH(0), "RDBAH"},
269
270	/* TX Registers */
271	{E1000_TCTL, "TCTL"},
272	{E1000_TDBAL(0), "TDBAL"},
273	{E1000_TDBAH(0), "TDBAH"},
274	{E1000_TDLEN(0), "TDLEN"},
275	{E1000_TDH(0), "TDH"},
276	{E1000_TDT(0), "TDT"},
277	{E1000_TXDCTL(0), "TXDCTL"},
278	{E1000_TDFH, "TDFH"},
279	{E1000_TDFT, "TDFT"},
280	{E1000_TDFHS, "TDFHS"},
281	{E1000_TDFPC, "TDFPC"},
282
283	/* List Terminator */
284	{}
285};
286
287/*
288 * igb_regdump - register printout routine
289 */
290static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
291{
292	int n = 0;
293	char rname[16];
294	u32 regs[8];
295
296	switch (reginfo->ofs) {
297	case E1000_RDLEN(0):
298		for (n = 0; n < 4; n++)
299			regs[n] = rd32(E1000_RDLEN(n));
300		break;
301	case E1000_RDH(0):
302		for (n = 0; n < 4; n++)
303			regs[n] = rd32(E1000_RDH(n));
304		break;
305	case E1000_RDT(0):
306		for (n = 0; n < 4; n++)
307			regs[n] = rd32(E1000_RDT(n));
308		break;
309	case E1000_RXDCTL(0):
310		for (n = 0; n < 4; n++)
311			regs[n] = rd32(E1000_RXDCTL(n));
312		break;
313	case E1000_RDBAL(0):
314		for (n = 0; n < 4; n++)
315			regs[n] = rd32(E1000_RDBAL(n));
316		break;
317	case E1000_RDBAH(0):
318		for (n = 0; n < 4; n++)
319			regs[n] = rd32(E1000_RDBAH(n));
320		break;
321	case E1000_TDBAL(0):
322		for (n = 0; n < 4; n++)
323			regs[n] = rd32(E1000_RDBAL(n));
324		break;
325	case E1000_TDBAH(0):
326		for (n = 0; n < 4; n++)
327			regs[n] = rd32(E1000_TDBAH(n));
328		break;
329	case E1000_TDLEN(0):
330		for (n = 0; n < 4; n++)
331			regs[n] = rd32(E1000_TDLEN(n));
332		break;
333	case E1000_TDH(0):
334		for (n = 0; n < 4; n++)
335			regs[n] = rd32(E1000_TDH(n));
336		break;
337	case E1000_TDT(0):
338		for (n = 0; n < 4; n++)
339			regs[n] = rd32(E1000_TDT(n));
340		break;
341	case E1000_TXDCTL(0):
342		for (n = 0; n < 4; n++)
343			regs[n] = rd32(E1000_TXDCTL(n));
344		break;
345	default:
346		pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
347		return;
348	}
349
350	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
351	pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
352		regs[2], regs[3]);
353}
354
355/*
356 * igb_dump - Print registers, tx-rings and rx-rings
357 */
358static void igb_dump(struct igb_adapter *adapter)
359{
360	struct net_device *netdev = adapter->netdev;
361	struct e1000_hw *hw = &adapter->hw;
362	struct igb_reg_info *reginfo;
363	struct igb_ring *tx_ring;
364	union e1000_adv_tx_desc *tx_desc;
365	struct my_u0 { u64 a; u64 b; } *u0;
366	struct igb_ring *rx_ring;
367	union e1000_adv_rx_desc *rx_desc;
368	u32 staterr;
369	u16 i, n;
370
371	if (!netif_msg_hw(adapter))
372		return;
373
374	/* Print netdevice Info */
375	if (netdev) {
376		dev_info(&adapter->pdev->dev, "Net device Info\n");
377		pr_info("Device Name     state            trans_start      "
378			"last_rx\n");
379		pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
380			netdev->state, netdev->trans_start, netdev->last_rx);
381	}
382
383	/* Print Registers */
384	dev_info(&adapter->pdev->dev, "Register Dump\n");
385	pr_info(" Register Name   Value\n");
386	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
387	     reginfo->name; reginfo++) {
388		igb_regdump(hw, reginfo);
389	}
390
391	/* Print TX Ring Summary */
392	if (!netdev || !netif_running(netdev))
393		goto exit;
394
395	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
396	pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
397	for (n = 0; n < adapter->num_tx_queues; n++) {
398		struct igb_tx_buffer *buffer_info;
399		tx_ring = adapter->tx_ring[n];
400		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
401		pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
402			n, tx_ring->next_to_use, tx_ring->next_to_clean,
403			(u64)buffer_info->dma,
404			buffer_info->length,
405			buffer_info->next_to_watch,
406			(u64)buffer_info->time_stamp);
407	}
408
409	/* Print TX Rings */
410	if (!netif_msg_tx_done(adapter))
411		goto rx_ring_summary;
412
413	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
414
415	/* Transmit Descriptor Formats
416	 *
417	 * Advanced Transmit Descriptor
418	 *   +--------------------------------------------------------------+
419	 * 0 |         Buffer Address [63:0]                                |
420	 *   +--------------------------------------------------------------+
421	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
422	 *   +--------------------------------------------------------------+
423	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
424	 */
425
426	for (n = 0; n < adapter->num_tx_queues; n++) {
427		tx_ring = adapter->tx_ring[n];
428		pr_info("------------------------------------\n");
429		pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
430		pr_info("------------------------------------\n");
431		pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] "
432			"[bi->dma       ] leng  ntw timestamp        "
433			"bi->skb\n");
434
435		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
436			const char *next_desc;
437			struct igb_tx_buffer *buffer_info;
438			tx_desc = IGB_TX_DESC(tx_ring, i);
439			buffer_info = &tx_ring->tx_buffer_info[i];
440			u0 = (struct my_u0 *)tx_desc;
441			if (i == tx_ring->next_to_use &&
442			    i == tx_ring->next_to_clean)
443				next_desc = " NTC/U";
444			else if (i == tx_ring->next_to_use)
445				next_desc = " NTU";
446			else if (i == tx_ring->next_to_clean)
447				next_desc = " NTC";
448			else
449				next_desc = "";
450
451			pr_info("T [0x%03X]    %016llX %016llX %016llX"
452				" %04X  %p %016llX %p%s\n", i,
453				le64_to_cpu(u0->a),
454				le64_to_cpu(u0->b),
455				(u64)buffer_info->dma,
456				buffer_info->length,
457				buffer_info->next_to_watch,
458				(u64)buffer_info->time_stamp,
459				buffer_info->skb, next_desc);
460
461			if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
462				print_hex_dump(KERN_INFO, "",
463					DUMP_PREFIX_ADDRESS,
464					16, 1, phys_to_virt(buffer_info->dma),
465					buffer_info->length, true);
466		}
467	}
468
469	/* Print RX Rings Summary */
470rx_ring_summary:
471	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
472	pr_info("Queue [NTU] [NTC]\n");
473	for (n = 0; n < adapter->num_rx_queues; n++) {
474		rx_ring = adapter->rx_ring[n];
475		pr_info(" %5d %5X %5X\n",
476			n, rx_ring->next_to_use, rx_ring->next_to_clean);
477	}
478
479	/* Print RX Rings */
480	if (!netif_msg_rx_status(adapter))
481		goto exit;
482
483	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
484
485	/* Advanced Receive Descriptor (Read) Format
486	 *    63                                           1        0
487	 *    +-----------------------------------------------------+
488	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
489	 *    +----------------------------------------------+------+
490	 *  8 |       Header Buffer Address [63:1]           |  DD  |
491	 *    +-----------------------------------------------------+
492	 *
493	 *
494	 * Advanced Receive Descriptor (Write-Back) Format
495	 *
496	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
497	 *   +------------------------------------------------------+
498	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
499	 *   | Checksum   Ident  |   |           |    | Type | Type |
500	 *   +------------------------------------------------------+
501	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
502	 *   +------------------------------------------------------+
503	 *   63       48 47    32 31            20 19               0
504	 */
505
506	for (n = 0; n < adapter->num_rx_queues; n++) {
507		rx_ring = adapter->rx_ring[n];
508		pr_info("------------------------------------\n");
509		pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
510		pr_info("------------------------------------\n");
511		pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] "
512			"[bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
513		pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] -----"
514			"----------- [bi->skb] <-- Adv Rx Write-Back format\n");
515
516		for (i = 0; i < rx_ring->count; i++) {
517			const char *next_desc;
518			struct igb_rx_buffer *buffer_info;
519			buffer_info = &rx_ring->rx_buffer_info[i];
520			rx_desc = IGB_RX_DESC(rx_ring, i);
521			u0 = (struct my_u0 *)rx_desc;
522			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
523
524			if (i == rx_ring->next_to_use)
525				next_desc = " NTU";
526			else if (i == rx_ring->next_to_clean)
527				next_desc = " NTC";
528			else
529				next_desc = "";
530
531			if (staterr & E1000_RXD_STAT_DD) {
532				/* Descriptor Done */
533				pr_info("%s[0x%03X]     %016llX %016llX -------"
534					"--------- %p%s\n", "RWB", i,
535					le64_to_cpu(u0->a),
536					le64_to_cpu(u0->b),
537					buffer_info->skb, next_desc);
538			} else {
539				pr_info("%s[0x%03X]     %016llX %016llX %016llX"
540					" %p%s\n", "R  ", i,
541					le64_to_cpu(u0->a),
542					le64_to_cpu(u0->b),
543					(u64)buffer_info->dma,
544					buffer_info->skb, next_desc);
545
546				if (netif_msg_pktdata(adapter)) {
547					print_hex_dump(KERN_INFO, "",
548						DUMP_PREFIX_ADDRESS,
549						16, 1,
550						phys_to_virt(buffer_info->dma),
551						IGB_RX_HDR_LEN, true);
552					print_hex_dump(KERN_INFO, "",
553					  DUMP_PREFIX_ADDRESS,
554					  16, 1,
555					  phys_to_virt(
556					    buffer_info->page_dma +
557					    buffer_info->page_offset),
558					  PAGE_SIZE/2, true);
559				}
560			}
561		}
562	}
563
564exit:
565	return;
566}
567
568
569/**
570 * igb_read_clock - read raw cycle counter (to be used by time counter)
571 */
572static cycle_t igb_read_clock(const struct cyclecounter *tc)
573{
574	struct igb_adapter *adapter =
575		container_of(tc, struct igb_adapter, cycles);
576	struct e1000_hw *hw = &adapter->hw;
577	u64 stamp = 0;
578	int shift = 0;
579
580	/*
581	 * The timestamp latches on lowest register read. For the 82580
582	 * the lowest register is SYSTIMR instead of SYSTIML.  However we never
583	 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
584	 */
585	if (hw->mac.type >= e1000_82580) {
586		stamp = rd32(E1000_SYSTIMR) >> 8;
587		shift = IGB_82580_TSYNC_SHIFT;
588	}
589
590	stamp |= (u64)rd32(E1000_SYSTIML) << shift;
591	stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
592	return stamp;
593}
594
595/**
596 * igb_get_hw_dev - return device
597 * used by hardware layer to print debugging information
598 **/
599struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
600{
601	struct igb_adapter *adapter = hw->back;
602	return adapter->netdev;
603}
604
605/**
606 * igb_init_module - Driver Registration Routine
607 *
608 * igb_init_module is the first routine called when the driver is
609 * loaded. All it does is register with the PCI subsystem.
610 **/
611static int __init igb_init_module(void)
612{
613	int ret;
614	pr_info("%s - version %s\n",
615	       igb_driver_string, igb_driver_version);
616
617	pr_info("%s\n", igb_copyright);
618
619#ifdef CONFIG_IGB_DCA
620	dca_register_notify(&dca_notifier);
621#endif
622	ret = pci_register_driver(&igb_driver);
623	return ret;
624}
625
626module_init(igb_init_module);
627
628/**
629 * igb_exit_module - Driver Exit Cleanup Routine
630 *
631 * igb_exit_module is called just before the driver is removed
632 * from memory.
633 **/
634static void __exit igb_exit_module(void)
635{
636#ifdef CONFIG_IGB_DCA
637	dca_unregister_notify(&dca_notifier);
638#endif
639	pci_unregister_driver(&igb_driver);
640}
641
642module_exit(igb_exit_module);
643
644#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
645/**
646 * igb_cache_ring_register - Descriptor ring to register mapping
647 * @adapter: board private structure to initialize
648 *
649 * Once we know the feature-set enabled for the device, we'll cache
650 * the register offset the descriptor ring is assigned to.
651 **/
652static void igb_cache_ring_register(struct igb_adapter *adapter)
653{
654	int i = 0, j = 0;
655	u32 rbase_offset = adapter->vfs_allocated_count;
656
657	switch (adapter->hw.mac.type) {
658	case e1000_82576:
659		/* The queues are allocated for virtualization such that VF 0
660		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
661		 * In order to avoid collision we start at the first free queue
662		 * and continue consuming queues in the same sequence
663		 */
664		if (adapter->vfs_allocated_count) {
665			for (; i < adapter->rss_queues; i++)
666				adapter->rx_ring[i]->reg_idx = rbase_offset +
667				                               Q_IDX_82576(i);
668		}
669	case e1000_82575:
670	case e1000_82580:
671	case e1000_i350:
672	default:
673		for (; i < adapter->num_rx_queues; i++)
674			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
675		for (; j < adapter->num_tx_queues; j++)
676			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
677		break;
678	}
679}
680
681static void igb_free_queues(struct igb_adapter *adapter)
682{
683	int i;
684
685	for (i = 0; i < adapter->num_tx_queues; i++) {
686		kfree(adapter->tx_ring[i]);
687		adapter->tx_ring[i] = NULL;
688	}
689	for (i = 0; i < adapter->num_rx_queues; i++) {
690		kfree(adapter->rx_ring[i]);
691		adapter->rx_ring[i] = NULL;
692	}
693	adapter->num_rx_queues = 0;
694	adapter->num_tx_queues = 0;
695}
696
697/**
698 * igb_alloc_queues - Allocate memory for all rings
699 * @adapter: board private structure to initialize
700 *
701 * We allocate one ring per queue at run-time since we don't know the
702 * number of queues at compile-time.
703 **/
704static int igb_alloc_queues(struct igb_adapter *adapter)
705{
706	struct igb_ring *ring;
707	int i;
708	int orig_node = adapter->node;
709
710	for (i = 0; i < adapter->num_tx_queues; i++) {
711		if (orig_node == -1) {
712			int cur_node = next_online_node(adapter->node);
713			if (cur_node == MAX_NUMNODES)
714				cur_node = first_online_node;
715			adapter->node = cur_node;
716		}
717		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
718				    adapter->node);
719		if (!ring)
720			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
721		if (!ring)
722			goto err;
723		ring->count = adapter->tx_ring_count;
724		ring->queue_index = i;
725		ring->dev = &adapter->pdev->dev;
726		ring->netdev = adapter->netdev;
727		ring->numa_node = adapter->node;
728		/* For 82575, context index must be unique per ring. */
729		if (adapter->hw.mac.type == e1000_82575)
730			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
731		adapter->tx_ring[i] = ring;
732	}
733	/* Restore the adapter's original node */
734	adapter->node = orig_node;
735
736	for (i = 0; i < adapter->num_rx_queues; i++) {
737		if (orig_node == -1) {
738			int cur_node = next_online_node(adapter->node);
739			if (cur_node == MAX_NUMNODES)
740				cur_node = first_online_node;
741			adapter->node = cur_node;
742		}
743		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
744				    adapter->node);
745		if (!ring)
746			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
747		if (!ring)
748			goto err;
749		ring->count = adapter->rx_ring_count;
750		ring->queue_index = i;
751		ring->dev = &adapter->pdev->dev;
752		ring->netdev = adapter->netdev;
753		ring->numa_node = adapter->node;
754		/* set flag indicating ring supports SCTP checksum offload */
755		if (adapter->hw.mac.type >= e1000_82576)
756			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
757
758		/* On i350, loopback VLAN packets have the tag byte-swapped. */
759		if (adapter->hw.mac.type == e1000_i350)
760			set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
761
762		adapter->rx_ring[i] = ring;
763	}
764	/* Restore the adapter's original node */
765	adapter->node = orig_node;
766
767	igb_cache_ring_register(adapter);
768
769	return 0;
770
771err:
772	/* Restore the adapter's original node */
773	adapter->node = orig_node;
774	igb_free_queues(adapter);
775
776	return -ENOMEM;
777}
778
779/**
780 *  igb_write_ivar - configure ivar for given MSI-X vector
781 *  @hw: pointer to the HW structure
782 *  @msix_vector: vector number we are allocating to a given ring
783 *  @index: row index of IVAR register to write within IVAR table
784 *  @offset: column offset of in IVAR, should be multiple of 8
785 *
786 *  This function is intended to handle the writing of the IVAR register
787 *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
788 *  each containing an cause allocation for an Rx and Tx ring, and a
789 *  variable number of rows depending on the number of queues supported.
790 **/
791static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
792			   int index, int offset)
793{
794	u32 ivar = array_rd32(E1000_IVAR0, index);
795
796	/* clear any bits that are currently set */
797	ivar &= ~((u32)0xFF << offset);
798
799	/* write vector and valid bit */
800	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
801
802	array_wr32(E1000_IVAR0, index, ivar);
803}
804
805#define IGB_N0_QUEUE -1
806static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
807{
808	struct igb_adapter *adapter = q_vector->adapter;
809	struct e1000_hw *hw = &adapter->hw;
810	int rx_queue = IGB_N0_QUEUE;
811	int tx_queue = IGB_N0_QUEUE;
812	u32 msixbm = 0;
813
814	if (q_vector->rx.ring)
815		rx_queue = q_vector->rx.ring->reg_idx;
816	if (q_vector->tx.ring)
817		tx_queue = q_vector->tx.ring->reg_idx;
818
819	switch (hw->mac.type) {
820	case e1000_82575:
821		/* The 82575 assigns vectors using a bitmask, which matches the
822		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
823		   or more queues to a vector, we write the appropriate bits
824		   into the MSIXBM register for that vector. */
825		if (rx_queue > IGB_N0_QUEUE)
826			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
827		if (tx_queue > IGB_N0_QUEUE)
828			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
829		if (!adapter->msix_entries && msix_vector == 0)
830			msixbm |= E1000_EIMS_OTHER;
831		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
832		q_vector->eims_value = msixbm;
833		break;
834	case e1000_82576:
835		/*
836		 * 82576 uses a table that essentially consists of 2 columns
837		 * with 8 rows.  The ordering is column-major so we use the
838		 * lower 3 bits as the row index, and the 4th bit as the
839		 * column offset.
840		 */
841		if (rx_queue > IGB_N0_QUEUE)
842			igb_write_ivar(hw, msix_vector,
843				       rx_queue & 0x7,
844				       (rx_queue & 0x8) << 1);
845		if (tx_queue > IGB_N0_QUEUE)
846			igb_write_ivar(hw, msix_vector,
847				       tx_queue & 0x7,
848				       ((tx_queue & 0x8) << 1) + 8);
849		q_vector->eims_value = 1 << msix_vector;
850		break;
851	case e1000_82580:
852	case e1000_i350:
853		/*
854		 * On 82580 and newer adapters the scheme is similar to 82576
855		 * however instead of ordering column-major we have things
856		 * ordered row-major.  So we traverse the table by using
857		 * bit 0 as the column offset, and the remaining bits as the
858		 * row index.
859		 */
860		if (rx_queue > IGB_N0_QUEUE)
861			igb_write_ivar(hw, msix_vector,
862				       rx_queue >> 1,
863				       (rx_queue & 0x1) << 4);
864		if (tx_queue > IGB_N0_QUEUE)
865			igb_write_ivar(hw, msix_vector,
866				       tx_queue >> 1,
867				       ((tx_queue & 0x1) << 4) + 8);
868		q_vector->eims_value = 1 << msix_vector;
869		break;
870	default:
871		BUG();
872		break;
873	}
874
875	/* add q_vector eims value to global eims_enable_mask */
876	adapter->eims_enable_mask |= q_vector->eims_value;
877
878	/* configure q_vector to set itr on first interrupt */
879	q_vector->set_itr = 1;
880}
881
882/**
883 * igb_configure_msix - Configure MSI-X hardware
884 *
885 * igb_configure_msix sets up the hardware to properly
886 * generate MSI-X interrupts.
887 **/
888static void igb_configure_msix(struct igb_adapter *adapter)
889{
890	u32 tmp;
891	int i, vector = 0;
892	struct e1000_hw *hw = &adapter->hw;
893
894	adapter->eims_enable_mask = 0;
895
896	/* set vector for other causes, i.e. link changes */
897	switch (hw->mac.type) {
898	case e1000_82575:
899		tmp = rd32(E1000_CTRL_EXT);
900		/* enable MSI-X PBA support*/
901		tmp |= E1000_CTRL_EXT_PBA_CLR;
902
903		/* Auto-Mask interrupts upon ICR read. */
904		tmp |= E1000_CTRL_EXT_EIAME;
905		tmp |= E1000_CTRL_EXT_IRCA;
906
907		wr32(E1000_CTRL_EXT, tmp);
908
909		/* enable msix_other interrupt */
910		array_wr32(E1000_MSIXBM(0), vector++,
911		                      E1000_EIMS_OTHER);
912		adapter->eims_other = E1000_EIMS_OTHER;
913
914		break;
915
916	case e1000_82576:
917	case e1000_82580:
918	case e1000_i350:
919		/* Turn on MSI-X capability first, or our settings
920		 * won't stick.  And it will take days to debug. */
921		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
922		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
923		                E1000_GPIE_NSICR);
924
925		/* enable msix_other interrupt */
926		adapter->eims_other = 1 << vector;
927		tmp = (vector++ | E1000_IVAR_VALID) << 8;
928
929		wr32(E1000_IVAR_MISC, tmp);
930		break;
931	default:
932		/* do nothing, since nothing else supports MSI-X */
933		break;
934	} /* switch (hw->mac.type) */
935
936	adapter->eims_enable_mask |= adapter->eims_other;
937
938	for (i = 0; i < adapter->num_q_vectors; i++)
939		igb_assign_vector(adapter->q_vector[i], vector++);
940
941	wrfl();
942}
943
944/**
945 * igb_request_msix - Initialize MSI-X interrupts
946 *
947 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
948 * kernel.
949 **/
950static int igb_request_msix(struct igb_adapter *adapter)
951{
952	struct net_device *netdev = adapter->netdev;
953	struct e1000_hw *hw = &adapter->hw;
954	int i, err = 0, vector = 0;
955
956	err = request_irq(adapter->msix_entries[vector].vector,
957	                  igb_msix_other, 0, netdev->name, adapter);
958	if (err)
959		goto out;
960	vector++;
961
962	for (i = 0; i < adapter->num_q_vectors; i++) {
963		struct igb_q_vector *q_vector = adapter->q_vector[i];
964
965		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
966
967		if (q_vector->rx.ring && q_vector->tx.ring)
968			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
969				q_vector->rx.ring->queue_index);
970		else if (q_vector->tx.ring)
971			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
972				q_vector->tx.ring->queue_index);
973		else if (q_vector->rx.ring)
974			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
975				q_vector->rx.ring->queue_index);
976		else
977			sprintf(q_vector->name, "%s-unused", netdev->name);
978
979		err = request_irq(adapter->msix_entries[vector].vector,
980		                  igb_msix_ring, 0, q_vector->name,
981		                  q_vector);
982		if (err)
983			goto out;
984		vector++;
985	}
986
987	igb_configure_msix(adapter);
988	return 0;
989out:
990	return err;
991}
992
993static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
994{
995	if (adapter->msix_entries) {
996		pci_disable_msix(adapter->pdev);
997		kfree(adapter->msix_entries);
998		adapter->msix_entries = NULL;
999	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
1000		pci_disable_msi(adapter->pdev);
1001	}
1002}
1003
1004/**
1005 * igb_free_q_vectors - Free memory allocated for interrupt vectors
1006 * @adapter: board private structure to initialize
1007 *
1008 * This function frees the memory allocated to the q_vectors.  In addition if
1009 * NAPI is enabled it will delete any references to the NAPI struct prior
1010 * to freeing the q_vector.
1011 **/
1012static void igb_free_q_vectors(struct igb_adapter *adapter)
1013{
1014	int v_idx;
1015
1016	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1017		struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1018		adapter->q_vector[v_idx] = NULL;
1019		if (!q_vector)
1020			continue;
1021		netif_napi_del(&q_vector->napi);
1022		kfree(q_vector);
1023	}
1024	adapter->num_q_vectors = 0;
1025}
1026
1027/**
1028 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1029 *
1030 * This function resets the device so that it has 0 rx queues, tx queues, and
1031 * MSI-X interrupts allocated.
1032 */
1033static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1034{
1035	igb_free_queues(adapter);
1036	igb_free_q_vectors(adapter);
1037	igb_reset_interrupt_capability(adapter);
1038}
1039
1040/**
1041 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1042 *
1043 * Attempt to configure interrupts using the best available
1044 * capabilities of the hardware and kernel.
1045 **/
1046static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1047{
1048	int err;
1049	int numvecs, i;
1050
1051	/* Number of supported queues. */
1052	adapter->num_rx_queues = adapter->rss_queues;
1053	if (adapter->vfs_allocated_count)
1054		adapter->num_tx_queues = 1;
1055	else
1056		adapter->num_tx_queues = adapter->rss_queues;
1057
1058	/* start with one vector for every rx queue */
1059	numvecs = adapter->num_rx_queues;
1060
1061	/* if tx handler is separate add 1 for every tx queue */
1062	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1063		numvecs += adapter->num_tx_queues;
1064
1065	/* store the number of vectors reserved for queues */
1066	adapter->num_q_vectors = numvecs;
1067
1068	/* add 1 vector for link status interrupts */
1069	numvecs++;
1070	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1071					GFP_KERNEL);
1072	if (!adapter->msix_entries)
1073		goto msi_only;
1074
1075	for (i = 0; i < numvecs; i++)
1076		adapter->msix_entries[i].entry = i;
1077
1078	err = pci_enable_msix(adapter->pdev,
1079			      adapter->msix_entries,
1080			      numvecs);
1081	if (err == 0)
1082		goto out;
1083
1084	igb_reset_interrupt_capability(adapter);
1085
1086	/* If we can't do MSI-X, try MSI */
1087msi_only:
1088#ifdef CONFIG_PCI_IOV
1089	/* disable SR-IOV for non MSI-X configurations */
1090	if (adapter->vf_data) {
1091		struct e1000_hw *hw = &adapter->hw;
1092		/* disable iov and allow time for transactions to clear */
1093		pci_disable_sriov(adapter->pdev);
1094		msleep(500);
1095
1096		kfree(adapter->vf_data);
1097		adapter->vf_data = NULL;
1098		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1099		wrfl();
1100		msleep(100);
1101		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1102	}
1103#endif
1104	adapter->vfs_allocated_count = 0;
1105	adapter->rss_queues = 1;
1106	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1107	adapter->num_rx_queues = 1;
1108	adapter->num_tx_queues = 1;
1109	adapter->num_q_vectors = 1;
1110	if (!pci_enable_msi(adapter->pdev))
1111		adapter->flags |= IGB_FLAG_HAS_MSI;
1112out:
1113	/* Notify the stack of the (possibly) reduced queue counts. */
1114	rtnl_lock();
1115	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1116	err = netif_set_real_num_rx_queues(adapter->netdev,
1117		adapter->num_rx_queues);
1118	rtnl_unlock();
1119	return err;
1120}
1121
1122/**
1123 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1124 * @adapter: board private structure to initialize
1125 *
1126 * We allocate one q_vector per queue interrupt.  If allocation fails we
1127 * return -ENOMEM.
1128 **/
1129static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1130{
1131	struct igb_q_vector *q_vector;
1132	struct e1000_hw *hw = &adapter->hw;
1133	int v_idx;
1134	int orig_node = adapter->node;
1135
1136	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1137		if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1138						adapter->num_tx_queues)) &&
1139		    (adapter->num_rx_queues == v_idx))
1140			adapter->node = orig_node;
1141		if (orig_node == -1) {
1142			int cur_node = next_online_node(adapter->node);
1143			if (cur_node == MAX_NUMNODES)
1144				cur_node = first_online_node;
1145			adapter->node = cur_node;
1146		}
1147		q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1148					adapter->node);
1149		if (!q_vector)
1150			q_vector = kzalloc(sizeof(struct igb_q_vector),
1151					   GFP_KERNEL);
1152		if (!q_vector)
1153			goto err_out;
1154		q_vector->adapter = adapter;
1155		q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1156		q_vector->itr_val = IGB_START_ITR;
1157		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1158		adapter->q_vector[v_idx] = q_vector;
1159	}
1160	/* Restore the adapter's original node */
1161	adapter->node = orig_node;
1162
1163	return 0;
1164
1165err_out:
1166	/* Restore the adapter's original node */
1167	adapter->node = orig_node;
1168	igb_free_q_vectors(adapter);
1169	return -ENOMEM;
1170}
1171
1172static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1173                                      int ring_idx, int v_idx)
1174{
1175	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1176
1177	q_vector->rx.ring = adapter->rx_ring[ring_idx];
1178	q_vector->rx.ring->q_vector = q_vector;
1179	q_vector->rx.count++;
1180	q_vector->itr_val = adapter->rx_itr_setting;
1181	if (q_vector->itr_val && q_vector->itr_val <= 3)
1182		q_vector->itr_val = IGB_START_ITR;
1183}
1184
1185static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1186                                      int ring_idx, int v_idx)
1187{
1188	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1189
1190	q_vector->tx.ring = adapter->tx_ring[ring_idx];
1191	q_vector->tx.ring->q_vector = q_vector;
1192	q_vector->tx.count++;
1193	q_vector->itr_val = adapter->tx_itr_setting;
1194	q_vector->tx.work_limit = adapter->tx_work_limit;
1195	if (q_vector->itr_val && q_vector->itr_val <= 3)
1196		q_vector->itr_val = IGB_START_ITR;
1197}
1198
1199/**
1200 * igb_map_ring_to_vector - maps allocated queues to vectors
1201 *
1202 * This function maps the recently allocated queues to vectors.
1203 **/
1204static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1205{
1206	int i;
1207	int v_idx = 0;
1208
1209	if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1210	    (adapter->num_q_vectors < adapter->num_tx_queues))
1211		return -ENOMEM;
1212
1213	if (adapter->num_q_vectors >=
1214	    (adapter->num_rx_queues + adapter->num_tx_queues)) {
1215		for (i = 0; i < adapter->num_rx_queues; i++)
1216			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1217		for (i = 0; i < adapter->num_tx_queues; i++)
1218			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1219	} else {
1220		for (i = 0; i < adapter->num_rx_queues; i++) {
1221			if (i < adapter->num_tx_queues)
1222				igb_map_tx_ring_to_vector(adapter, i, v_idx);
1223			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1224		}
1225		for (; i < adapter->num_tx_queues; i++)
1226			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1227	}
1228	return 0;
1229}
1230
1231/**
1232 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1233 *
1234 * This function initializes the interrupts and allocates all of the queues.
1235 **/
1236static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1237{
1238	struct pci_dev *pdev = adapter->pdev;
1239	int err;
1240
1241	err = igb_set_interrupt_capability(adapter);
1242	if (err)
1243		return err;
1244
1245	err = igb_alloc_q_vectors(adapter);
1246	if (err) {
1247		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1248		goto err_alloc_q_vectors;
1249	}
1250
1251	err = igb_alloc_queues(adapter);
1252	if (err) {
1253		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1254		goto err_alloc_queues;
1255	}
1256
1257	err = igb_map_ring_to_vector(adapter);
1258	if (err) {
1259		dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1260		goto err_map_queues;
1261	}
1262
1263
1264	return 0;
1265err_map_queues:
1266	igb_free_queues(adapter);
1267err_alloc_queues:
1268	igb_free_q_vectors(adapter);
1269err_alloc_q_vectors:
1270	igb_reset_interrupt_capability(adapter);
1271	return err;
1272}
1273
1274/**
1275 * igb_request_irq - initialize interrupts
1276 *
1277 * Attempts to configure interrupts using the best available
1278 * capabilities of the hardware and kernel.
1279 **/
1280static int igb_request_irq(struct igb_adapter *adapter)
1281{
1282	struct net_device *netdev = adapter->netdev;
1283	struct pci_dev *pdev = adapter->pdev;
1284	int err = 0;
1285
1286	if (adapter->msix_entries) {
1287		err = igb_request_msix(adapter);
1288		if (!err)
1289			goto request_done;
1290		/* fall back to MSI */
1291		igb_clear_interrupt_scheme(adapter);
1292		if (!pci_enable_msi(pdev))
1293			adapter->flags |= IGB_FLAG_HAS_MSI;
1294		igb_free_all_tx_resources(adapter);
1295		igb_free_all_rx_resources(adapter);
1296		adapter->num_tx_queues = 1;
1297		adapter->num_rx_queues = 1;
1298		adapter->num_q_vectors = 1;
1299		err = igb_alloc_q_vectors(adapter);
1300		if (err) {
1301			dev_err(&pdev->dev,
1302			        "Unable to allocate memory for vectors\n");
1303			goto request_done;
1304		}
1305		err = igb_alloc_queues(adapter);
1306		if (err) {
1307			dev_err(&pdev->dev,
1308			        "Unable to allocate memory for queues\n");
1309			igb_free_q_vectors(adapter);
1310			goto request_done;
1311		}
1312		igb_setup_all_tx_resources(adapter);
1313		igb_setup_all_rx_resources(adapter);
1314	}
1315
1316	igb_assign_vector(adapter->q_vector[0], 0);
1317
1318	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1319		err = request_irq(pdev->irq, igb_intr_msi, 0,
1320				  netdev->name, adapter);
1321		if (!err)
1322			goto request_done;
1323
1324		/* fall back to legacy interrupts */
1325		igb_reset_interrupt_capability(adapter);
1326		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1327	}
1328
1329	err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1330			  netdev->name, adapter);
1331
1332	if (err)
1333		dev_err(&pdev->dev, "Error %d getting interrupt\n",
1334			err);
1335
1336request_done:
1337	return err;
1338}
1339
1340static void igb_free_irq(struct igb_adapter *adapter)
1341{
1342	if (adapter->msix_entries) {
1343		int vector = 0, i;
1344
1345		free_irq(adapter->msix_entries[vector++].vector, adapter);
1346
1347		for (i = 0; i < adapter->num_q_vectors; i++)
1348			free_irq(adapter->msix_entries[vector++].vector,
1349				 adapter->q_vector[i]);
1350	} else {
1351		free_irq(adapter->pdev->irq, adapter);
1352	}
1353}
1354
1355/**
1356 * igb_irq_disable - Mask off interrupt generation on the NIC
1357 * @adapter: board private structure
1358 **/
1359static void igb_irq_disable(struct igb_adapter *adapter)
1360{
1361	struct e1000_hw *hw = &adapter->hw;
1362
1363	/*
1364	 * we need to be careful when disabling interrupts.  The VFs are also
1365	 * mapped into these registers and so clearing the bits can cause
1366	 * issues on the VF drivers so we only need to clear what we set
1367	 */
1368	if (adapter->msix_entries) {
1369		u32 regval = rd32(E1000_EIAM);
1370		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1371		wr32(E1000_EIMC, adapter->eims_enable_mask);
1372		regval = rd32(E1000_EIAC);
1373		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1374	}
1375
1376	wr32(E1000_IAM, 0);
1377	wr32(E1000_IMC, ~0);
1378	wrfl();
1379	if (adapter->msix_entries) {
1380		int i;
1381		for (i = 0; i < adapter->num_q_vectors; i++)
1382			synchronize_irq(adapter->msix_entries[i].vector);
1383	} else {
1384		synchronize_irq(adapter->pdev->irq);
1385	}
1386}
1387
1388/**
1389 * igb_irq_enable - Enable default interrupt generation settings
1390 * @adapter: board private structure
1391 **/
1392static void igb_irq_enable(struct igb_adapter *adapter)
1393{
1394	struct e1000_hw *hw = &adapter->hw;
1395
1396	if (adapter->msix_entries) {
1397		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1398		u32 regval = rd32(E1000_EIAC);
1399		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1400		regval = rd32(E1000_EIAM);
1401		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1402		wr32(E1000_EIMS, adapter->eims_enable_mask);
1403		if (adapter->vfs_allocated_count) {
1404			wr32(E1000_MBVFIMR, 0xFF);
1405			ims |= E1000_IMS_VMMB;
1406		}
1407		wr32(E1000_IMS, ims);
1408	} else {
1409		wr32(E1000_IMS, IMS_ENABLE_MASK |
1410				E1000_IMS_DRSTA);
1411		wr32(E1000_IAM, IMS_ENABLE_MASK |
1412				E1000_IMS_DRSTA);
1413	}
1414}
1415
1416static void igb_update_mng_vlan(struct igb_adapter *adapter)
1417{
1418	struct e1000_hw *hw = &adapter->hw;
1419	u16 vid = adapter->hw.mng_cookie.vlan_id;
1420	u16 old_vid = adapter->mng_vlan_id;
1421
1422	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1423		/* add VID to filter table */
1424		igb_vfta_set(hw, vid, true);
1425		adapter->mng_vlan_id = vid;
1426	} else {
1427		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1428	}
1429
1430	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1431	    (vid != old_vid) &&
1432	    !test_bit(old_vid, adapter->active_vlans)) {
1433		/* remove VID from filter table */
1434		igb_vfta_set(hw, old_vid, false);
1435	}
1436}
1437
1438/**
1439 * igb_release_hw_control - release control of the h/w to f/w
1440 * @adapter: address of board private structure
1441 *
1442 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1443 * For ASF and Pass Through versions of f/w this means that the
1444 * driver is no longer loaded.
1445 *
1446 **/
1447static void igb_release_hw_control(struct igb_adapter *adapter)
1448{
1449	struct e1000_hw *hw = &adapter->hw;
1450	u32 ctrl_ext;
1451
1452	/* Let firmware take over control of h/w */
1453	ctrl_ext = rd32(E1000_CTRL_EXT);
1454	wr32(E1000_CTRL_EXT,
1455			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1456}
1457
1458/**
1459 * igb_get_hw_control - get control of the h/w from f/w
1460 * @adapter: address of board private structure
1461 *
1462 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1463 * For ASF and Pass Through versions of f/w this means that
1464 * the driver is loaded.
1465 *
1466 **/
1467static void igb_get_hw_control(struct igb_adapter *adapter)
1468{
1469	struct e1000_hw *hw = &adapter->hw;
1470	u32 ctrl_ext;
1471
1472	/* Let firmware know the driver has taken over */
1473	ctrl_ext = rd32(E1000_CTRL_EXT);
1474	wr32(E1000_CTRL_EXT,
1475			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1476}
1477
1478/**
1479 * igb_configure - configure the hardware for RX and TX
1480 * @adapter: private board structure
1481 **/
1482static void igb_configure(struct igb_adapter *adapter)
1483{
1484	struct net_device *netdev = adapter->netdev;
1485	int i;
1486
1487	igb_get_hw_control(adapter);
1488	igb_set_rx_mode(netdev);
1489
1490	igb_restore_vlan(adapter);
1491
1492	igb_setup_tctl(adapter);
1493	igb_setup_mrqc(adapter);
1494	igb_setup_rctl(adapter);
1495
1496	igb_configure_tx(adapter);
1497	igb_configure_rx(adapter);
1498
1499	igb_rx_fifo_flush_82575(&adapter->hw);
1500
1501	/* call igb_desc_unused which always leaves
1502	 * at least 1 descriptor unused to make sure
1503	 * next_to_use != next_to_clean */
1504	for (i = 0; i < adapter->num_rx_queues; i++) {
1505		struct igb_ring *ring = adapter->rx_ring[i];
1506		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1507	}
1508}
1509
1510/**
1511 * igb_power_up_link - Power up the phy/serdes link
1512 * @adapter: address of board private structure
1513 **/
1514void igb_power_up_link(struct igb_adapter *adapter)
1515{
1516	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1517		igb_power_up_phy_copper(&adapter->hw);
1518	else
1519		igb_power_up_serdes_link_82575(&adapter->hw);
1520	igb_reset_phy(&adapter->hw);
1521}
1522
1523/**
1524 * igb_power_down_link - Power down the phy/serdes link
1525 * @adapter: address of board private structure
1526 */
1527static void igb_power_down_link(struct igb_adapter *adapter)
1528{
1529	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1530		igb_power_down_phy_copper_82575(&adapter->hw);
1531	else
1532		igb_shutdown_serdes_link_82575(&adapter->hw);
1533}
1534
1535/**
1536 * igb_up - Open the interface and prepare it to handle traffic
1537 * @adapter: board private structure
1538 **/
1539int igb_up(struct igb_adapter *adapter)
1540{
1541	struct e1000_hw *hw = &adapter->hw;
1542	int i;
1543
1544	/* hardware has been reset, we need to reload some things */
1545	igb_configure(adapter);
1546
1547	clear_bit(__IGB_DOWN, &adapter->state);
1548
1549	for (i = 0; i < adapter->num_q_vectors; i++)
1550		napi_enable(&(adapter->q_vector[i]->napi));
1551
1552	if (adapter->msix_entries)
1553		igb_configure_msix(adapter);
1554	else
1555		igb_assign_vector(adapter->q_vector[0], 0);
1556
1557	/* Clear any pending interrupts. */
1558	rd32(E1000_ICR);
1559	igb_irq_enable(adapter);
1560
1561	/* notify VFs that reset has been completed */
1562	if (adapter->vfs_allocated_count) {
1563		u32 reg_data = rd32(E1000_CTRL_EXT);
1564		reg_data |= E1000_CTRL_EXT_PFRSTD;
1565		wr32(E1000_CTRL_EXT, reg_data);
1566	}
1567
1568	netif_tx_start_all_queues(adapter->netdev);
1569
1570	/* start the watchdog. */
1571	hw->mac.get_link_status = 1;
1572	schedule_work(&adapter->watchdog_task);
1573
1574	return 0;
1575}
1576
1577void igb_down(struct igb_adapter *adapter)
1578{
1579	struct net_device *netdev = adapter->netdev;
1580	struct e1000_hw *hw = &adapter->hw;
1581	u32 tctl, rctl;
1582	int i;
1583
1584	/* signal that we're down so the interrupt handler does not
1585	 * reschedule our watchdog timer */
1586	set_bit(__IGB_DOWN, &adapter->state);
1587
1588	/* disable receives in the hardware */
1589	rctl = rd32(E1000_RCTL);
1590	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1591	/* flush and sleep below */
1592
1593	netif_tx_stop_all_queues(netdev);
1594
1595	/* disable transmits in the hardware */
1596	tctl = rd32(E1000_TCTL);
1597	tctl &= ~E1000_TCTL_EN;
1598	wr32(E1000_TCTL, tctl);
1599	/* flush both disables and wait for them to finish */
1600	wrfl();
1601	msleep(10);
1602
1603	for (i = 0; i < adapter->num_q_vectors; i++)
1604		napi_disable(&(adapter->q_vector[i]->napi));
1605
1606	igb_irq_disable(adapter);
1607
1608	del_timer_sync(&adapter->watchdog_timer);
1609	del_timer_sync(&adapter->phy_info_timer);
1610
1611	netif_carrier_off(netdev);
1612
1613	/* record the stats before reset*/
1614	spin_lock(&adapter->stats64_lock);
1615	igb_update_stats(adapter, &adapter->stats64);
1616	spin_unlock(&adapter->stats64_lock);
1617
1618	adapter->link_speed = 0;
1619	adapter->link_duplex = 0;
1620
1621	if (!pci_channel_offline(adapter->pdev))
1622		igb_reset(adapter);
1623	igb_clean_all_tx_rings(adapter);
1624	igb_clean_all_rx_rings(adapter);
1625#ifdef CONFIG_IGB_DCA
1626
1627	/* since we reset the hardware DCA settings were cleared */
1628	igb_setup_dca(adapter);
1629#endif
1630}
1631
1632void igb_reinit_locked(struct igb_adapter *adapter)
1633{
1634	WARN_ON(in_interrupt());
1635	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1636		msleep(1);
1637	igb_down(adapter);
1638	igb_up(adapter);
1639	clear_bit(__IGB_RESETTING, &adapter->state);
1640}
1641
1642void igb_reset(struct igb_adapter *adapter)
1643{
1644	struct pci_dev *pdev = adapter->pdev;
1645	struct e1000_hw *hw = &adapter->hw;
1646	struct e1000_mac_info *mac = &hw->mac;
1647	struct e1000_fc_info *fc = &hw->fc;
1648	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1649	u16 hwm;
1650
1651	/* Repartition Pba for greater than 9k mtu
1652	 * To take effect CTRL.RST is required.
1653	 */
1654	switch (mac->type) {
1655	case e1000_i350:
1656	case e1000_82580:
1657		pba = rd32(E1000_RXPBS);
1658		pba = igb_rxpbs_adjust_82580(pba);
1659		break;
1660	case e1000_82576:
1661		pba = rd32(E1000_RXPBS);
1662		pba &= E1000_RXPBS_SIZE_MASK_82576;
1663		break;
1664	case e1000_82575:
1665	default:
1666		pba = E1000_PBA_34K;
1667		break;
1668	}
1669
1670	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1671	    (mac->type < e1000_82576)) {
1672		/* adjust PBA for jumbo frames */
1673		wr32(E1000_PBA, pba);
1674
1675		/* To maintain wire speed transmits, the Tx FIFO should be
1676		 * large enough to accommodate two full transmit packets,
1677		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1678		 * the Rx FIFO should be large enough to accommodate at least
1679		 * one full receive packet and is similarly rounded up and
1680		 * expressed in KB. */
1681		pba = rd32(E1000_PBA);
1682		/* upper 16 bits has Tx packet buffer allocation size in KB */
1683		tx_space = pba >> 16;
1684		/* lower 16 bits has Rx packet buffer allocation size in KB */
1685		pba &= 0xffff;
1686		/* the tx fifo also stores 16 bytes of information about the tx
1687		 * but don't include ethernet FCS because hardware appends it */
1688		min_tx_space = (adapter->max_frame_size +
1689				sizeof(union e1000_adv_tx_desc) -
1690				ETH_FCS_LEN) * 2;
1691		min_tx_space = ALIGN(min_tx_space, 1024);
1692		min_tx_space >>= 10;
1693		/* software strips receive CRC, so leave room for it */
1694		min_rx_space = adapter->max_frame_size;
1695		min_rx_space = ALIGN(min_rx_space, 1024);
1696		min_rx_space >>= 10;
1697
1698		/* If current Tx allocation is less than the min Tx FIFO size,
1699		 * and the min Tx FIFO size is less than the current Rx FIFO
1700		 * allocation, take space away from current Rx allocation */
1701		if (tx_space < min_tx_space &&
1702		    ((min_tx_space - tx_space) < pba)) {
1703			pba = pba - (min_tx_space - tx_space);
1704
1705			/* if short on rx space, rx wins and must trump tx
1706			 * adjustment */
1707			if (pba < min_rx_space)
1708				pba = min_rx_space;
1709		}
1710		wr32(E1000_PBA, pba);
1711	}
1712
1713	/* flow control settings */
1714	/* The high water mark must be low enough to fit one full frame
1715	 * (or the size used for early receive) above it in the Rx FIFO.
1716	 * Set it to the lower of:
1717	 * - 90% of the Rx FIFO size, or
1718	 * - the full Rx FIFO size minus one full frame */
1719	hwm = min(((pba << 10) * 9 / 10),
1720			((pba << 10) - 2 * adapter->max_frame_size));
1721
1722	fc->high_water = hwm & 0xFFF0;	/* 16-byte granularity */
1723	fc->low_water = fc->high_water - 16;
1724	fc->pause_time = 0xFFFF;
1725	fc->send_xon = 1;
1726	fc->current_mode = fc->requested_mode;
1727
1728	/* disable receive for all VFs and wait one second */
1729	if (adapter->vfs_allocated_count) {
1730		int i;
1731		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1732			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1733
1734		/* ping all the active vfs to let them know we are going down */
1735		igb_ping_all_vfs(adapter);
1736
1737		/* disable transmits and receives */
1738		wr32(E1000_VFRE, 0);
1739		wr32(E1000_VFTE, 0);
1740	}
1741
1742	/* Allow time for pending master requests to run */
1743	hw->mac.ops.reset_hw(hw);
1744	wr32(E1000_WUC, 0);
1745
1746	if (hw->mac.ops.init_hw(hw))
1747		dev_err(&pdev->dev, "Hardware Error\n");
1748
1749	igb_init_dmac(adapter, pba);
1750	if (!netif_running(adapter->netdev))
1751		igb_power_down_link(adapter);
1752
1753	igb_update_mng_vlan(adapter);
1754
1755	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1756	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1757
1758	igb_get_phy_info(hw);
1759}
1760
1761static netdev_features_t igb_fix_features(struct net_device *netdev,
1762	netdev_features_t features)
1763{
1764	/*
1765	 * Since there is no support for separate rx/tx vlan accel
1766	 * enable/disable make sure tx flag is always in same state as rx.
1767	 */
1768	if (features & NETIF_F_HW_VLAN_RX)
1769		features |= NETIF_F_HW_VLAN_TX;
1770	else
1771		features &= ~NETIF_F_HW_VLAN_TX;
1772
1773	return features;
1774}
1775
1776static int igb_set_features(struct net_device *netdev,
1777	netdev_features_t features)
1778{
1779	netdev_features_t changed = netdev->features ^ features;
1780	struct igb_adapter *adapter = netdev_priv(netdev);
1781
1782	if (changed & NETIF_F_HW_VLAN_RX)
1783		igb_vlan_mode(netdev, features);
1784
1785	if (!(changed & NETIF_F_RXALL))
1786		return 0;
1787
1788	netdev->features = features;
1789
1790	if (netif_running(netdev))
1791		igb_reinit_locked(adapter);
1792	else
1793		igb_reset(adapter);
1794
1795	return 0;
1796}
1797
1798static const struct net_device_ops igb_netdev_ops = {
1799	.ndo_open		= igb_open,
1800	.ndo_stop		= igb_close,
1801	.ndo_start_xmit		= igb_xmit_frame,
1802	.ndo_get_stats64	= igb_get_stats64,
1803	.ndo_set_rx_mode	= igb_set_rx_mode,
1804	.ndo_set_mac_address	= igb_set_mac,
1805	.ndo_change_mtu		= igb_change_mtu,
1806	.ndo_do_ioctl		= igb_ioctl,
1807	.ndo_tx_timeout		= igb_tx_timeout,
1808	.ndo_validate_addr	= eth_validate_addr,
1809	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
1810	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
1811	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
1812	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
1813	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
1814	.ndo_get_vf_config	= igb_ndo_get_vf_config,
1815#ifdef CONFIG_NET_POLL_CONTROLLER
1816	.ndo_poll_controller	= igb_netpoll,
1817#endif
1818	.ndo_fix_features	= igb_fix_features,
1819	.ndo_set_features	= igb_set_features,
1820};
1821
1822/**
1823 * igb_probe - Device Initialization Routine
1824 * @pdev: PCI device information struct
1825 * @ent: entry in igb_pci_tbl
1826 *
1827 * Returns 0 on success, negative on failure
1828 *
1829 * igb_probe initializes an adapter identified by a pci_dev structure.
1830 * The OS initialization, configuring of the adapter private structure,
1831 * and a hardware reset occur.
1832 **/
1833static int __devinit igb_probe(struct pci_dev *pdev,
1834			       const struct pci_device_id *ent)
1835{
1836	struct net_device *netdev;
1837	struct igb_adapter *adapter;
1838	struct e1000_hw *hw;
1839	u16 eeprom_data = 0;
1840	s32 ret_val;
1841	static int global_quad_port_a; /* global quad port a indication */
1842	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1843	unsigned long mmio_start, mmio_len;
1844	int err, pci_using_dac;
1845	u16 eeprom_apme_mask = IGB_EEPROM_APME;
1846	u8 part_str[E1000_PBANUM_LENGTH];
1847
1848	/* Catch broken hardware that put the wrong VF device ID in
1849	 * the PCIe SR-IOV capability.
1850	 */
1851	if (pdev->is_virtfn) {
1852		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1853		     pci_name(pdev), pdev->vendor, pdev->device);
1854		return -EINVAL;
1855	}
1856
1857	err = pci_enable_device_mem(pdev);
1858	if (err)
1859		return err;
1860
1861	pci_using_dac = 0;
1862	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1863	if (!err) {
1864		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1865		if (!err)
1866			pci_using_dac = 1;
1867	} else {
1868		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1869		if (err) {
1870			err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1871			if (err) {
1872				dev_err(&pdev->dev, "No usable DMA "
1873					"configuration, aborting\n");
1874				goto err_dma;
1875			}
1876		}
1877	}
1878
1879	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1880	                                   IORESOURCE_MEM),
1881	                                   igb_driver_name);
1882	if (err)
1883		goto err_pci_reg;
1884
1885	pci_enable_pcie_error_reporting(pdev);
1886
1887	pci_set_master(pdev);
1888	pci_save_state(pdev);
1889
1890	err = -ENOMEM;
1891	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1892				   IGB_MAX_TX_QUEUES);
1893	if (!netdev)
1894		goto err_alloc_etherdev;
1895
1896	SET_NETDEV_DEV(netdev, &pdev->dev);
1897
1898	pci_set_drvdata(pdev, netdev);
1899	adapter = netdev_priv(netdev);
1900	adapter->netdev = netdev;
1901	adapter->pdev = pdev;
1902	hw = &adapter->hw;
1903	hw->back = adapter;
1904	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
1905
1906	mmio_start = pci_resource_start(pdev, 0);
1907	mmio_len = pci_resource_len(pdev, 0);
1908
1909	err = -EIO;
1910	hw->hw_addr = ioremap(mmio_start, mmio_len);
1911	if (!hw->hw_addr)
1912		goto err_ioremap;
1913
1914	netdev->netdev_ops = &igb_netdev_ops;
1915	igb_set_ethtool_ops(netdev);
1916	netdev->watchdog_timeo = 5 * HZ;
1917
1918	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1919
1920	netdev->mem_start = mmio_start;
1921	netdev->mem_end = mmio_start + mmio_len;
1922
1923	/* PCI config space info */
1924	hw->vendor_id = pdev->vendor;
1925	hw->device_id = pdev->device;
1926	hw->revision_id = pdev->revision;
1927	hw->subsystem_vendor_id = pdev->subsystem_vendor;
1928	hw->subsystem_device_id = pdev->subsystem_device;
1929
1930	/* Copy the default MAC, PHY and NVM function pointers */
1931	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1932	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1933	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1934	/* Initialize skew-specific constants */
1935	err = ei->get_invariants(hw);
1936	if (err)
1937		goto err_sw_init;
1938
1939	/* setup the private structure */
1940	err = igb_sw_init(adapter);
1941	if (err)
1942		goto err_sw_init;
1943
1944	igb_get_bus_info_pcie(hw);
1945
1946	hw->phy.autoneg_wait_to_complete = false;
1947
1948	/* Copper options */
1949	if (hw->phy.media_type == e1000_media_type_copper) {
1950		hw->phy.mdix = AUTO_ALL_MODES;
1951		hw->phy.disable_polarity_correction = false;
1952		hw->phy.ms_type = e1000_ms_hw_default;
1953	}
1954
1955	if (igb_check_reset_block(hw))
1956		dev_info(&pdev->dev,
1957			"PHY reset is blocked due to SOL/IDER session.\n");
1958
1959	/*
1960	 * features is initialized to 0 in allocation, it might have bits
1961	 * set by igb_sw_init so we should use an or instead of an
1962	 * assignment.
1963	 */
1964	netdev->features |= NETIF_F_SG |
1965			    NETIF_F_IP_CSUM |
1966			    NETIF_F_IPV6_CSUM |
1967			    NETIF_F_TSO |
1968			    NETIF_F_TSO6 |
1969			    NETIF_F_RXHASH |
1970			    NETIF_F_RXCSUM |
1971			    NETIF_F_HW_VLAN_RX |
1972			    NETIF_F_HW_VLAN_TX;
1973
1974	/* copy netdev features into list of user selectable features */
1975	netdev->hw_features |= netdev->features;
1976	netdev->hw_features |= NETIF_F_RXALL;
1977
1978	/* set this bit last since it cannot be part of hw_features */
1979	netdev->features |= NETIF_F_HW_VLAN_FILTER;
1980
1981	netdev->vlan_features |= NETIF_F_TSO |
1982				 NETIF_F_TSO6 |
1983				 NETIF_F_IP_CSUM |
1984				 NETIF_F_IPV6_CSUM |
1985				 NETIF_F_SG;
1986
1987	netdev->priv_flags |= IFF_SUPP_NOFCS;
1988
1989	if (pci_using_dac) {
1990		netdev->features |= NETIF_F_HIGHDMA;
1991		netdev->vlan_features |= NETIF_F_HIGHDMA;
1992	}
1993
1994	if (hw->mac.type >= e1000_82576) {
1995		netdev->hw_features |= NETIF_F_SCTP_CSUM;
1996		netdev->features |= NETIF_F_SCTP_CSUM;
1997	}
1998
1999	netdev->priv_flags |= IFF_UNICAST_FLT;
2000
2001	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2002
2003	/* before reading the NVM, reset the controller to put the device in a
2004	 * known good starting state */
2005	hw->mac.ops.reset_hw(hw);
2006
2007	/* make sure the NVM is good */
2008	if (hw->nvm.ops.validate(hw) < 0) {
2009		dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2010		err = -EIO;
2011		goto err_eeprom;
2012	}
2013
2014	/* copy the MAC address out of the NVM */
2015	if (hw->mac.ops.read_mac_addr(hw))
2016		dev_err(&pdev->dev, "NVM Read Error\n");
2017
2018	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2019	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2020
2021	if (!is_valid_ether_addr(netdev->perm_addr)) {
2022		dev_err(&pdev->dev, "Invalid MAC Address\n");
2023		err = -EIO;
2024		goto err_eeprom;
2025	}
2026
2027	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2028	            (unsigned long) adapter);
2029	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2030	            (unsigned long) adapter);
2031
2032	INIT_WORK(&adapter->reset_task, igb_reset_task);
2033	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2034
2035	/* Initialize link properties that are user-changeable */
2036	adapter->fc_autoneg = true;
2037	hw->mac.autoneg = true;
2038	hw->phy.autoneg_advertised = 0x2f;
2039
2040	hw->fc.requested_mode = e1000_fc_default;
2041	hw->fc.current_mode = e1000_fc_default;
2042
2043	igb_validate_mdi_setting(hw);
2044
2045	/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2046	 * enable the ACPI Magic Packet filter
2047	 */
2048
2049	if (hw->bus.func == 0)
2050		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2051	else if (hw->mac.type >= e1000_82580)
2052		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2053		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2054		                 &eeprom_data);
2055	else if (hw->bus.func == 1)
2056		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2057
2058	if (eeprom_data & eeprom_apme_mask)
2059		adapter->eeprom_wol |= E1000_WUFC_MAG;
2060
2061	/* now that we have the eeprom settings, apply the special cases where
2062	 * the eeprom may be wrong or the board simply won't support wake on
2063	 * lan on a particular port */
2064	switch (pdev->device) {
2065	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2066		adapter->eeprom_wol = 0;
2067		break;
2068	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2069	case E1000_DEV_ID_82576_FIBER:
2070	case E1000_DEV_ID_82576_SERDES:
2071		/* Wake events only supported on port A for dual fiber
2072		 * regardless of eeprom setting */
2073		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2074			adapter->eeprom_wol = 0;
2075		break;
2076	case E1000_DEV_ID_82576_QUAD_COPPER:
2077	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2078		/* if quad port adapter, disable WoL on all but port A */
2079		if (global_quad_port_a != 0)
2080			adapter->eeprom_wol = 0;
2081		else
2082			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2083		/* Reset for multiple quad port adapters */
2084		if (++global_quad_port_a == 4)
2085			global_quad_port_a = 0;
2086		break;
2087	}
2088
2089	/* initialize the wol settings based on the eeprom settings */
2090	adapter->wol = adapter->eeprom_wol;
2091	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2092
2093	/* reset the hardware with the new settings */
2094	igb_reset(adapter);
2095
2096	/* let the f/w know that the h/w is now under the control of the
2097	 * driver. */
2098	igb_get_hw_control(adapter);
2099
2100	strcpy(netdev->name, "eth%d");
2101	err = register_netdev(netdev);
2102	if (err)
2103		goto err_register;
2104
2105	/* carrier off reporting is important to ethtool even BEFORE open */
2106	netif_carrier_off(netdev);
2107
2108#ifdef CONFIG_IGB_DCA
2109	if (dca_add_requester(&pdev->dev) == 0) {
2110		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2111		dev_info(&pdev->dev, "DCA enabled\n");
2112		igb_setup_dca(adapter);
2113	}
2114
2115#endif
2116	/* do hw tstamp init after resetting */
2117	igb_init_hw_timer(adapter);
2118
2119	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2120	/* print bus type/speed/width info */
2121	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2122		 netdev->name,
2123		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2124		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2125		                                            "unknown"),
2126		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2127		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2128		  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2129		   "unknown"),
2130		 netdev->dev_addr);
2131
2132	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2133	if (ret_val)
2134		strcpy(part_str, "Unknown");
2135	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2136	dev_info(&pdev->dev,
2137		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2138		adapter->msix_entries ? "MSI-X" :
2139		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2140		adapter->num_rx_queues, adapter->num_tx_queues);
2141	switch (hw->mac.type) {
2142	case e1000_i350:
2143		igb_set_eee_i350(hw);
2144		break;
2145	default:
2146		break;
2147	}
2148
2149	pm_runtime_put_noidle(&pdev->dev);
2150	return 0;
2151
2152err_register:
2153	igb_release_hw_control(adapter);
2154err_eeprom:
2155	if (!igb_check_reset_block(hw))
2156		igb_reset_phy(hw);
2157
2158	if (hw->flash_address)
2159		iounmap(hw->flash_address);
2160err_sw_init:
2161	igb_clear_interrupt_scheme(adapter);
2162	iounmap(hw->hw_addr);
2163err_ioremap:
2164	free_netdev(netdev);
2165err_alloc_etherdev:
2166	pci_release_selected_regions(pdev,
2167	                             pci_select_bars(pdev, IORESOURCE_MEM));
2168err_pci_reg:
2169err_dma:
2170	pci_disable_device(pdev);
2171	return err;
2172}
2173
2174/**
2175 * igb_remove - Device Removal Routine
2176 * @pdev: PCI device information struct
2177 *
2178 * igb_remove is called by the PCI subsystem to alert the driver
2179 * that it should release a PCI device.  The could be caused by a
2180 * Hot-Plug event, or because the driver is going to be removed from
2181 * memory.
2182 **/
2183static void __devexit igb_remove(struct pci_dev *pdev)
2184{
2185	struct net_device *netdev = pci_get_drvdata(pdev);
2186	struct igb_adapter *adapter = netdev_priv(netdev);
2187	struct e1000_hw *hw = &adapter->hw;
2188
2189	pm_runtime_get_noresume(&pdev->dev);
2190
2191	/*
2192	 * The watchdog timer may be rescheduled, so explicitly
2193	 * disable watchdog from being rescheduled.
2194	 */
2195	set_bit(__IGB_DOWN, &adapter->state);
2196	del_timer_sync(&adapter->watchdog_timer);
2197	del_timer_sync(&adapter->phy_info_timer);
2198
2199	cancel_work_sync(&adapter->reset_task);
2200	cancel_work_sync(&adapter->watchdog_task);
2201
2202#ifdef CONFIG_IGB_DCA
2203	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2204		dev_info(&pdev->dev, "DCA disabled\n");
2205		dca_remove_requester(&pdev->dev);
2206		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2207		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2208	}
2209#endif
2210
2211	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2212	 * would have already happened in close and is redundant. */
2213	igb_release_hw_control(adapter);
2214
2215	unregister_netdev(netdev);
2216
2217	igb_clear_interrupt_scheme(adapter);
2218
2219#ifdef CONFIG_PCI_IOV
2220	/* reclaim resources allocated to VFs */
2221	if (adapter->vf_data) {
2222		/* disable iov and allow time for transactions to clear */
2223		if (!igb_check_vf_assignment(adapter)) {
2224			pci_disable_sriov(pdev);
2225			msleep(500);
2226		} else {
2227			dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2228		}
2229
2230		kfree(adapter->vf_data);
2231		adapter->vf_data = NULL;
2232		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2233		wrfl();
2234		msleep(100);
2235		dev_info(&pdev->dev, "IOV Disabled\n");
2236	}
2237#endif
2238
2239	iounmap(hw->hw_addr);
2240	if (hw->flash_address)
2241		iounmap(hw->flash_address);
2242	pci_release_selected_regions(pdev,
2243	                             pci_select_bars(pdev, IORESOURCE_MEM));
2244
2245	kfree(adapter->shadow_vfta);
2246	free_netdev(netdev);
2247
2248	pci_disable_pcie_error_reporting(pdev);
2249
2250	pci_disable_device(pdev);
2251}
2252
2253/**
2254 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2255 * @adapter: board private structure to initialize
2256 *
2257 * This function initializes the vf specific data storage and then attempts to
2258 * allocate the VFs.  The reason for ordering it this way is because it is much
2259 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2260 * the memory for the VFs.
2261 **/
2262static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2263{
2264#ifdef CONFIG_PCI_IOV
2265	struct pci_dev *pdev = adapter->pdev;
2266	int old_vfs = igb_find_enabled_vfs(adapter);
2267	int i;
2268
2269	if (old_vfs) {
2270		dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2271			 "max_vfs setting of %d\n", old_vfs, max_vfs);
2272		adapter->vfs_allocated_count = old_vfs;
2273	}
2274
2275	if (!adapter->vfs_allocated_count)
2276		return;
2277
2278	adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2279				sizeof(struct vf_data_storage), GFP_KERNEL);
2280	/* if allocation failed then we do not support SR-IOV */
2281	if (!adapter->vf_data) {
2282		adapter->vfs_allocated_count = 0;
2283		dev_err(&pdev->dev, "Unable to allocate memory for VF "
2284			"Data Storage\n");
2285		goto out;
2286	}
2287
2288	if (!old_vfs) {
2289		if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2290			goto err_out;
2291	}
2292	dev_info(&pdev->dev, "%d VFs allocated\n",
2293		 adapter->vfs_allocated_count);
2294	for (i = 0; i < adapter->vfs_allocated_count; i++)
2295		igb_vf_configure(adapter, i);
2296
2297	/* DMA Coalescing is not supported in IOV mode. */
2298	adapter->flags &= ~IGB_FLAG_DMAC;
2299	goto out;
2300err_out:
2301	kfree(adapter->vf_data);
2302	adapter->vf_data = NULL;
2303	adapter->vfs_allocated_count = 0;
2304out:
2305	return;
2306#endif /* CONFIG_PCI_IOV */
2307}
2308
2309/**
2310 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2311 * @adapter: board private structure to initialize
2312 *
2313 * igb_init_hw_timer initializes the function pointer and values for the hw
2314 * timer found in hardware.
2315 **/
2316static void igb_init_hw_timer(struct igb_adapter *adapter)
2317{
2318	struct e1000_hw *hw = &adapter->hw;
2319
2320	switch (hw->mac.type) {
2321	case e1000_i350:
2322	case e1000_82580:
2323		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2324		adapter->cycles.read = igb_read_clock;
2325		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2326		adapter->cycles.mult = 1;
2327		/*
2328		 * The 82580 timesync updates the system timer every 8ns by 8ns
2329		 * and the value cannot be shifted.  Instead we need to shift
2330		 * the registers to generate a 64bit timer value.  As a result
2331		 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2332		 * 24 in order to generate a larger value for synchronization.
2333		 */
2334		adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2335		/* disable system timer temporarily by setting bit 31 */
2336		wr32(E1000_TSAUXC, 0x80000000);
2337		wrfl();
2338
2339		/* Set registers so that rollover occurs soon to test this. */
2340		wr32(E1000_SYSTIMR, 0x00000000);
2341		wr32(E1000_SYSTIML, 0x80000000);
2342		wr32(E1000_SYSTIMH, 0x000000FF);
2343		wrfl();
2344
2345		/* enable system timer by clearing bit 31 */
2346		wr32(E1000_TSAUXC, 0x0);
2347		wrfl();
2348
2349		timecounter_init(&adapter->clock,
2350				 &adapter->cycles,
2351				 ktime_to_ns(ktime_get_real()));
2352		/*
2353		 * Synchronize our NIC clock against system wall clock. NIC
2354		 * time stamp reading requires ~3us per sample, each sample
2355		 * was pretty stable even under load => only require 10
2356		 * samples for each offset comparison.
2357		 */
2358		memset(&adapter->compare, 0, sizeof(adapter->compare));
2359		adapter->compare.source = &adapter->clock;
2360		adapter->compare.target = ktime_get_real;
2361		adapter->compare.num_samples = 10;
2362		timecompare_update(&adapter->compare, 0);
2363		break;
2364	case e1000_82576:
2365		/*
2366		 * Initialize hardware timer: we keep it running just in case
2367		 * that some program needs it later on.
2368		 */
2369		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2370		adapter->cycles.read = igb_read_clock;
2371		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2372		adapter->cycles.mult = 1;
2373		/**
2374		 * Scale the NIC clock cycle by a large factor so that
2375		 * relatively small clock corrections can be added or
2376		 * subtracted at each clock tick. The drawbacks of a large
2377		 * factor are a) that the clock register overflows more quickly
2378		 * (not such a big deal) and b) that the increment per tick has
2379		 * to fit into 24 bits.  As a result we need to use a shift of
2380		 * 19 so we can fit a value of 16 into the TIMINCA register.
2381		 */
2382		adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2383		wr32(E1000_TIMINCA,
2384		                (1 << E1000_TIMINCA_16NS_SHIFT) |
2385		                (16 << IGB_82576_TSYNC_SHIFT));
2386
2387		/* Set registers so that rollover occurs soon to test this. */
2388		wr32(E1000_SYSTIML, 0x00000000);
2389		wr32(E1000_SYSTIMH, 0xFF800000);
2390		wrfl();
2391
2392		timecounter_init(&adapter->clock,
2393				 &adapter->cycles,
2394				 ktime_to_ns(ktime_get_real()));
2395		/*
2396		 * Synchronize our NIC clock against system wall clock. NIC
2397		 * time stamp reading requires ~3us per sample, each sample
2398		 * was pretty stable even under load => only require 10
2399		 * samples for each offset comparison.
2400		 */
2401		memset(&adapter->compare, 0, sizeof(adapter->compare));
2402		adapter->compare.source = &adapter->clock;
2403		adapter->compare.target = ktime_get_real;
2404		adapter->compare.num_samples = 10;
2405		timecompare_update(&adapter->compare, 0);
2406		break;
2407	case e1000_82575:
2408		/* 82575 does not support timesync */
2409	default:
2410		break;
2411	}
2412
2413}
2414
2415/**
2416 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2417 * @adapter: board private structure to initialize
2418 *
2419 * igb_sw_init initializes the Adapter private data structure.
2420 * Fields are initialized based on PCI device information and
2421 * OS network device settings (MTU size).
2422 **/
2423static int __devinit igb_sw_init(struct igb_adapter *adapter)
2424{
2425	struct e1000_hw *hw = &adapter->hw;
2426	struct net_device *netdev = adapter->netdev;
2427	struct pci_dev *pdev = adapter->pdev;
2428
2429	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2430
2431	/* set default ring sizes */
2432	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2433	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2434
2435	/* set default ITR values */
2436	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2437	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2438
2439	/* set default work limits */
2440	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2441
2442	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2443				  VLAN_HLEN;
2444	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2445
2446	adapter->node = -1;
2447
2448	spin_lock_init(&adapter->stats64_lock);
2449#ifdef CONFIG_PCI_IOV
2450	switch (hw->mac.type) {
2451	case e1000_82576:
2452	case e1000_i350:
2453		if (max_vfs > 7) {
2454			dev_warn(&pdev->dev,
2455				 "Maximum of 7 VFs per PF, using max\n");
2456			adapter->vfs_allocated_count = 7;
2457		} else
2458			adapter->vfs_allocated_count = max_vfs;
2459		break;
2460	default:
2461		break;
2462	}
2463#endif /* CONFIG_PCI_IOV */
2464	adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2465	/* i350 cannot do RSS and SR-IOV at the same time */
2466	if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2467		adapter->rss_queues = 1;
2468
2469	/*
2470	 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2471	 * then we should combine the queues into a queue pair in order to
2472	 * conserve interrupts due to limited supply
2473	 */
2474	if ((adapter->rss_queues > 4) ||
2475	    ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2476		adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2477
2478	/* Setup and initialize a copy of the hw vlan table array */
2479	adapter->shadow_vfta = kzalloc(sizeof(u32) *
2480				E1000_VLAN_FILTER_TBL_SIZE,
2481				GFP_ATOMIC);
2482
2483	/* This call may decrease the number of queues */
2484	if (igb_init_interrupt_scheme(adapter)) {
2485		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2486		return -ENOMEM;
2487	}
2488
2489	igb_probe_vfs(adapter);
2490
2491	/* Explicitly disable IRQ since the NIC can be in any state. */
2492	igb_irq_disable(adapter);
2493
2494	if (hw->mac.type == e1000_i350)
2495		adapter->flags &= ~IGB_FLAG_DMAC;
2496
2497	set_bit(__IGB_DOWN, &adapter->state);
2498	return 0;
2499}
2500
2501/**
2502 * igb_open - Called when a network interface is made active
2503 * @netdev: network interface device structure
2504 *
2505 * Returns 0 on success, negative value on failure
2506 *
2507 * The open entry point is called when a network interface is made
2508 * active by the system (IFF_UP).  At this point all resources needed
2509 * for transmit and receive operations are allocated, the interrupt
2510 * handler is registered with the OS, the watchdog timer is started,
2511 * and the stack is notified that the interface is ready.
2512 **/
2513static int __igb_open(struct net_device *netdev, bool resuming)
2514{
2515	struct igb_adapter *adapter = netdev_priv(netdev);
2516	struct e1000_hw *hw = &adapter->hw;
2517	struct pci_dev *pdev = adapter->pdev;
2518	int err;
2519	int i;
2520
2521	/* disallow open during test */
2522	if (test_bit(__IGB_TESTING, &adapter->state)) {
2523		WARN_ON(resuming);
2524		return -EBUSY;
2525	}
2526
2527	if (!resuming)
2528		pm_runtime_get_sync(&pdev->dev);
2529
2530	netif_carrier_off(netdev);
2531
2532	/* allocate transmit descriptors */
2533	err = igb_setup_all_tx_resources(adapter);
2534	if (err)
2535		goto err_setup_tx;
2536
2537	/* allocate receive descriptors */
2538	err = igb_setup_all_rx_resources(adapter);
2539	if (err)
2540		goto err_setup_rx;
2541
2542	igb_power_up_link(adapter);
2543
2544	/* before we allocate an interrupt, we must be ready to handle it.
2545	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2546	 * as soon as we call pci_request_irq, so we have to setup our
2547	 * clean_rx handler before we do so.  */
2548	igb_configure(adapter);
2549
2550	err = igb_request_irq(adapter);
2551	if (err)
2552		goto err_req_irq;
2553
2554	/* From here on the code is the same as igb_up() */
2555	clear_bit(__IGB_DOWN, &adapter->state);
2556
2557	for (i = 0; i < adapter->num_q_vectors; i++)
2558		napi_enable(&(adapter->q_vector[i]->napi));
2559
2560	/* Clear any pending interrupts. */
2561	rd32(E1000_ICR);
2562
2563	igb_irq_enable(adapter);
2564
2565	/* notify VFs that reset has been completed */
2566	if (adapter->vfs_allocated_count) {
2567		u32 reg_data = rd32(E1000_CTRL_EXT);
2568		reg_data |= E1000_CTRL_EXT_PFRSTD;
2569		wr32(E1000_CTRL_EXT, reg_data);
2570	}
2571
2572	netif_tx_start_all_queues(netdev);
2573
2574	if (!resuming)
2575		pm_runtime_put(&pdev->dev);
2576
2577	/* start the watchdog. */
2578	hw->mac.get_link_status = 1;
2579	schedule_work(&adapter->watchdog_task);
2580
2581	return 0;
2582
2583err_req_irq:
2584	igb_release_hw_control(adapter);
2585	igb_power_down_link(adapter);
2586	igb_free_all_rx_resources(adapter);
2587err_setup_rx:
2588	igb_free_all_tx_resources(adapter);
2589err_setup_tx:
2590	igb_reset(adapter);
2591	if (!resuming)
2592		pm_runtime_put(&pdev->dev);
2593
2594	return err;
2595}
2596
2597static int igb_open(struct net_device *netdev)
2598{
2599	return __igb_open(netdev, false);
2600}
2601
2602/**
2603 * igb_close - Disables a network interface
2604 * @netdev: network interface device structure
2605 *
2606 * Returns 0, this is not allowed to fail
2607 *
2608 * The close entry point is called when an interface is de-activated
2609 * by the OS.  The hardware is still under the driver's control, but
2610 * needs to be disabled.  A global MAC reset is issued to stop the
2611 * hardware, and all transmit and receive resources are freed.
2612 **/
2613static int __igb_close(struct net_device *netdev, bool suspending)
2614{
2615	struct igb_adapter *adapter = netdev_priv(netdev);
2616	struct pci_dev *pdev = adapter->pdev;
2617
2618	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2619
2620	if (!suspending)
2621		pm_runtime_get_sync(&pdev->dev);
2622
2623	igb_down(adapter);
2624	igb_free_irq(adapter);
2625
2626	igb_free_all_tx_resources(adapter);
2627	igb_free_all_rx_resources(adapter);
2628
2629	if (!suspending)
2630		pm_runtime_put_sync(&pdev->dev);
2631	return 0;
2632}
2633
2634static int igb_close(struct net_device *netdev)
2635{
2636	return __igb_close(netdev, false);
2637}
2638
2639/**
2640 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2641 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2642 *
2643 * Return 0 on success, negative on failure
2644 **/
2645int igb_setup_tx_resources(struct igb_ring *tx_ring)
2646{
2647	struct device *dev = tx_ring->dev;
2648	int orig_node = dev_to_node(dev);
2649	int size;
2650
2651	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2652	tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2653	if (!tx_ring->tx_buffer_info)
2654		tx_ring->tx_buffer_info = vzalloc(size);
2655	if (!tx_ring->tx_buffer_info)
2656		goto err;
2657
2658	/* round up to nearest 4K */
2659	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2660	tx_ring->size = ALIGN(tx_ring->size, 4096);
2661
2662	set_dev_node(dev, tx_ring->numa_node);
2663	tx_ring->desc = dma_alloc_coherent(dev,
2664					   tx_ring->size,
2665					   &tx_ring->dma,
2666					   GFP_KERNEL);
2667	set_dev_node(dev, orig_node);
2668	if (!tx_ring->desc)
2669		tx_ring->desc = dma_alloc_coherent(dev,
2670						   tx_ring->size,
2671						   &tx_ring->dma,
2672						   GFP_KERNEL);
2673
2674	if (!tx_ring->desc)
2675		goto err;
2676
2677	tx_ring->next_to_use = 0;
2678	tx_ring->next_to_clean = 0;
2679
2680	return 0;
2681
2682err:
2683	vfree(tx_ring->tx_buffer_info);
2684	dev_err(dev,
2685		"Unable to allocate memory for the transmit descriptor ring\n");
2686	return -ENOMEM;
2687}
2688
2689/**
2690 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2691 *				  (Descriptors) for all queues
2692 * @adapter: board private structure
2693 *
2694 * Return 0 on success, negative on failure
2695 **/
2696static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2697{
2698	struct pci_dev *pdev = adapter->pdev;
2699	int i, err = 0;
2700
2701	for (i = 0; i < adapter->num_tx_queues; i++) {
2702		err = igb_setup_tx_resources(adapter->tx_ring[i]);
2703		if (err) {
2704			dev_err(&pdev->dev,
2705				"Allocation for Tx Queue %u failed\n", i);
2706			for (i--; i >= 0; i--)
2707				igb_free_tx_resources(adapter->tx_ring[i]);
2708			break;
2709		}
2710	}
2711
2712	return err;
2713}
2714
2715/**
2716 * igb_setup_tctl - configure the transmit control registers
2717 * @adapter: Board private structure
2718 **/
2719void igb_setup_tctl(struct igb_adapter *adapter)
2720{
2721	struct e1000_hw *hw = &adapter->hw;
2722	u32 tctl;
2723
2724	/* disable queue 0 which is enabled by default on 82575 and 82576 */
2725	wr32(E1000_TXDCTL(0), 0);
2726
2727	/* Program the Transmit Control Register */
2728	tctl = rd32(E1000_TCTL);
2729	tctl &= ~E1000_TCTL_CT;
2730	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2731		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2732
2733	igb_config_collision_dist(hw);
2734
2735	/* Enable transmits */
2736	tctl |= E1000_TCTL_EN;
2737
2738	wr32(E1000_TCTL, tctl);
2739}
2740
2741/**
2742 * igb_configure_tx_ring - Configure transmit ring after Reset
2743 * @adapter: board private structure
2744 * @ring: tx ring to configure
2745 *
2746 * Configure a transmit ring after a reset.
2747 **/
2748void igb_configure_tx_ring(struct igb_adapter *adapter,
2749                           struct igb_ring *ring)
2750{
2751	struct e1000_hw *hw = &adapter->hw;
2752	u32 txdctl = 0;
2753	u64 tdba = ring->dma;
2754	int reg_idx = ring->reg_idx;
2755
2756	/* disable the queue */
2757	wr32(E1000_TXDCTL(reg_idx), 0);
2758	wrfl();
2759	mdelay(10);
2760
2761	wr32(E1000_TDLEN(reg_idx),
2762	                ring->count * sizeof(union e1000_adv_tx_desc));
2763	wr32(E1000_TDBAL(reg_idx),
2764	                tdba & 0x00000000ffffffffULL);
2765	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2766
2767	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2768	wr32(E1000_TDH(reg_idx), 0);
2769	writel(0, ring->tail);
2770
2771	txdctl |= IGB_TX_PTHRESH;
2772	txdctl |= IGB_TX_HTHRESH << 8;
2773	txdctl |= IGB_TX_WTHRESH << 16;
2774
2775	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2776	wr32(E1000_TXDCTL(reg_idx), txdctl);
2777}
2778
2779/**
2780 * igb_configure_tx - Configure transmit Unit after Reset
2781 * @adapter: board private structure
2782 *
2783 * Configure the Tx unit of the MAC after a reset.
2784 **/
2785static void igb_configure_tx(struct igb_adapter *adapter)
2786{
2787	int i;
2788
2789	for (i = 0; i < adapter->num_tx_queues; i++)
2790		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2791}
2792
2793/**
2794 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2795 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2796 *
2797 * Returns 0 on success, negative on failure
2798 **/
2799int igb_setup_rx_resources(struct igb_ring *rx_ring)
2800{
2801	struct device *dev = rx_ring->dev;
2802	int orig_node = dev_to_node(dev);
2803	int size, desc_len;
2804
2805	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2806	rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2807	if (!rx_ring->rx_buffer_info)
2808		rx_ring->rx_buffer_info = vzalloc(size);
2809	if (!rx_ring->rx_buffer_info)
2810		goto err;
2811
2812	desc_len = sizeof(union e1000_adv_rx_desc);
2813
2814	/* Round up to nearest 4K */
2815	rx_ring->size = rx_ring->count * desc_len;
2816	rx_ring->size = ALIGN(rx_ring->size, 4096);
2817
2818	set_dev_node(dev, rx_ring->numa_node);
2819	rx_ring->desc = dma_alloc_coherent(dev,
2820					   rx_ring->size,
2821					   &rx_ring->dma,
2822					   GFP_KERNEL);
2823	set_dev_node(dev, orig_node);
2824	if (!rx_ring->desc)
2825		rx_ring->desc = dma_alloc_coherent(dev,
2826						   rx_ring->size,
2827						   &rx_ring->dma,
2828						   GFP_KERNEL);
2829
2830	if (!rx_ring->desc)
2831		goto err;
2832
2833	rx_ring->next_to_clean = 0;
2834	rx_ring->next_to_use = 0;
2835
2836	return 0;
2837
2838err:
2839	vfree(rx_ring->rx_buffer_info);
2840	rx_ring->rx_buffer_info = NULL;
2841	dev_err(dev, "Unable to allocate memory for the receive descriptor"
2842		" ring\n");
2843	return -ENOMEM;
2844}
2845
2846/**
2847 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2848 *				  (Descriptors) for all queues
2849 * @adapter: board private structure
2850 *
2851 * Return 0 on success, negative on failure
2852 **/
2853static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2854{
2855	struct pci_dev *pdev = adapter->pdev;
2856	int i, err = 0;
2857
2858	for (i = 0; i < adapter->num_rx_queues; i++) {
2859		err = igb_setup_rx_resources(adapter->rx_ring[i]);
2860		if (err) {
2861			dev_err(&pdev->dev,
2862				"Allocation for Rx Queue %u failed\n", i);
2863			for (i--; i >= 0; i--)
2864				igb_free_rx_resources(adapter->rx_ring[i]);
2865			break;
2866		}
2867	}
2868
2869	return err;
2870}
2871
2872/**
2873 * igb_setup_mrqc - configure the multiple receive queue control registers
2874 * @adapter: Board private structure
2875 **/
2876static void igb_setup_mrqc(struct igb_adapter *adapter)
2877{
2878	struct e1000_hw *hw = &adapter->hw;
2879	u32 mrqc, rxcsum;
2880	u32 j, num_rx_queues, shift = 0, shift2 = 0;
2881	union e1000_reta {
2882		u32 dword;
2883		u8  bytes[4];
2884	} reta;
2885	static const u8 rsshash[40] = {
2886		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2887		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2888		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2889		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2890
2891	/* Fill out hash function seeds */
2892	for (j = 0; j < 10; j++) {
2893		u32 rsskey = rsshash[(j * 4)];
2894		rsskey |= rsshash[(j * 4) + 1] << 8;
2895		rsskey |= rsshash[(j * 4) + 2] << 16;
2896		rsskey |= rsshash[(j * 4) + 3] << 24;
2897		array_wr32(E1000_RSSRK(0), j, rsskey);
2898	}
2899
2900	num_rx_queues = adapter->rss_queues;
2901
2902	if (adapter->vfs_allocated_count) {
2903		/* 82575 and 82576 supports 2 RSS queues for VMDq */
2904		switch (hw->mac.type) {
2905		case e1000_i350:
2906		case e1000_82580:
2907			num_rx_queues = 1;
2908			shift = 0;
2909			break;
2910		case e1000_82576:
2911			shift = 3;
2912			num_rx_queues = 2;
2913			break;
2914		case e1000_82575:
2915			shift = 2;
2916			shift2 = 6;
2917		default:
2918			break;
2919		}
2920	} else {
2921		if (hw->mac.type == e1000_82575)
2922			shift = 6;
2923	}
2924
2925	for (j = 0; j < (32 * 4); j++) {
2926		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2927		if (shift2)
2928			reta.bytes[j & 3] |= num_rx_queues << shift2;
2929		if ((j & 3) == 3)
2930			wr32(E1000_RETA(j >> 2), reta.dword);
2931	}
2932
2933	/*
2934	 * Disable raw packet checksumming so that RSS hash is placed in
2935	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2936	 * offloads as they are enabled by default
2937	 */
2938	rxcsum = rd32(E1000_RXCSUM);
2939	rxcsum |= E1000_RXCSUM_PCSD;
2940
2941	if (adapter->hw.mac.type >= e1000_82576)
2942		/* Enable Receive Checksum Offload for SCTP */
2943		rxcsum |= E1000_RXCSUM_CRCOFL;
2944
2945	/* Don't need to set TUOFL or IPOFL, they default to 1 */
2946	wr32(E1000_RXCSUM, rxcsum);
2947
2948	/* If VMDq is enabled then we set the appropriate mode for that, else
2949	 * we default to RSS so that an RSS hash is calculated per packet even
2950	 * if we are only using one queue */
2951	if (adapter->vfs_allocated_count) {
2952		if (hw->mac.type > e1000_82575) {
2953			/* Set the default pool for the PF's first queue */
2954			u32 vtctl = rd32(E1000_VT_CTL);
2955			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2956				   E1000_VT_CTL_DISABLE_DEF_POOL);
2957			vtctl |= adapter->vfs_allocated_count <<
2958				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2959			wr32(E1000_VT_CTL, vtctl);
2960		}
2961		if (adapter->rss_queues > 1)
2962			mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2963		else
2964			mrqc = E1000_MRQC_ENABLE_VMDQ;
2965	} else {
2966		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2967	}
2968	igb_vmm_control(adapter);
2969
2970	/*
2971	 * Generate RSS hash based on TCP port numbers and/or
2972	 * IPv4/v6 src and dst addresses since UDP cannot be
2973	 * hashed reliably due to IP fragmentation
2974	 */
2975	mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2976		E1000_MRQC_RSS_FIELD_IPV4_TCP |
2977		E1000_MRQC_RSS_FIELD_IPV6 |
2978		E1000_MRQC_RSS_FIELD_IPV6_TCP |
2979		E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2980
2981	wr32(E1000_MRQC, mrqc);
2982}
2983
2984/**
2985 * igb_setup_rctl - configure the receive control registers
2986 * @adapter: Board private structure
2987 **/
2988void igb_setup_rctl(struct igb_adapter *adapter)
2989{
2990	struct e1000_hw *hw = &adapter->hw;
2991	u32 rctl;
2992
2993	rctl = rd32(E1000_RCTL);
2994
2995	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2996	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2997
2998	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2999		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3000
3001	/*
3002	 * enable stripping of CRC. It's unlikely this will break BMC
3003	 * redirection as it did with e1000. Newer features require
3004	 * that the HW strips the CRC.
3005	 */
3006	rctl |= E1000_RCTL_SECRC;
3007
3008	/* disable store bad packets and clear size bits. */
3009	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
3010
3011	/* enable LPE to prevent packets larger than max_frame_size */
3012	rctl |= E1000_RCTL_LPE;
3013
3014	/* disable queue 0 to prevent tail write w/o re-config */
3015	wr32(E1000_RXDCTL(0), 0);
3016
3017	/* Attention!!!  For SR-IOV PF driver operations you must enable
3018	 * queue drop for all VF and PF queues to prevent head of line blocking
3019	 * if an un-trusted VF does not provide descriptors to hardware.
3020	 */
3021	if (adapter->vfs_allocated_count) {
3022		/* set all queue drop enable bits */
3023		wr32(E1000_QDE, ALL_QUEUES);
3024	}
3025
3026	/* This is useful for sniffing bad packets. */
3027	if (adapter->netdev->features & NETIF_F_RXALL) {
3028		/* UPE and MPE will be handled by normal PROMISC logic
3029		 * in e1000e_set_rx_mode */
3030		rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
3031			 E1000_RCTL_BAM | /* RX All Bcast Pkts */
3032			 E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
3033
3034		rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
3035			  E1000_RCTL_DPF | /* Allow filtered pause */
3036			  E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
3037		/* Do not mess with E1000_CTRL_VME, it affects transmit as well,
3038		 * and that breaks VLANs.
3039		 */
3040	}
3041
3042	wr32(E1000_RCTL, rctl);
3043}
3044
3045static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3046                                   int vfn)
3047{
3048	struct e1000_hw *hw = &adapter->hw;
3049	u32 vmolr;
3050
3051	/* if it isn't the PF check to see if VFs are enabled and
3052	 * increase the size to support vlan tags */
3053	if (vfn < adapter->vfs_allocated_count &&
3054	    adapter->vf_data[vfn].vlans_enabled)
3055		size += VLAN_TAG_SIZE;
3056
3057	vmolr = rd32(E1000_VMOLR(vfn));
3058	vmolr &= ~E1000_VMOLR_RLPML_MASK;
3059	vmolr |= size | E1000_VMOLR_LPE;
3060	wr32(E1000_VMOLR(vfn), vmolr);
3061
3062	return 0;
3063}
3064
3065/**
3066 * igb_rlpml_set - set maximum receive packet size
3067 * @adapter: board private structure
3068 *
3069 * Configure maximum receivable packet size.
3070 **/
3071static void igb_rlpml_set(struct igb_adapter *adapter)
3072{
3073	u32 max_frame_size = adapter->max_frame_size;
3074	struct e1000_hw *hw = &adapter->hw;
3075	u16 pf_id = adapter->vfs_allocated_count;
3076
3077	if (pf_id) {
3078		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3079		/*
3080		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3081		 * to our max jumbo frame size, in case we need to enable
3082		 * jumbo frames on one of the rings later.
3083		 * This will not pass over-length frames into the default
3084		 * queue because it's gated by the VMOLR.RLPML.
3085		 */
3086		max_frame_size = MAX_JUMBO_FRAME_SIZE;
3087	}
3088
3089	wr32(E1000_RLPML, max_frame_size);
3090}
3091
3092static inline void igb_set_vmolr(struct igb_adapter *adapter,
3093				 int vfn, bool aupe)
3094{
3095	struct e1000_hw *hw = &adapter->hw;
3096	u32 vmolr;
3097
3098	/*
3099	 * This register exists only on 82576 and newer so if we are older then
3100	 * we should exit and do nothing
3101	 */
3102	if (hw->mac.type < e1000_82576)
3103		return;
3104
3105	vmolr = rd32(E1000_VMOLR(vfn));
3106	vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3107	if (aupe)
3108		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3109	else
3110		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3111
3112	/* clear all bits that might not be set */
3113	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3114
3115	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3116		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3117	/*
3118	 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3119	 * multicast packets
3120	 */
3121	if (vfn <= adapter->vfs_allocated_count)
3122		vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
3123
3124	wr32(E1000_VMOLR(vfn), vmolr);
3125}
3126
3127/**
3128 * igb_configure_rx_ring - Configure a receive ring after Reset
3129 * @adapter: board private structure
3130 * @ring: receive ring to be configured
3131 *
3132 * Configure the Rx unit of the MAC after a reset.
3133 **/
3134void igb_configure_rx_ring(struct igb_adapter *adapter,
3135                           struct igb_ring *ring)
3136{
3137	struct e1000_hw *hw = &adapter->hw;
3138	u64 rdba = ring->dma;
3139	int reg_idx = ring->reg_idx;
3140	u32 srrctl = 0, rxdctl = 0;
3141
3142	/* disable the queue */
3143	wr32(E1000_RXDCTL(reg_idx), 0);
3144
3145	/* Set DMA base address registers */
3146	wr32(E1000_RDBAL(reg_idx),
3147	     rdba & 0x00000000ffffffffULL);
3148	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3149	wr32(E1000_RDLEN(reg_idx),
3150	               ring->count * sizeof(union e1000_adv_rx_desc));
3151
3152	/* initialize head and tail */
3153	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3154	wr32(E1000_RDH(reg_idx), 0);
3155	writel(0, ring->tail);
3156
3157	/* set descriptor configuration */
3158	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3159#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3160	srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3161#else
3162	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3163#endif
3164	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3165	if (hw->mac.type >= e1000_82580)
3166		srrctl |= E1000_SRRCTL_TIMESTAMP;
3167	/* Only set Drop Enable if we are supporting multiple queues */
3168	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3169		srrctl |= E1000_SRRCTL_DROP_EN;
3170
3171	wr32(E1000_SRRCTL(reg_idx), srrctl);
3172
3173	/* set filtering for VMDQ pools */
3174	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3175
3176	rxdctl |= IGB_RX_PTHRESH;
3177	rxdctl |= IGB_RX_HTHRESH << 8;
3178	rxdctl |= IGB_RX_WTHRESH << 16;
3179
3180	/* enable receive descriptor fetching */
3181	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3182	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3183}
3184
3185/**
3186 * igb_configure_rx - Configure receive Unit after Reset
3187 * @adapter: board private structure
3188 *
3189 * Configure the Rx unit of the MAC after a reset.
3190 **/
3191static void igb_configure_rx(struct igb_adapter *adapter)
3192{
3193	int i;
3194
3195	/* set UTA to appropriate mode */
3196	igb_set_uta(adapter);
3197
3198	/* set the correct pool for the PF default MAC address in entry 0 */
3199	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3200	                 adapter->vfs_allocated_count);
3201
3202	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3203	 * the Base and Length of the Rx Descriptor Ring */
3204	for (i = 0; i < adapter->num_rx_queues; i++)
3205		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3206}
3207
3208/**
3209 * igb_free_tx_resources - Free Tx Resources per Queue
3210 * @tx_ring: Tx descriptor ring for a specific queue
3211 *
3212 * Free all transmit software resources
3213 **/
3214void igb_free_tx_resources(struct igb_ring *tx_ring)
3215{
3216	igb_clean_tx_ring(tx_ring);
3217
3218	vfree(tx_ring->tx_buffer_info);
3219	tx_ring->tx_buffer_info = NULL;
3220
3221	/* if not set, then don't free */
3222	if (!tx_ring->desc)
3223		return;
3224
3225	dma_free_coherent(tx_ring->dev, tx_ring->size,
3226			  tx_ring->desc, tx_ring->dma);
3227
3228	tx_ring->desc = NULL;
3229}
3230
3231/**
3232 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3233 * @adapter: board private structure
3234 *
3235 * Free all transmit software resources
3236 **/
3237static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3238{
3239	int i;
3240
3241	for (i = 0; i < adapter->num_tx_queues; i++)
3242		igb_free_tx_resources(adapter->tx_ring[i]);
3243}
3244
3245void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3246				    struct igb_tx_buffer *tx_buffer)
3247{
3248	if (tx_buffer->skb) {
3249		dev_kfree_skb_any(tx_buffer->skb);
3250		if (tx_buffer->dma)
3251			dma_unmap_single(ring->dev,
3252					 tx_buffer->dma,
3253					 tx_buffer->length,
3254					 DMA_TO_DEVICE);
3255	} else if (tx_buffer->dma) {
3256		dma_unmap_page(ring->dev,
3257			       tx_buffer->dma,
3258			       tx_buffer->length,
3259			       DMA_TO_DEVICE);
3260	}
3261	tx_buffer->next_to_watch = NULL;
3262	tx_buffer->skb = NULL;
3263	tx_buffer->dma = 0;
3264	/* buffer_info must be completely set up in the transmit path */
3265}
3266
3267/**
3268 * igb_clean_tx_ring - Free Tx Buffers
3269 * @tx_ring: ring to be cleaned
3270 **/
3271static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3272{
3273	struct igb_tx_buffer *buffer_info;
3274	unsigned long size;
3275	u16 i;
3276
3277	if (!tx_ring->tx_buffer_info)
3278		return;
3279	/* Free all the Tx ring sk_buffs */
3280
3281	for (i = 0; i < tx_ring->count; i++) {
3282		buffer_info = &tx_ring->tx_buffer_info[i];
3283		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3284	}
3285
3286	netdev_tx_reset_queue(txring_txq(tx_ring));
3287
3288	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3289	memset(tx_ring->tx_buffer_info, 0, size);
3290
3291	/* Zero out the descriptor ring */
3292	memset(tx_ring->desc, 0, tx_ring->size);
3293
3294	tx_ring->next_to_use = 0;
3295	tx_ring->next_to_clean = 0;
3296}
3297
3298/**
3299 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3300 * @adapter: board private structure
3301 **/
3302static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3303{
3304	int i;
3305
3306	for (i = 0; i < adapter->num_tx_queues; i++)
3307		igb_clean_tx_ring(adapter->tx_ring[i]);
3308}
3309
3310/**
3311 * igb_free_rx_resources - Free Rx Resources
3312 * @rx_ring: ring to clean the resources from
3313 *
3314 * Free all receive software resources
3315 **/
3316void igb_free_rx_resources(struct igb_ring *rx_ring)
3317{
3318	igb_clean_rx_ring(rx_ring);
3319
3320	vfree(rx_ring->rx_buffer_info);
3321	rx_ring->rx_buffer_info = NULL;
3322
3323	/* if not set, then don't free */
3324	if (!rx_ring->desc)
3325		return;
3326
3327	dma_free_coherent(rx_ring->dev, rx_ring->size,
3328			  rx_ring->desc, rx_ring->dma);
3329
3330	rx_ring->desc = NULL;
3331}
3332
3333/**
3334 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3335 * @adapter: board private structure
3336 *
3337 * Free all receive software resources
3338 **/
3339static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3340{
3341	int i;
3342
3343	for (i = 0; i < adapter->num_rx_queues; i++)
3344		igb_free_rx_resources(adapter->rx_ring[i]);
3345}
3346
3347/**
3348 * igb_clean_rx_ring - Free Rx Buffers per Queue
3349 * @rx_ring: ring to free buffers from
3350 **/
3351static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3352{
3353	unsigned long size;
3354	u16 i;
3355
3356	if (!rx_ring->rx_buffer_info)
3357		return;
3358
3359	/* Free all the Rx ring sk_buffs */
3360	for (i = 0; i < rx_ring->count; i++) {
3361		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3362		if (buffer_info->dma) {
3363			dma_unmap_single(rx_ring->dev,
3364			                 buffer_info->dma,
3365					 IGB_RX_HDR_LEN,
3366					 DMA_FROM_DEVICE);
3367			buffer_info->dma = 0;
3368		}
3369
3370		if (buffer_info->skb) {
3371			dev_kfree_skb(buffer_info->skb);
3372			buffer_info->skb = NULL;
3373		}
3374		if (buffer_info->page_dma) {
3375			dma_unmap_page(rx_ring->dev,
3376			               buffer_info->page_dma,
3377				       PAGE_SIZE / 2,
3378				       DMA_FROM_DEVICE);
3379			buffer_info->page_dma = 0;
3380		}
3381		if (buffer_info->page) {
3382			put_page(buffer_info->page);
3383			buffer_info->page = NULL;
3384			buffer_info->page_offset = 0;
3385		}
3386	}
3387
3388	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3389	memset(rx_ring->rx_buffer_info, 0, size);
3390
3391	/* Zero out the descriptor ring */
3392	memset(rx_ring->desc, 0, rx_ring->size);
3393
3394	rx_ring->next_to_clean = 0;
3395	rx_ring->next_to_use = 0;
3396}
3397
3398/**
3399 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3400 * @adapter: board private structure
3401 **/
3402static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3403{
3404	int i;
3405
3406	for (i = 0; i < adapter->num_rx_queues; i++)
3407		igb_clean_rx_ring(adapter->rx_ring[i]);
3408}
3409
3410/**
3411 * igb_set_mac - Change the Ethernet Address of the NIC
3412 * @netdev: network interface device structure
3413 * @p: pointer to an address structure
3414 *
3415 * Returns 0 on success, negative on failure
3416 **/
3417static int igb_set_mac(struct net_device *netdev, void *p)
3418{
3419	struct igb_adapter *adapter = netdev_priv(netdev);
3420	struct e1000_hw *hw = &adapter->hw;
3421	struct sockaddr *addr = p;
3422
3423	if (!is_valid_ether_addr(addr->sa_data))
3424		return -EADDRNOTAVAIL;
3425
3426	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3427	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3428
3429	/* set the correct pool for the new PF MAC address in entry 0 */
3430	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3431	                 adapter->vfs_allocated_count);
3432
3433	return 0;
3434}
3435
3436/**
3437 * igb_write_mc_addr_list - write multicast addresses to MTA
3438 * @netdev: network interface device structure
3439 *
3440 * Writes multicast address list to the MTA hash table.
3441 * Returns: -ENOMEM on failure
3442 *                0 on no addresses written
3443 *                X on writing X addresses to MTA
3444 **/
3445static int igb_write_mc_addr_list(struct net_device *netdev)
3446{
3447	struct igb_adapter *adapter = netdev_priv(netdev);
3448	struct e1000_hw *hw = &adapter->hw;
3449	struct netdev_hw_addr *ha;
3450	u8  *mta_list;
3451	int i;
3452
3453	if (netdev_mc_empty(netdev)) {
3454		/* nothing to program, so clear mc list */
3455		igb_update_mc_addr_list(hw, NULL, 0);
3456		igb_restore_vf_multicasts(adapter);
3457		return 0;
3458	}
3459
3460	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3461	if (!mta_list)
3462		return -ENOMEM;
3463
3464	/* The shared function expects a packed array of only addresses. */
3465	i = 0;
3466	netdev_for_each_mc_addr(ha, netdev)
3467		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3468
3469	igb_update_mc_addr_list(hw, mta_list, i);
3470	kfree(mta_list);
3471
3472	return netdev_mc_count(netdev);
3473}
3474
3475/**
3476 * igb_write_uc_addr_list - write unicast addresses to RAR table
3477 * @netdev: network interface device structure
3478 *
3479 * Writes unicast address list to the RAR table.
3480 * Returns: -ENOMEM on failure/insufficient address space
3481 *                0 on no addresses written
3482 *                X on writing X addresses to the RAR table
3483 **/
3484static int igb_write_uc_addr_list(struct net_device *netdev)
3485{
3486	struct igb_adapter *adapter = netdev_priv(netdev);
3487	struct e1000_hw *hw = &adapter->hw;
3488	unsigned int vfn = adapter->vfs_allocated_count;
3489	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3490	int count = 0;
3491
3492	/* return ENOMEM indicating insufficient memory for addresses */
3493	if (netdev_uc_count(netdev) > rar_entries)
3494		return -ENOMEM;
3495
3496	if (!netdev_uc_empty(netdev) && rar_entries) {
3497		struct netdev_hw_addr *ha;
3498
3499		netdev_for_each_uc_addr(ha, netdev) {
3500			if (!rar_entries)
3501				break;
3502			igb_rar_set_qsel(adapter, ha->addr,
3503			                 rar_entries--,
3504			                 vfn);
3505			count++;
3506		}
3507	}
3508	/* write the addresses in reverse order to avoid write combining */
3509	for (; rar_entries > 0 ; rar_entries--) {
3510		wr32(E1000_RAH(rar_entries), 0);
3511		wr32(E1000_RAL(rar_entries), 0);
3512	}
3513	wrfl();
3514
3515	return count;
3516}
3517
3518/**
3519 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3520 * @netdev: network interface device structure
3521 *
3522 * The set_rx_mode entry point is called whenever the unicast or multicast
3523 * address lists or the network interface flags are updated.  This routine is
3524 * responsible for configuring the hardware for proper unicast, multicast,
3525 * promiscuous mode, and all-multi behavior.
3526 **/
3527static void igb_set_rx_mode(struct net_device *netdev)
3528{
3529	struct igb_adapter *adapter = netdev_priv(netdev);
3530	struct e1000_hw *hw = &adapter->hw;
3531	unsigned int vfn = adapter->vfs_allocated_count;
3532	u32 rctl, vmolr = 0;
3533	int count;
3534
3535	/* Check for Promiscuous and All Multicast modes */
3536	rctl = rd32(E1000_RCTL);
3537
3538	/* clear the effected bits */
3539	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3540
3541	if (netdev->flags & IFF_PROMISC) {
3542		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3543		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3544	} else {
3545		if (netdev->flags & IFF_ALLMULTI) {
3546			rctl |= E1000_RCTL_MPE;
3547			vmolr |= E1000_VMOLR_MPME;
3548		} else {
3549			/*
3550			 * Write addresses to the MTA, if the attempt fails
3551			 * then we should just turn on promiscuous mode so
3552			 * that we can at least receive multicast traffic
3553			 */
3554			count = igb_write_mc_addr_list(netdev);
3555			if (count < 0) {
3556				rctl |= E1000_RCTL_MPE;
3557				vmolr |= E1000_VMOLR_MPME;
3558			} else if (count) {
3559				vmolr |= E1000_VMOLR_ROMPE;
3560			}
3561		}
3562		/*
3563		 * Write addresses to available RAR registers, if there is not
3564		 * sufficient space to store all the addresses then enable
3565		 * unicast promiscuous mode
3566		 */
3567		count = igb_write_uc_addr_list(netdev);
3568		if (count < 0) {
3569			rctl |= E1000_RCTL_UPE;
3570			vmolr |= E1000_VMOLR_ROPE;
3571		}
3572		rctl |= E1000_RCTL_VFE;
3573	}
3574	wr32(E1000_RCTL, rctl);
3575
3576	/*
3577	 * In order to support SR-IOV and eventually VMDq it is necessary to set
3578	 * the VMOLR to enable the appropriate modes.  Without this workaround
3579	 * we will have issues with VLAN tag stripping not being done for frames
3580	 * that are only arriving because we are the default pool
3581	 */
3582	if (hw->mac.type < e1000_82576)
3583		return;
3584
3585	vmolr |= rd32(E1000_VMOLR(vfn)) &
3586	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3587	wr32(E1000_VMOLR(vfn), vmolr);
3588	igb_restore_vf_multicasts(adapter);
3589}
3590
3591static void igb_check_wvbr(struct igb_adapter *adapter)
3592{
3593	struct e1000_hw *hw = &adapter->hw;
3594	u32 wvbr = 0;
3595
3596	switch (hw->mac.type) {
3597	case e1000_82576:
3598	case e1000_i350:
3599		if (!(wvbr = rd32(E1000_WVBR)))
3600			return;
3601		break;
3602	default:
3603		break;
3604	}
3605
3606	adapter->wvbr |= wvbr;
3607}
3608
3609#define IGB_STAGGERED_QUEUE_OFFSET 8
3610
3611static void igb_spoof_check(struct igb_adapter *adapter)
3612{
3613	int j;
3614
3615	if (!adapter->wvbr)
3616		return;
3617
3618	for(j = 0; j < adapter->vfs_allocated_count; j++) {
3619		if (adapter->wvbr & (1 << j) ||
3620		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3621			dev_warn(&adapter->pdev->dev,
3622				"Spoof event(s) detected on VF %d\n", j);
3623			adapter->wvbr &=
3624				~((1 << j) |
3625				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3626		}
3627	}
3628}
3629
3630/* Need to wait a few seconds after link up to get diagnostic information from
3631 * the phy */
3632static void igb_update_phy_info(unsigned long data)
3633{
3634	struct igb_adapter *adapter = (struct igb_adapter *) data;
3635	igb_get_phy_info(&adapter->hw);
3636}
3637
3638/**
3639 * igb_has_link - check shared code for link and determine up/down
3640 * @adapter: pointer to driver private info
3641 **/
3642bool igb_has_link(struct igb_adapter *adapter)
3643{
3644	struct e1000_hw *hw = &adapter->hw;
3645	bool link_active = false;
3646	s32 ret_val = 0;
3647
3648	/* get_link_status is set on LSC (link status) interrupt or
3649	 * rx sequence error interrupt.  get_link_status will stay
3650	 * false until the e1000_check_for_link establishes link
3651	 * for copper adapters ONLY
3652	 */
3653	switch (hw->phy.media_type) {
3654	case e1000_media_type_copper:
3655		if (hw->mac.get_link_status) {
3656			ret_val = hw->mac.ops.check_for_link(hw);
3657			link_active = !hw->mac.get_link_status;
3658		} else {
3659			link_active = true;
3660		}
3661		break;
3662	case e1000_media_type_internal_serdes:
3663		ret_val = hw->mac.ops.check_for_link(hw);
3664		link_active = hw->mac.serdes_has_link;
3665		break;
3666	default:
3667	case e1000_media_type_unknown:
3668		break;
3669	}
3670
3671	return link_active;
3672}
3673
3674static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3675{
3676	bool ret = false;
3677	u32 ctrl_ext, thstat;
3678
3679	/* check for thermal sensor event on i350, copper only */
3680	if (hw->mac.type == e1000_i350) {
3681		thstat = rd32(E1000_THSTAT);
3682		ctrl_ext = rd32(E1000_CTRL_EXT);
3683
3684		if ((hw->phy.media_type == e1000_media_type_copper) &&
3685		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3686			ret = !!(thstat & event);
3687		}
3688	}
3689
3690	return ret;
3691}
3692
3693/**
3694 * igb_watchdog - Timer Call-back
3695 * @data: pointer to adapter cast into an unsigned long
3696 **/
3697static void igb_watchdog(unsigned long data)
3698{
3699	struct igb_adapter *adapter = (struct igb_adapter *)data;
3700	/* Do the rest outside of interrupt context */
3701	schedule_work(&adapter->watchdog_task);
3702}
3703
3704static void igb_watchdog_task(struct work_struct *work)
3705{
3706	struct igb_adapter *adapter = container_of(work,
3707	                                           struct igb_adapter,
3708                                                   watchdog_task);
3709	struct e1000_hw *hw = &adapter->hw;
3710	struct net_device *netdev = adapter->netdev;
3711	u32 link;
3712	int i;
3713
3714	link = igb_has_link(adapter);
3715	if (link) {
3716		/* Cancel scheduled suspend requests. */
3717		pm_runtime_resume(netdev->dev.parent);
3718
3719		if (!netif_carrier_ok(netdev)) {
3720			u32 ctrl;
3721			hw->mac.ops.get_speed_and_duplex(hw,
3722			                                 &adapter->link_speed,
3723			                                 &adapter->link_duplex);
3724
3725			ctrl = rd32(E1000_CTRL);
3726			/* Links status message must follow this format */
3727			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3728			       "Duplex, Flow Control: %s\n",
3729			       netdev->name,
3730			       adapter->link_speed,
3731			       adapter->link_duplex == FULL_DUPLEX ?
3732			       "Full" : "Half",
3733			       (ctrl & E1000_CTRL_TFCE) &&
3734			       (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3735			       (ctrl & E1000_CTRL_RFCE) ?  "RX" :
3736			       (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
3737
3738			/* check for thermal sensor event */
3739			if (igb_thermal_sensor_event(hw,
3740			    E1000_THSTAT_LINK_THROTTLE)) {
3741				netdev_info(netdev, "The network adapter link "
3742					    "speed was downshifted because it "
3743					    "overheated\n");
3744			}
3745
3746			/* adjust timeout factor according to speed/duplex */
3747			adapter->tx_timeout_factor = 1;
3748			switch (adapter->link_speed) {
3749			case SPEED_10:
3750				adapter->tx_timeout_factor = 14;
3751				break;
3752			case SPEED_100:
3753				/* maybe add some timeout factor ? */
3754				break;
3755			}
3756
3757			netif_carrier_on(netdev);
3758
3759			igb_ping_all_vfs(adapter);
3760			igb_check_vf_rate_limit(adapter);
3761
3762			/* link state has changed, schedule phy info update */
3763			if (!test_bit(__IGB_DOWN, &adapter->state))
3764				mod_timer(&adapter->phy_info_timer,
3765					  round_jiffies(jiffies + 2 * HZ));
3766		}
3767	} else {
3768		if (netif_carrier_ok(netdev)) {
3769			adapter->link_speed = 0;
3770			adapter->link_duplex = 0;
3771
3772			/* check for thermal sensor event */
3773			if (igb_thermal_sensor_event(hw,
3774			    E1000_THSTAT_PWR_DOWN)) {
3775				netdev_err(netdev, "The network adapter was "
3776					   "stopped because it overheated\n");
3777			}
3778
3779			/* Links status message must follow this format */
3780			printk(KERN_INFO "igb: %s NIC Link is Down\n",
3781			       netdev->name);
3782			netif_carrier_off(netdev);
3783
3784			igb_ping_all_vfs(adapter);
3785
3786			/* link state has changed, schedule phy info update */
3787			if (!test_bit(__IGB_DOWN, &adapter->state))
3788				mod_timer(&adapter->phy_info_timer,
3789					  round_jiffies(jiffies + 2 * HZ));
3790
3791			pm_schedule_suspend(netdev->dev.parent,
3792					    MSEC_PER_SEC * 5);
3793		}
3794	}
3795
3796	spin_lock(&adapter->stats64_lock);
3797	igb_update_stats(adapter, &adapter->stats64);
3798	spin_unlock(&adapter->stats64_lock);
3799
3800	for (i = 0; i < adapter->num_tx_queues; i++) {
3801		struct igb_ring *tx_ring = adapter->tx_ring[i];
3802		if (!netif_carrier_ok(netdev)) {
3803			/* We've lost link, so the controller stops DMA,
3804			 * but we've got queued Tx work that's never going
3805			 * to get done, so reset controller to flush Tx.
3806			 * (Do the reset outside of interrupt context). */
3807			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3808				adapter->tx_timeout_count++;
3809				schedule_work(&adapter->reset_task);
3810				/* return immediately since reset is imminent */
3811				return;
3812			}
3813		}
3814
3815		/* Force detection of hung controller every watchdog period */
3816		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3817	}
3818
3819	/* Cause software interrupt to ensure rx ring is cleaned */
3820	if (adapter->msix_entries) {
3821		u32 eics = 0;
3822		for (i = 0; i < adapter->num_q_vectors; i++)
3823			eics |= adapter->q_vector[i]->eims_value;
3824		wr32(E1000_EICS, eics);
3825	} else {
3826		wr32(E1000_ICS, E1000_ICS_RXDMT0);
3827	}
3828
3829	igb_spoof_check(adapter);
3830
3831	/* Reset the timer */
3832	if (!test_bit(__IGB_DOWN, &adapter->state))
3833		mod_timer(&adapter->watchdog_timer,
3834			  round_jiffies(jiffies + 2 * HZ));
3835}
3836
3837enum latency_range {
3838	lowest_latency = 0,
3839	low_latency = 1,
3840	bulk_latency = 2,
3841	latency_invalid = 255
3842};
3843
3844/**
3845 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3846 *
3847 *      Stores a new ITR value based on strictly on packet size.  This
3848 *      algorithm is less sophisticated than that used in igb_update_itr,
3849 *      due to the difficulty of synchronizing statistics across multiple
3850 *      receive rings.  The divisors and thresholds used by this function
3851 *      were determined based on theoretical maximum wire speed and testing
3852 *      data, in order to minimize response time while increasing bulk
3853 *      throughput.
3854 *      This functionality is controlled by the InterruptThrottleRate module
3855 *      parameter (see igb_param.c)
3856 *      NOTE:  This function is called only when operating in a multiqueue
3857 *             receive environment.
3858 * @q_vector: pointer to q_vector
3859 **/
3860static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3861{
3862	int new_val = q_vector->itr_val;
3863	int avg_wire_size = 0;
3864	struct igb_adapter *adapter = q_vector->adapter;
3865	unsigned int packets;
3866
3867	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3868	 * ints/sec - ITR timer value of 120 ticks.
3869	 */
3870	if (adapter->link_speed != SPEED_1000) {
3871		new_val = IGB_4K_ITR;
3872		goto set_itr_val;
3873	}
3874
3875	packets = q_vector->rx.total_packets;
3876	if (packets)
3877		avg_wire_size = q_vector->rx.total_bytes / packets;
3878
3879	packets = q_vector->tx.total_packets;
3880	if (packets)
3881		avg_wire_size = max_t(u32, avg_wire_size,
3882				      q_vector->tx.total_bytes / packets);
3883
3884	/* if avg_wire_size isn't set no work was done */
3885	if (!avg_wire_size)
3886		goto clear_counts;
3887
3888	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3889	avg_wire_size += 24;
3890
3891	/* Don't starve jumbo frames */
3892	avg_wire_size = min(avg_wire_size, 3000);
3893
3894	/* Give a little boost to mid-size frames */
3895	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3896		new_val = avg_wire_size / 3;
3897	else
3898		new_val = avg_wire_size / 2;
3899
3900	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3901	if (new_val < IGB_20K_ITR &&
3902	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3903	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3904		new_val = IGB_20K_ITR;
3905
3906set_itr_val:
3907	if (new_val != q_vector->itr_val) {
3908		q_vector->itr_val = new_val;
3909		q_vector->set_itr = 1;
3910	}
3911clear_counts:
3912	q_vector->rx.total_bytes = 0;
3913	q_vector->rx.total_packets = 0;
3914	q_vector->tx.total_bytes = 0;
3915	q_vector->tx.total_packets = 0;
3916}
3917
3918/**
3919 * igb_update_itr - update the dynamic ITR value based on statistics
3920 *      Stores a new ITR value based on packets and byte
3921 *      counts during the last interrupt.  The advantage of per interrupt
3922 *      computation is faster updates and more accurate ITR for the current
3923 *      traffic pattern.  Constants in this function were computed
3924 *      based on theoretical maximum wire speed and thresholds were set based
3925 *      on testing data as well as attempting to minimize response time
3926 *      while increasing bulk throughput.
3927 *      this functionality is controlled by the InterruptThrottleRate module
3928 *      parameter (see igb_param.c)
3929 *      NOTE:  These calculations are only valid when operating in a single-
3930 *             queue environment.
3931 * @q_vector: pointer to q_vector
3932 * @ring_container: ring info to update the itr for
3933 **/
3934static void igb_update_itr(struct igb_q_vector *q_vector,
3935			   struct igb_ring_container *ring_container)
3936{
3937	unsigned int packets = ring_container->total_packets;
3938	unsigned int bytes = ring_container->total_bytes;
3939	u8 itrval = ring_container->itr;
3940
3941	/* no packets, exit with status unchanged */
3942	if (packets == 0)
3943		return;
3944
3945	switch (itrval) {
3946	case lowest_latency:
3947		/* handle TSO and jumbo frames */
3948		if (bytes/packets > 8000)
3949			itrval = bulk_latency;
3950		else if ((packets < 5) && (bytes > 512))
3951			itrval = low_latency;
3952		break;
3953	case low_latency:  /* 50 usec aka 20000 ints/s */
3954		if (bytes > 10000) {
3955			/* this if handles the TSO accounting */
3956			if (bytes/packets > 8000) {
3957				itrval = bulk_latency;
3958			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3959				itrval = bulk_latency;
3960			} else if ((packets > 35)) {
3961				itrval = lowest_latency;
3962			}
3963		} else if (bytes/packets > 2000) {
3964			itrval = bulk_latency;
3965		} else if (packets <= 2 && bytes < 512) {
3966			itrval = lowest_latency;
3967		}
3968		break;
3969	case bulk_latency: /* 250 usec aka 4000 ints/s */
3970		if (bytes > 25000) {
3971			if (packets > 35)
3972				itrval = low_latency;
3973		} else if (bytes < 1500) {
3974			itrval = low_latency;
3975		}
3976		break;
3977	}
3978
3979	/* clear work counters since we have the values we need */
3980	ring_container->total_bytes = 0;
3981	ring_container->total_packets = 0;
3982
3983	/* write updated itr to ring container */
3984	ring_container->itr = itrval;
3985}
3986
3987static void igb_set_itr(struct igb_q_vector *q_vector)
3988{
3989	struct igb_adapter *adapter = q_vector->adapter;
3990	u32 new_itr = q_vector->itr_val;
3991	u8 current_itr = 0;
3992
3993	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3994	if (adapter->link_speed != SPEED_1000) {
3995		current_itr = 0;
3996		new_itr = IGB_4K_ITR;
3997		goto set_itr_now;
3998	}
3999
4000	igb_update_itr(q_vector, &q_vector->tx);
4001	igb_update_itr(q_vector, &q_vector->rx);
4002
4003	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4004
4005	/* conservative mode (itr 3) eliminates the lowest_latency setting */
4006	if (current_itr == lowest_latency &&
4007	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4008	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4009		current_itr = low_latency;
4010
4011	switch (current_itr) {
4012	/* counts and packets in update_itr are dependent on these numbers */
4013	case lowest_latency:
4014		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
4015		break;
4016	case low_latency:
4017		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
4018		break;
4019	case bulk_latency:
4020		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
4021		break;
4022	default:
4023		break;
4024	}
4025
4026set_itr_now:
4027	if (new_itr != q_vector->itr_val) {
4028		/* this attempts to bias the interrupt rate towards Bulk
4029		 * by adding intermediate steps when interrupt rate is
4030		 * increasing */
4031		new_itr = new_itr > q_vector->itr_val ?
4032		             max((new_itr * q_vector->itr_val) /
4033		                 (new_itr + (q_vector->itr_val >> 2)),
4034				 new_itr) :
4035			     new_itr;
4036		/* Don't write the value here; it resets the adapter's
4037		 * internal timer, and causes us to delay far longer than
4038		 * we should between interrupts.  Instead, we write the ITR
4039		 * value at the beginning of the next interrupt so the timing
4040		 * ends up being correct.
4041		 */
4042		q_vector->itr_val = new_itr;
4043		q_vector->set_itr = 1;
4044	}
4045}
4046
4047static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4048			    u32 type_tucmd, u32 mss_l4len_idx)
4049{
4050	struct e1000_adv_tx_context_desc *context_desc;
4051	u16 i = tx_ring->next_to_use;
4052
4053	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4054
4055	i++;
4056	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4057
4058	/* set bits to identify this as an advanced context descriptor */
4059	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4060
4061	/* For 82575, context index must be unique per ring. */
4062	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4063		mss_l4len_idx |= tx_ring->reg_idx << 4;
4064
4065	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
4066	context_desc->seqnum_seed	= 0;
4067	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
4068	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
4069}
4070
4071static int igb_tso(struct igb_ring *tx_ring,
4072		   struct igb_tx_buffer *first,
4073		   u8 *hdr_len)
4074{
4075	struct sk_buff *skb = first->skb;
4076	u32 vlan_macip_lens, type_tucmd;
4077	u32 mss_l4len_idx, l4len;
4078
4079	if (!skb_is_gso(skb))
4080		return 0;
4081
4082	if (skb_header_cloned(skb)) {
4083		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4084		if (err)
4085			return err;
4086	}
4087
4088	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4089	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4090
4091	if (first->protocol == __constant_htons(ETH_P_IP)) {
4092		struct iphdr *iph = ip_hdr(skb);
4093		iph->tot_len = 0;
4094		iph->check = 0;
4095		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4096							 iph->daddr, 0,
4097							 IPPROTO_TCP,
4098							 0);
4099		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4100		first->tx_flags |= IGB_TX_FLAGS_TSO |
4101				   IGB_TX_FLAGS_CSUM |
4102				   IGB_TX_FLAGS_IPV4;
4103	} else if (skb_is_gso_v6(skb)) {
4104		ipv6_hdr(skb)->payload_len = 0;
4105		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4106						       &ipv6_hdr(skb)->daddr,
4107						       0, IPPROTO_TCP, 0);
4108		first->tx_flags |= IGB_TX_FLAGS_TSO |
4109				   IGB_TX_FLAGS_CSUM;
4110	}
4111
4112	/* compute header lengths */
4113	l4len = tcp_hdrlen(skb);
4114	*hdr_len = skb_transport_offset(skb) + l4len;
4115
4116	/* update gso size and bytecount with header size */
4117	first->gso_segs = skb_shinfo(skb)->gso_segs;
4118	first->bytecount += (first->gso_segs - 1) * *hdr_len;
4119
4120	/* MSS L4LEN IDX */
4121	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4122	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4123
4124	/* VLAN MACLEN IPLEN */
4125	vlan_macip_lens = skb_network_header_len(skb);
4126	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4127	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4128
4129	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4130
4131	return 1;
4132}
4133
4134static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4135{
4136	struct sk_buff *skb = first->skb;
4137	u32 vlan_macip_lens = 0;
4138	u32 mss_l4len_idx = 0;
4139	u32 type_tucmd = 0;
4140
4141	if (skb->ip_summed != CHECKSUM_PARTIAL) {
4142		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4143			return;
4144	} else {
4145		u8 l4_hdr = 0;
4146		switch (first->protocol) {
4147		case __constant_htons(ETH_P_IP):
4148			vlan_macip_lens |= skb_network_header_len(skb);
4149			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4150			l4_hdr = ip_hdr(skb)->protocol;
4151			break;
4152		case __constant_htons(ETH_P_IPV6):
4153			vlan_macip_lens |= skb_network_header_len(skb);
4154			l4_hdr = ipv6_hdr(skb)->nexthdr;
4155			break;
4156		default:
4157			if (unlikely(net_ratelimit())) {
4158				dev_warn(tx_ring->dev,
4159				 "partial checksum but proto=%x!\n",
4160				 first->protocol);
4161			}
4162			break;
4163		}
4164
4165		switch (l4_hdr) {
4166		case IPPROTO_TCP:
4167			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4168			mss_l4len_idx = tcp_hdrlen(skb) <<
4169					E1000_ADVTXD_L4LEN_SHIFT;
4170			break;
4171		case IPPROTO_SCTP:
4172			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4173			mss_l4len_idx = sizeof(struct sctphdr) <<
4174					E1000_ADVTXD_L4LEN_SHIFT;
4175			break;
4176		case IPPROTO_UDP:
4177			mss_l4len_idx = sizeof(struct udphdr) <<
4178					E1000_ADVTXD_L4LEN_SHIFT;
4179			break;
4180		default:
4181			if (unlikely(net_ratelimit())) {
4182				dev_warn(tx_ring->dev,
4183				 "partial checksum but l4 proto=%x!\n",
4184				 l4_hdr);
4185			}
4186			break;
4187		}
4188
4189		/* update TX checksum flag */
4190		first->tx_flags |= IGB_TX_FLAGS_CSUM;
4191	}
4192
4193	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4194	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4195
4196	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4197}
4198
4199static __le32 igb_tx_cmd_type(u32 tx_flags)
4200{
4201	/* set type for advanced descriptor with frame checksum insertion */
4202	__le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4203				      E1000_ADVTXD_DCMD_IFCS |
4204				      E1000_ADVTXD_DCMD_DEXT);
4205
4206	/* set HW vlan bit if vlan is present */
4207	if (tx_flags & IGB_TX_FLAGS_VLAN)
4208		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4209
4210	/* set timestamp bit if present */
4211	if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4212		cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4213
4214	/* set segmentation bits for TSO */
4215	if (tx_flags & IGB_TX_FLAGS_TSO)
4216		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4217
4218	return cmd_type;
4219}
4220
4221static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4222				 union e1000_adv_tx_desc *tx_desc,
4223				 u32 tx_flags, unsigned int paylen)
4224{
4225	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4226
4227	/* 82575 requires a unique index per ring if any offload is enabled */
4228	if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4229	    test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4230		olinfo_status |= tx_ring->reg_idx << 4;
4231
4232	/* insert L4 checksum */
4233	if (tx_flags & IGB_TX_FLAGS_CSUM) {
4234		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4235
4236		/* insert IPv4 checksum */
4237		if (tx_flags & IGB_TX_FLAGS_IPV4)
4238			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4239	}
4240
4241	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4242}
4243
4244/*
4245 * The largest size we can write to the descriptor is 65535.  In order to
4246 * maintain a power of two alignment we have to limit ourselves to 32K.
4247 */
4248#define IGB_MAX_TXD_PWR	15
4249#define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
4250
4251static void igb_tx_map(struct igb_ring *tx_ring,
4252		       struct igb_tx_buffer *first,
4253		       const u8 hdr_len)
4254{
4255	struct sk_buff *skb = first->skb;
4256	struct igb_tx_buffer *tx_buffer_info;
4257	union e1000_adv_tx_desc *tx_desc;
4258	dma_addr_t dma;
4259	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4260	unsigned int data_len = skb->data_len;
4261	unsigned int size = skb_headlen(skb);
4262	unsigned int paylen = skb->len - hdr_len;
4263	__le32 cmd_type;
4264	u32 tx_flags = first->tx_flags;
4265	u16 i = tx_ring->next_to_use;
4266
4267	tx_desc = IGB_TX_DESC(tx_ring, i);
4268
4269	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4270	cmd_type = igb_tx_cmd_type(tx_flags);
4271
4272	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4273	if (dma_mapping_error(tx_ring->dev, dma))
4274		goto dma_error;
4275
4276	/* record length, and DMA address */
4277	first->length = size;
4278	first->dma = dma;
4279	tx_desc->read.buffer_addr = cpu_to_le64(dma);
4280
4281	for (;;) {
4282		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4283			tx_desc->read.cmd_type_len =
4284				cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4285
4286			i++;
4287			tx_desc++;
4288			if (i == tx_ring->count) {
4289				tx_desc = IGB_TX_DESC(tx_ring, 0);
4290				i = 0;
4291			}
4292
4293			dma += IGB_MAX_DATA_PER_TXD;
4294			size -= IGB_MAX_DATA_PER_TXD;
4295
4296			tx_desc->read.olinfo_status = 0;
4297			tx_desc->read.buffer_addr = cpu_to_le64(dma);
4298		}
4299
4300		if (likely(!data_len))
4301			break;
4302
4303		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4304
4305		i++;
4306		tx_desc++;
4307		if (i == tx_ring->count) {
4308			tx_desc = IGB_TX_DESC(tx_ring, 0);
4309			i = 0;
4310		}
4311
4312		size = skb_frag_size(frag);
4313		data_len -= size;
4314
4315		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4316				   size, DMA_TO_DEVICE);
4317		if (dma_mapping_error(tx_ring->dev, dma))
4318			goto dma_error;
4319
4320		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4321		tx_buffer_info->length = size;
4322		tx_buffer_info->dma = dma;
4323
4324		tx_desc->read.olinfo_status = 0;
4325		tx_desc->read.buffer_addr = cpu_to_le64(dma);
4326
4327		frag++;
4328	}
4329
4330	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4331
4332	/* write last descriptor with RS and EOP bits */
4333	cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4334	if (unlikely(skb->no_fcs))
4335		cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS));
4336	tx_desc->read.cmd_type_len = cmd_type;
4337
4338	/* set the timestamp */
4339	first->time_stamp = jiffies;
4340
4341	/*
4342	 * Force memory writes to complete before letting h/w know there
4343	 * are new descriptors to fetch.  (Only applicable for weak-ordered
4344	 * memory model archs, such as IA-64).
4345	 *
4346	 * We also need this memory barrier to make certain all of the
4347	 * status bits have been updated before next_to_watch is written.
4348	 */
4349	wmb();
4350
4351	/* set next_to_watch value indicating a packet is present */
4352	first->next_to_watch = tx_desc;
4353
4354	i++;
4355	if (i == tx_ring->count)
4356		i = 0;
4357
4358	tx_ring->next_to_use = i;
4359
4360	writel(i, tx_ring->tail);
4361
4362	/* we need this if more than one processor can write to our tail
4363	 * at a time, it syncronizes IO on IA64/Altix systems */
4364	mmiowb();
4365
4366	return;
4367
4368dma_error:
4369	dev_err(tx_ring->dev, "TX DMA map failed\n");
4370
4371	/* clear dma mappings for failed tx_buffer_info map */
4372	for (;;) {
4373		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4374		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4375		if (tx_buffer_info == first)
4376			break;
4377		if (i == 0)
4378			i = tx_ring->count;
4379		i--;
4380	}
4381
4382	tx_ring->next_to_use = i;
4383}
4384
4385static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4386{
4387	struct net_device *netdev = tx_ring->netdev;
4388
4389	netif_stop_subqueue(netdev, tx_ring->queue_index);
4390
4391	/* Herbert's original patch had:
4392	 *  smp_mb__after_netif_stop_queue();
4393	 * but since that doesn't exist yet, just open code it. */
4394	smp_mb();
4395
4396	/* We need to check again in a case another CPU has just
4397	 * made room available. */
4398	if (igb_desc_unused(tx_ring) < size)
4399		return -EBUSY;
4400
4401	/* A reprieve! */
4402	netif_wake_subqueue(netdev, tx_ring->queue_index);
4403
4404	u64_stats_update_begin(&tx_ring->tx_syncp2);
4405	tx_ring->tx_stats.restart_queue2++;
4406	u64_stats_update_end(&tx_ring->tx_syncp2);
4407
4408	return 0;
4409}
4410
4411static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4412{
4413	if (igb_desc_unused(tx_ring) >= size)
4414		return 0;
4415	return __igb_maybe_stop_tx(tx_ring, size);
4416}
4417
4418netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4419				struct igb_ring *tx_ring)
4420{
4421	struct igb_tx_buffer *first;
4422	int tso;
4423	u32 tx_flags = 0;
4424	__be16 protocol = vlan_get_protocol(skb);
4425	u8 hdr_len = 0;
4426
4427	/* need: 1 descriptor per page,
4428	 *       + 2 desc gap to keep tail from touching head,
4429	 *       + 1 desc for skb->data,
4430	 *       + 1 desc for context descriptor,
4431	 * otherwise try next time */
4432	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4433		/* this is a hard error */
4434		return NETDEV_TX_BUSY;
4435	}
4436
4437	/* record the location of the first descriptor for this packet */
4438	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4439	first->skb = skb;
4440	first->bytecount = skb->len;
4441	first->gso_segs = 1;
4442
4443	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4444		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4445		tx_flags |= IGB_TX_FLAGS_TSTAMP;
4446	}
4447
4448	if (vlan_tx_tag_present(skb)) {
4449		tx_flags |= IGB_TX_FLAGS_VLAN;
4450		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4451	}
4452
4453	/* record initial flags and protocol */
4454	first->tx_flags = tx_flags;
4455	first->protocol = protocol;
4456
4457	tso = igb_tso(tx_ring, first, &hdr_len);
4458	if (tso < 0)
4459		goto out_drop;
4460	else if (!tso)
4461		igb_tx_csum(tx_ring, first);
4462
4463	igb_tx_map(tx_ring, first, hdr_len);
4464
4465	/* Make sure there is space in the ring for the next send. */
4466	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4467
4468	return NETDEV_TX_OK;
4469
4470out_drop:
4471	igb_unmap_and_free_tx_resource(tx_ring, first);
4472
4473	return NETDEV_TX_OK;
4474}
4475
4476static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4477						    struct sk_buff *skb)
4478{
4479	unsigned int r_idx = skb->queue_mapping;
4480
4481	if (r_idx >= adapter->num_tx_queues)
4482		r_idx = r_idx % adapter->num_tx_queues;
4483
4484	return adapter->tx_ring[r_idx];
4485}
4486
4487static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4488				  struct net_device *netdev)
4489{
4490	struct igb_adapter *adapter = netdev_priv(netdev);
4491
4492	if (test_bit(__IGB_DOWN, &adapter->state)) {
4493		dev_kfree_skb_any(skb);
4494		return NETDEV_TX_OK;
4495	}
4496
4497	if (skb->len <= 0) {
4498		dev_kfree_skb_any(skb);
4499		return NETDEV_TX_OK;
4500	}
4501
4502	/*
4503	 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4504	 * in order to meet this minimum size requirement.
4505	 */
4506	if (skb->len < 17) {
4507		if (skb_padto(skb, 17))
4508			return NETDEV_TX_OK;
4509		skb->len = 17;
4510	}
4511
4512	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4513}
4514
4515/**
4516 * igb_tx_timeout - Respond to a Tx Hang
4517 * @netdev: network interface device structure
4518 **/
4519static void igb_tx_timeout(struct net_device *netdev)
4520{
4521	struct igb_adapter *adapter = netdev_priv(netdev);
4522	struct e1000_hw *hw = &adapter->hw;
4523
4524	/* Do the reset outside of interrupt context */
4525	adapter->tx_timeout_count++;
4526
4527	if (hw->mac.type >= e1000_82580)
4528		hw->dev_spec._82575.global_device_reset = true;
4529
4530	schedule_work(&adapter->reset_task);
4531	wr32(E1000_EICS,
4532	     (adapter->eims_enable_mask & ~adapter->eims_other));
4533}
4534
4535static void igb_reset_task(struct work_struct *work)
4536{
4537	struct igb_adapter *adapter;
4538	adapter = container_of(work, struct igb_adapter, reset_task);
4539
4540	igb_dump(adapter);
4541	netdev_err(adapter->netdev, "Reset adapter\n");
4542	igb_reinit_locked(adapter);
4543}
4544
4545/**
4546 * igb_get_stats64 - Get System Network Statistics
4547 * @netdev: network interface device structure
4548 * @stats: rtnl_link_stats64 pointer
4549 *
4550 **/
4551static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4552						 struct rtnl_link_stats64 *stats)
4553{
4554	struct igb_adapter *adapter = netdev_priv(netdev);
4555
4556	spin_lock(&adapter->stats64_lock);
4557	igb_update_stats(adapter, &adapter->stats64);
4558	memcpy(stats, &adapter->stats64, sizeof(*stats));
4559	spin_unlock(&adapter->stats64_lock);
4560
4561	return stats;
4562}
4563
4564/**
4565 * igb_change_mtu - Change the Maximum Transfer Unit
4566 * @netdev: network interface device structure
4567 * @new_mtu: new value for maximum frame size
4568 *
4569 * Returns 0 on success, negative on failure
4570 **/
4571static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4572{
4573	struct igb_adapter *adapter = netdev_priv(netdev);
4574	struct pci_dev *pdev = adapter->pdev;
4575	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4576
4577	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4578		dev_err(&pdev->dev, "Invalid MTU setting\n");
4579		return -EINVAL;
4580	}
4581
4582#define MAX_STD_JUMBO_FRAME_SIZE 9238
4583	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4584		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4585		return -EINVAL;
4586	}
4587
4588	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4589		msleep(1);
4590
4591	/* igb_down has a dependency on max_frame_size */
4592	adapter->max_frame_size = max_frame;
4593
4594	if (netif_running(netdev))
4595		igb_down(adapter);
4596
4597	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4598		 netdev->mtu, new_mtu);
4599	netdev->mtu = new_mtu;
4600
4601	if (netif_running(netdev))
4602		igb_up(adapter);
4603	else
4604		igb_reset(adapter);
4605
4606	clear_bit(__IGB_RESETTING, &adapter->state);
4607
4608	return 0;
4609}
4610
4611/**
4612 * igb_update_stats - Update the board statistics counters
4613 * @adapter: board private structure
4614 **/
4615
4616void igb_update_stats(struct igb_adapter *adapter,
4617		      struct rtnl_link_stats64 *net_stats)
4618{
4619	struct e1000_hw *hw = &adapter->hw;
4620	struct pci_dev *pdev = adapter->pdev;
4621	u32 reg, mpc;
4622	u16 phy_tmp;
4623	int i;
4624	u64 bytes, packets;
4625	unsigned int start;
4626	u64 _bytes, _packets;
4627
4628#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4629
4630	/*
4631	 * Prevent stats update while adapter is being reset, or if the pci
4632	 * connection is down.
4633	 */
4634	if (adapter->link_speed == 0)
4635		return;
4636	if (pci_channel_offline(pdev))
4637		return;
4638
4639	bytes = 0;
4640	packets = 0;
4641	for (i = 0; i < adapter->num_rx_queues; i++) {
4642		u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4643		struct igb_ring *ring = adapter->rx_ring[i];
4644
4645		ring->rx_stats.drops += rqdpc_tmp;
4646		net_stats->rx_fifo_errors += rqdpc_tmp;
4647
4648		do {
4649			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4650			_bytes = ring->rx_stats.bytes;
4651			_packets = ring->rx_stats.packets;
4652		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4653		bytes += _bytes;
4654		packets += _packets;
4655	}
4656
4657	net_stats->rx_bytes = bytes;
4658	net_stats->rx_packets = packets;
4659
4660	bytes = 0;
4661	packets = 0;
4662	for (i = 0; i < adapter->num_tx_queues; i++) {
4663		struct igb_ring *ring = adapter->tx_ring[i];
4664		do {
4665			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4666			_bytes = ring->tx_stats.bytes;
4667			_packets = ring->tx_stats.packets;
4668		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4669		bytes += _bytes;
4670		packets += _packets;
4671	}
4672	net_stats->tx_bytes = bytes;
4673	net_stats->tx_packets = packets;
4674
4675	/* read stats registers */
4676	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4677	adapter->stats.gprc += rd32(E1000_GPRC);
4678	adapter->stats.gorc += rd32(E1000_GORCL);
4679	rd32(E1000_GORCH); /* clear GORCL */
4680	adapter->stats.bprc += rd32(E1000_BPRC);
4681	adapter->stats.mprc += rd32(E1000_MPRC);
4682	adapter->stats.roc += rd32(E1000_ROC);
4683
4684	adapter->stats.prc64 += rd32(E1000_PRC64);
4685	adapter->stats.prc127 += rd32(E1000_PRC127);
4686	adapter->stats.prc255 += rd32(E1000_PRC255);
4687	adapter->stats.prc511 += rd32(E1000_PRC511);
4688	adapter->stats.prc1023 += rd32(E1000_PRC1023);
4689	adapter->stats.prc1522 += rd32(E1000_PRC1522);
4690	adapter->stats.symerrs += rd32(E1000_SYMERRS);
4691	adapter->stats.sec += rd32(E1000_SEC);
4692
4693	mpc = rd32(E1000_MPC);
4694	adapter->stats.mpc += mpc;
4695	net_stats->rx_fifo_errors += mpc;
4696	adapter->stats.scc += rd32(E1000_SCC);
4697	adapter->stats.ecol += rd32(E1000_ECOL);
4698	adapter->stats.mcc += rd32(E1000_MCC);
4699	adapter->stats.latecol += rd32(E1000_LATECOL);
4700	adapter->stats.dc += rd32(E1000_DC);
4701	adapter->stats.rlec += rd32(E1000_RLEC);
4702	adapter->stats.xonrxc += rd32(E1000_XONRXC);
4703	adapter->stats.xontxc += rd32(E1000_XONTXC);
4704	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4705	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4706	adapter->stats.fcruc += rd32(E1000_FCRUC);
4707	adapter->stats.gptc += rd32(E1000_GPTC);
4708	adapter->stats.gotc += rd32(E1000_GOTCL);
4709	rd32(E1000_GOTCH); /* clear GOTCL */
4710	adapter->stats.rnbc += rd32(E1000_RNBC);
4711	adapter->stats.ruc += rd32(E1000_RUC);
4712	adapter->stats.rfc += rd32(E1000_RFC);
4713	adapter->stats.rjc += rd32(E1000_RJC);
4714	adapter->stats.tor += rd32(E1000_TORH);
4715	adapter->stats.tot += rd32(E1000_TOTH);
4716	adapter->stats.tpr += rd32(E1000_TPR);
4717
4718	adapter->stats.ptc64 += rd32(E1000_PTC64);
4719	adapter->stats.ptc127 += rd32(E1000_PTC127);
4720	adapter->stats.ptc255 += rd32(E1000_PTC255);
4721	adapter->stats.ptc511 += rd32(E1000_PTC511);
4722	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4723	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4724
4725	adapter->stats.mptc += rd32(E1000_MPTC);
4726	adapter->stats.bptc += rd32(E1000_BPTC);
4727
4728	adapter->stats.tpt += rd32(E1000_TPT);
4729	adapter->stats.colc += rd32(E1000_COLC);
4730
4731	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4732	/* read internal phy specific stats */
4733	reg = rd32(E1000_CTRL_EXT);
4734	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4735		adapter->stats.rxerrc += rd32(E1000_RXERRC);
4736		adapter->stats.tncrs += rd32(E1000_TNCRS);
4737	}
4738
4739	adapter->stats.tsctc += rd32(E1000_TSCTC);
4740	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4741
4742	adapter->stats.iac += rd32(E1000_IAC);
4743	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4744	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4745	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4746	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4747	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4748	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4749	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4750	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4751
4752	/* Fill out the OS statistics structure */
4753	net_stats->multicast = adapter->stats.mprc;
4754	net_stats->collisions = adapter->stats.colc;
4755
4756	/* Rx Errors */
4757
4758	/* RLEC on some newer hardware can be incorrect so build
4759	 * our own version based on RUC and ROC */
4760	net_stats->rx_errors = adapter->stats.rxerrc +
4761		adapter->stats.crcerrs + adapter->stats.algnerrc +
4762		adapter->stats.ruc + adapter->stats.roc +
4763		adapter->stats.cexterr;
4764	net_stats->rx_length_errors = adapter->stats.ruc +
4765				      adapter->stats.roc;
4766	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4767	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4768	net_stats->rx_missed_errors = adapter->stats.mpc;
4769
4770	/* Tx Errors */
4771	net_stats->tx_errors = adapter->stats.ecol +
4772			       adapter->stats.latecol;
4773	net_stats->tx_aborted_errors = adapter->stats.ecol;
4774	net_stats->tx_window_errors = adapter->stats.latecol;
4775	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4776
4777	/* Tx Dropped needs to be maintained elsewhere */
4778
4779	/* Phy Stats */
4780	if (hw->phy.media_type == e1000_media_type_copper) {
4781		if ((adapter->link_speed == SPEED_1000) &&
4782		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4783			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4784			adapter->phy_stats.idle_errors += phy_tmp;
4785		}
4786	}
4787
4788	/* Management Stats */
4789	adapter->stats.mgptc += rd32(E1000_MGTPTC);
4790	adapter->stats.mgprc += rd32(E1000_MGTPRC);
4791	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4792
4793	/* OS2BMC Stats */
4794	reg = rd32(E1000_MANC);
4795	if (reg & E1000_MANC_EN_BMC2OS) {
4796		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4797		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4798		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4799		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4800	}
4801}
4802
4803static irqreturn_t igb_msix_other(int irq, void *data)
4804{
4805	struct igb_adapter *adapter = data;
4806	struct e1000_hw *hw = &adapter->hw;
4807	u32 icr = rd32(E1000_ICR);
4808	/* reading ICR causes bit 31 of EICR to be cleared */
4809
4810	if (icr & E1000_ICR_DRSTA)
4811		schedule_work(&adapter->reset_task);
4812
4813	if (icr & E1000_ICR_DOUTSYNC) {
4814		/* HW is reporting DMA is out of sync */
4815		adapter->stats.doosync++;
4816		/* The DMA Out of Sync is also indication of a spoof event
4817		 * in IOV mode. Check the Wrong VM Behavior register to
4818		 * see if it is really a spoof event. */
4819		igb_check_wvbr(adapter);
4820	}
4821
4822	/* Check for a mailbox event */
4823	if (icr & E1000_ICR_VMMB)
4824		igb_msg_task(adapter);
4825
4826	if (icr & E1000_ICR_LSC) {
4827		hw->mac.get_link_status = 1;
4828		/* guard against interrupt when we're going down */
4829		if (!test_bit(__IGB_DOWN, &adapter->state))
4830			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4831	}
4832
4833	wr32(E1000_EIMS, adapter->eims_other);
4834
4835	return IRQ_HANDLED;
4836}
4837
4838static void igb_write_itr(struct igb_q_vector *q_vector)
4839{
4840	struct igb_adapter *adapter = q_vector->adapter;
4841	u32 itr_val = q_vector->itr_val & 0x7FFC;
4842
4843	if (!q_vector->set_itr)
4844		return;
4845
4846	if (!itr_val)
4847		itr_val = 0x4;
4848
4849	if (adapter->hw.mac.type == e1000_82575)
4850		itr_val |= itr_val << 16;
4851	else
4852		itr_val |= E1000_EITR_CNT_IGNR;
4853
4854	writel(itr_val, q_vector->itr_register);
4855	q_vector->set_itr = 0;
4856}
4857
4858static irqreturn_t igb_msix_ring(int irq, void *data)
4859{
4860	struct igb_q_vector *q_vector = data;
4861
4862	/* Write the ITR value calculated from the previous interrupt. */
4863	igb_write_itr(q_vector);
4864
4865	napi_schedule(&q_vector->napi);
4866
4867	return IRQ_HANDLED;
4868}
4869
4870#ifdef CONFIG_IGB_DCA
4871static void igb_update_dca(struct igb_q_vector *q_vector)
4872{
4873	struct igb_adapter *adapter = q_vector->adapter;
4874	struct e1000_hw *hw = &adapter->hw;
4875	int cpu = get_cpu();
4876
4877	if (q_vector->cpu == cpu)
4878		goto out_no_update;
4879
4880	if (q_vector->tx.ring) {
4881		int q = q_vector->tx.ring->reg_idx;
4882		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4883		if (hw->mac.type == e1000_82575) {
4884			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4885			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4886		} else {
4887			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4888			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4889			              E1000_DCA_TXCTRL_CPUID_SHIFT;
4890		}
4891		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4892		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4893	}
4894	if (q_vector->rx.ring) {
4895		int q = q_vector->rx.ring->reg_idx;
4896		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4897		if (hw->mac.type == e1000_82575) {
4898			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4899			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4900		} else {
4901			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4902			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4903			              E1000_DCA_RXCTRL_CPUID_SHIFT;
4904		}
4905		dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4906		dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4907		dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4908		wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4909	}
4910	q_vector->cpu = cpu;
4911out_no_update:
4912	put_cpu();
4913}
4914
4915static void igb_setup_dca(struct igb_adapter *adapter)
4916{
4917	struct e1000_hw *hw = &adapter->hw;
4918	int i;
4919
4920	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4921		return;
4922
4923	/* Always use CB2 mode, difference is masked in the CB driver. */
4924	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4925
4926	for (i = 0; i < adapter->num_q_vectors; i++) {
4927		adapter->q_vector[i]->cpu = -1;
4928		igb_update_dca(adapter->q_vector[i]);
4929	}
4930}
4931
4932static int __igb_notify_dca(struct device *dev, void *data)
4933{
4934	struct net_device *netdev = dev_get_drvdata(dev);
4935	struct igb_adapter *adapter = netdev_priv(netdev);
4936	struct pci_dev *pdev = adapter->pdev;
4937	struct e1000_hw *hw = &adapter->hw;
4938	unsigned long event = *(unsigned long *)data;
4939
4940	switch (event) {
4941	case DCA_PROVIDER_ADD:
4942		/* if already enabled, don't do it again */
4943		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4944			break;
4945		if (dca_add_requester(dev) == 0) {
4946			adapter->flags |= IGB_FLAG_DCA_ENABLED;
4947			dev_info(&pdev->dev, "DCA enabled\n");
4948			igb_setup_dca(adapter);
4949			break;
4950		}
4951		/* Fall Through since DCA is disabled. */
4952	case DCA_PROVIDER_REMOVE:
4953		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4954			/* without this a class_device is left
4955			 * hanging around in the sysfs model */
4956			dca_remove_requester(dev);
4957			dev_info(&pdev->dev, "DCA disabled\n");
4958			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4959			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4960		}
4961		break;
4962	}
4963
4964	return 0;
4965}
4966
4967static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4968                          void *p)
4969{
4970	int ret_val;
4971
4972	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4973	                                 __igb_notify_dca);
4974
4975	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4976}
4977#endif /* CONFIG_IGB_DCA */
4978
4979#ifdef CONFIG_PCI_IOV
4980static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4981{
4982	unsigned char mac_addr[ETH_ALEN];
4983	struct pci_dev *pdev = adapter->pdev;
4984	struct e1000_hw *hw = &adapter->hw;
4985	struct pci_dev *pvfdev;
4986	unsigned int device_id;
4987	u16 thisvf_devfn;
4988
4989	random_ether_addr(mac_addr);
4990	igb_set_vf_mac(adapter, vf, mac_addr);
4991
4992	switch (adapter->hw.mac.type) {
4993	case e1000_82576:
4994		device_id = IGB_82576_VF_DEV_ID;
4995		/* VF Stride for 82576 is 2 */
4996		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4997			(pdev->devfn & 1);
4998		break;
4999	case e1000_i350:
5000		device_id = IGB_I350_VF_DEV_ID;
5001		/* VF Stride for I350 is 4 */
5002		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
5003				(pdev->devfn & 3);
5004		break;
5005	default:
5006		device_id = 0;
5007		thisvf_devfn = 0;
5008		break;
5009	}
5010
5011	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5012	while (pvfdev) {
5013		if (pvfdev->devfn == thisvf_devfn)
5014			break;
5015		pvfdev = pci_get_device(hw->vendor_id,
5016					device_id, pvfdev);
5017	}
5018
5019	if (pvfdev)
5020		adapter->vf_data[vf].vfdev = pvfdev;
5021	else
5022		dev_err(&pdev->dev,
5023			"Couldn't find pci dev ptr for VF %4.4x\n",
5024			thisvf_devfn);
5025	return pvfdev != NULL;
5026}
5027
5028static int igb_find_enabled_vfs(struct igb_adapter *adapter)
5029{
5030	struct e1000_hw *hw = &adapter->hw;
5031	struct pci_dev *pdev = adapter->pdev;
5032	struct pci_dev *pvfdev;
5033	u16 vf_devfn = 0;
5034	u16 vf_stride;
5035	unsigned int device_id;
5036	int vfs_found = 0;
5037
5038	switch (adapter->hw.mac.type) {
5039	case e1000_82576:
5040		device_id = IGB_82576_VF_DEV_ID;
5041		/* VF Stride for 82576 is 2 */
5042		vf_stride = 2;
5043		break;
5044	case e1000_i350:
5045		device_id = IGB_I350_VF_DEV_ID;
5046		/* VF Stride for I350 is 4 */
5047		vf_stride = 4;
5048		break;
5049	default:
5050		device_id = 0;
5051		vf_stride = 0;
5052		break;
5053	}
5054
5055	vf_devfn = pdev->devfn + 0x80;
5056	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5057	while (pvfdev) {
5058		if (pvfdev->devfn == vf_devfn &&
5059		    (pvfdev->bus->number >= pdev->bus->number))
5060			vfs_found++;
5061		vf_devfn += vf_stride;
5062		pvfdev = pci_get_device(hw->vendor_id,
5063					device_id, pvfdev);
5064	}
5065
5066	return vfs_found;
5067}
5068
5069static int igb_check_vf_assignment(struct igb_adapter *adapter)
5070{
5071	int i;
5072	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5073		if (adapter->vf_data[i].vfdev) {
5074			if (adapter->vf_data[i].vfdev->dev_flags &
5075			    PCI_DEV_FLAGS_ASSIGNED)
5076				return true;
5077		}
5078	}
5079	return false;
5080}
5081
5082#endif
5083static void igb_ping_all_vfs(struct igb_adapter *adapter)
5084{
5085	struct e1000_hw *hw = &adapter->hw;
5086	u32 ping;
5087	int i;
5088
5089	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5090		ping = E1000_PF_CONTROL_MSG;
5091		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5092			ping |= E1000_VT_MSGTYPE_CTS;
5093		igb_write_mbx(hw, &ping, 1, i);
5094	}
5095}
5096
5097static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5098{
5099	struct e1000_hw *hw = &adapter->hw;
5100	u32 vmolr = rd32(E1000_VMOLR(vf));
5101	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5102
5103	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5104	                    IGB_VF_FLAG_MULTI_PROMISC);
5105	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5106
5107	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5108		vmolr |= E1000_VMOLR_MPME;
5109		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5110		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5111	} else {
5112		/*
5113		 * if we have hashes and we are clearing a multicast promisc
5114		 * flag we need to write the hashes to the MTA as this step
5115		 * was previously skipped
5116		 */
5117		if (vf_data->num_vf_mc_hashes > 30) {
5118			vmolr |= E1000_VMOLR_MPME;
5119		} else if (vf_data->num_vf_mc_hashes) {
5120			int j;
5121			vmolr |= E1000_VMOLR_ROMPE;
5122			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5123				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5124		}
5125	}
5126
5127	wr32(E1000_VMOLR(vf), vmolr);
5128
5129	/* there are flags left unprocessed, likely not supported */
5130	if (*msgbuf & E1000_VT_MSGINFO_MASK)
5131		return -EINVAL;
5132
5133	return 0;
5134
5135}
5136
5137static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5138				  u32 *msgbuf, u32 vf)
5139{
5140	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5141	u16 *hash_list = (u16 *)&msgbuf[1];
5142	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5143	int i;
5144
5145	/* salt away the number of multicast addresses assigned
5146	 * to this VF for later use to restore when the PF multi cast
5147	 * list changes
5148	 */
5149	vf_data->num_vf_mc_hashes = n;
5150
5151	/* only up to 30 hash values supported */
5152	if (n > 30)
5153		n = 30;
5154
5155	/* store the hashes for later use */
5156	for (i = 0; i < n; i++)
5157		vf_data->vf_mc_hashes[i] = hash_list[i];
5158
5159	/* Flush and reset the mta with the new values */
5160	igb_set_rx_mode(adapter->netdev);
5161
5162	return 0;
5163}
5164
5165static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5166{
5167	struct e1000_hw *hw = &adapter->hw;
5168	struct vf_data_storage *vf_data;
5169	int i, j;
5170
5171	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5172		u32 vmolr = rd32(E1000_VMOLR(i));
5173		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5174
5175		vf_data = &adapter->vf_data[i];
5176
5177		if ((vf_data->num_vf_mc_hashes > 30) ||
5178		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5179			vmolr |= E1000_VMOLR_MPME;
5180		} else if (vf_data->num_vf_mc_hashes) {
5181			vmolr |= E1000_VMOLR_ROMPE;
5182			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5183				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5184		}
5185		wr32(E1000_VMOLR(i), vmolr);
5186	}
5187}
5188
5189static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5190{
5191	struct e1000_hw *hw = &adapter->hw;
5192	u32 pool_mask, reg, vid;
5193	int i;
5194
5195	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5196
5197	/* Find the vlan filter for this id */
5198	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5199		reg = rd32(E1000_VLVF(i));
5200
5201		/* remove the vf from the pool */
5202		reg &= ~pool_mask;
5203
5204		/* if pool is empty then remove entry from vfta */
5205		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5206		    (reg & E1000_VLVF_VLANID_ENABLE)) {
5207			reg = 0;
5208			vid = reg & E1000_VLVF_VLANID_MASK;
5209			igb_vfta_set(hw, vid, false);
5210		}
5211
5212		wr32(E1000_VLVF(i), reg);
5213	}
5214
5215	adapter->vf_data[vf].vlans_enabled = 0;
5216}
5217
5218static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5219{
5220	struct e1000_hw *hw = &adapter->hw;
5221	u32 reg, i;
5222
5223	/* The vlvf table only exists on 82576 hardware and newer */
5224	if (hw->mac.type < e1000_82576)
5225		return -1;
5226
5227	/* we only need to do this if VMDq is enabled */
5228	if (!adapter->vfs_allocated_count)
5229		return -1;
5230
5231	/* Find the vlan filter for this id */
5232	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5233		reg = rd32(E1000_VLVF(i));
5234		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5235		    vid == (reg & E1000_VLVF_VLANID_MASK))
5236			break;
5237	}
5238
5239	if (add) {
5240		if (i == E1000_VLVF_ARRAY_SIZE) {
5241			/* Did not find a matching VLAN ID entry that was
5242			 * enabled.  Search for a free filter entry, i.e.
5243			 * one without the enable bit set
5244			 */
5245			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5246				reg = rd32(E1000_VLVF(i));
5247				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5248					break;
5249			}
5250		}
5251		if (i < E1000_VLVF_ARRAY_SIZE) {
5252			/* Found an enabled/available entry */
5253			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5254
5255			/* if !enabled we need to set this up in vfta */
5256			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5257				/* add VID to filter table */
5258				igb_vfta_set(hw, vid, true);
5259				reg |= E1000_VLVF_VLANID_ENABLE;
5260			}
5261			reg &= ~E1000_VLVF_VLANID_MASK;
5262			reg |= vid;
5263			wr32(E1000_VLVF(i), reg);
5264
5265			/* do not modify RLPML for PF devices */
5266			if (vf >= adapter->vfs_allocated_count)
5267				return 0;
5268
5269			if (!adapter->vf_data[vf].vlans_enabled) {
5270				u32 size;
5271				reg = rd32(E1000_VMOLR(vf));
5272				size = reg & E1000_VMOLR_RLPML_MASK;
5273				size += 4;
5274				reg &= ~E1000_VMOLR_RLPML_MASK;
5275				reg |= size;
5276				wr32(E1000_VMOLR(vf), reg);
5277			}
5278
5279			adapter->vf_data[vf].vlans_enabled++;
5280		}
5281	} else {
5282		if (i < E1000_VLVF_ARRAY_SIZE) {
5283			/* remove vf from the pool */
5284			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5285			/* if pool is empty then remove entry from vfta */
5286			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5287				reg = 0;
5288				igb_vfta_set(hw, vid, false);
5289			}
5290			wr32(E1000_VLVF(i), reg);
5291
5292			/* do not modify RLPML for PF devices */
5293			if (vf >= adapter->vfs_allocated_count)
5294				return 0;
5295
5296			adapter->vf_data[vf].vlans_enabled--;
5297			if (!adapter->vf_data[vf].vlans_enabled) {
5298				u32 size;
5299				reg = rd32(E1000_VMOLR(vf));
5300				size = reg & E1000_VMOLR_RLPML_MASK;
5301				size -= 4;
5302				reg &= ~E1000_VMOLR_RLPML_MASK;
5303				reg |= size;
5304				wr32(E1000_VMOLR(vf), reg);
5305			}
5306		}
5307	}
5308	return 0;
5309}
5310
5311static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5312{
5313	struct e1000_hw *hw = &adapter->hw;
5314
5315	if (vid)
5316		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5317	else
5318		wr32(E1000_VMVIR(vf), 0);
5319}
5320
5321static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5322			       int vf, u16 vlan, u8 qos)
5323{
5324	int err = 0;
5325	struct igb_adapter *adapter = netdev_priv(netdev);
5326
5327	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5328		return -EINVAL;
5329	if (vlan || qos) {
5330		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5331		if (err)
5332			goto out;
5333		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5334		igb_set_vmolr(adapter, vf, !vlan);
5335		adapter->vf_data[vf].pf_vlan = vlan;
5336		adapter->vf_data[vf].pf_qos = qos;
5337		dev_info(&adapter->pdev->dev,
5338			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5339		if (test_bit(__IGB_DOWN, &adapter->state)) {
5340			dev_warn(&adapter->pdev->dev,
5341				 "The VF VLAN has been set,"
5342				 " but the PF device is not up.\n");
5343			dev_warn(&adapter->pdev->dev,
5344				 "Bring the PF device up before"
5345				 " attempting to use the VF device.\n");
5346		}
5347	} else {
5348		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5349				   false, vf);
5350		igb_set_vmvir(adapter, vlan, vf);
5351		igb_set_vmolr(adapter, vf, true);
5352		adapter->vf_data[vf].pf_vlan = 0;
5353		adapter->vf_data[vf].pf_qos = 0;
5354       }
5355out:
5356       return err;
5357}
5358
5359static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5360{
5361	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5362	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5363
5364	return igb_vlvf_set(adapter, vid, add, vf);
5365}
5366
5367static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5368{
5369	/* clear flags - except flag that indicates PF has set the MAC */
5370	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5371	adapter->vf_data[vf].last_nack = jiffies;
5372
5373	/* reset offloads to defaults */
5374	igb_set_vmolr(adapter, vf, true);
5375
5376	/* reset vlans for device */
5377	igb_clear_vf_vfta(adapter, vf);
5378	if (adapter->vf_data[vf].pf_vlan)
5379		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5380				    adapter->vf_data[vf].pf_vlan,
5381				    adapter->vf_data[vf].pf_qos);
5382	else
5383		igb_clear_vf_vfta(adapter, vf);
5384
5385	/* reset multicast table array for vf */
5386	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5387
5388	/* Flush and reset the mta with the new values */
5389	igb_set_rx_mode(adapter->netdev);
5390}
5391
5392static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5393{
5394	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5395
5396	/* generate a new mac address as we were hotplug removed/added */
5397	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5398		random_ether_addr(vf_mac);
5399
5400	/* process remaining reset events */
5401	igb_vf_reset(adapter, vf);
5402}
5403
5404static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5405{
5406	struct e1000_hw *hw = &adapter->hw;
5407	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5408	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5409	u32 reg, msgbuf[3];
5410	u8 *addr = (u8 *)(&msgbuf[1]);
5411
5412	/* process all the same items cleared in a function level reset */
5413	igb_vf_reset(adapter, vf);
5414
5415	/* set vf mac address */
5416	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5417
5418	/* enable transmit and receive for vf */
5419	reg = rd32(E1000_VFTE);
5420	wr32(E1000_VFTE, reg | (1 << vf));
5421	reg = rd32(E1000_VFRE);
5422	wr32(E1000_VFRE, reg | (1 << vf));
5423
5424	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5425
5426	/* reply to reset with ack and vf mac address */
5427	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5428	memcpy(addr, vf_mac, 6);
5429	igb_write_mbx(hw, msgbuf, 3, vf);
5430}
5431
5432static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5433{
5434	/*
5435	 * The VF MAC Address is stored in a packed array of bytes
5436	 * starting at the second 32 bit word of the msg array
5437	 */
5438	unsigned char *addr = (char *)&msg[1];
5439	int err = -1;
5440
5441	if (is_valid_ether_addr(addr))
5442		err = igb_set_vf_mac(adapter, vf, addr);
5443
5444	return err;
5445}
5446
5447static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5448{
5449	struct e1000_hw *hw = &adapter->hw;
5450	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5451	u32 msg = E1000_VT_MSGTYPE_NACK;
5452
5453	/* if device isn't clear to send it shouldn't be reading either */
5454	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5455	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5456		igb_write_mbx(hw, &msg, 1, vf);
5457		vf_data->last_nack = jiffies;
5458	}
5459}
5460
5461static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5462{
5463	struct pci_dev *pdev = adapter->pdev;
5464	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5465	struct e1000_hw *hw = &adapter->hw;
5466	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5467	s32 retval;
5468
5469	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5470
5471	if (retval) {
5472		/* if receive failed revoke VF CTS stats and restart init */
5473		dev_err(&pdev->dev, "Error receiving message from VF\n");
5474		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5475		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5476			return;
5477		goto out;
5478	}
5479
5480	/* this is a message we already processed, do nothing */
5481	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5482		return;
5483
5484	/*
5485	 * until the vf completes a reset it should not be
5486	 * allowed to start any configuration.
5487	 */
5488
5489	if (msgbuf[0] == E1000_VF_RESET) {
5490		igb_vf_reset_msg(adapter, vf);
5491		return;
5492	}
5493
5494	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5495		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5496			return;
5497		retval = -1;
5498		goto out;
5499	}
5500
5501	switch ((msgbuf[0] & 0xFFFF)) {
5502	case E1000_VF_SET_MAC_ADDR:
5503		retval = -EINVAL;
5504		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5505			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5506		else
5507			dev_warn(&pdev->dev,
5508				 "VF %d attempted to override administratively "
5509				 "set MAC address\nReload the VF driver to "
5510				 "resume operations\n", vf);
5511		break;
5512	case E1000_VF_SET_PROMISC:
5513		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5514		break;
5515	case E1000_VF_SET_MULTICAST:
5516		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5517		break;
5518	case E1000_VF_SET_LPE:
5519		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5520		break;
5521	case E1000_VF_SET_VLAN:
5522		retval = -1;
5523		if (vf_data->pf_vlan)
5524			dev_warn(&pdev->dev,
5525				 "VF %d attempted to override administratively "
5526				 "set VLAN tag\nReload the VF driver to "
5527				 "resume operations\n", vf);
5528		else
5529			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5530		break;
5531	default:
5532		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5533		retval = -1;
5534		break;
5535	}
5536
5537	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5538out:
5539	/* notify the VF of the results of what it sent us */
5540	if (retval)
5541		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5542	else
5543		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5544
5545	igb_write_mbx(hw, msgbuf, 1, vf);
5546}
5547
5548static void igb_msg_task(struct igb_adapter *adapter)
5549{
5550	struct e1000_hw *hw = &adapter->hw;
5551	u32 vf;
5552
5553	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5554		/* process any reset requests */
5555		if (!igb_check_for_rst(hw, vf))
5556			igb_vf_reset_event(adapter, vf);
5557
5558		/* process any messages pending */
5559		if (!igb_check_for_msg(hw, vf))
5560			igb_rcv_msg_from_vf(adapter, vf);
5561
5562		/* process any acks */
5563		if (!igb_check_for_ack(hw, vf))
5564			igb_rcv_ack_from_vf(adapter, vf);
5565	}
5566}
5567
5568/**
5569 *  igb_set_uta - Set unicast filter table address
5570 *  @adapter: board private structure
5571 *
5572 *  The unicast table address is a register array of 32-bit registers.
5573 *  The table is meant to be used in a way similar to how the MTA is used
5574 *  however due to certain limitations in the hardware it is necessary to
5575 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5576 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5577 **/
5578static void igb_set_uta(struct igb_adapter *adapter)
5579{
5580	struct e1000_hw *hw = &adapter->hw;
5581	int i;
5582
5583	/* The UTA table only exists on 82576 hardware and newer */
5584	if (hw->mac.type < e1000_82576)
5585		return;
5586
5587	/* we only need to do this if VMDq is enabled */
5588	if (!adapter->vfs_allocated_count)
5589		return;
5590
5591	for (i = 0; i < hw->mac.uta_reg_count; i++)
5592		array_wr32(E1000_UTA, i, ~0);
5593}
5594
5595/**
5596 * igb_intr_msi - Interrupt Handler
5597 * @irq: interrupt number
5598 * @data: pointer to a network interface device structure
5599 **/
5600static irqreturn_t igb_intr_msi(int irq, void *data)
5601{
5602	struct igb_adapter *adapter = data;
5603	struct igb_q_vector *q_vector = adapter->q_vector[0];
5604	struct e1000_hw *hw = &adapter->hw;
5605	/* read ICR disables interrupts using IAM */
5606	u32 icr = rd32(E1000_ICR);
5607
5608	igb_write_itr(q_vector);
5609
5610	if (icr & E1000_ICR_DRSTA)
5611		schedule_work(&adapter->reset_task);
5612
5613	if (icr & E1000_ICR_DOUTSYNC) {
5614		/* HW is reporting DMA is out of sync */
5615		adapter->stats.doosync++;
5616	}
5617
5618	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5619		hw->mac.get_link_status = 1;
5620		if (!test_bit(__IGB_DOWN, &adapter->state))
5621			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5622	}
5623
5624	napi_schedule(&q_vector->napi);
5625
5626	return IRQ_HANDLED;
5627}
5628
5629/**
5630 * igb_intr - Legacy Interrupt Handler
5631 * @irq: interrupt number
5632 * @data: pointer to a network interface device structure
5633 **/
5634static irqreturn_t igb_intr(int irq, void *data)
5635{
5636	struct igb_adapter *adapter = data;
5637	struct igb_q_vector *q_vector = adapter->q_vector[0];
5638	struct e1000_hw *hw = &adapter->hw;
5639	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5640	 * need for the IMC write */
5641	u32 icr = rd32(E1000_ICR);
5642
5643	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5644	 * not set, then the adapter didn't send an interrupt */
5645	if (!(icr & E1000_ICR_INT_ASSERTED))
5646		return IRQ_NONE;
5647
5648	igb_write_itr(q_vector);
5649
5650	if (icr & E1000_ICR_DRSTA)
5651		schedule_work(&adapter->reset_task);
5652
5653	if (icr & E1000_ICR_DOUTSYNC) {
5654		/* HW is reporting DMA is out of sync */
5655		adapter->stats.doosync++;
5656	}
5657
5658	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5659		hw->mac.get_link_status = 1;
5660		/* guard against interrupt when we're going down */
5661		if (!test_bit(__IGB_DOWN, &adapter->state))
5662			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5663	}
5664
5665	napi_schedule(&q_vector->napi);
5666
5667	return IRQ_HANDLED;
5668}
5669
5670static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5671{
5672	struct igb_adapter *adapter = q_vector->adapter;
5673	struct e1000_hw *hw = &adapter->hw;
5674
5675	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5676	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5677		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5678			igb_set_itr(q_vector);
5679		else
5680			igb_update_ring_itr(q_vector);
5681	}
5682
5683	if (!test_bit(__IGB_DOWN, &adapter->state)) {
5684		if (adapter->msix_entries)
5685			wr32(E1000_EIMS, q_vector->eims_value);
5686		else
5687			igb_irq_enable(adapter);
5688	}
5689}
5690
5691/**
5692 * igb_poll - NAPI Rx polling callback
5693 * @napi: napi polling structure
5694 * @budget: count of how many packets we should handle
5695 **/
5696static int igb_poll(struct napi_struct *napi, int budget)
5697{
5698	struct igb_q_vector *q_vector = container_of(napi,
5699	                                             struct igb_q_vector,
5700	                                             napi);
5701	bool clean_complete = true;
5702
5703#ifdef CONFIG_IGB_DCA
5704	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5705		igb_update_dca(q_vector);
5706#endif
5707	if (q_vector->tx.ring)
5708		clean_complete = igb_clean_tx_irq(q_vector);
5709
5710	if (q_vector->rx.ring)
5711		clean_complete &= igb_clean_rx_irq(q_vector, budget);
5712
5713	/* If all work not completed, return budget and keep polling */
5714	if (!clean_complete)
5715		return budget;
5716
5717	/* If not enough Rx work done, exit the polling mode */
5718	napi_complete(napi);
5719	igb_ring_irq_enable(q_vector);
5720
5721	return 0;
5722}
5723
5724/**
5725 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5726 * @adapter: board private structure
5727 * @shhwtstamps: timestamp structure to update
5728 * @regval: unsigned 64bit system time value.
5729 *
5730 * We need to convert the system time value stored in the RX/TXSTMP registers
5731 * into a hwtstamp which can be used by the upper level timestamping functions
5732 */
5733static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5734                                   struct skb_shared_hwtstamps *shhwtstamps,
5735                                   u64 regval)
5736{
5737	u64 ns;
5738
5739	/*
5740	 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5741	 * 24 to match clock shift we setup earlier.
5742	 */
5743	if (adapter->hw.mac.type >= e1000_82580)
5744		regval <<= IGB_82580_TSYNC_SHIFT;
5745
5746	ns = timecounter_cyc2time(&adapter->clock, regval);
5747	timecompare_update(&adapter->compare, ns);
5748	memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5749	shhwtstamps->hwtstamp = ns_to_ktime(ns);
5750	shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5751}
5752
5753/**
5754 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5755 * @q_vector: pointer to q_vector containing needed info
5756 * @buffer: pointer to igb_tx_buffer structure
5757 *
5758 * If we were asked to do hardware stamping and such a time stamp is
5759 * available, then it must have been for this skb here because we only
5760 * allow only one such packet into the queue.
5761 */
5762static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5763			    struct igb_tx_buffer *buffer_info)
5764{
5765	struct igb_adapter *adapter = q_vector->adapter;
5766	struct e1000_hw *hw = &adapter->hw;
5767	struct skb_shared_hwtstamps shhwtstamps;
5768	u64 regval;
5769
5770	/* if skb does not support hw timestamp or TX stamp not valid exit */
5771	if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5772	    !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5773		return;
5774
5775	regval = rd32(E1000_TXSTMPL);
5776	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5777
5778	igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5779	skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5780}
5781
5782/**
5783 * igb_clean_tx_irq - Reclaim resources after transmit completes
5784 * @q_vector: pointer to q_vector containing needed info
5785 * returns true if ring is completely cleaned
5786 **/
5787static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5788{
5789	struct igb_adapter *adapter = q_vector->adapter;
5790	struct igb_ring *tx_ring = q_vector->tx.ring;
5791	struct igb_tx_buffer *tx_buffer;
5792	union e1000_adv_tx_desc *tx_desc, *eop_desc;
5793	unsigned int total_bytes = 0, total_packets = 0;
5794	unsigned int budget = q_vector->tx.work_limit;
5795	unsigned int i = tx_ring->next_to_clean;
5796
5797	if (test_bit(__IGB_DOWN, &adapter->state))
5798		return true;
5799
5800	tx_buffer = &tx_ring->tx_buffer_info[i];
5801	tx_desc = IGB_TX_DESC(tx_ring, i);
5802	i -= tx_ring->count;
5803
5804	for (; budget; budget--) {
5805		eop_desc = tx_buffer->next_to_watch;
5806
5807		/* prevent any other reads prior to eop_desc */
5808		rmb();
5809
5810		/* if next_to_watch is not set then there is no work pending */
5811		if (!eop_desc)
5812			break;
5813
5814		/* if DD is not set pending work has not been completed */
5815		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5816			break;
5817
5818		/* clear next_to_watch to prevent false hangs */
5819		tx_buffer->next_to_watch = NULL;
5820
5821		/* update the statistics for this packet */
5822		total_bytes += tx_buffer->bytecount;
5823		total_packets += tx_buffer->gso_segs;
5824
5825		/* retrieve hardware timestamp */
5826		igb_tx_hwtstamp(q_vector, tx_buffer);
5827
5828		/* free the skb */
5829		dev_kfree_skb_any(tx_buffer->skb);
5830		tx_buffer->skb = NULL;
5831
5832		/* unmap skb header data */
5833		dma_unmap_single(tx_ring->dev,
5834				 tx_buffer->dma,
5835				 tx_buffer->length,
5836				 DMA_TO_DEVICE);
5837
5838		/* clear last DMA location and unmap remaining buffers */
5839		while (tx_desc != eop_desc) {
5840			tx_buffer->dma = 0;
5841
5842			tx_buffer++;
5843			tx_desc++;
5844			i++;
5845			if (unlikely(!i)) {
5846				i -= tx_ring->count;
5847				tx_buffer = tx_ring->tx_buffer_info;
5848				tx_desc = IGB_TX_DESC(tx_ring, 0);
5849			}
5850
5851			/* unmap any remaining paged data */
5852			if (tx_buffer->dma) {
5853				dma_unmap_page(tx_ring->dev,
5854					       tx_buffer->dma,
5855					       tx_buffer->length,
5856					       DMA_TO_DEVICE);
5857			}
5858		}
5859
5860		/* clear last DMA location */
5861		tx_buffer->dma = 0;
5862
5863		/* move us one more past the eop_desc for start of next pkt */
5864		tx_buffer++;
5865		tx_desc++;
5866		i++;
5867		if (unlikely(!i)) {
5868			i -= tx_ring->count;
5869			tx_buffer = tx_ring->tx_buffer_info;
5870			tx_desc = IGB_TX_DESC(tx_ring, 0);
5871		}
5872	}
5873
5874	netdev_tx_completed_queue(txring_txq(tx_ring),
5875				  total_packets, total_bytes);
5876	i += tx_ring->count;
5877	tx_ring->next_to_clean = i;
5878	u64_stats_update_begin(&tx_ring->tx_syncp);
5879	tx_ring->tx_stats.bytes += total_bytes;
5880	tx_ring->tx_stats.packets += total_packets;
5881	u64_stats_update_end(&tx_ring->tx_syncp);
5882	q_vector->tx.total_bytes += total_bytes;
5883	q_vector->tx.total_packets += total_packets;
5884
5885	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5886		struct e1000_hw *hw = &adapter->hw;
5887
5888		eop_desc = tx_buffer->next_to_watch;
5889
5890		/* Detect a transmit hang in hardware, this serializes the
5891		 * check with the clearing of time_stamp and movement of i */
5892		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5893		if (eop_desc &&
5894		    time_after(jiffies, tx_buffer->time_stamp +
5895			       (adapter->tx_timeout_factor * HZ)) &&
5896		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5897
5898			/* detected Tx unit hang */
5899			dev_err(tx_ring->dev,
5900				"Detected Tx Unit Hang\n"
5901				"  Tx Queue             <%d>\n"
5902				"  TDH                  <%x>\n"
5903				"  TDT                  <%x>\n"
5904				"  next_to_use          <%x>\n"
5905				"  next_to_clean        <%x>\n"
5906				"buffer_info[next_to_clean]\n"
5907				"  time_stamp           <%lx>\n"
5908				"  next_to_watch        <%p>\n"
5909				"  jiffies              <%lx>\n"
5910				"  desc.status          <%x>\n",
5911				tx_ring->queue_index,
5912				rd32(E1000_TDH(tx_ring->reg_idx)),
5913				readl(tx_ring->tail),
5914				tx_ring->next_to_use,
5915				tx_ring->next_to_clean,
5916				tx_buffer->time_stamp,
5917				eop_desc,
5918				jiffies,
5919				eop_desc->wb.status);
5920			netif_stop_subqueue(tx_ring->netdev,
5921					    tx_ring->queue_index);
5922
5923			/* we are about to reset, no point in enabling stuff */
5924			return true;
5925		}
5926	}
5927
5928	if (unlikely(total_packets &&
5929		     netif_carrier_ok(tx_ring->netdev) &&
5930		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5931		/* Make sure that anybody stopping the queue after this
5932		 * sees the new next_to_clean.
5933		 */
5934		smp_mb();
5935		if (__netif_subqueue_stopped(tx_ring->netdev,
5936					     tx_ring->queue_index) &&
5937		    !(test_bit(__IGB_DOWN, &adapter->state))) {
5938			netif_wake_subqueue(tx_ring->netdev,
5939					    tx_ring->queue_index);
5940
5941			u64_stats_update_begin(&tx_ring->tx_syncp);
5942			tx_ring->tx_stats.restart_queue++;
5943			u64_stats_update_end(&tx_ring->tx_syncp);
5944		}
5945	}
5946
5947	return !!budget;
5948}
5949
5950static inline void igb_rx_checksum(struct igb_ring *ring,
5951				   union e1000_adv_rx_desc *rx_desc,
5952				   struct sk_buff *skb)
5953{
5954	skb_checksum_none_assert(skb);
5955
5956	/* Ignore Checksum bit is set */
5957	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5958		return;
5959
5960	/* Rx checksum disabled via ethtool */
5961	if (!(ring->netdev->features & NETIF_F_RXCSUM))
5962		return;
5963
5964	/* TCP/UDP checksum error bit is set */
5965	if (igb_test_staterr(rx_desc,
5966			     E1000_RXDEXT_STATERR_TCPE |
5967			     E1000_RXDEXT_STATERR_IPE)) {
5968		/*
5969		 * work around errata with sctp packets where the TCPE aka
5970		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5971		 * packets, (aka let the stack check the crc32c)
5972		 */
5973		if (!((skb->len == 60) &&
5974		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5975			u64_stats_update_begin(&ring->rx_syncp);
5976			ring->rx_stats.csum_err++;
5977			u64_stats_update_end(&ring->rx_syncp);
5978		}
5979		/* let the stack verify checksum errors */
5980		return;
5981	}
5982	/* It must be a TCP or UDP packet with a valid checksum */
5983	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5984				      E1000_RXD_STAT_UDPCS))
5985		skb->ip_summed = CHECKSUM_UNNECESSARY;
5986
5987	dev_dbg(ring->dev, "cksum success: bits %08X\n",
5988		le32_to_cpu(rx_desc->wb.upper.status_error));
5989}
5990
5991static inline void igb_rx_hash(struct igb_ring *ring,
5992			       union e1000_adv_rx_desc *rx_desc,
5993			       struct sk_buff *skb)
5994{
5995	if (ring->netdev->features & NETIF_F_RXHASH)
5996		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5997}
5998
5999static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
6000			    union e1000_adv_rx_desc *rx_desc,
6001			    struct sk_buff *skb)
6002{
6003	struct igb_adapter *adapter = q_vector->adapter;
6004	struct e1000_hw *hw = &adapter->hw;
6005	u64 regval;
6006
6007	if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
6008				       E1000_RXDADV_STAT_TS))
6009		return;
6010
6011	/*
6012	 * If this bit is set, then the RX registers contain the time stamp. No
6013	 * other packet will be time stamped until we read these registers, so
6014	 * read the registers to make them available again. Because only one
6015	 * packet can be time stamped at a time, we know that the register
6016	 * values must belong to this one here and therefore we don't need to
6017	 * compare any of the additional attributes stored for it.
6018	 *
6019	 * If nothing went wrong, then it should have a shared tx_flags that we
6020	 * can turn into a skb_shared_hwtstamps.
6021	 */
6022	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
6023		u32 *stamp = (u32 *)skb->data;
6024		regval = le32_to_cpu(*(stamp + 2));
6025		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
6026		skb_pull(skb, IGB_TS_HDR_LEN);
6027	} else {
6028		if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
6029			return;
6030
6031		regval = rd32(E1000_RXSTMPL);
6032		regval |= (u64)rd32(E1000_RXSTMPH) << 32;
6033	}
6034
6035	igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
6036}
6037
6038static void igb_rx_vlan(struct igb_ring *ring,
6039			union e1000_adv_rx_desc *rx_desc,
6040			struct sk_buff *skb)
6041{
6042	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
6043		u16 vid;
6044		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6045		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6046			vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6047		else
6048			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6049
6050		__vlan_hwaccel_put_tag(skb, vid);
6051	}
6052}
6053
6054static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6055{
6056	/* HW will not DMA in data larger than the given buffer, even if it
6057	 * parses the (NFS, of course) header to be larger.  In that case, it
6058	 * fills the header buffer and spills the rest into the page.
6059	 */
6060	u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
6061	           E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6062	if (hlen > IGB_RX_HDR_LEN)
6063		hlen = IGB_RX_HDR_LEN;
6064	return hlen;
6065}
6066
6067static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
6068{
6069	struct igb_ring *rx_ring = q_vector->rx.ring;
6070	union e1000_adv_rx_desc *rx_desc;
6071	const int current_node = numa_node_id();
6072	unsigned int total_bytes = 0, total_packets = 0;
6073	u16 cleaned_count = igb_desc_unused(rx_ring);
6074	u16 i = rx_ring->next_to_clean;
6075
6076	rx_desc = IGB_RX_DESC(rx_ring, i);
6077
6078	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6079		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6080		struct sk_buff *skb = buffer_info->skb;
6081		union e1000_adv_rx_desc *next_rxd;
6082
6083		buffer_info->skb = NULL;
6084		prefetch(skb->data);
6085
6086		i++;
6087		if (i == rx_ring->count)
6088			i = 0;
6089
6090		next_rxd = IGB_RX_DESC(rx_ring, i);
6091		prefetch(next_rxd);
6092
6093		/*
6094		 * This memory barrier is needed to keep us from reading
6095		 * any other fields out of the rx_desc until we know the
6096		 * RXD_STAT_DD bit is set
6097		 */
6098		rmb();
6099
6100		if (!skb_is_nonlinear(skb)) {
6101			__skb_put(skb, igb_get_hlen(rx_desc));
6102			dma_unmap_single(rx_ring->dev, buffer_info->dma,
6103					 IGB_RX_HDR_LEN,
6104					 DMA_FROM_DEVICE);
6105			buffer_info->dma = 0;
6106		}
6107
6108		if (rx_desc->wb.upper.length) {
6109			u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6110
6111			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6112						buffer_info->page,
6113						buffer_info->page_offset,
6114						length);
6115
6116			skb->len += length;
6117			skb->data_len += length;
6118			skb->truesize += PAGE_SIZE / 2;
6119
6120			if ((page_count(buffer_info->page) != 1) ||
6121			    (page_to_nid(buffer_info->page) != current_node))
6122				buffer_info->page = NULL;
6123			else
6124				get_page(buffer_info->page);
6125
6126			dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6127				       PAGE_SIZE / 2, DMA_FROM_DEVICE);
6128			buffer_info->page_dma = 0;
6129		}
6130
6131		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6132			struct igb_rx_buffer *next_buffer;
6133			next_buffer = &rx_ring->rx_buffer_info[i];
6134			buffer_info->skb = next_buffer->skb;
6135			buffer_info->dma = next_buffer->dma;
6136			next_buffer->skb = skb;
6137			next_buffer->dma = 0;
6138			goto next_desc;
6139		}
6140
6141		if (unlikely((igb_test_staterr(rx_desc,
6142					       E1000_RXDEXT_ERR_FRAME_ERR_MASK))
6143			     && !(rx_ring->netdev->features & NETIF_F_RXALL))) {
6144			dev_kfree_skb_any(skb);
6145			goto next_desc;
6146		}
6147
6148		igb_rx_hwtstamp(q_vector, rx_desc, skb);
6149		igb_rx_hash(rx_ring, rx_desc, skb);
6150		igb_rx_checksum(rx_ring, rx_desc, skb);
6151		igb_rx_vlan(rx_ring, rx_desc, skb);
6152
6153		total_bytes += skb->len;
6154		total_packets++;
6155
6156		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6157
6158		napi_gro_receive(&q_vector->napi, skb);
6159
6160		budget--;
6161next_desc:
6162		if (!budget)
6163			break;
6164
6165		cleaned_count++;
6166		/* return some buffers to hardware, one at a time is too slow */
6167		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6168			igb_alloc_rx_buffers(rx_ring, cleaned_count);
6169			cleaned_count = 0;
6170		}
6171
6172		/* use prefetched values */
6173		rx_desc = next_rxd;
6174	}
6175
6176	rx_ring->next_to_clean = i;
6177	u64_stats_update_begin(&rx_ring->rx_syncp);
6178	rx_ring->rx_stats.packets += total_packets;
6179	rx_ring->rx_stats.bytes += total_bytes;
6180	u64_stats_update_end(&rx_ring->rx_syncp);
6181	q_vector->rx.total_packets += total_packets;
6182	q_vector->rx.total_bytes += total_bytes;
6183
6184	if (cleaned_count)
6185		igb_alloc_rx_buffers(rx_ring, cleaned_count);
6186
6187	return !!budget;
6188}
6189
6190static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6191				 struct igb_rx_buffer *bi)
6192{
6193	struct sk_buff *skb = bi->skb;
6194	dma_addr_t dma = bi->dma;
6195
6196	if (dma)
6197		return true;
6198
6199	if (likely(!skb)) {
6200		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6201						IGB_RX_HDR_LEN);
6202		bi->skb = skb;
6203		if (!skb) {
6204			rx_ring->rx_stats.alloc_failed++;
6205			return false;
6206		}
6207
6208		/* initialize skb for ring */
6209		skb_record_rx_queue(skb, rx_ring->queue_index);
6210	}
6211
6212	dma = dma_map_single(rx_ring->dev, skb->data,
6213			     IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6214
6215	if (dma_mapping_error(rx_ring->dev, dma)) {
6216		rx_ring->rx_stats.alloc_failed++;
6217		return false;
6218	}
6219
6220	bi->dma = dma;
6221	return true;
6222}
6223
6224static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6225				  struct igb_rx_buffer *bi)
6226{
6227	struct page *page = bi->page;
6228	dma_addr_t page_dma = bi->page_dma;
6229	unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6230
6231	if (page_dma)
6232		return true;
6233
6234	if (!page) {
6235		page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6236		bi->page = page;
6237		if (unlikely(!page)) {
6238			rx_ring->rx_stats.alloc_failed++;
6239			return false;
6240		}
6241	}
6242
6243	page_dma = dma_map_page(rx_ring->dev, page,
6244				page_offset, PAGE_SIZE / 2,
6245				DMA_FROM_DEVICE);
6246
6247	if (dma_mapping_error(rx_ring->dev, page_dma)) {
6248		rx_ring->rx_stats.alloc_failed++;
6249		return false;
6250	}
6251
6252	bi->page_dma = page_dma;
6253	bi->page_offset = page_offset;
6254	return true;
6255}
6256
6257/**
6258 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6259 * @adapter: address of board private structure
6260 **/
6261void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6262{
6263	union e1000_adv_rx_desc *rx_desc;
6264	struct igb_rx_buffer *bi;
6265	u16 i = rx_ring->next_to_use;
6266
6267	rx_desc = IGB_RX_DESC(rx_ring, i);
6268	bi = &rx_ring->rx_buffer_info[i];
6269	i -= rx_ring->count;
6270
6271	while (cleaned_count--) {
6272		if (!igb_alloc_mapped_skb(rx_ring, bi))
6273			break;
6274
6275		/* Refresh the desc even if buffer_addrs didn't change
6276		 * because each write-back erases this info. */
6277		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6278
6279		if (!igb_alloc_mapped_page(rx_ring, bi))
6280			break;
6281
6282		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6283
6284		rx_desc++;
6285		bi++;
6286		i++;
6287		if (unlikely(!i)) {
6288			rx_desc = IGB_RX_DESC(rx_ring, 0);
6289			bi = rx_ring->rx_buffer_info;
6290			i -= rx_ring->count;
6291		}
6292
6293		/* clear the hdr_addr for the next_to_use descriptor */
6294		rx_desc->read.hdr_addr = 0;
6295	}
6296
6297	i += rx_ring->count;
6298
6299	if (rx_ring->next_to_use != i) {
6300		rx_ring->next_to_use = i;
6301
6302		/* Force memory writes to complete before letting h/w
6303		 * know there are new descriptors to fetch.  (Only
6304		 * applicable for weak-ordered memory model archs,
6305		 * such as IA-64). */
6306		wmb();
6307		writel(i, rx_ring->tail);
6308	}
6309}
6310
6311/**
6312 * igb_mii_ioctl -
6313 * @netdev:
6314 * @ifreq:
6315 * @cmd:
6316 **/
6317static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6318{
6319	struct igb_adapter *adapter = netdev_priv(netdev);
6320	struct mii_ioctl_data *data = if_mii(ifr);
6321
6322	if (adapter->hw.phy.media_type != e1000_media_type_copper)
6323		return -EOPNOTSUPP;
6324
6325	switch (cmd) {
6326	case SIOCGMIIPHY:
6327		data->phy_id = adapter->hw.phy.addr;
6328		break;
6329	case SIOCGMIIREG:
6330		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6331		                     &data->val_out))
6332			return -EIO;
6333		break;
6334	case SIOCSMIIREG:
6335	default:
6336		return -EOPNOTSUPP;
6337	}
6338	return 0;
6339}
6340
6341/**
6342 * igb_hwtstamp_ioctl - control hardware time stamping
6343 * @netdev:
6344 * @ifreq:
6345 * @cmd:
6346 *
6347 * Outgoing time stamping can be enabled and disabled. Play nice and
6348 * disable it when requested, although it shouldn't case any overhead
6349 * when no packet needs it. At most one packet in the queue may be
6350 * marked for time stamping, otherwise it would be impossible to tell
6351 * for sure to which packet the hardware time stamp belongs.
6352 *
6353 * Incoming time stamping has to be configured via the hardware
6354 * filters. Not all combinations are supported, in particular event
6355 * type has to be specified. Matching the kind of event packet is
6356 * not supported, with the exception of "all V2 events regardless of
6357 * level 2 or 4".
6358 *
6359 **/
6360static int igb_hwtstamp_ioctl(struct net_device *netdev,
6361			      struct ifreq *ifr, int cmd)
6362{
6363	struct igb_adapter *adapter = netdev_priv(netdev);
6364	struct e1000_hw *hw = &adapter->hw;
6365	struct hwtstamp_config config;
6366	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6367	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6368	u32 tsync_rx_cfg = 0;
6369	bool is_l4 = false;
6370	bool is_l2 = false;
6371	u32 regval;
6372
6373	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6374		return -EFAULT;
6375
6376	/* reserved for future extensions */
6377	if (config.flags)
6378		return -EINVAL;
6379
6380	switch (config.tx_type) {
6381	case HWTSTAMP_TX_OFF:
6382		tsync_tx_ctl = 0;
6383	case HWTSTAMP_TX_ON:
6384		break;
6385	default:
6386		return -ERANGE;
6387	}
6388
6389	switch (config.rx_filter) {
6390	case HWTSTAMP_FILTER_NONE:
6391		tsync_rx_ctl = 0;
6392		break;
6393	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6394	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6395	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6396	case HWTSTAMP_FILTER_ALL:
6397		/*
6398		 * register TSYNCRXCFG must be set, therefore it is not
6399		 * possible to time stamp both Sync and Delay_Req messages
6400		 * => fall back to time stamping all packets
6401		 */
6402		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6403		config.rx_filter = HWTSTAMP_FILTER_ALL;
6404		break;
6405	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6406		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6407		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6408		is_l4 = true;
6409		break;
6410	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6411		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6412		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6413		is_l4 = true;
6414		break;
6415	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6416	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6417		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6418		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6419		is_l2 = true;
6420		is_l4 = true;
6421		config.rx_filter = HWTSTAMP_FILTER_SOME;
6422		break;
6423	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6424	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6425		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6426		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6427		is_l2 = true;
6428		is_l4 = true;
6429		config.rx_filter = HWTSTAMP_FILTER_SOME;
6430		break;
6431	case HWTSTAMP_FILTER_PTP_V2_EVENT:
6432	case HWTSTAMP_FILTER_PTP_V2_SYNC:
6433	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6434		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6435		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6436		is_l2 = true;
6437		is_l4 = true;
6438		break;
6439	default:
6440		return -ERANGE;
6441	}
6442
6443	if (hw->mac.type == e1000_82575) {
6444		if (tsync_rx_ctl | tsync_tx_ctl)
6445			return -EINVAL;
6446		return 0;
6447	}
6448
6449	/*
6450	 * Per-packet timestamping only works if all packets are
6451	 * timestamped, so enable timestamping in all packets as
6452	 * long as one rx filter was configured.
6453	 */
6454	if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6455		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6456		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6457	}
6458
6459	/* enable/disable TX */
6460	regval = rd32(E1000_TSYNCTXCTL);
6461	regval &= ~E1000_TSYNCTXCTL_ENABLED;
6462	regval |= tsync_tx_ctl;
6463	wr32(E1000_TSYNCTXCTL, regval);
6464
6465	/* enable/disable RX */
6466	regval = rd32(E1000_TSYNCRXCTL);
6467	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6468	regval |= tsync_rx_ctl;
6469	wr32(E1000_TSYNCRXCTL, regval);
6470
6471	/* define which PTP packets are time stamped */
6472	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6473
6474	/* define ethertype filter for timestamped packets */
6475	if (is_l2)
6476		wr32(E1000_ETQF(3),
6477		                (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6478		                 E1000_ETQF_1588 | /* enable timestamping */
6479		                 ETH_P_1588));     /* 1588 eth protocol type */
6480	else
6481		wr32(E1000_ETQF(3), 0);
6482
6483#define PTP_PORT 319
6484	/* L4 Queue Filter[3]: filter by destination port and protocol */
6485	if (is_l4) {
6486		u32 ftqf = (IPPROTO_UDP /* UDP */
6487			| E1000_FTQF_VF_BP /* VF not compared */
6488			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6489			| E1000_FTQF_MASK); /* mask all inputs */
6490		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6491
6492		wr32(E1000_IMIR(3), htons(PTP_PORT));
6493		wr32(E1000_IMIREXT(3),
6494		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6495		if (hw->mac.type == e1000_82576) {
6496			/* enable source port check */
6497			wr32(E1000_SPQF(3), htons(PTP_PORT));
6498			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6499		}
6500		wr32(E1000_FTQF(3), ftqf);
6501	} else {
6502		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6503	}
6504	wrfl();
6505
6506	adapter->hwtstamp_config = config;
6507
6508	/* clear TX/RX time stamp registers, just to be sure */
6509	regval = rd32(E1000_TXSTMPH);
6510	regval = rd32(E1000_RXSTMPH);
6511
6512	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6513		-EFAULT : 0;
6514}
6515
6516/**
6517 * igb_ioctl -
6518 * @netdev:
6519 * @ifreq:
6520 * @cmd:
6521 **/
6522static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6523{
6524	switch (cmd) {
6525	case SIOCGMIIPHY:
6526	case SIOCGMIIREG:
6527	case SIOCSMIIREG:
6528		return igb_mii_ioctl(netdev, ifr, cmd);
6529	case SIOCSHWTSTAMP:
6530		return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6531	default:
6532		return -EOPNOTSUPP;
6533	}
6534}
6535
6536s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6537{
6538	struct igb_adapter *adapter = hw->back;
6539	u16 cap_offset;
6540
6541	cap_offset = adapter->pdev->pcie_cap;
6542	if (!cap_offset)
6543		return -E1000_ERR_CONFIG;
6544
6545	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6546
6547	return 0;
6548}
6549
6550s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6551{
6552	struct igb_adapter *adapter = hw->back;
6553	u16 cap_offset;
6554
6555	cap_offset = adapter->pdev->pcie_cap;
6556	if (!cap_offset)
6557		return -E1000_ERR_CONFIG;
6558
6559	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6560
6561	return 0;
6562}
6563
6564static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6565{
6566	struct igb_adapter *adapter = netdev_priv(netdev);
6567	struct e1000_hw *hw = &adapter->hw;
6568	u32 ctrl, rctl;
6569	bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6570
6571	if (enable) {
6572		/* enable VLAN tag insert/strip */
6573		ctrl = rd32(E1000_CTRL);
6574		ctrl |= E1000_CTRL_VME;
6575		wr32(E1000_CTRL, ctrl);
6576
6577		/* Disable CFI check */
6578		rctl = rd32(E1000_RCTL);
6579		rctl &= ~E1000_RCTL_CFIEN;
6580		wr32(E1000_RCTL, rctl);
6581	} else {
6582		/* disable VLAN tag insert/strip */
6583		ctrl = rd32(E1000_CTRL);
6584		ctrl &= ~E1000_CTRL_VME;
6585		wr32(E1000_CTRL, ctrl);
6586	}
6587
6588	igb_rlpml_set(adapter);
6589}
6590
6591static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6592{
6593	struct igb_adapter *adapter = netdev_priv(netdev);
6594	struct e1000_hw *hw = &adapter->hw;
6595	int pf_id = adapter->vfs_allocated_count;
6596
6597	/* attempt to add filter to vlvf array */
6598	igb_vlvf_set(adapter, vid, true, pf_id);
6599
6600	/* add the filter since PF can receive vlans w/o entry in vlvf */
6601	igb_vfta_set(hw, vid, true);
6602
6603	set_bit(vid, adapter->active_vlans);
6604
6605	return 0;
6606}
6607
6608static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6609{
6610	struct igb_adapter *adapter = netdev_priv(netdev);
6611	struct e1000_hw *hw = &adapter->hw;
6612	int pf_id = adapter->vfs_allocated_count;
6613	s32 err;
6614
6615	/* remove vlan from VLVF table array */
6616	err = igb_vlvf_set(adapter, vid, false, pf_id);
6617
6618	/* if vid was not present in VLVF just remove it from table */
6619	if (err)
6620		igb_vfta_set(hw, vid, false);
6621
6622	clear_bit(vid, adapter->active_vlans);
6623
6624	return 0;
6625}
6626
6627static void igb_restore_vlan(struct igb_adapter *adapter)
6628{
6629	u16 vid;
6630
6631	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6632
6633	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6634		igb_vlan_rx_add_vid(adapter->netdev, vid);
6635}
6636
6637int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6638{
6639	struct pci_dev *pdev = adapter->pdev;
6640	struct e1000_mac_info *mac = &adapter->hw.mac;
6641
6642	mac->autoneg = 0;
6643
6644	/* Make sure dplx is at most 1 bit and lsb of speed is not set
6645	 * for the switch() below to work */
6646	if ((spd & 1) || (dplx & ~1))
6647		goto err_inval;
6648
6649	/* Fiber NIC's only allow 1000 Gbps Full duplex */
6650	if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6651	    spd != SPEED_1000 &&
6652	    dplx != DUPLEX_FULL)
6653		goto err_inval;
6654
6655	switch (spd + dplx) {
6656	case SPEED_10 + DUPLEX_HALF:
6657		mac->forced_speed_duplex = ADVERTISE_10_HALF;
6658		break;
6659	case SPEED_10 + DUPLEX_FULL:
6660		mac->forced_speed_duplex = ADVERTISE_10_FULL;
6661		break;
6662	case SPEED_100 + DUPLEX_HALF:
6663		mac->forced_speed_duplex = ADVERTISE_100_HALF;
6664		break;
6665	case SPEED_100 + DUPLEX_FULL:
6666		mac->forced_speed_duplex = ADVERTISE_100_FULL;
6667		break;
6668	case SPEED_1000 + DUPLEX_FULL:
6669		mac->autoneg = 1;
6670		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6671		break;
6672	case SPEED_1000 + DUPLEX_HALF: /* not supported */
6673	default:
6674		goto err_inval;
6675	}
6676	return 0;
6677
6678err_inval:
6679	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6680	return -EINVAL;
6681}
6682
6683static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6684			  bool runtime)
6685{
6686	struct net_device *netdev = pci_get_drvdata(pdev);
6687	struct igb_adapter *adapter = netdev_priv(netdev);
6688	struct e1000_hw *hw = &adapter->hw;
6689	u32 ctrl, rctl, status;
6690	u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6691#ifdef CONFIG_PM
6692	int retval = 0;
6693#endif
6694
6695	netif_device_detach(netdev);
6696
6697	if (netif_running(netdev))
6698		__igb_close(netdev, true);
6699
6700	igb_clear_interrupt_scheme(adapter);
6701
6702#ifdef CONFIG_PM
6703	retval = pci_save_state(pdev);
6704	if (retval)
6705		return retval;
6706#endif
6707
6708	status = rd32(E1000_STATUS);
6709	if (status & E1000_STATUS_LU)
6710		wufc &= ~E1000_WUFC_LNKC;
6711
6712	if (wufc) {
6713		igb_setup_rctl(adapter);
6714		igb_set_rx_mode(netdev);
6715
6716		/* turn on all-multi mode if wake on multicast is enabled */
6717		if (wufc & E1000_WUFC_MC) {
6718			rctl = rd32(E1000_RCTL);
6719			rctl |= E1000_RCTL_MPE;
6720			wr32(E1000_RCTL, rctl);
6721		}
6722
6723		ctrl = rd32(E1000_CTRL);
6724		/* advertise wake from D3Cold */
6725		#define E1000_CTRL_ADVD3WUC 0x00100000
6726		/* phy power management enable */
6727		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6728		ctrl |= E1000_CTRL_ADVD3WUC;
6729		wr32(E1000_CTRL, ctrl);
6730
6731		/* Allow time for pending master requests to run */
6732		igb_disable_pcie_master(hw);
6733
6734		wr32(E1000_WUC, E1000_WUC_PME_EN);
6735		wr32(E1000_WUFC, wufc);
6736	} else {
6737		wr32(E1000_WUC, 0);
6738		wr32(E1000_WUFC, 0);
6739	}
6740
6741	*enable_wake = wufc || adapter->en_mng_pt;
6742	if (!*enable_wake)
6743		igb_power_down_link(adapter);
6744	else
6745		igb_power_up_link(adapter);
6746
6747	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6748	 * would have already happened in close and is redundant. */
6749	igb_release_hw_control(adapter);
6750
6751	pci_disable_device(pdev);
6752
6753	return 0;
6754}
6755
6756#ifdef CONFIG_PM
6757#ifdef CONFIG_PM_SLEEP
6758static int igb_suspend(struct device *dev)
6759{
6760	int retval;
6761	bool wake;
6762	struct pci_dev *pdev = to_pci_dev(dev);
6763
6764	retval = __igb_shutdown(pdev, &wake, 0);
6765	if (retval)
6766		return retval;
6767
6768	if (wake) {
6769		pci_prepare_to_sleep(pdev);
6770	} else {
6771		pci_wake_from_d3(pdev, false);
6772		pci_set_power_state(pdev, PCI_D3hot);
6773	}
6774
6775	return 0;
6776}
6777#endif /* CONFIG_PM_SLEEP */
6778
6779static int igb_resume(struct device *dev)
6780{
6781	struct pci_dev *pdev = to_pci_dev(dev);
6782	struct net_device *netdev = pci_get_drvdata(pdev);
6783	struct igb_adapter *adapter = netdev_priv(netdev);
6784	struct e1000_hw *hw = &adapter->hw;
6785	u32 err;
6786
6787	pci_set_power_state(pdev, PCI_D0);
6788	pci_restore_state(pdev);
6789	pci_save_state(pdev);
6790
6791	err = pci_enable_device_mem(pdev);
6792	if (err) {
6793		dev_err(&pdev->dev,
6794			"igb: Cannot enable PCI device from suspend\n");
6795		return err;
6796	}
6797	pci_set_master(pdev);
6798
6799	pci_enable_wake(pdev, PCI_D3hot, 0);
6800	pci_enable_wake(pdev, PCI_D3cold, 0);
6801
6802	if (igb_init_interrupt_scheme(adapter)) {
6803		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6804		return -ENOMEM;
6805	}
6806
6807	igb_reset(adapter);
6808
6809	/* let the f/w know that the h/w is now under the control of the
6810	 * driver. */
6811	igb_get_hw_control(adapter);
6812
6813	wr32(E1000_WUS, ~0);
6814
6815	if (netdev->flags & IFF_UP) {
6816		err = __igb_open(netdev, true);
6817		if (err)
6818			return err;
6819	}
6820
6821	netif_device_attach(netdev);
6822	return 0;
6823}
6824
6825#ifdef CONFIG_PM_RUNTIME
6826static int igb_runtime_idle(struct device *dev)
6827{
6828	struct pci_dev *pdev = to_pci_dev(dev);
6829	struct net_device *netdev = pci_get_drvdata(pdev);
6830	struct igb_adapter *adapter = netdev_priv(netdev);
6831
6832	if (!igb_has_link(adapter))
6833		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6834
6835	return -EBUSY;
6836}
6837
6838static int igb_runtime_suspend(struct device *dev)
6839{
6840	struct pci_dev *pdev = to_pci_dev(dev);
6841	int retval;
6842	bool wake;
6843
6844	retval = __igb_shutdown(pdev, &wake, 1);
6845	if (retval)
6846		return retval;
6847
6848	if (wake) {
6849		pci_prepare_to_sleep(pdev);
6850	} else {
6851		pci_wake_from_d3(pdev, false);
6852		pci_set_power_state(pdev, PCI_D3hot);
6853	}
6854
6855	return 0;
6856}
6857
6858static int igb_runtime_resume(struct device *dev)
6859{
6860	return igb_resume(dev);
6861}
6862#endif /* CONFIG_PM_RUNTIME */
6863#endif
6864
6865static void igb_shutdown(struct pci_dev *pdev)
6866{
6867	bool wake;
6868
6869	__igb_shutdown(pdev, &wake, 0);
6870
6871	if (system_state == SYSTEM_POWER_OFF) {
6872		pci_wake_from_d3(pdev, wake);
6873		pci_set_power_state(pdev, PCI_D3hot);
6874	}
6875}
6876
6877#ifdef CONFIG_NET_POLL_CONTROLLER
6878/*
6879 * Polling 'interrupt' - used by things like netconsole to send skbs
6880 * without having to re-enable interrupts. It's not called while
6881 * the interrupt routine is executing.
6882 */
6883static void igb_netpoll(struct net_device *netdev)
6884{
6885	struct igb_adapter *adapter = netdev_priv(netdev);
6886	struct e1000_hw *hw = &adapter->hw;
6887	struct igb_q_vector *q_vector;
6888	int i;
6889
6890	for (i = 0; i < adapter->num_q_vectors; i++) {
6891		q_vector = adapter->q_vector[i];
6892		if (adapter->msix_entries)
6893			wr32(E1000_EIMC, q_vector->eims_value);
6894		else
6895			igb_irq_disable(adapter);
6896		napi_schedule(&q_vector->napi);
6897	}
6898}
6899#endif /* CONFIG_NET_POLL_CONTROLLER */
6900
6901/**
6902 * igb_io_error_detected - called when PCI error is detected
6903 * @pdev: Pointer to PCI device
6904 * @state: The current pci connection state
6905 *
6906 * This function is called after a PCI bus error affecting
6907 * this device has been detected.
6908 */
6909static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6910					      pci_channel_state_t state)
6911{
6912	struct net_device *netdev = pci_get_drvdata(pdev);
6913	struct igb_adapter *adapter = netdev_priv(netdev);
6914
6915	netif_device_detach(netdev);
6916
6917	if (state == pci_channel_io_perm_failure)
6918		return PCI_ERS_RESULT_DISCONNECT;
6919
6920	if (netif_running(netdev))
6921		igb_down(adapter);
6922	pci_disable_device(pdev);
6923
6924	/* Request a slot slot reset. */
6925	return PCI_ERS_RESULT_NEED_RESET;
6926}
6927
6928/**
6929 * igb_io_slot_reset - called after the pci bus has been reset.
6930 * @pdev: Pointer to PCI device
6931 *
6932 * Restart the card from scratch, as if from a cold-boot. Implementation
6933 * resembles the first-half of the igb_resume routine.
6934 */
6935static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6936{
6937	struct net_device *netdev = pci_get_drvdata(pdev);
6938	struct igb_adapter *adapter = netdev_priv(netdev);
6939	struct e1000_hw *hw = &adapter->hw;
6940	pci_ers_result_t result;
6941	int err;
6942
6943	if (pci_enable_device_mem(pdev)) {
6944		dev_err(&pdev->dev,
6945			"Cannot re-enable PCI device after reset.\n");
6946		result = PCI_ERS_RESULT_DISCONNECT;
6947	} else {
6948		pci_set_master(pdev);
6949		pci_restore_state(pdev);
6950		pci_save_state(pdev);
6951
6952		pci_enable_wake(pdev, PCI_D3hot, 0);
6953		pci_enable_wake(pdev, PCI_D3cold, 0);
6954
6955		igb_reset(adapter);
6956		wr32(E1000_WUS, ~0);
6957		result = PCI_ERS_RESULT_RECOVERED;
6958	}
6959
6960	err = pci_cleanup_aer_uncorrect_error_status(pdev);
6961	if (err) {
6962		dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6963		        "failed 0x%0x\n", err);
6964		/* non-fatal, continue */
6965	}
6966
6967	return result;
6968}
6969
6970/**
6971 * igb_io_resume - called when traffic can start flowing again.
6972 * @pdev: Pointer to PCI device
6973 *
6974 * This callback is called when the error recovery driver tells us that
6975 * its OK to resume normal operation. Implementation resembles the
6976 * second-half of the igb_resume routine.
6977 */
6978static void igb_io_resume(struct pci_dev *pdev)
6979{
6980	struct net_device *netdev = pci_get_drvdata(pdev);
6981	struct igb_adapter *adapter = netdev_priv(netdev);
6982
6983	if (netif_running(netdev)) {
6984		if (igb_up(adapter)) {
6985			dev_err(&pdev->dev, "igb_up failed after reset\n");
6986			return;
6987		}
6988	}
6989
6990	netif_device_attach(netdev);
6991
6992	/* let the f/w know that the h/w is now under the control of the
6993	 * driver. */
6994	igb_get_hw_control(adapter);
6995}
6996
6997static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6998                             u8 qsel)
6999{
7000	u32 rar_low, rar_high;
7001	struct e1000_hw *hw = &adapter->hw;
7002
7003	/* HW expects these in little endian so we reverse the byte order
7004	 * from network order (big endian) to little endian
7005	 */
7006	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
7007	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
7008	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
7009
7010	/* Indicate to hardware the Address is Valid. */
7011	rar_high |= E1000_RAH_AV;
7012
7013	if (hw->mac.type == e1000_82575)
7014		rar_high |= E1000_RAH_POOL_1 * qsel;
7015	else
7016		rar_high |= E1000_RAH_POOL_1 << qsel;
7017
7018	wr32(E1000_RAL(index), rar_low);
7019	wrfl();
7020	wr32(E1000_RAH(index), rar_high);
7021	wrfl();
7022}
7023
7024static int igb_set_vf_mac(struct igb_adapter *adapter,
7025                          int vf, unsigned char *mac_addr)
7026{
7027	struct e1000_hw *hw = &adapter->hw;
7028	/* VF MAC addresses start at end of receive addresses and moves
7029	 * torwards the first, as a result a collision should not be possible */
7030	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
7031
7032	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
7033
7034	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
7035
7036	return 0;
7037}
7038
7039static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
7040{
7041	struct igb_adapter *adapter = netdev_priv(netdev);
7042	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
7043		return -EINVAL;
7044	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
7045	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
7046	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
7047				      " change effective.");
7048	if (test_bit(__IGB_DOWN, &adapter->state)) {
7049		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
7050			 " but the PF device is not up.\n");
7051		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
7052			 " attempting to use the VF device.\n");
7053	}
7054	return igb_set_vf_mac(adapter, vf, mac);
7055}
7056
7057static int igb_link_mbps(int internal_link_speed)
7058{
7059	switch (internal_link_speed) {
7060	case SPEED_100:
7061		return 100;
7062	case SPEED_1000:
7063		return 1000;
7064	default:
7065		return 0;
7066	}
7067}
7068
7069static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
7070				  int link_speed)
7071{
7072	int rf_dec, rf_int;
7073	u32 bcnrc_val;
7074
7075	if (tx_rate != 0) {
7076		/* Calculate the rate factor values to set */
7077		rf_int = link_speed / tx_rate;
7078		rf_dec = (link_speed - (rf_int * tx_rate));
7079		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
7080
7081		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
7082		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
7083		               E1000_RTTBCNRC_RF_INT_MASK);
7084		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
7085	} else {
7086		bcnrc_val = 0;
7087	}
7088
7089	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7090	wr32(E1000_RTTBCNRC, bcnrc_val);
7091}
7092
7093static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7094{
7095	int actual_link_speed, i;
7096	bool reset_rate = false;
7097
7098	/* VF TX rate limit was not set or not supported */
7099	if ((adapter->vf_rate_link_speed == 0) ||
7100	    (adapter->hw.mac.type != e1000_82576))
7101		return;
7102
7103	actual_link_speed = igb_link_mbps(adapter->link_speed);
7104	if (actual_link_speed != adapter->vf_rate_link_speed) {
7105		reset_rate = true;
7106		adapter->vf_rate_link_speed = 0;
7107		dev_info(&adapter->pdev->dev,
7108		         "Link speed has been changed. VF Transmit "
7109		         "rate is disabled\n");
7110	}
7111
7112	for (i = 0; i < adapter->vfs_allocated_count; i++) {
7113		if (reset_rate)
7114			adapter->vf_data[i].tx_rate = 0;
7115
7116		igb_set_vf_rate_limit(&adapter->hw, i,
7117		                      adapter->vf_data[i].tx_rate,
7118		                      actual_link_speed);
7119	}
7120}
7121
7122static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7123{
7124	struct igb_adapter *adapter = netdev_priv(netdev);
7125	struct e1000_hw *hw = &adapter->hw;
7126	int actual_link_speed;
7127
7128	if (hw->mac.type != e1000_82576)
7129		return -EOPNOTSUPP;
7130
7131	actual_link_speed = igb_link_mbps(adapter->link_speed);
7132	if ((vf >= adapter->vfs_allocated_count) ||
7133	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7134	    (tx_rate < 0) || (tx_rate > actual_link_speed))
7135		return -EINVAL;
7136
7137	adapter->vf_rate_link_speed = actual_link_speed;
7138	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7139	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7140
7141	return 0;
7142}
7143
7144static int igb_ndo_get_vf_config(struct net_device *netdev,
7145				 int vf, struct ifla_vf_info *ivi)
7146{
7147	struct igb_adapter *adapter = netdev_priv(netdev);
7148	if (vf >= adapter->vfs_allocated_count)
7149		return -EINVAL;
7150	ivi->vf = vf;
7151	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7152	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7153	ivi->vlan = adapter->vf_data[vf].pf_vlan;
7154	ivi->qos = adapter->vf_data[vf].pf_qos;
7155	return 0;
7156}
7157
7158static void igb_vmm_control(struct igb_adapter *adapter)
7159{
7160	struct e1000_hw *hw = &adapter->hw;
7161	u32 reg;
7162
7163	switch (hw->mac.type) {
7164	case e1000_82575:
7165	default:
7166		/* replication is not supported for 82575 */
7167		return;
7168	case e1000_82576:
7169		/* notify HW that the MAC is adding vlan tags */
7170		reg = rd32(E1000_DTXCTL);
7171		reg |= E1000_DTXCTL_VLAN_ADDED;
7172		wr32(E1000_DTXCTL, reg);
7173	case e1000_82580:
7174		/* enable replication vlan tag stripping */
7175		reg = rd32(E1000_RPLOLR);
7176		reg |= E1000_RPLOLR_STRVLAN;
7177		wr32(E1000_RPLOLR, reg);
7178	case e1000_i350:
7179		/* none of the above registers are supported by i350 */
7180		break;
7181	}
7182
7183	if (adapter->vfs_allocated_count) {
7184		igb_vmdq_set_loopback_pf(hw, true);
7185		igb_vmdq_set_replication_pf(hw, true);
7186		igb_vmdq_set_anti_spoofing_pf(hw, true,
7187						adapter->vfs_allocated_count);
7188	} else {
7189		igb_vmdq_set_loopback_pf(hw, false);
7190		igb_vmdq_set_replication_pf(hw, false);
7191	}
7192}
7193
7194static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7195{
7196	struct e1000_hw *hw = &adapter->hw;
7197	u32 dmac_thr;
7198	u16 hwm;
7199
7200	if (hw->mac.type > e1000_82580) {
7201		if (adapter->flags & IGB_FLAG_DMAC) {
7202			u32 reg;
7203
7204			/* force threshold to 0. */
7205			wr32(E1000_DMCTXTH, 0);
7206
7207			/*
7208			 * DMA Coalescing high water mark needs to be greater
7209			 * than the Rx threshold. Set hwm to PBA - max frame
7210			 * size in 16B units, capping it at PBA - 6KB.
7211			 */
7212			hwm = 64 * pba - adapter->max_frame_size / 16;
7213			if (hwm < 64 * (pba - 6))
7214				hwm = 64 * (pba - 6);
7215			reg = rd32(E1000_FCRTC);
7216			reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7217			reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7218				& E1000_FCRTC_RTH_COAL_MASK);
7219			wr32(E1000_FCRTC, reg);
7220
7221			/*
7222			 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7223			 * frame size, capping it at PBA - 10KB.
7224			 */
7225			dmac_thr = pba - adapter->max_frame_size / 512;
7226			if (dmac_thr < pba - 10)
7227				dmac_thr = pba - 10;
7228			reg = rd32(E1000_DMACR);
7229			reg &= ~E1000_DMACR_DMACTHR_MASK;
7230			reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7231				& E1000_DMACR_DMACTHR_MASK);
7232
7233			/* transition to L0x or L1 if available..*/
7234			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7235
7236			/* watchdog timer= +-1000 usec in 32usec intervals */
7237			reg |= (1000 >> 5);
7238			wr32(E1000_DMACR, reg);
7239
7240			/*
7241			 * no lower threshold to disable
7242			 * coalescing(smart fifb)-UTRESH=0
7243			 */
7244			wr32(E1000_DMCRTRH, 0);
7245
7246			reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7247
7248			wr32(E1000_DMCTLX, reg);
7249
7250			/*
7251			 * free space in tx packet buffer to wake from
7252			 * DMA coal
7253			 */
7254			wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7255			     (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7256
7257			/*
7258			 * make low power state decision controlled
7259			 * by DMA coal
7260			 */
7261			reg = rd32(E1000_PCIEMISC);
7262			reg &= ~E1000_PCIEMISC_LX_DECISION;
7263			wr32(E1000_PCIEMISC, reg);
7264		} /* endif adapter->dmac is not disabled */
7265	} else if (hw->mac.type == e1000_82580) {
7266		u32 reg = rd32(E1000_PCIEMISC);
7267		wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7268		wr32(E1000_DMACR, 0);
7269	}
7270}
7271
7272/* igb_main.c */
7273