igb_main.c revision d9dd966d7fc088a6bed991c2b1e2fba4485e0a31
1/******************************************************************************* 2 3 Intel(R) Gigabit Ethernet Linux driver 4 Copyright(c) 2007-2012 Intel Corporation. 5 6 This program is free software; you can redistribute it and/or modify it 7 under the terms and conditions of the GNU General Public License, 8 version 2, as published by the Free Software Foundation. 9 10 This program is distributed in the hope it will be useful, but WITHOUT 11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 more details. 14 15 You should have received a copy of the GNU General Public License along with 16 this program; if not, write to the Free Software Foundation, Inc., 17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 18 19 The full GNU General Public License is included in this distribution in 20 the file called "COPYING". 21 22 Contact Information: 23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> 24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 25 26*******************************************************************************/ 27 28#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 29 30#include <linux/module.h> 31#include <linux/types.h> 32#include <linux/init.h> 33#include <linux/bitops.h> 34#include <linux/vmalloc.h> 35#include <linux/pagemap.h> 36#include <linux/netdevice.h> 37#include <linux/ipv6.h> 38#include <linux/slab.h> 39#include <net/checksum.h> 40#include <net/ip6_checksum.h> 41#include <linux/net_tstamp.h> 42#include <linux/mii.h> 43#include <linux/ethtool.h> 44#include <linux/if.h> 45#include <linux/if_vlan.h> 46#include <linux/pci.h> 47#include <linux/pci-aspm.h> 48#include <linux/delay.h> 49#include <linux/interrupt.h> 50#include <linux/ip.h> 51#include <linux/tcp.h> 52#include <linux/sctp.h> 53#include <linux/if_ether.h> 54#include <linux/aer.h> 55#include <linux/prefetch.h> 56#include <linux/pm_runtime.h> 57#ifdef CONFIG_IGB_DCA 58#include <linux/dca.h> 59#endif 60#include "igb.h" 61 62#define MAJ 3 63#define MIN 2 64#define BUILD 10 65#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ 66__stringify(BUILD) "-k" 67char igb_driver_name[] = "igb"; 68char igb_driver_version[] = DRV_VERSION; 69static const char igb_driver_string[] = 70 "Intel(R) Gigabit Ethernet Network Driver"; 71static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation."; 72 73static const struct e1000_info *igb_info_tbl[] = { 74 [board_82575] = &e1000_82575_info, 75}; 76 77static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = { 78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 }, 79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 }, 80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 }, 81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 }, 82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 }, 83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 }, 84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 }, 85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 }, 86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 }, 87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 }, 88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 }, 89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 }, 90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 }, 91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 }, 92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 }, 93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 }, 94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 }, 95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 }, 96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 }, 97 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 }, 98 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 }, 99 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 }, 100 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 }, 101 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 }, 102 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 }, 103 /* required last entry */ 104 {0, } 105}; 106 107MODULE_DEVICE_TABLE(pci, igb_pci_tbl); 108 109void igb_reset(struct igb_adapter *); 110static int igb_setup_all_tx_resources(struct igb_adapter *); 111static int igb_setup_all_rx_resources(struct igb_adapter *); 112static void igb_free_all_tx_resources(struct igb_adapter *); 113static void igb_free_all_rx_resources(struct igb_adapter *); 114static void igb_setup_mrqc(struct igb_adapter *); 115static int igb_probe(struct pci_dev *, const struct pci_device_id *); 116static void __devexit igb_remove(struct pci_dev *pdev); 117static void igb_init_hw_timer(struct igb_adapter *adapter); 118static int igb_sw_init(struct igb_adapter *); 119static int igb_open(struct net_device *); 120static int igb_close(struct net_device *); 121static void igb_configure_tx(struct igb_adapter *); 122static void igb_configure_rx(struct igb_adapter *); 123static void igb_clean_all_tx_rings(struct igb_adapter *); 124static void igb_clean_all_rx_rings(struct igb_adapter *); 125static void igb_clean_tx_ring(struct igb_ring *); 126static void igb_clean_rx_ring(struct igb_ring *); 127static void igb_set_rx_mode(struct net_device *); 128static void igb_update_phy_info(unsigned long); 129static void igb_watchdog(unsigned long); 130static void igb_watchdog_task(struct work_struct *); 131static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *); 132static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev, 133 struct rtnl_link_stats64 *stats); 134static int igb_change_mtu(struct net_device *, int); 135static int igb_set_mac(struct net_device *, void *); 136static void igb_set_uta(struct igb_adapter *adapter); 137static irqreturn_t igb_intr(int irq, void *); 138static irqreturn_t igb_intr_msi(int irq, void *); 139static irqreturn_t igb_msix_other(int irq, void *); 140static irqreturn_t igb_msix_ring(int irq, void *); 141#ifdef CONFIG_IGB_DCA 142static void igb_update_dca(struct igb_q_vector *); 143static void igb_setup_dca(struct igb_adapter *); 144#endif /* CONFIG_IGB_DCA */ 145static int igb_poll(struct napi_struct *, int); 146static bool igb_clean_tx_irq(struct igb_q_vector *); 147static bool igb_clean_rx_irq(struct igb_q_vector *, int); 148static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); 149static void igb_tx_timeout(struct net_device *); 150static void igb_reset_task(struct work_struct *); 151static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features); 152static int igb_vlan_rx_add_vid(struct net_device *, u16); 153static int igb_vlan_rx_kill_vid(struct net_device *, u16); 154static void igb_restore_vlan(struct igb_adapter *); 155static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8); 156static void igb_ping_all_vfs(struct igb_adapter *); 157static void igb_msg_task(struct igb_adapter *); 158static void igb_vmm_control(struct igb_adapter *); 159static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *); 160static void igb_restore_vf_multicasts(struct igb_adapter *adapter); 161static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac); 162static int igb_ndo_set_vf_vlan(struct net_device *netdev, 163 int vf, u16 vlan, u8 qos); 164static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate); 165static int igb_ndo_get_vf_config(struct net_device *netdev, int vf, 166 struct ifla_vf_info *ivi); 167static void igb_check_vf_rate_limit(struct igb_adapter *); 168 169#ifdef CONFIG_PCI_IOV 170static int igb_vf_configure(struct igb_adapter *adapter, int vf); 171static int igb_find_enabled_vfs(struct igb_adapter *adapter); 172static int igb_check_vf_assignment(struct igb_adapter *adapter); 173#endif 174 175#ifdef CONFIG_PM 176#ifdef CONFIG_PM_SLEEP 177static int igb_suspend(struct device *); 178#endif 179static int igb_resume(struct device *); 180#ifdef CONFIG_PM_RUNTIME 181static int igb_runtime_suspend(struct device *dev); 182static int igb_runtime_resume(struct device *dev); 183static int igb_runtime_idle(struct device *dev); 184#endif 185static const struct dev_pm_ops igb_pm_ops = { 186 SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume) 187 SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume, 188 igb_runtime_idle) 189}; 190#endif 191static void igb_shutdown(struct pci_dev *); 192#ifdef CONFIG_IGB_DCA 193static int igb_notify_dca(struct notifier_block *, unsigned long, void *); 194static struct notifier_block dca_notifier = { 195 .notifier_call = igb_notify_dca, 196 .next = NULL, 197 .priority = 0 198}; 199#endif 200#ifdef CONFIG_NET_POLL_CONTROLLER 201/* for netdump / net console */ 202static void igb_netpoll(struct net_device *); 203#endif 204#ifdef CONFIG_PCI_IOV 205static unsigned int max_vfs = 0; 206module_param(max_vfs, uint, 0); 207MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate " 208 "per physical function"); 209#endif /* CONFIG_PCI_IOV */ 210 211static pci_ers_result_t igb_io_error_detected(struct pci_dev *, 212 pci_channel_state_t); 213static pci_ers_result_t igb_io_slot_reset(struct pci_dev *); 214static void igb_io_resume(struct pci_dev *); 215 216static struct pci_error_handlers igb_err_handler = { 217 .error_detected = igb_io_error_detected, 218 .slot_reset = igb_io_slot_reset, 219 .resume = igb_io_resume, 220}; 221 222static void igb_init_dmac(struct igb_adapter *adapter, u32 pba); 223 224static struct pci_driver igb_driver = { 225 .name = igb_driver_name, 226 .id_table = igb_pci_tbl, 227 .probe = igb_probe, 228 .remove = __devexit_p(igb_remove), 229#ifdef CONFIG_PM 230 .driver.pm = &igb_pm_ops, 231#endif 232 .shutdown = igb_shutdown, 233 .err_handler = &igb_err_handler 234}; 235 236MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>"); 237MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver"); 238MODULE_LICENSE("GPL"); 239MODULE_VERSION(DRV_VERSION); 240 241struct igb_reg_info { 242 u32 ofs; 243 char *name; 244}; 245 246static const struct igb_reg_info igb_reg_info_tbl[] = { 247 248 /* General Registers */ 249 {E1000_CTRL, "CTRL"}, 250 {E1000_STATUS, "STATUS"}, 251 {E1000_CTRL_EXT, "CTRL_EXT"}, 252 253 /* Interrupt Registers */ 254 {E1000_ICR, "ICR"}, 255 256 /* RX Registers */ 257 {E1000_RCTL, "RCTL"}, 258 {E1000_RDLEN(0), "RDLEN"}, 259 {E1000_RDH(0), "RDH"}, 260 {E1000_RDT(0), "RDT"}, 261 {E1000_RXDCTL(0), "RXDCTL"}, 262 {E1000_RDBAL(0), "RDBAL"}, 263 {E1000_RDBAH(0), "RDBAH"}, 264 265 /* TX Registers */ 266 {E1000_TCTL, "TCTL"}, 267 {E1000_TDBAL(0), "TDBAL"}, 268 {E1000_TDBAH(0), "TDBAH"}, 269 {E1000_TDLEN(0), "TDLEN"}, 270 {E1000_TDH(0), "TDH"}, 271 {E1000_TDT(0), "TDT"}, 272 {E1000_TXDCTL(0), "TXDCTL"}, 273 {E1000_TDFH, "TDFH"}, 274 {E1000_TDFT, "TDFT"}, 275 {E1000_TDFHS, "TDFHS"}, 276 {E1000_TDFPC, "TDFPC"}, 277 278 /* List Terminator */ 279 {} 280}; 281 282/* 283 * igb_regdump - register printout routine 284 */ 285static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo) 286{ 287 int n = 0; 288 char rname[16]; 289 u32 regs[8]; 290 291 switch (reginfo->ofs) { 292 case E1000_RDLEN(0): 293 for (n = 0; n < 4; n++) 294 regs[n] = rd32(E1000_RDLEN(n)); 295 break; 296 case E1000_RDH(0): 297 for (n = 0; n < 4; n++) 298 regs[n] = rd32(E1000_RDH(n)); 299 break; 300 case E1000_RDT(0): 301 for (n = 0; n < 4; n++) 302 regs[n] = rd32(E1000_RDT(n)); 303 break; 304 case E1000_RXDCTL(0): 305 for (n = 0; n < 4; n++) 306 regs[n] = rd32(E1000_RXDCTL(n)); 307 break; 308 case E1000_RDBAL(0): 309 for (n = 0; n < 4; n++) 310 regs[n] = rd32(E1000_RDBAL(n)); 311 break; 312 case E1000_RDBAH(0): 313 for (n = 0; n < 4; n++) 314 regs[n] = rd32(E1000_RDBAH(n)); 315 break; 316 case E1000_TDBAL(0): 317 for (n = 0; n < 4; n++) 318 regs[n] = rd32(E1000_RDBAL(n)); 319 break; 320 case E1000_TDBAH(0): 321 for (n = 0; n < 4; n++) 322 regs[n] = rd32(E1000_TDBAH(n)); 323 break; 324 case E1000_TDLEN(0): 325 for (n = 0; n < 4; n++) 326 regs[n] = rd32(E1000_TDLEN(n)); 327 break; 328 case E1000_TDH(0): 329 for (n = 0; n < 4; n++) 330 regs[n] = rd32(E1000_TDH(n)); 331 break; 332 case E1000_TDT(0): 333 for (n = 0; n < 4; n++) 334 regs[n] = rd32(E1000_TDT(n)); 335 break; 336 case E1000_TXDCTL(0): 337 for (n = 0; n < 4; n++) 338 regs[n] = rd32(E1000_TXDCTL(n)); 339 break; 340 default: 341 pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs)); 342 return; 343 } 344 345 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]"); 346 pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1], 347 regs[2], regs[3]); 348} 349 350/* 351 * igb_dump - Print registers, tx-rings and rx-rings 352 */ 353static void igb_dump(struct igb_adapter *adapter) 354{ 355 struct net_device *netdev = adapter->netdev; 356 struct e1000_hw *hw = &adapter->hw; 357 struct igb_reg_info *reginfo; 358 struct igb_ring *tx_ring; 359 union e1000_adv_tx_desc *tx_desc; 360 struct my_u0 { u64 a; u64 b; } *u0; 361 struct igb_ring *rx_ring; 362 union e1000_adv_rx_desc *rx_desc; 363 u32 staterr; 364 u16 i, n; 365 366 if (!netif_msg_hw(adapter)) 367 return; 368 369 /* Print netdevice Info */ 370 if (netdev) { 371 dev_info(&adapter->pdev->dev, "Net device Info\n"); 372 pr_info("Device Name state trans_start " 373 "last_rx\n"); 374 pr_info("%-15s %016lX %016lX %016lX\n", netdev->name, 375 netdev->state, netdev->trans_start, netdev->last_rx); 376 } 377 378 /* Print Registers */ 379 dev_info(&adapter->pdev->dev, "Register Dump\n"); 380 pr_info(" Register Name Value\n"); 381 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl; 382 reginfo->name; reginfo++) { 383 igb_regdump(hw, reginfo); 384 } 385 386 /* Print TX Ring Summary */ 387 if (!netdev || !netif_running(netdev)) 388 goto exit; 389 390 dev_info(&adapter->pdev->dev, "TX Rings Summary\n"); 391 pr_info("Queue [NTU] [NTC] [bi(ntc)->dma ] leng ntw timestamp\n"); 392 for (n = 0; n < adapter->num_tx_queues; n++) { 393 struct igb_tx_buffer *buffer_info; 394 tx_ring = adapter->tx_ring[n]; 395 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean]; 396 pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n", 397 n, tx_ring->next_to_use, tx_ring->next_to_clean, 398 (u64)buffer_info->dma, 399 buffer_info->length, 400 buffer_info->next_to_watch, 401 (u64)buffer_info->time_stamp); 402 } 403 404 /* Print TX Rings */ 405 if (!netif_msg_tx_done(adapter)) 406 goto rx_ring_summary; 407 408 dev_info(&adapter->pdev->dev, "TX Rings Dump\n"); 409 410 /* Transmit Descriptor Formats 411 * 412 * Advanced Transmit Descriptor 413 * +--------------------------------------------------------------+ 414 * 0 | Buffer Address [63:0] | 415 * +--------------------------------------------------------------+ 416 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN | 417 * +--------------------------------------------------------------+ 418 * 63 46 45 40 39 38 36 35 32 31 24 15 0 419 */ 420 421 for (n = 0; n < adapter->num_tx_queues; n++) { 422 tx_ring = adapter->tx_ring[n]; 423 pr_info("------------------------------------\n"); 424 pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index); 425 pr_info("------------------------------------\n"); 426 pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] " 427 "[bi->dma ] leng ntw timestamp " 428 "bi->skb\n"); 429 430 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) { 431 const char *next_desc; 432 struct igb_tx_buffer *buffer_info; 433 tx_desc = IGB_TX_DESC(tx_ring, i); 434 buffer_info = &tx_ring->tx_buffer_info[i]; 435 u0 = (struct my_u0 *)tx_desc; 436 if (i == tx_ring->next_to_use && 437 i == tx_ring->next_to_clean) 438 next_desc = " NTC/U"; 439 else if (i == tx_ring->next_to_use) 440 next_desc = " NTU"; 441 else if (i == tx_ring->next_to_clean) 442 next_desc = " NTC"; 443 else 444 next_desc = ""; 445 446 pr_info("T [0x%03X] %016llX %016llX %016llX" 447 " %04X %p %016llX %p%s\n", i, 448 le64_to_cpu(u0->a), 449 le64_to_cpu(u0->b), 450 (u64)buffer_info->dma, 451 buffer_info->length, 452 buffer_info->next_to_watch, 453 (u64)buffer_info->time_stamp, 454 buffer_info->skb, next_desc); 455 456 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0) 457 print_hex_dump(KERN_INFO, "", 458 DUMP_PREFIX_ADDRESS, 459 16, 1, phys_to_virt(buffer_info->dma), 460 buffer_info->length, true); 461 } 462 } 463 464 /* Print RX Rings Summary */ 465rx_ring_summary: 466 dev_info(&adapter->pdev->dev, "RX Rings Summary\n"); 467 pr_info("Queue [NTU] [NTC]\n"); 468 for (n = 0; n < adapter->num_rx_queues; n++) { 469 rx_ring = adapter->rx_ring[n]; 470 pr_info(" %5d %5X %5X\n", 471 n, rx_ring->next_to_use, rx_ring->next_to_clean); 472 } 473 474 /* Print RX Rings */ 475 if (!netif_msg_rx_status(adapter)) 476 goto exit; 477 478 dev_info(&adapter->pdev->dev, "RX Rings Dump\n"); 479 480 /* Advanced Receive Descriptor (Read) Format 481 * 63 1 0 482 * +-----------------------------------------------------+ 483 * 0 | Packet Buffer Address [63:1] |A0/NSE| 484 * +----------------------------------------------+------+ 485 * 8 | Header Buffer Address [63:1] | DD | 486 * +-----------------------------------------------------+ 487 * 488 * 489 * Advanced Receive Descriptor (Write-Back) Format 490 * 491 * 63 48 47 32 31 30 21 20 17 16 4 3 0 492 * +------------------------------------------------------+ 493 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS | 494 * | Checksum Ident | | | | Type | Type | 495 * +------------------------------------------------------+ 496 * 8 | VLAN Tag | Length | Extended Error | Extended Status | 497 * +------------------------------------------------------+ 498 * 63 48 47 32 31 20 19 0 499 */ 500 501 for (n = 0; n < adapter->num_rx_queues; n++) { 502 rx_ring = adapter->rx_ring[n]; 503 pr_info("------------------------------------\n"); 504 pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index); 505 pr_info("------------------------------------\n"); 506 pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] " 507 "[bi->dma ] [bi->skb] <-- Adv Rx Read format\n"); 508 pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] -----" 509 "----------- [bi->skb] <-- Adv Rx Write-Back format\n"); 510 511 for (i = 0; i < rx_ring->count; i++) { 512 const char *next_desc; 513 struct igb_rx_buffer *buffer_info; 514 buffer_info = &rx_ring->rx_buffer_info[i]; 515 rx_desc = IGB_RX_DESC(rx_ring, i); 516 u0 = (struct my_u0 *)rx_desc; 517 staterr = le32_to_cpu(rx_desc->wb.upper.status_error); 518 519 if (i == rx_ring->next_to_use) 520 next_desc = " NTU"; 521 else if (i == rx_ring->next_to_clean) 522 next_desc = " NTC"; 523 else 524 next_desc = ""; 525 526 if (staterr & E1000_RXD_STAT_DD) { 527 /* Descriptor Done */ 528 pr_info("%s[0x%03X] %016llX %016llX -------" 529 "--------- %p%s\n", "RWB", i, 530 le64_to_cpu(u0->a), 531 le64_to_cpu(u0->b), 532 buffer_info->skb, next_desc); 533 } else { 534 pr_info("%s[0x%03X] %016llX %016llX %016llX" 535 " %p%s\n", "R ", i, 536 le64_to_cpu(u0->a), 537 le64_to_cpu(u0->b), 538 (u64)buffer_info->dma, 539 buffer_info->skb, next_desc); 540 541 if (netif_msg_pktdata(adapter)) { 542 print_hex_dump(KERN_INFO, "", 543 DUMP_PREFIX_ADDRESS, 544 16, 1, 545 phys_to_virt(buffer_info->dma), 546 IGB_RX_HDR_LEN, true); 547 print_hex_dump(KERN_INFO, "", 548 DUMP_PREFIX_ADDRESS, 549 16, 1, 550 phys_to_virt( 551 buffer_info->page_dma + 552 buffer_info->page_offset), 553 PAGE_SIZE/2, true); 554 } 555 } 556 } 557 } 558 559exit: 560 return; 561} 562 563 564/** 565 * igb_read_clock - read raw cycle counter (to be used by time counter) 566 */ 567static cycle_t igb_read_clock(const struct cyclecounter *tc) 568{ 569 struct igb_adapter *adapter = 570 container_of(tc, struct igb_adapter, cycles); 571 struct e1000_hw *hw = &adapter->hw; 572 u64 stamp = 0; 573 int shift = 0; 574 575 /* 576 * The timestamp latches on lowest register read. For the 82580 577 * the lowest register is SYSTIMR instead of SYSTIML. However we never 578 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it. 579 */ 580 if (hw->mac.type >= e1000_82580) { 581 stamp = rd32(E1000_SYSTIMR) >> 8; 582 shift = IGB_82580_TSYNC_SHIFT; 583 } 584 585 stamp |= (u64)rd32(E1000_SYSTIML) << shift; 586 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32); 587 return stamp; 588} 589 590/** 591 * igb_get_hw_dev - return device 592 * used by hardware layer to print debugging information 593 **/ 594struct net_device *igb_get_hw_dev(struct e1000_hw *hw) 595{ 596 struct igb_adapter *adapter = hw->back; 597 return adapter->netdev; 598} 599 600/** 601 * igb_init_module - Driver Registration Routine 602 * 603 * igb_init_module is the first routine called when the driver is 604 * loaded. All it does is register with the PCI subsystem. 605 **/ 606static int __init igb_init_module(void) 607{ 608 int ret; 609 pr_info("%s - version %s\n", 610 igb_driver_string, igb_driver_version); 611 612 pr_info("%s\n", igb_copyright); 613 614#ifdef CONFIG_IGB_DCA 615 dca_register_notify(&dca_notifier); 616#endif 617 ret = pci_register_driver(&igb_driver); 618 return ret; 619} 620 621module_init(igb_init_module); 622 623/** 624 * igb_exit_module - Driver Exit Cleanup Routine 625 * 626 * igb_exit_module is called just before the driver is removed 627 * from memory. 628 **/ 629static void __exit igb_exit_module(void) 630{ 631#ifdef CONFIG_IGB_DCA 632 dca_unregister_notify(&dca_notifier); 633#endif 634 pci_unregister_driver(&igb_driver); 635} 636 637module_exit(igb_exit_module); 638 639#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1)) 640/** 641 * igb_cache_ring_register - Descriptor ring to register mapping 642 * @adapter: board private structure to initialize 643 * 644 * Once we know the feature-set enabled for the device, we'll cache 645 * the register offset the descriptor ring is assigned to. 646 **/ 647static void igb_cache_ring_register(struct igb_adapter *adapter) 648{ 649 int i = 0, j = 0; 650 u32 rbase_offset = adapter->vfs_allocated_count; 651 652 switch (adapter->hw.mac.type) { 653 case e1000_82576: 654 /* The queues are allocated for virtualization such that VF 0 655 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc. 656 * In order to avoid collision we start at the first free queue 657 * and continue consuming queues in the same sequence 658 */ 659 if (adapter->vfs_allocated_count) { 660 for (; i < adapter->rss_queues; i++) 661 adapter->rx_ring[i]->reg_idx = rbase_offset + 662 Q_IDX_82576(i); 663 } 664 case e1000_82575: 665 case e1000_82580: 666 case e1000_i350: 667 default: 668 for (; i < adapter->num_rx_queues; i++) 669 adapter->rx_ring[i]->reg_idx = rbase_offset + i; 670 for (; j < adapter->num_tx_queues; j++) 671 adapter->tx_ring[j]->reg_idx = rbase_offset + j; 672 break; 673 } 674} 675 676static void igb_free_queues(struct igb_adapter *adapter) 677{ 678 int i; 679 680 for (i = 0; i < adapter->num_tx_queues; i++) { 681 kfree(adapter->tx_ring[i]); 682 adapter->tx_ring[i] = NULL; 683 } 684 for (i = 0; i < adapter->num_rx_queues; i++) { 685 kfree(adapter->rx_ring[i]); 686 adapter->rx_ring[i] = NULL; 687 } 688 adapter->num_rx_queues = 0; 689 adapter->num_tx_queues = 0; 690} 691 692/** 693 * igb_alloc_queues - Allocate memory for all rings 694 * @adapter: board private structure to initialize 695 * 696 * We allocate one ring per queue at run-time since we don't know the 697 * number of queues at compile-time. 698 **/ 699static int igb_alloc_queues(struct igb_adapter *adapter) 700{ 701 struct igb_ring *ring; 702 int i; 703 int orig_node = adapter->node; 704 705 for (i = 0; i < adapter->num_tx_queues; i++) { 706 if (orig_node == -1) { 707 int cur_node = next_online_node(adapter->node); 708 if (cur_node == MAX_NUMNODES) 709 cur_node = first_online_node; 710 adapter->node = cur_node; 711 } 712 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL, 713 adapter->node); 714 if (!ring) 715 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); 716 if (!ring) 717 goto err; 718 ring->count = adapter->tx_ring_count; 719 ring->queue_index = i; 720 ring->dev = &adapter->pdev->dev; 721 ring->netdev = adapter->netdev; 722 ring->numa_node = adapter->node; 723 /* For 82575, context index must be unique per ring. */ 724 if (adapter->hw.mac.type == e1000_82575) 725 set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags); 726 adapter->tx_ring[i] = ring; 727 } 728 /* Restore the adapter's original node */ 729 adapter->node = orig_node; 730 731 for (i = 0; i < adapter->num_rx_queues; i++) { 732 if (orig_node == -1) { 733 int cur_node = next_online_node(adapter->node); 734 if (cur_node == MAX_NUMNODES) 735 cur_node = first_online_node; 736 adapter->node = cur_node; 737 } 738 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL, 739 adapter->node); 740 if (!ring) 741 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); 742 if (!ring) 743 goto err; 744 ring->count = adapter->rx_ring_count; 745 ring->queue_index = i; 746 ring->dev = &adapter->pdev->dev; 747 ring->netdev = adapter->netdev; 748 ring->numa_node = adapter->node; 749 /* set flag indicating ring supports SCTP checksum offload */ 750 if (adapter->hw.mac.type >= e1000_82576) 751 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags); 752 753 /* On i350, loopback VLAN packets have the tag byte-swapped. */ 754 if (adapter->hw.mac.type == e1000_i350) 755 set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags); 756 757 adapter->rx_ring[i] = ring; 758 } 759 /* Restore the adapter's original node */ 760 adapter->node = orig_node; 761 762 igb_cache_ring_register(adapter); 763 764 return 0; 765 766err: 767 /* Restore the adapter's original node */ 768 adapter->node = orig_node; 769 igb_free_queues(adapter); 770 771 return -ENOMEM; 772} 773 774/** 775 * igb_write_ivar - configure ivar for given MSI-X vector 776 * @hw: pointer to the HW structure 777 * @msix_vector: vector number we are allocating to a given ring 778 * @index: row index of IVAR register to write within IVAR table 779 * @offset: column offset of in IVAR, should be multiple of 8 780 * 781 * This function is intended to handle the writing of the IVAR register 782 * for adapters 82576 and newer. The IVAR table consists of 2 columns, 783 * each containing an cause allocation for an Rx and Tx ring, and a 784 * variable number of rows depending on the number of queues supported. 785 **/ 786static void igb_write_ivar(struct e1000_hw *hw, int msix_vector, 787 int index, int offset) 788{ 789 u32 ivar = array_rd32(E1000_IVAR0, index); 790 791 /* clear any bits that are currently set */ 792 ivar &= ~((u32)0xFF << offset); 793 794 /* write vector and valid bit */ 795 ivar |= (msix_vector | E1000_IVAR_VALID) << offset; 796 797 array_wr32(E1000_IVAR0, index, ivar); 798} 799 800#define IGB_N0_QUEUE -1 801static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector) 802{ 803 struct igb_adapter *adapter = q_vector->adapter; 804 struct e1000_hw *hw = &adapter->hw; 805 int rx_queue = IGB_N0_QUEUE; 806 int tx_queue = IGB_N0_QUEUE; 807 u32 msixbm = 0; 808 809 if (q_vector->rx.ring) 810 rx_queue = q_vector->rx.ring->reg_idx; 811 if (q_vector->tx.ring) 812 tx_queue = q_vector->tx.ring->reg_idx; 813 814 switch (hw->mac.type) { 815 case e1000_82575: 816 /* The 82575 assigns vectors using a bitmask, which matches the 817 bitmask for the EICR/EIMS/EIMC registers. To assign one 818 or more queues to a vector, we write the appropriate bits 819 into the MSIXBM register for that vector. */ 820 if (rx_queue > IGB_N0_QUEUE) 821 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue; 822 if (tx_queue > IGB_N0_QUEUE) 823 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue; 824 if (!adapter->msix_entries && msix_vector == 0) 825 msixbm |= E1000_EIMS_OTHER; 826 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm); 827 q_vector->eims_value = msixbm; 828 break; 829 case e1000_82576: 830 /* 831 * 82576 uses a table that essentially consists of 2 columns 832 * with 8 rows. The ordering is column-major so we use the 833 * lower 3 bits as the row index, and the 4th bit as the 834 * column offset. 835 */ 836 if (rx_queue > IGB_N0_QUEUE) 837 igb_write_ivar(hw, msix_vector, 838 rx_queue & 0x7, 839 (rx_queue & 0x8) << 1); 840 if (tx_queue > IGB_N0_QUEUE) 841 igb_write_ivar(hw, msix_vector, 842 tx_queue & 0x7, 843 ((tx_queue & 0x8) << 1) + 8); 844 q_vector->eims_value = 1 << msix_vector; 845 break; 846 case e1000_82580: 847 case e1000_i350: 848 /* 849 * On 82580 and newer adapters the scheme is similar to 82576 850 * however instead of ordering column-major we have things 851 * ordered row-major. So we traverse the table by using 852 * bit 0 as the column offset, and the remaining bits as the 853 * row index. 854 */ 855 if (rx_queue > IGB_N0_QUEUE) 856 igb_write_ivar(hw, msix_vector, 857 rx_queue >> 1, 858 (rx_queue & 0x1) << 4); 859 if (tx_queue > IGB_N0_QUEUE) 860 igb_write_ivar(hw, msix_vector, 861 tx_queue >> 1, 862 ((tx_queue & 0x1) << 4) + 8); 863 q_vector->eims_value = 1 << msix_vector; 864 break; 865 default: 866 BUG(); 867 break; 868 } 869 870 /* add q_vector eims value to global eims_enable_mask */ 871 adapter->eims_enable_mask |= q_vector->eims_value; 872 873 /* configure q_vector to set itr on first interrupt */ 874 q_vector->set_itr = 1; 875} 876 877/** 878 * igb_configure_msix - Configure MSI-X hardware 879 * 880 * igb_configure_msix sets up the hardware to properly 881 * generate MSI-X interrupts. 882 **/ 883static void igb_configure_msix(struct igb_adapter *adapter) 884{ 885 u32 tmp; 886 int i, vector = 0; 887 struct e1000_hw *hw = &adapter->hw; 888 889 adapter->eims_enable_mask = 0; 890 891 /* set vector for other causes, i.e. link changes */ 892 switch (hw->mac.type) { 893 case e1000_82575: 894 tmp = rd32(E1000_CTRL_EXT); 895 /* enable MSI-X PBA support*/ 896 tmp |= E1000_CTRL_EXT_PBA_CLR; 897 898 /* Auto-Mask interrupts upon ICR read. */ 899 tmp |= E1000_CTRL_EXT_EIAME; 900 tmp |= E1000_CTRL_EXT_IRCA; 901 902 wr32(E1000_CTRL_EXT, tmp); 903 904 /* enable msix_other interrupt */ 905 array_wr32(E1000_MSIXBM(0), vector++, 906 E1000_EIMS_OTHER); 907 adapter->eims_other = E1000_EIMS_OTHER; 908 909 break; 910 911 case e1000_82576: 912 case e1000_82580: 913 case e1000_i350: 914 /* Turn on MSI-X capability first, or our settings 915 * won't stick. And it will take days to debug. */ 916 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE | 917 E1000_GPIE_PBA | E1000_GPIE_EIAME | 918 E1000_GPIE_NSICR); 919 920 /* enable msix_other interrupt */ 921 adapter->eims_other = 1 << vector; 922 tmp = (vector++ | E1000_IVAR_VALID) << 8; 923 924 wr32(E1000_IVAR_MISC, tmp); 925 break; 926 default: 927 /* do nothing, since nothing else supports MSI-X */ 928 break; 929 } /* switch (hw->mac.type) */ 930 931 adapter->eims_enable_mask |= adapter->eims_other; 932 933 for (i = 0; i < adapter->num_q_vectors; i++) 934 igb_assign_vector(adapter->q_vector[i], vector++); 935 936 wrfl(); 937} 938 939/** 940 * igb_request_msix - Initialize MSI-X interrupts 941 * 942 * igb_request_msix allocates MSI-X vectors and requests interrupts from the 943 * kernel. 944 **/ 945static int igb_request_msix(struct igb_adapter *adapter) 946{ 947 struct net_device *netdev = adapter->netdev; 948 struct e1000_hw *hw = &adapter->hw; 949 int i, err = 0, vector = 0; 950 951 err = request_irq(adapter->msix_entries[vector].vector, 952 igb_msix_other, 0, netdev->name, adapter); 953 if (err) 954 goto out; 955 vector++; 956 957 for (i = 0; i < adapter->num_q_vectors; i++) { 958 struct igb_q_vector *q_vector = adapter->q_vector[i]; 959 960 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector); 961 962 if (q_vector->rx.ring && q_vector->tx.ring) 963 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name, 964 q_vector->rx.ring->queue_index); 965 else if (q_vector->tx.ring) 966 sprintf(q_vector->name, "%s-tx-%u", netdev->name, 967 q_vector->tx.ring->queue_index); 968 else if (q_vector->rx.ring) 969 sprintf(q_vector->name, "%s-rx-%u", netdev->name, 970 q_vector->rx.ring->queue_index); 971 else 972 sprintf(q_vector->name, "%s-unused", netdev->name); 973 974 err = request_irq(adapter->msix_entries[vector].vector, 975 igb_msix_ring, 0, q_vector->name, 976 q_vector); 977 if (err) 978 goto out; 979 vector++; 980 } 981 982 igb_configure_msix(adapter); 983 return 0; 984out: 985 return err; 986} 987 988static void igb_reset_interrupt_capability(struct igb_adapter *adapter) 989{ 990 if (adapter->msix_entries) { 991 pci_disable_msix(adapter->pdev); 992 kfree(adapter->msix_entries); 993 adapter->msix_entries = NULL; 994 } else if (adapter->flags & IGB_FLAG_HAS_MSI) { 995 pci_disable_msi(adapter->pdev); 996 } 997} 998 999/** 1000 * igb_free_q_vectors - Free memory allocated for interrupt vectors 1001 * @adapter: board private structure to initialize 1002 * 1003 * This function frees the memory allocated to the q_vectors. In addition if 1004 * NAPI is enabled it will delete any references to the NAPI struct prior 1005 * to freeing the q_vector. 1006 **/ 1007static void igb_free_q_vectors(struct igb_adapter *adapter) 1008{ 1009 int v_idx; 1010 1011 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) { 1012 struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; 1013 adapter->q_vector[v_idx] = NULL; 1014 if (!q_vector) 1015 continue; 1016 netif_napi_del(&q_vector->napi); 1017 kfree(q_vector); 1018 } 1019 adapter->num_q_vectors = 0; 1020} 1021 1022/** 1023 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts 1024 * 1025 * This function resets the device so that it has 0 rx queues, tx queues, and 1026 * MSI-X interrupts allocated. 1027 */ 1028static void igb_clear_interrupt_scheme(struct igb_adapter *adapter) 1029{ 1030 igb_free_queues(adapter); 1031 igb_free_q_vectors(adapter); 1032 igb_reset_interrupt_capability(adapter); 1033} 1034 1035/** 1036 * igb_set_interrupt_capability - set MSI or MSI-X if supported 1037 * 1038 * Attempt to configure interrupts using the best available 1039 * capabilities of the hardware and kernel. 1040 **/ 1041static int igb_set_interrupt_capability(struct igb_adapter *adapter) 1042{ 1043 int err; 1044 int numvecs, i; 1045 1046 /* Number of supported queues. */ 1047 adapter->num_rx_queues = adapter->rss_queues; 1048 if (adapter->vfs_allocated_count) 1049 adapter->num_tx_queues = 1; 1050 else 1051 adapter->num_tx_queues = adapter->rss_queues; 1052 1053 /* start with one vector for every rx queue */ 1054 numvecs = adapter->num_rx_queues; 1055 1056 /* if tx handler is separate add 1 for every tx queue */ 1057 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS)) 1058 numvecs += adapter->num_tx_queues; 1059 1060 /* store the number of vectors reserved for queues */ 1061 adapter->num_q_vectors = numvecs; 1062 1063 /* add 1 vector for link status interrupts */ 1064 numvecs++; 1065 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry), 1066 GFP_KERNEL); 1067 if (!adapter->msix_entries) 1068 goto msi_only; 1069 1070 for (i = 0; i < numvecs; i++) 1071 adapter->msix_entries[i].entry = i; 1072 1073 err = pci_enable_msix(adapter->pdev, 1074 adapter->msix_entries, 1075 numvecs); 1076 if (err == 0) 1077 goto out; 1078 1079 igb_reset_interrupt_capability(adapter); 1080 1081 /* If we can't do MSI-X, try MSI */ 1082msi_only: 1083#ifdef CONFIG_PCI_IOV 1084 /* disable SR-IOV for non MSI-X configurations */ 1085 if (adapter->vf_data) { 1086 struct e1000_hw *hw = &adapter->hw; 1087 /* disable iov and allow time for transactions to clear */ 1088 pci_disable_sriov(adapter->pdev); 1089 msleep(500); 1090 1091 kfree(adapter->vf_data); 1092 adapter->vf_data = NULL; 1093 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); 1094 wrfl(); 1095 msleep(100); 1096 dev_info(&adapter->pdev->dev, "IOV Disabled\n"); 1097 } 1098#endif 1099 adapter->vfs_allocated_count = 0; 1100 adapter->rss_queues = 1; 1101 adapter->flags |= IGB_FLAG_QUEUE_PAIRS; 1102 adapter->num_rx_queues = 1; 1103 adapter->num_tx_queues = 1; 1104 adapter->num_q_vectors = 1; 1105 if (!pci_enable_msi(adapter->pdev)) 1106 adapter->flags |= IGB_FLAG_HAS_MSI; 1107out: 1108 /* Notify the stack of the (possibly) reduced queue counts. */ 1109 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues); 1110 return netif_set_real_num_rx_queues(adapter->netdev, 1111 adapter->num_rx_queues); 1112} 1113 1114/** 1115 * igb_alloc_q_vectors - Allocate memory for interrupt vectors 1116 * @adapter: board private structure to initialize 1117 * 1118 * We allocate one q_vector per queue interrupt. If allocation fails we 1119 * return -ENOMEM. 1120 **/ 1121static int igb_alloc_q_vectors(struct igb_adapter *adapter) 1122{ 1123 struct igb_q_vector *q_vector; 1124 struct e1000_hw *hw = &adapter->hw; 1125 int v_idx; 1126 int orig_node = adapter->node; 1127 1128 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) { 1129 if ((adapter->num_q_vectors == (adapter->num_rx_queues + 1130 adapter->num_tx_queues)) && 1131 (adapter->num_rx_queues == v_idx)) 1132 adapter->node = orig_node; 1133 if (orig_node == -1) { 1134 int cur_node = next_online_node(adapter->node); 1135 if (cur_node == MAX_NUMNODES) 1136 cur_node = first_online_node; 1137 adapter->node = cur_node; 1138 } 1139 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL, 1140 adapter->node); 1141 if (!q_vector) 1142 q_vector = kzalloc(sizeof(struct igb_q_vector), 1143 GFP_KERNEL); 1144 if (!q_vector) 1145 goto err_out; 1146 q_vector->adapter = adapter; 1147 q_vector->itr_register = hw->hw_addr + E1000_EITR(0); 1148 q_vector->itr_val = IGB_START_ITR; 1149 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64); 1150 adapter->q_vector[v_idx] = q_vector; 1151 } 1152 /* Restore the adapter's original node */ 1153 adapter->node = orig_node; 1154 1155 return 0; 1156 1157err_out: 1158 /* Restore the adapter's original node */ 1159 adapter->node = orig_node; 1160 igb_free_q_vectors(adapter); 1161 return -ENOMEM; 1162} 1163 1164static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter, 1165 int ring_idx, int v_idx) 1166{ 1167 struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; 1168 1169 q_vector->rx.ring = adapter->rx_ring[ring_idx]; 1170 q_vector->rx.ring->q_vector = q_vector; 1171 q_vector->rx.count++; 1172 q_vector->itr_val = adapter->rx_itr_setting; 1173 if (q_vector->itr_val && q_vector->itr_val <= 3) 1174 q_vector->itr_val = IGB_START_ITR; 1175} 1176 1177static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter, 1178 int ring_idx, int v_idx) 1179{ 1180 struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; 1181 1182 q_vector->tx.ring = adapter->tx_ring[ring_idx]; 1183 q_vector->tx.ring->q_vector = q_vector; 1184 q_vector->tx.count++; 1185 q_vector->itr_val = adapter->tx_itr_setting; 1186 q_vector->tx.work_limit = adapter->tx_work_limit; 1187 if (q_vector->itr_val && q_vector->itr_val <= 3) 1188 q_vector->itr_val = IGB_START_ITR; 1189} 1190 1191/** 1192 * igb_map_ring_to_vector - maps allocated queues to vectors 1193 * 1194 * This function maps the recently allocated queues to vectors. 1195 **/ 1196static int igb_map_ring_to_vector(struct igb_adapter *adapter) 1197{ 1198 int i; 1199 int v_idx = 0; 1200 1201 if ((adapter->num_q_vectors < adapter->num_rx_queues) || 1202 (adapter->num_q_vectors < adapter->num_tx_queues)) 1203 return -ENOMEM; 1204 1205 if (adapter->num_q_vectors >= 1206 (adapter->num_rx_queues + adapter->num_tx_queues)) { 1207 for (i = 0; i < adapter->num_rx_queues; i++) 1208 igb_map_rx_ring_to_vector(adapter, i, v_idx++); 1209 for (i = 0; i < adapter->num_tx_queues; i++) 1210 igb_map_tx_ring_to_vector(adapter, i, v_idx++); 1211 } else { 1212 for (i = 0; i < adapter->num_rx_queues; i++) { 1213 if (i < adapter->num_tx_queues) 1214 igb_map_tx_ring_to_vector(adapter, i, v_idx); 1215 igb_map_rx_ring_to_vector(adapter, i, v_idx++); 1216 } 1217 for (; i < adapter->num_tx_queues; i++) 1218 igb_map_tx_ring_to_vector(adapter, i, v_idx++); 1219 } 1220 return 0; 1221} 1222 1223/** 1224 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors 1225 * 1226 * This function initializes the interrupts and allocates all of the queues. 1227 **/ 1228static int igb_init_interrupt_scheme(struct igb_adapter *adapter) 1229{ 1230 struct pci_dev *pdev = adapter->pdev; 1231 int err; 1232 1233 err = igb_set_interrupt_capability(adapter); 1234 if (err) 1235 return err; 1236 1237 err = igb_alloc_q_vectors(adapter); 1238 if (err) { 1239 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n"); 1240 goto err_alloc_q_vectors; 1241 } 1242 1243 err = igb_alloc_queues(adapter); 1244 if (err) { 1245 dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); 1246 goto err_alloc_queues; 1247 } 1248 1249 err = igb_map_ring_to_vector(adapter); 1250 if (err) { 1251 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n"); 1252 goto err_map_queues; 1253 } 1254 1255 1256 return 0; 1257err_map_queues: 1258 igb_free_queues(adapter); 1259err_alloc_queues: 1260 igb_free_q_vectors(adapter); 1261err_alloc_q_vectors: 1262 igb_reset_interrupt_capability(adapter); 1263 return err; 1264} 1265 1266/** 1267 * igb_request_irq - initialize interrupts 1268 * 1269 * Attempts to configure interrupts using the best available 1270 * capabilities of the hardware and kernel. 1271 **/ 1272static int igb_request_irq(struct igb_adapter *adapter) 1273{ 1274 struct net_device *netdev = adapter->netdev; 1275 struct pci_dev *pdev = adapter->pdev; 1276 int err = 0; 1277 1278 if (adapter->msix_entries) { 1279 err = igb_request_msix(adapter); 1280 if (!err) 1281 goto request_done; 1282 /* fall back to MSI */ 1283 igb_clear_interrupt_scheme(adapter); 1284 if (!pci_enable_msi(pdev)) 1285 adapter->flags |= IGB_FLAG_HAS_MSI; 1286 igb_free_all_tx_resources(adapter); 1287 igb_free_all_rx_resources(adapter); 1288 adapter->num_tx_queues = 1; 1289 adapter->num_rx_queues = 1; 1290 adapter->num_q_vectors = 1; 1291 err = igb_alloc_q_vectors(adapter); 1292 if (err) { 1293 dev_err(&pdev->dev, 1294 "Unable to allocate memory for vectors\n"); 1295 goto request_done; 1296 } 1297 err = igb_alloc_queues(adapter); 1298 if (err) { 1299 dev_err(&pdev->dev, 1300 "Unable to allocate memory for queues\n"); 1301 igb_free_q_vectors(adapter); 1302 goto request_done; 1303 } 1304 igb_setup_all_tx_resources(adapter); 1305 igb_setup_all_rx_resources(adapter); 1306 } 1307 1308 igb_assign_vector(adapter->q_vector[0], 0); 1309 1310 if (adapter->flags & IGB_FLAG_HAS_MSI) { 1311 err = request_irq(pdev->irq, igb_intr_msi, 0, 1312 netdev->name, adapter); 1313 if (!err) 1314 goto request_done; 1315 1316 /* fall back to legacy interrupts */ 1317 igb_reset_interrupt_capability(adapter); 1318 adapter->flags &= ~IGB_FLAG_HAS_MSI; 1319 } 1320 1321 err = request_irq(pdev->irq, igb_intr, IRQF_SHARED, 1322 netdev->name, adapter); 1323 1324 if (err) 1325 dev_err(&pdev->dev, "Error %d getting interrupt\n", 1326 err); 1327 1328request_done: 1329 return err; 1330} 1331 1332static void igb_free_irq(struct igb_adapter *adapter) 1333{ 1334 if (adapter->msix_entries) { 1335 int vector = 0, i; 1336 1337 free_irq(adapter->msix_entries[vector++].vector, adapter); 1338 1339 for (i = 0; i < adapter->num_q_vectors; i++) 1340 free_irq(adapter->msix_entries[vector++].vector, 1341 adapter->q_vector[i]); 1342 } else { 1343 free_irq(adapter->pdev->irq, adapter); 1344 } 1345} 1346 1347/** 1348 * igb_irq_disable - Mask off interrupt generation on the NIC 1349 * @adapter: board private structure 1350 **/ 1351static void igb_irq_disable(struct igb_adapter *adapter) 1352{ 1353 struct e1000_hw *hw = &adapter->hw; 1354 1355 /* 1356 * we need to be careful when disabling interrupts. The VFs are also 1357 * mapped into these registers and so clearing the bits can cause 1358 * issues on the VF drivers so we only need to clear what we set 1359 */ 1360 if (adapter->msix_entries) { 1361 u32 regval = rd32(E1000_EIAM); 1362 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask); 1363 wr32(E1000_EIMC, adapter->eims_enable_mask); 1364 regval = rd32(E1000_EIAC); 1365 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask); 1366 } 1367 1368 wr32(E1000_IAM, 0); 1369 wr32(E1000_IMC, ~0); 1370 wrfl(); 1371 if (adapter->msix_entries) { 1372 int i; 1373 for (i = 0; i < adapter->num_q_vectors; i++) 1374 synchronize_irq(adapter->msix_entries[i].vector); 1375 } else { 1376 synchronize_irq(adapter->pdev->irq); 1377 } 1378} 1379 1380/** 1381 * igb_irq_enable - Enable default interrupt generation settings 1382 * @adapter: board private structure 1383 **/ 1384static void igb_irq_enable(struct igb_adapter *adapter) 1385{ 1386 struct e1000_hw *hw = &adapter->hw; 1387 1388 if (adapter->msix_entries) { 1389 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA; 1390 u32 regval = rd32(E1000_EIAC); 1391 wr32(E1000_EIAC, regval | adapter->eims_enable_mask); 1392 regval = rd32(E1000_EIAM); 1393 wr32(E1000_EIAM, regval | adapter->eims_enable_mask); 1394 wr32(E1000_EIMS, adapter->eims_enable_mask); 1395 if (adapter->vfs_allocated_count) { 1396 wr32(E1000_MBVFIMR, 0xFF); 1397 ims |= E1000_IMS_VMMB; 1398 } 1399 wr32(E1000_IMS, ims); 1400 } else { 1401 wr32(E1000_IMS, IMS_ENABLE_MASK | 1402 E1000_IMS_DRSTA); 1403 wr32(E1000_IAM, IMS_ENABLE_MASK | 1404 E1000_IMS_DRSTA); 1405 } 1406} 1407 1408static void igb_update_mng_vlan(struct igb_adapter *adapter) 1409{ 1410 struct e1000_hw *hw = &adapter->hw; 1411 u16 vid = adapter->hw.mng_cookie.vlan_id; 1412 u16 old_vid = adapter->mng_vlan_id; 1413 1414 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) { 1415 /* add VID to filter table */ 1416 igb_vfta_set(hw, vid, true); 1417 adapter->mng_vlan_id = vid; 1418 } else { 1419 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE; 1420 } 1421 1422 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) && 1423 (vid != old_vid) && 1424 !test_bit(old_vid, adapter->active_vlans)) { 1425 /* remove VID from filter table */ 1426 igb_vfta_set(hw, old_vid, false); 1427 } 1428} 1429 1430/** 1431 * igb_release_hw_control - release control of the h/w to f/w 1432 * @adapter: address of board private structure 1433 * 1434 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. 1435 * For ASF and Pass Through versions of f/w this means that the 1436 * driver is no longer loaded. 1437 * 1438 **/ 1439static void igb_release_hw_control(struct igb_adapter *adapter) 1440{ 1441 struct e1000_hw *hw = &adapter->hw; 1442 u32 ctrl_ext; 1443 1444 /* Let firmware take over control of h/w */ 1445 ctrl_ext = rd32(E1000_CTRL_EXT); 1446 wr32(E1000_CTRL_EXT, 1447 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); 1448} 1449 1450/** 1451 * igb_get_hw_control - get control of the h/w from f/w 1452 * @adapter: address of board private structure 1453 * 1454 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. 1455 * For ASF and Pass Through versions of f/w this means that 1456 * the driver is loaded. 1457 * 1458 **/ 1459static void igb_get_hw_control(struct igb_adapter *adapter) 1460{ 1461 struct e1000_hw *hw = &adapter->hw; 1462 u32 ctrl_ext; 1463 1464 /* Let firmware know the driver has taken over */ 1465 ctrl_ext = rd32(E1000_CTRL_EXT); 1466 wr32(E1000_CTRL_EXT, 1467 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); 1468} 1469 1470/** 1471 * igb_configure - configure the hardware for RX and TX 1472 * @adapter: private board structure 1473 **/ 1474static void igb_configure(struct igb_adapter *adapter) 1475{ 1476 struct net_device *netdev = adapter->netdev; 1477 int i; 1478 1479 igb_get_hw_control(adapter); 1480 igb_set_rx_mode(netdev); 1481 1482 igb_restore_vlan(adapter); 1483 1484 igb_setup_tctl(adapter); 1485 igb_setup_mrqc(adapter); 1486 igb_setup_rctl(adapter); 1487 1488 igb_configure_tx(adapter); 1489 igb_configure_rx(adapter); 1490 1491 igb_rx_fifo_flush_82575(&adapter->hw); 1492 1493 /* call igb_desc_unused which always leaves 1494 * at least 1 descriptor unused to make sure 1495 * next_to_use != next_to_clean */ 1496 for (i = 0; i < adapter->num_rx_queues; i++) { 1497 struct igb_ring *ring = adapter->rx_ring[i]; 1498 igb_alloc_rx_buffers(ring, igb_desc_unused(ring)); 1499 } 1500} 1501 1502/** 1503 * igb_power_up_link - Power up the phy/serdes link 1504 * @adapter: address of board private structure 1505 **/ 1506void igb_power_up_link(struct igb_adapter *adapter) 1507{ 1508 if (adapter->hw.phy.media_type == e1000_media_type_copper) 1509 igb_power_up_phy_copper(&adapter->hw); 1510 else 1511 igb_power_up_serdes_link_82575(&adapter->hw); 1512 igb_reset_phy(&adapter->hw); 1513} 1514 1515/** 1516 * igb_power_down_link - Power down the phy/serdes link 1517 * @adapter: address of board private structure 1518 */ 1519static void igb_power_down_link(struct igb_adapter *adapter) 1520{ 1521 if (adapter->hw.phy.media_type == e1000_media_type_copper) 1522 igb_power_down_phy_copper_82575(&adapter->hw); 1523 else 1524 igb_shutdown_serdes_link_82575(&adapter->hw); 1525} 1526 1527/** 1528 * igb_up - Open the interface and prepare it to handle traffic 1529 * @adapter: board private structure 1530 **/ 1531int igb_up(struct igb_adapter *adapter) 1532{ 1533 struct e1000_hw *hw = &adapter->hw; 1534 int i; 1535 1536 /* hardware has been reset, we need to reload some things */ 1537 igb_configure(adapter); 1538 1539 clear_bit(__IGB_DOWN, &adapter->state); 1540 1541 for (i = 0; i < adapter->num_q_vectors; i++) 1542 napi_enable(&(adapter->q_vector[i]->napi)); 1543 1544 if (adapter->msix_entries) 1545 igb_configure_msix(adapter); 1546 else 1547 igb_assign_vector(adapter->q_vector[0], 0); 1548 1549 /* Clear any pending interrupts. */ 1550 rd32(E1000_ICR); 1551 igb_irq_enable(adapter); 1552 1553 /* notify VFs that reset has been completed */ 1554 if (adapter->vfs_allocated_count) { 1555 u32 reg_data = rd32(E1000_CTRL_EXT); 1556 reg_data |= E1000_CTRL_EXT_PFRSTD; 1557 wr32(E1000_CTRL_EXT, reg_data); 1558 } 1559 1560 netif_tx_start_all_queues(adapter->netdev); 1561 1562 /* start the watchdog. */ 1563 hw->mac.get_link_status = 1; 1564 schedule_work(&adapter->watchdog_task); 1565 1566 return 0; 1567} 1568 1569void igb_down(struct igb_adapter *adapter) 1570{ 1571 struct net_device *netdev = adapter->netdev; 1572 struct e1000_hw *hw = &adapter->hw; 1573 u32 tctl, rctl; 1574 int i; 1575 1576 /* signal that we're down so the interrupt handler does not 1577 * reschedule our watchdog timer */ 1578 set_bit(__IGB_DOWN, &adapter->state); 1579 1580 /* disable receives in the hardware */ 1581 rctl = rd32(E1000_RCTL); 1582 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN); 1583 /* flush and sleep below */ 1584 1585 netif_tx_stop_all_queues(netdev); 1586 1587 /* disable transmits in the hardware */ 1588 tctl = rd32(E1000_TCTL); 1589 tctl &= ~E1000_TCTL_EN; 1590 wr32(E1000_TCTL, tctl); 1591 /* flush both disables and wait for them to finish */ 1592 wrfl(); 1593 msleep(10); 1594 1595 for (i = 0; i < adapter->num_q_vectors; i++) 1596 napi_disable(&(adapter->q_vector[i]->napi)); 1597 1598 igb_irq_disable(adapter); 1599 1600 del_timer_sync(&adapter->watchdog_timer); 1601 del_timer_sync(&adapter->phy_info_timer); 1602 1603 netif_carrier_off(netdev); 1604 1605 /* record the stats before reset*/ 1606 spin_lock(&adapter->stats64_lock); 1607 igb_update_stats(adapter, &adapter->stats64); 1608 spin_unlock(&adapter->stats64_lock); 1609 1610 adapter->link_speed = 0; 1611 adapter->link_duplex = 0; 1612 1613 if (!pci_channel_offline(adapter->pdev)) 1614 igb_reset(adapter); 1615 igb_clean_all_tx_rings(adapter); 1616 igb_clean_all_rx_rings(adapter); 1617#ifdef CONFIG_IGB_DCA 1618 1619 /* since we reset the hardware DCA settings were cleared */ 1620 igb_setup_dca(adapter); 1621#endif 1622} 1623 1624void igb_reinit_locked(struct igb_adapter *adapter) 1625{ 1626 WARN_ON(in_interrupt()); 1627 while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) 1628 msleep(1); 1629 igb_down(adapter); 1630 igb_up(adapter); 1631 clear_bit(__IGB_RESETTING, &adapter->state); 1632} 1633 1634void igb_reset(struct igb_adapter *adapter) 1635{ 1636 struct pci_dev *pdev = adapter->pdev; 1637 struct e1000_hw *hw = &adapter->hw; 1638 struct e1000_mac_info *mac = &hw->mac; 1639 struct e1000_fc_info *fc = &hw->fc; 1640 u32 pba = 0, tx_space, min_tx_space, min_rx_space; 1641 u16 hwm; 1642 1643 /* Repartition Pba for greater than 9k mtu 1644 * To take effect CTRL.RST is required. 1645 */ 1646 switch (mac->type) { 1647 case e1000_i350: 1648 case e1000_82580: 1649 pba = rd32(E1000_RXPBS); 1650 pba = igb_rxpbs_adjust_82580(pba); 1651 break; 1652 case e1000_82576: 1653 pba = rd32(E1000_RXPBS); 1654 pba &= E1000_RXPBS_SIZE_MASK_82576; 1655 break; 1656 case e1000_82575: 1657 default: 1658 pba = E1000_PBA_34K; 1659 break; 1660 } 1661 1662 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) && 1663 (mac->type < e1000_82576)) { 1664 /* adjust PBA for jumbo frames */ 1665 wr32(E1000_PBA, pba); 1666 1667 /* To maintain wire speed transmits, the Tx FIFO should be 1668 * large enough to accommodate two full transmit packets, 1669 * rounded up to the next 1KB and expressed in KB. Likewise, 1670 * the Rx FIFO should be large enough to accommodate at least 1671 * one full receive packet and is similarly rounded up and 1672 * expressed in KB. */ 1673 pba = rd32(E1000_PBA); 1674 /* upper 16 bits has Tx packet buffer allocation size in KB */ 1675 tx_space = pba >> 16; 1676 /* lower 16 bits has Rx packet buffer allocation size in KB */ 1677 pba &= 0xffff; 1678 /* the tx fifo also stores 16 bytes of information about the tx 1679 * but don't include ethernet FCS because hardware appends it */ 1680 min_tx_space = (adapter->max_frame_size + 1681 sizeof(union e1000_adv_tx_desc) - 1682 ETH_FCS_LEN) * 2; 1683 min_tx_space = ALIGN(min_tx_space, 1024); 1684 min_tx_space >>= 10; 1685 /* software strips receive CRC, so leave room for it */ 1686 min_rx_space = adapter->max_frame_size; 1687 min_rx_space = ALIGN(min_rx_space, 1024); 1688 min_rx_space >>= 10; 1689 1690 /* If current Tx allocation is less than the min Tx FIFO size, 1691 * and the min Tx FIFO size is less than the current Rx FIFO 1692 * allocation, take space away from current Rx allocation */ 1693 if (tx_space < min_tx_space && 1694 ((min_tx_space - tx_space) < pba)) { 1695 pba = pba - (min_tx_space - tx_space); 1696 1697 /* if short on rx space, rx wins and must trump tx 1698 * adjustment */ 1699 if (pba < min_rx_space) 1700 pba = min_rx_space; 1701 } 1702 wr32(E1000_PBA, pba); 1703 } 1704 1705 /* flow control settings */ 1706 /* The high water mark must be low enough to fit one full frame 1707 * (or the size used for early receive) above it in the Rx FIFO. 1708 * Set it to the lower of: 1709 * - 90% of the Rx FIFO size, or 1710 * - the full Rx FIFO size minus one full frame */ 1711 hwm = min(((pba << 10) * 9 / 10), 1712 ((pba << 10) - 2 * adapter->max_frame_size)); 1713 1714 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ 1715 fc->low_water = fc->high_water - 16; 1716 fc->pause_time = 0xFFFF; 1717 fc->send_xon = 1; 1718 fc->current_mode = fc->requested_mode; 1719 1720 /* disable receive for all VFs and wait one second */ 1721 if (adapter->vfs_allocated_count) { 1722 int i; 1723 for (i = 0 ; i < adapter->vfs_allocated_count; i++) 1724 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC; 1725 1726 /* ping all the active vfs to let them know we are going down */ 1727 igb_ping_all_vfs(adapter); 1728 1729 /* disable transmits and receives */ 1730 wr32(E1000_VFRE, 0); 1731 wr32(E1000_VFTE, 0); 1732 } 1733 1734 /* Allow time for pending master requests to run */ 1735 hw->mac.ops.reset_hw(hw); 1736 wr32(E1000_WUC, 0); 1737 1738 if (hw->mac.ops.init_hw(hw)) 1739 dev_err(&pdev->dev, "Hardware Error\n"); 1740 1741 igb_init_dmac(adapter, pba); 1742 if (!netif_running(adapter->netdev)) 1743 igb_power_down_link(adapter); 1744 1745 igb_update_mng_vlan(adapter); 1746 1747 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */ 1748 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE); 1749 1750 igb_get_phy_info(hw); 1751} 1752 1753static netdev_features_t igb_fix_features(struct net_device *netdev, 1754 netdev_features_t features) 1755{ 1756 /* 1757 * Since there is no support for separate rx/tx vlan accel 1758 * enable/disable make sure tx flag is always in same state as rx. 1759 */ 1760 if (features & NETIF_F_HW_VLAN_RX) 1761 features |= NETIF_F_HW_VLAN_TX; 1762 else 1763 features &= ~NETIF_F_HW_VLAN_TX; 1764 1765 return features; 1766} 1767 1768static int igb_set_features(struct net_device *netdev, 1769 netdev_features_t features) 1770{ 1771 netdev_features_t changed = netdev->features ^ features; 1772 1773 if (changed & NETIF_F_HW_VLAN_RX) 1774 igb_vlan_mode(netdev, features); 1775 1776 return 0; 1777} 1778 1779static const struct net_device_ops igb_netdev_ops = { 1780 .ndo_open = igb_open, 1781 .ndo_stop = igb_close, 1782 .ndo_start_xmit = igb_xmit_frame, 1783 .ndo_get_stats64 = igb_get_stats64, 1784 .ndo_set_rx_mode = igb_set_rx_mode, 1785 .ndo_set_mac_address = igb_set_mac, 1786 .ndo_change_mtu = igb_change_mtu, 1787 .ndo_do_ioctl = igb_ioctl, 1788 .ndo_tx_timeout = igb_tx_timeout, 1789 .ndo_validate_addr = eth_validate_addr, 1790 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid, 1791 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid, 1792 .ndo_set_vf_mac = igb_ndo_set_vf_mac, 1793 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan, 1794 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw, 1795 .ndo_get_vf_config = igb_ndo_get_vf_config, 1796#ifdef CONFIG_NET_POLL_CONTROLLER 1797 .ndo_poll_controller = igb_netpoll, 1798#endif 1799 .ndo_fix_features = igb_fix_features, 1800 .ndo_set_features = igb_set_features, 1801}; 1802 1803/** 1804 * igb_probe - Device Initialization Routine 1805 * @pdev: PCI device information struct 1806 * @ent: entry in igb_pci_tbl 1807 * 1808 * Returns 0 on success, negative on failure 1809 * 1810 * igb_probe initializes an adapter identified by a pci_dev structure. 1811 * The OS initialization, configuring of the adapter private structure, 1812 * and a hardware reset occur. 1813 **/ 1814static int __devinit igb_probe(struct pci_dev *pdev, 1815 const struct pci_device_id *ent) 1816{ 1817 struct net_device *netdev; 1818 struct igb_adapter *adapter; 1819 struct e1000_hw *hw; 1820 u16 eeprom_data = 0; 1821 s32 ret_val; 1822 static int global_quad_port_a; /* global quad port a indication */ 1823 const struct e1000_info *ei = igb_info_tbl[ent->driver_data]; 1824 unsigned long mmio_start, mmio_len; 1825 int err, pci_using_dac; 1826 u16 eeprom_apme_mask = IGB_EEPROM_APME; 1827 u8 part_str[E1000_PBANUM_LENGTH]; 1828 1829 /* Catch broken hardware that put the wrong VF device ID in 1830 * the PCIe SR-IOV capability. 1831 */ 1832 if (pdev->is_virtfn) { 1833 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n", 1834 pci_name(pdev), pdev->vendor, pdev->device); 1835 return -EINVAL; 1836 } 1837 1838 err = pci_enable_device_mem(pdev); 1839 if (err) 1840 return err; 1841 1842 pci_using_dac = 0; 1843 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); 1844 if (!err) { 1845 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); 1846 if (!err) 1847 pci_using_dac = 1; 1848 } else { 1849 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); 1850 if (err) { 1851 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); 1852 if (err) { 1853 dev_err(&pdev->dev, "No usable DMA " 1854 "configuration, aborting\n"); 1855 goto err_dma; 1856 } 1857 } 1858 } 1859 1860 err = pci_request_selected_regions(pdev, pci_select_bars(pdev, 1861 IORESOURCE_MEM), 1862 igb_driver_name); 1863 if (err) 1864 goto err_pci_reg; 1865 1866 pci_enable_pcie_error_reporting(pdev); 1867 1868 pci_set_master(pdev); 1869 pci_save_state(pdev); 1870 1871 err = -ENOMEM; 1872 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter), 1873 IGB_MAX_TX_QUEUES); 1874 if (!netdev) 1875 goto err_alloc_etherdev; 1876 1877 SET_NETDEV_DEV(netdev, &pdev->dev); 1878 1879 pci_set_drvdata(pdev, netdev); 1880 adapter = netdev_priv(netdev); 1881 adapter->netdev = netdev; 1882 adapter->pdev = pdev; 1883 hw = &adapter->hw; 1884 hw->back = adapter; 1885 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE; 1886 1887 mmio_start = pci_resource_start(pdev, 0); 1888 mmio_len = pci_resource_len(pdev, 0); 1889 1890 err = -EIO; 1891 hw->hw_addr = ioremap(mmio_start, mmio_len); 1892 if (!hw->hw_addr) 1893 goto err_ioremap; 1894 1895 netdev->netdev_ops = &igb_netdev_ops; 1896 igb_set_ethtool_ops(netdev); 1897 netdev->watchdog_timeo = 5 * HZ; 1898 1899 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); 1900 1901 netdev->mem_start = mmio_start; 1902 netdev->mem_end = mmio_start + mmio_len; 1903 1904 /* PCI config space info */ 1905 hw->vendor_id = pdev->vendor; 1906 hw->device_id = pdev->device; 1907 hw->revision_id = pdev->revision; 1908 hw->subsystem_vendor_id = pdev->subsystem_vendor; 1909 hw->subsystem_device_id = pdev->subsystem_device; 1910 1911 /* Copy the default MAC, PHY and NVM function pointers */ 1912 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops)); 1913 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops)); 1914 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops)); 1915 /* Initialize skew-specific constants */ 1916 err = ei->get_invariants(hw); 1917 if (err) 1918 goto err_sw_init; 1919 1920 /* setup the private structure */ 1921 err = igb_sw_init(adapter); 1922 if (err) 1923 goto err_sw_init; 1924 1925 igb_get_bus_info_pcie(hw); 1926 1927 hw->phy.autoneg_wait_to_complete = false; 1928 1929 /* Copper options */ 1930 if (hw->phy.media_type == e1000_media_type_copper) { 1931 hw->phy.mdix = AUTO_ALL_MODES; 1932 hw->phy.disable_polarity_correction = false; 1933 hw->phy.ms_type = e1000_ms_hw_default; 1934 } 1935 1936 if (igb_check_reset_block(hw)) 1937 dev_info(&pdev->dev, 1938 "PHY reset is blocked due to SOL/IDER session.\n"); 1939 1940 /* 1941 * features is initialized to 0 in allocation, it might have bits 1942 * set by igb_sw_init so we should use an or instead of an 1943 * assignment. 1944 */ 1945 netdev->features |= NETIF_F_SG | 1946 NETIF_F_IP_CSUM | 1947 NETIF_F_IPV6_CSUM | 1948 NETIF_F_TSO | 1949 NETIF_F_TSO6 | 1950 NETIF_F_RXHASH | 1951 NETIF_F_RXCSUM | 1952 NETIF_F_HW_VLAN_RX | 1953 NETIF_F_HW_VLAN_TX; 1954 1955 /* copy netdev features into list of user selectable features */ 1956 netdev->hw_features |= netdev->features; 1957 1958 /* set this bit last since it cannot be part of hw_features */ 1959 netdev->features |= NETIF_F_HW_VLAN_FILTER; 1960 1961 netdev->vlan_features |= NETIF_F_TSO | 1962 NETIF_F_TSO6 | 1963 NETIF_F_IP_CSUM | 1964 NETIF_F_IPV6_CSUM | 1965 NETIF_F_SG; 1966 1967 if (pci_using_dac) { 1968 netdev->features |= NETIF_F_HIGHDMA; 1969 netdev->vlan_features |= NETIF_F_HIGHDMA; 1970 } 1971 1972 if (hw->mac.type >= e1000_82576) { 1973 netdev->hw_features |= NETIF_F_SCTP_CSUM; 1974 netdev->features |= NETIF_F_SCTP_CSUM; 1975 } 1976 1977 netdev->priv_flags |= IFF_UNICAST_FLT; 1978 1979 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw); 1980 1981 /* before reading the NVM, reset the controller to put the device in a 1982 * known good starting state */ 1983 hw->mac.ops.reset_hw(hw); 1984 1985 /* make sure the NVM is good */ 1986 if (hw->nvm.ops.validate(hw) < 0) { 1987 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n"); 1988 err = -EIO; 1989 goto err_eeprom; 1990 } 1991 1992 /* copy the MAC address out of the NVM */ 1993 if (hw->mac.ops.read_mac_addr(hw)) 1994 dev_err(&pdev->dev, "NVM Read Error\n"); 1995 1996 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); 1997 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len); 1998 1999 if (!is_valid_ether_addr(netdev->perm_addr)) { 2000 dev_err(&pdev->dev, "Invalid MAC Address\n"); 2001 err = -EIO; 2002 goto err_eeprom; 2003 } 2004 2005 setup_timer(&adapter->watchdog_timer, igb_watchdog, 2006 (unsigned long) adapter); 2007 setup_timer(&adapter->phy_info_timer, igb_update_phy_info, 2008 (unsigned long) adapter); 2009 2010 INIT_WORK(&adapter->reset_task, igb_reset_task); 2011 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task); 2012 2013 /* Initialize link properties that are user-changeable */ 2014 adapter->fc_autoneg = true; 2015 hw->mac.autoneg = true; 2016 hw->phy.autoneg_advertised = 0x2f; 2017 2018 hw->fc.requested_mode = e1000_fc_default; 2019 hw->fc.current_mode = e1000_fc_default; 2020 2021 igb_validate_mdi_setting(hw); 2022 2023 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM, 2024 * enable the ACPI Magic Packet filter 2025 */ 2026 2027 if (hw->bus.func == 0) 2028 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); 2029 else if (hw->mac.type >= e1000_82580) 2030 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + 2031 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, 2032 &eeprom_data); 2033 else if (hw->bus.func == 1) 2034 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); 2035 2036 if (eeprom_data & eeprom_apme_mask) 2037 adapter->eeprom_wol |= E1000_WUFC_MAG; 2038 2039 /* now that we have the eeprom settings, apply the special cases where 2040 * the eeprom may be wrong or the board simply won't support wake on 2041 * lan on a particular port */ 2042 switch (pdev->device) { 2043 case E1000_DEV_ID_82575GB_QUAD_COPPER: 2044 adapter->eeprom_wol = 0; 2045 break; 2046 case E1000_DEV_ID_82575EB_FIBER_SERDES: 2047 case E1000_DEV_ID_82576_FIBER: 2048 case E1000_DEV_ID_82576_SERDES: 2049 /* Wake events only supported on port A for dual fiber 2050 * regardless of eeprom setting */ 2051 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1) 2052 adapter->eeprom_wol = 0; 2053 break; 2054 case E1000_DEV_ID_82576_QUAD_COPPER: 2055 case E1000_DEV_ID_82576_QUAD_COPPER_ET2: 2056 /* if quad port adapter, disable WoL on all but port A */ 2057 if (global_quad_port_a != 0) 2058 adapter->eeprom_wol = 0; 2059 else 2060 adapter->flags |= IGB_FLAG_QUAD_PORT_A; 2061 /* Reset for multiple quad port adapters */ 2062 if (++global_quad_port_a == 4) 2063 global_quad_port_a = 0; 2064 break; 2065 } 2066 2067 /* initialize the wol settings based on the eeprom settings */ 2068 adapter->wol = adapter->eeprom_wol; 2069 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); 2070 2071 /* reset the hardware with the new settings */ 2072 igb_reset(adapter); 2073 2074 /* let the f/w know that the h/w is now under the control of the 2075 * driver. */ 2076 igb_get_hw_control(adapter); 2077 2078 strcpy(netdev->name, "eth%d"); 2079 err = register_netdev(netdev); 2080 if (err) 2081 goto err_register; 2082 2083 /* carrier off reporting is important to ethtool even BEFORE open */ 2084 netif_carrier_off(netdev); 2085 2086#ifdef CONFIG_IGB_DCA 2087 if (dca_add_requester(&pdev->dev) == 0) { 2088 adapter->flags |= IGB_FLAG_DCA_ENABLED; 2089 dev_info(&pdev->dev, "DCA enabled\n"); 2090 igb_setup_dca(adapter); 2091 } 2092 2093#endif 2094 /* do hw tstamp init after resetting */ 2095 igb_init_hw_timer(adapter); 2096 2097 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n"); 2098 /* print bus type/speed/width info */ 2099 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n", 2100 netdev->name, 2101 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" : 2102 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" : 2103 "unknown"), 2104 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" : 2105 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" : 2106 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" : 2107 "unknown"), 2108 netdev->dev_addr); 2109 2110 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH); 2111 if (ret_val) 2112 strcpy(part_str, "Unknown"); 2113 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str); 2114 dev_info(&pdev->dev, 2115 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n", 2116 adapter->msix_entries ? "MSI-X" : 2117 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy", 2118 adapter->num_rx_queues, adapter->num_tx_queues); 2119 switch (hw->mac.type) { 2120 case e1000_i350: 2121 igb_set_eee_i350(hw); 2122 break; 2123 default: 2124 break; 2125 } 2126 2127 pm_runtime_put_noidle(&pdev->dev); 2128 return 0; 2129 2130err_register: 2131 igb_release_hw_control(adapter); 2132err_eeprom: 2133 if (!igb_check_reset_block(hw)) 2134 igb_reset_phy(hw); 2135 2136 if (hw->flash_address) 2137 iounmap(hw->flash_address); 2138err_sw_init: 2139 igb_clear_interrupt_scheme(adapter); 2140 iounmap(hw->hw_addr); 2141err_ioremap: 2142 free_netdev(netdev); 2143err_alloc_etherdev: 2144 pci_release_selected_regions(pdev, 2145 pci_select_bars(pdev, IORESOURCE_MEM)); 2146err_pci_reg: 2147err_dma: 2148 pci_disable_device(pdev); 2149 return err; 2150} 2151 2152/** 2153 * igb_remove - Device Removal Routine 2154 * @pdev: PCI device information struct 2155 * 2156 * igb_remove is called by the PCI subsystem to alert the driver 2157 * that it should release a PCI device. The could be caused by a 2158 * Hot-Plug event, or because the driver is going to be removed from 2159 * memory. 2160 **/ 2161static void __devexit igb_remove(struct pci_dev *pdev) 2162{ 2163 struct net_device *netdev = pci_get_drvdata(pdev); 2164 struct igb_adapter *adapter = netdev_priv(netdev); 2165 struct e1000_hw *hw = &adapter->hw; 2166 2167 pm_runtime_get_noresume(&pdev->dev); 2168 2169 /* 2170 * The watchdog timer may be rescheduled, so explicitly 2171 * disable watchdog from being rescheduled. 2172 */ 2173 set_bit(__IGB_DOWN, &adapter->state); 2174 del_timer_sync(&adapter->watchdog_timer); 2175 del_timer_sync(&adapter->phy_info_timer); 2176 2177 cancel_work_sync(&adapter->reset_task); 2178 cancel_work_sync(&adapter->watchdog_task); 2179 2180#ifdef CONFIG_IGB_DCA 2181 if (adapter->flags & IGB_FLAG_DCA_ENABLED) { 2182 dev_info(&pdev->dev, "DCA disabled\n"); 2183 dca_remove_requester(&pdev->dev); 2184 adapter->flags &= ~IGB_FLAG_DCA_ENABLED; 2185 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE); 2186 } 2187#endif 2188 2189 /* Release control of h/w to f/w. If f/w is AMT enabled, this 2190 * would have already happened in close and is redundant. */ 2191 igb_release_hw_control(adapter); 2192 2193 unregister_netdev(netdev); 2194 2195 igb_clear_interrupt_scheme(adapter); 2196 2197#ifdef CONFIG_PCI_IOV 2198 /* reclaim resources allocated to VFs */ 2199 if (adapter->vf_data) { 2200 /* disable iov and allow time for transactions to clear */ 2201 if (!igb_check_vf_assignment(adapter)) { 2202 pci_disable_sriov(pdev); 2203 msleep(500); 2204 } else { 2205 dev_info(&pdev->dev, "VF(s) assigned to guests!\n"); 2206 } 2207 2208 kfree(adapter->vf_data); 2209 adapter->vf_data = NULL; 2210 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); 2211 wrfl(); 2212 msleep(100); 2213 dev_info(&pdev->dev, "IOV Disabled\n"); 2214 } 2215#endif 2216 2217 iounmap(hw->hw_addr); 2218 if (hw->flash_address) 2219 iounmap(hw->flash_address); 2220 pci_release_selected_regions(pdev, 2221 pci_select_bars(pdev, IORESOURCE_MEM)); 2222 2223 kfree(adapter->shadow_vfta); 2224 free_netdev(netdev); 2225 2226 pci_disable_pcie_error_reporting(pdev); 2227 2228 pci_disable_device(pdev); 2229} 2230 2231/** 2232 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space 2233 * @adapter: board private structure to initialize 2234 * 2235 * This function initializes the vf specific data storage and then attempts to 2236 * allocate the VFs. The reason for ordering it this way is because it is much 2237 * mor expensive time wise to disable SR-IOV than it is to allocate and free 2238 * the memory for the VFs. 2239 **/ 2240static void __devinit igb_probe_vfs(struct igb_adapter * adapter) 2241{ 2242#ifdef CONFIG_PCI_IOV 2243 struct pci_dev *pdev = adapter->pdev; 2244 int old_vfs = igb_find_enabled_vfs(adapter); 2245 int i; 2246 2247 if (old_vfs) { 2248 dev_info(&pdev->dev, "%d pre-allocated VFs found - override " 2249 "max_vfs setting of %d\n", old_vfs, max_vfs); 2250 adapter->vfs_allocated_count = old_vfs; 2251 } 2252 2253 if (!adapter->vfs_allocated_count) 2254 return; 2255 2256 adapter->vf_data = kcalloc(adapter->vfs_allocated_count, 2257 sizeof(struct vf_data_storage), GFP_KERNEL); 2258 /* if allocation failed then we do not support SR-IOV */ 2259 if (!adapter->vf_data) { 2260 adapter->vfs_allocated_count = 0; 2261 dev_err(&pdev->dev, "Unable to allocate memory for VF " 2262 "Data Storage\n"); 2263 goto out; 2264 } 2265 2266 if (!old_vfs) { 2267 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) 2268 goto err_out; 2269 } 2270 dev_info(&pdev->dev, "%d VFs allocated\n", 2271 adapter->vfs_allocated_count); 2272 for (i = 0; i < adapter->vfs_allocated_count; i++) 2273 igb_vf_configure(adapter, i); 2274 2275 /* DMA Coalescing is not supported in IOV mode. */ 2276 adapter->flags &= ~IGB_FLAG_DMAC; 2277 goto out; 2278err_out: 2279 kfree(adapter->vf_data); 2280 adapter->vf_data = NULL; 2281 adapter->vfs_allocated_count = 0; 2282out: 2283 return; 2284#endif /* CONFIG_PCI_IOV */ 2285} 2286 2287/** 2288 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp 2289 * @adapter: board private structure to initialize 2290 * 2291 * igb_init_hw_timer initializes the function pointer and values for the hw 2292 * timer found in hardware. 2293 **/ 2294static void igb_init_hw_timer(struct igb_adapter *adapter) 2295{ 2296 struct e1000_hw *hw = &adapter->hw; 2297 2298 switch (hw->mac.type) { 2299 case e1000_i350: 2300 case e1000_82580: 2301 memset(&adapter->cycles, 0, sizeof(adapter->cycles)); 2302 adapter->cycles.read = igb_read_clock; 2303 adapter->cycles.mask = CLOCKSOURCE_MASK(64); 2304 adapter->cycles.mult = 1; 2305 /* 2306 * The 82580 timesync updates the system timer every 8ns by 8ns 2307 * and the value cannot be shifted. Instead we need to shift 2308 * the registers to generate a 64bit timer value. As a result 2309 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by 2310 * 24 in order to generate a larger value for synchronization. 2311 */ 2312 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT; 2313 /* disable system timer temporarily by setting bit 31 */ 2314 wr32(E1000_TSAUXC, 0x80000000); 2315 wrfl(); 2316 2317 /* Set registers so that rollover occurs soon to test this. */ 2318 wr32(E1000_SYSTIMR, 0x00000000); 2319 wr32(E1000_SYSTIML, 0x80000000); 2320 wr32(E1000_SYSTIMH, 0x000000FF); 2321 wrfl(); 2322 2323 /* enable system timer by clearing bit 31 */ 2324 wr32(E1000_TSAUXC, 0x0); 2325 wrfl(); 2326 2327 timecounter_init(&adapter->clock, 2328 &adapter->cycles, 2329 ktime_to_ns(ktime_get_real())); 2330 /* 2331 * Synchronize our NIC clock against system wall clock. NIC 2332 * time stamp reading requires ~3us per sample, each sample 2333 * was pretty stable even under load => only require 10 2334 * samples for each offset comparison. 2335 */ 2336 memset(&adapter->compare, 0, sizeof(adapter->compare)); 2337 adapter->compare.source = &adapter->clock; 2338 adapter->compare.target = ktime_get_real; 2339 adapter->compare.num_samples = 10; 2340 timecompare_update(&adapter->compare, 0); 2341 break; 2342 case e1000_82576: 2343 /* 2344 * Initialize hardware timer: we keep it running just in case 2345 * that some program needs it later on. 2346 */ 2347 memset(&adapter->cycles, 0, sizeof(adapter->cycles)); 2348 adapter->cycles.read = igb_read_clock; 2349 adapter->cycles.mask = CLOCKSOURCE_MASK(64); 2350 adapter->cycles.mult = 1; 2351 /** 2352 * Scale the NIC clock cycle by a large factor so that 2353 * relatively small clock corrections can be added or 2354 * subtracted at each clock tick. The drawbacks of a large 2355 * factor are a) that the clock register overflows more quickly 2356 * (not such a big deal) and b) that the increment per tick has 2357 * to fit into 24 bits. As a result we need to use a shift of 2358 * 19 so we can fit a value of 16 into the TIMINCA register. 2359 */ 2360 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT; 2361 wr32(E1000_TIMINCA, 2362 (1 << E1000_TIMINCA_16NS_SHIFT) | 2363 (16 << IGB_82576_TSYNC_SHIFT)); 2364 2365 /* Set registers so that rollover occurs soon to test this. */ 2366 wr32(E1000_SYSTIML, 0x00000000); 2367 wr32(E1000_SYSTIMH, 0xFF800000); 2368 wrfl(); 2369 2370 timecounter_init(&adapter->clock, 2371 &adapter->cycles, 2372 ktime_to_ns(ktime_get_real())); 2373 /* 2374 * Synchronize our NIC clock against system wall clock. NIC 2375 * time stamp reading requires ~3us per sample, each sample 2376 * was pretty stable even under load => only require 10 2377 * samples for each offset comparison. 2378 */ 2379 memset(&adapter->compare, 0, sizeof(adapter->compare)); 2380 adapter->compare.source = &adapter->clock; 2381 adapter->compare.target = ktime_get_real; 2382 adapter->compare.num_samples = 10; 2383 timecompare_update(&adapter->compare, 0); 2384 break; 2385 case e1000_82575: 2386 /* 82575 does not support timesync */ 2387 default: 2388 break; 2389 } 2390 2391} 2392 2393/** 2394 * igb_sw_init - Initialize general software structures (struct igb_adapter) 2395 * @adapter: board private structure to initialize 2396 * 2397 * igb_sw_init initializes the Adapter private data structure. 2398 * Fields are initialized based on PCI device information and 2399 * OS network device settings (MTU size). 2400 **/ 2401static int __devinit igb_sw_init(struct igb_adapter *adapter) 2402{ 2403 struct e1000_hw *hw = &adapter->hw; 2404 struct net_device *netdev = adapter->netdev; 2405 struct pci_dev *pdev = adapter->pdev; 2406 2407 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); 2408 2409 /* set default ring sizes */ 2410 adapter->tx_ring_count = IGB_DEFAULT_TXD; 2411 adapter->rx_ring_count = IGB_DEFAULT_RXD; 2412 2413 /* set default ITR values */ 2414 adapter->rx_itr_setting = IGB_DEFAULT_ITR; 2415 adapter->tx_itr_setting = IGB_DEFAULT_ITR; 2416 2417 /* set default work limits */ 2418 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK; 2419 2420 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + 2421 VLAN_HLEN; 2422 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; 2423 2424 adapter->node = -1; 2425 2426 spin_lock_init(&adapter->stats64_lock); 2427#ifdef CONFIG_PCI_IOV 2428 switch (hw->mac.type) { 2429 case e1000_82576: 2430 case e1000_i350: 2431 if (max_vfs > 7) { 2432 dev_warn(&pdev->dev, 2433 "Maximum of 7 VFs per PF, using max\n"); 2434 adapter->vfs_allocated_count = 7; 2435 } else 2436 adapter->vfs_allocated_count = max_vfs; 2437 break; 2438 default: 2439 break; 2440 } 2441#endif /* CONFIG_PCI_IOV */ 2442 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus()); 2443 /* i350 cannot do RSS and SR-IOV at the same time */ 2444 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count) 2445 adapter->rss_queues = 1; 2446 2447 /* 2448 * if rss_queues > 4 or vfs are going to be allocated with rss_queues 2449 * then we should combine the queues into a queue pair in order to 2450 * conserve interrupts due to limited supply 2451 */ 2452 if ((adapter->rss_queues > 4) || 2453 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6))) 2454 adapter->flags |= IGB_FLAG_QUEUE_PAIRS; 2455 2456 /* Setup and initialize a copy of the hw vlan table array */ 2457 adapter->shadow_vfta = kzalloc(sizeof(u32) * 2458 E1000_VLAN_FILTER_TBL_SIZE, 2459 GFP_ATOMIC); 2460 2461 /* This call may decrease the number of queues */ 2462 if (igb_init_interrupt_scheme(adapter)) { 2463 dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); 2464 return -ENOMEM; 2465 } 2466 2467 igb_probe_vfs(adapter); 2468 2469 /* Explicitly disable IRQ since the NIC can be in any state. */ 2470 igb_irq_disable(adapter); 2471 2472 if (hw->mac.type == e1000_i350) 2473 adapter->flags &= ~IGB_FLAG_DMAC; 2474 2475 set_bit(__IGB_DOWN, &adapter->state); 2476 return 0; 2477} 2478 2479/** 2480 * igb_open - Called when a network interface is made active 2481 * @netdev: network interface device structure 2482 * 2483 * Returns 0 on success, negative value on failure 2484 * 2485 * The open entry point is called when a network interface is made 2486 * active by the system (IFF_UP). At this point all resources needed 2487 * for transmit and receive operations are allocated, the interrupt 2488 * handler is registered with the OS, the watchdog timer is started, 2489 * and the stack is notified that the interface is ready. 2490 **/ 2491static int __igb_open(struct net_device *netdev, bool resuming) 2492{ 2493 struct igb_adapter *adapter = netdev_priv(netdev); 2494 struct e1000_hw *hw = &adapter->hw; 2495 struct pci_dev *pdev = adapter->pdev; 2496 int err; 2497 int i; 2498 2499 /* disallow open during test */ 2500 if (test_bit(__IGB_TESTING, &adapter->state)) { 2501 WARN_ON(resuming); 2502 return -EBUSY; 2503 } 2504 2505 if (!resuming) 2506 pm_runtime_get_sync(&pdev->dev); 2507 2508 netif_carrier_off(netdev); 2509 2510 /* allocate transmit descriptors */ 2511 err = igb_setup_all_tx_resources(adapter); 2512 if (err) 2513 goto err_setup_tx; 2514 2515 /* allocate receive descriptors */ 2516 err = igb_setup_all_rx_resources(adapter); 2517 if (err) 2518 goto err_setup_rx; 2519 2520 igb_power_up_link(adapter); 2521 2522 /* before we allocate an interrupt, we must be ready to handle it. 2523 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt 2524 * as soon as we call pci_request_irq, so we have to setup our 2525 * clean_rx handler before we do so. */ 2526 igb_configure(adapter); 2527 2528 err = igb_request_irq(adapter); 2529 if (err) 2530 goto err_req_irq; 2531 2532 /* From here on the code is the same as igb_up() */ 2533 clear_bit(__IGB_DOWN, &adapter->state); 2534 2535 for (i = 0; i < adapter->num_q_vectors; i++) 2536 napi_enable(&(adapter->q_vector[i]->napi)); 2537 2538 /* Clear any pending interrupts. */ 2539 rd32(E1000_ICR); 2540 2541 igb_irq_enable(adapter); 2542 2543 /* notify VFs that reset has been completed */ 2544 if (adapter->vfs_allocated_count) { 2545 u32 reg_data = rd32(E1000_CTRL_EXT); 2546 reg_data |= E1000_CTRL_EXT_PFRSTD; 2547 wr32(E1000_CTRL_EXT, reg_data); 2548 } 2549 2550 netif_tx_start_all_queues(netdev); 2551 2552 if (!resuming) 2553 pm_runtime_put(&pdev->dev); 2554 2555 /* start the watchdog. */ 2556 hw->mac.get_link_status = 1; 2557 schedule_work(&adapter->watchdog_task); 2558 2559 return 0; 2560 2561err_req_irq: 2562 igb_release_hw_control(adapter); 2563 igb_power_down_link(adapter); 2564 igb_free_all_rx_resources(adapter); 2565err_setup_rx: 2566 igb_free_all_tx_resources(adapter); 2567err_setup_tx: 2568 igb_reset(adapter); 2569 if (!resuming) 2570 pm_runtime_put(&pdev->dev); 2571 2572 return err; 2573} 2574 2575static int igb_open(struct net_device *netdev) 2576{ 2577 return __igb_open(netdev, false); 2578} 2579 2580/** 2581 * igb_close - Disables a network interface 2582 * @netdev: network interface device structure 2583 * 2584 * Returns 0, this is not allowed to fail 2585 * 2586 * The close entry point is called when an interface is de-activated 2587 * by the OS. The hardware is still under the driver's control, but 2588 * needs to be disabled. A global MAC reset is issued to stop the 2589 * hardware, and all transmit and receive resources are freed. 2590 **/ 2591static int __igb_close(struct net_device *netdev, bool suspending) 2592{ 2593 struct igb_adapter *adapter = netdev_priv(netdev); 2594 struct pci_dev *pdev = adapter->pdev; 2595 2596 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state)); 2597 2598 if (!suspending) 2599 pm_runtime_get_sync(&pdev->dev); 2600 2601 igb_down(adapter); 2602 igb_free_irq(adapter); 2603 2604 igb_free_all_tx_resources(adapter); 2605 igb_free_all_rx_resources(adapter); 2606 2607 if (!suspending) 2608 pm_runtime_put_sync(&pdev->dev); 2609 return 0; 2610} 2611 2612static int igb_close(struct net_device *netdev) 2613{ 2614 return __igb_close(netdev, false); 2615} 2616 2617/** 2618 * igb_setup_tx_resources - allocate Tx resources (Descriptors) 2619 * @tx_ring: tx descriptor ring (for a specific queue) to setup 2620 * 2621 * Return 0 on success, negative on failure 2622 **/ 2623int igb_setup_tx_resources(struct igb_ring *tx_ring) 2624{ 2625 struct device *dev = tx_ring->dev; 2626 int orig_node = dev_to_node(dev); 2627 int size; 2628 2629 size = sizeof(struct igb_tx_buffer) * tx_ring->count; 2630 tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node); 2631 if (!tx_ring->tx_buffer_info) 2632 tx_ring->tx_buffer_info = vzalloc(size); 2633 if (!tx_ring->tx_buffer_info) 2634 goto err; 2635 2636 /* round up to nearest 4K */ 2637 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc); 2638 tx_ring->size = ALIGN(tx_ring->size, 4096); 2639 2640 set_dev_node(dev, tx_ring->numa_node); 2641 tx_ring->desc = dma_alloc_coherent(dev, 2642 tx_ring->size, 2643 &tx_ring->dma, 2644 GFP_KERNEL); 2645 set_dev_node(dev, orig_node); 2646 if (!tx_ring->desc) 2647 tx_ring->desc = dma_alloc_coherent(dev, 2648 tx_ring->size, 2649 &tx_ring->dma, 2650 GFP_KERNEL); 2651 2652 if (!tx_ring->desc) 2653 goto err; 2654 2655 tx_ring->next_to_use = 0; 2656 tx_ring->next_to_clean = 0; 2657 2658 return 0; 2659 2660err: 2661 vfree(tx_ring->tx_buffer_info); 2662 dev_err(dev, 2663 "Unable to allocate memory for the transmit descriptor ring\n"); 2664 return -ENOMEM; 2665} 2666 2667/** 2668 * igb_setup_all_tx_resources - wrapper to allocate Tx resources 2669 * (Descriptors) for all queues 2670 * @adapter: board private structure 2671 * 2672 * Return 0 on success, negative on failure 2673 **/ 2674static int igb_setup_all_tx_resources(struct igb_adapter *adapter) 2675{ 2676 struct pci_dev *pdev = adapter->pdev; 2677 int i, err = 0; 2678 2679 for (i = 0; i < adapter->num_tx_queues; i++) { 2680 err = igb_setup_tx_resources(adapter->tx_ring[i]); 2681 if (err) { 2682 dev_err(&pdev->dev, 2683 "Allocation for Tx Queue %u failed\n", i); 2684 for (i--; i >= 0; i--) 2685 igb_free_tx_resources(adapter->tx_ring[i]); 2686 break; 2687 } 2688 } 2689 2690 return err; 2691} 2692 2693/** 2694 * igb_setup_tctl - configure the transmit control registers 2695 * @adapter: Board private structure 2696 **/ 2697void igb_setup_tctl(struct igb_adapter *adapter) 2698{ 2699 struct e1000_hw *hw = &adapter->hw; 2700 u32 tctl; 2701 2702 /* disable queue 0 which is enabled by default on 82575 and 82576 */ 2703 wr32(E1000_TXDCTL(0), 0); 2704 2705 /* Program the Transmit Control Register */ 2706 tctl = rd32(E1000_TCTL); 2707 tctl &= ~E1000_TCTL_CT; 2708 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC | 2709 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT); 2710 2711 igb_config_collision_dist(hw); 2712 2713 /* Enable transmits */ 2714 tctl |= E1000_TCTL_EN; 2715 2716 wr32(E1000_TCTL, tctl); 2717} 2718 2719/** 2720 * igb_configure_tx_ring - Configure transmit ring after Reset 2721 * @adapter: board private structure 2722 * @ring: tx ring to configure 2723 * 2724 * Configure a transmit ring after a reset. 2725 **/ 2726void igb_configure_tx_ring(struct igb_adapter *adapter, 2727 struct igb_ring *ring) 2728{ 2729 struct e1000_hw *hw = &adapter->hw; 2730 u32 txdctl = 0; 2731 u64 tdba = ring->dma; 2732 int reg_idx = ring->reg_idx; 2733 2734 /* disable the queue */ 2735 wr32(E1000_TXDCTL(reg_idx), 0); 2736 wrfl(); 2737 mdelay(10); 2738 2739 wr32(E1000_TDLEN(reg_idx), 2740 ring->count * sizeof(union e1000_adv_tx_desc)); 2741 wr32(E1000_TDBAL(reg_idx), 2742 tdba & 0x00000000ffffffffULL); 2743 wr32(E1000_TDBAH(reg_idx), tdba >> 32); 2744 2745 ring->tail = hw->hw_addr + E1000_TDT(reg_idx); 2746 wr32(E1000_TDH(reg_idx), 0); 2747 writel(0, ring->tail); 2748 2749 txdctl |= IGB_TX_PTHRESH; 2750 txdctl |= IGB_TX_HTHRESH << 8; 2751 txdctl |= IGB_TX_WTHRESH << 16; 2752 2753 txdctl |= E1000_TXDCTL_QUEUE_ENABLE; 2754 wr32(E1000_TXDCTL(reg_idx), txdctl); 2755} 2756 2757/** 2758 * igb_configure_tx - Configure transmit Unit after Reset 2759 * @adapter: board private structure 2760 * 2761 * Configure the Tx unit of the MAC after a reset. 2762 **/ 2763static void igb_configure_tx(struct igb_adapter *adapter) 2764{ 2765 int i; 2766 2767 for (i = 0; i < adapter->num_tx_queues; i++) 2768 igb_configure_tx_ring(adapter, adapter->tx_ring[i]); 2769} 2770 2771/** 2772 * igb_setup_rx_resources - allocate Rx resources (Descriptors) 2773 * @rx_ring: rx descriptor ring (for a specific queue) to setup 2774 * 2775 * Returns 0 on success, negative on failure 2776 **/ 2777int igb_setup_rx_resources(struct igb_ring *rx_ring) 2778{ 2779 struct device *dev = rx_ring->dev; 2780 int orig_node = dev_to_node(dev); 2781 int size, desc_len; 2782 2783 size = sizeof(struct igb_rx_buffer) * rx_ring->count; 2784 rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node); 2785 if (!rx_ring->rx_buffer_info) 2786 rx_ring->rx_buffer_info = vzalloc(size); 2787 if (!rx_ring->rx_buffer_info) 2788 goto err; 2789 2790 desc_len = sizeof(union e1000_adv_rx_desc); 2791 2792 /* Round up to nearest 4K */ 2793 rx_ring->size = rx_ring->count * desc_len; 2794 rx_ring->size = ALIGN(rx_ring->size, 4096); 2795 2796 set_dev_node(dev, rx_ring->numa_node); 2797 rx_ring->desc = dma_alloc_coherent(dev, 2798 rx_ring->size, 2799 &rx_ring->dma, 2800 GFP_KERNEL); 2801 set_dev_node(dev, orig_node); 2802 if (!rx_ring->desc) 2803 rx_ring->desc = dma_alloc_coherent(dev, 2804 rx_ring->size, 2805 &rx_ring->dma, 2806 GFP_KERNEL); 2807 2808 if (!rx_ring->desc) 2809 goto err; 2810 2811 rx_ring->next_to_clean = 0; 2812 rx_ring->next_to_use = 0; 2813 2814 return 0; 2815 2816err: 2817 vfree(rx_ring->rx_buffer_info); 2818 rx_ring->rx_buffer_info = NULL; 2819 dev_err(dev, "Unable to allocate memory for the receive descriptor" 2820 " ring\n"); 2821 return -ENOMEM; 2822} 2823 2824/** 2825 * igb_setup_all_rx_resources - wrapper to allocate Rx resources 2826 * (Descriptors) for all queues 2827 * @adapter: board private structure 2828 * 2829 * Return 0 on success, negative on failure 2830 **/ 2831static int igb_setup_all_rx_resources(struct igb_adapter *adapter) 2832{ 2833 struct pci_dev *pdev = adapter->pdev; 2834 int i, err = 0; 2835 2836 for (i = 0; i < adapter->num_rx_queues; i++) { 2837 err = igb_setup_rx_resources(adapter->rx_ring[i]); 2838 if (err) { 2839 dev_err(&pdev->dev, 2840 "Allocation for Rx Queue %u failed\n", i); 2841 for (i--; i >= 0; i--) 2842 igb_free_rx_resources(adapter->rx_ring[i]); 2843 break; 2844 } 2845 } 2846 2847 return err; 2848} 2849 2850/** 2851 * igb_setup_mrqc - configure the multiple receive queue control registers 2852 * @adapter: Board private structure 2853 **/ 2854static void igb_setup_mrqc(struct igb_adapter *adapter) 2855{ 2856 struct e1000_hw *hw = &adapter->hw; 2857 u32 mrqc, rxcsum; 2858 u32 j, num_rx_queues, shift = 0, shift2 = 0; 2859 union e1000_reta { 2860 u32 dword; 2861 u8 bytes[4]; 2862 } reta; 2863 static const u8 rsshash[40] = { 2864 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67, 2865 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb, 2866 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 2867 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa }; 2868 2869 /* Fill out hash function seeds */ 2870 for (j = 0; j < 10; j++) { 2871 u32 rsskey = rsshash[(j * 4)]; 2872 rsskey |= rsshash[(j * 4) + 1] << 8; 2873 rsskey |= rsshash[(j * 4) + 2] << 16; 2874 rsskey |= rsshash[(j * 4) + 3] << 24; 2875 array_wr32(E1000_RSSRK(0), j, rsskey); 2876 } 2877 2878 num_rx_queues = adapter->rss_queues; 2879 2880 if (adapter->vfs_allocated_count) { 2881 /* 82575 and 82576 supports 2 RSS queues for VMDq */ 2882 switch (hw->mac.type) { 2883 case e1000_i350: 2884 case e1000_82580: 2885 num_rx_queues = 1; 2886 shift = 0; 2887 break; 2888 case e1000_82576: 2889 shift = 3; 2890 num_rx_queues = 2; 2891 break; 2892 case e1000_82575: 2893 shift = 2; 2894 shift2 = 6; 2895 default: 2896 break; 2897 } 2898 } else { 2899 if (hw->mac.type == e1000_82575) 2900 shift = 6; 2901 } 2902 2903 for (j = 0; j < (32 * 4); j++) { 2904 reta.bytes[j & 3] = (j % num_rx_queues) << shift; 2905 if (shift2) 2906 reta.bytes[j & 3] |= num_rx_queues << shift2; 2907 if ((j & 3) == 3) 2908 wr32(E1000_RETA(j >> 2), reta.dword); 2909 } 2910 2911 /* 2912 * Disable raw packet checksumming so that RSS hash is placed in 2913 * descriptor on writeback. No need to enable TCP/UDP/IP checksum 2914 * offloads as they are enabled by default 2915 */ 2916 rxcsum = rd32(E1000_RXCSUM); 2917 rxcsum |= E1000_RXCSUM_PCSD; 2918 2919 if (adapter->hw.mac.type >= e1000_82576) 2920 /* Enable Receive Checksum Offload for SCTP */ 2921 rxcsum |= E1000_RXCSUM_CRCOFL; 2922 2923 /* Don't need to set TUOFL or IPOFL, they default to 1 */ 2924 wr32(E1000_RXCSUM, rxcsum); 2925 2926 /* If VMDq is enabled then we set the appropriate mode for that, else 2927 * we default to RSS so that an RSS hash is calculated per packet even 2928 * if we are only using one queue */ 2929 if (adapter->vfs_allocated_count) { 2930 if (hw->mac.type > e1000_82575) { 2931 /* Set the default pool for the PF's first queue */ 2932 u32 vtctl = rd32(E1000_VT_CTL); 2933 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK | 2934 E1000_VT_CTL_DISABLE_DEF_POOL); 2935 vtctl |= adapter->vfs_allocated_count << 2936 E1000_VT_CTL_DEFAULT_POOL_SHIFT; 2937 wr32(E1000_VT_CTL, vtctl); 2938 } 2939 if (adapter->rss_queues > 1) 2940 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q; 2941 else 2942 mrqc = E1000_MRQC_ENABLE_VMDQ; 2943 } else { 2944 mrqc = E1000_MRQC_ENABLE_RSS_4Q; 2945 } 2946 igb_vmm_control(adapter); 2947 2948 /* 2949 * Generate RSS hash based on TCP port numbers and/or 2950 * IPv4/v6 src and dst addresses since UDP cannot be 2951 * hashed reliably due to IP fragmentation 2952 */ 2953 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 | 2954 E1000_MRQC_RSS_FIELD_IPV4_TCP | 2955 E1000_MRQC_RSS_FIELD_IPV6 | 2956 E1000_MRQC_RSS_FIELD_IPV6_TCP | 2957 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX; 2958 2959 wr32(E1000_MRQC, mrqc); 2960} 2961 2962/** 2963 * igb_setup_rctl - configure the receive control registers 2964 * @adapter: Board private structure 2965 **/ 2966void igb_setup_rctl(struct igb_adapter *adapter) 2967{ 2968 struct e1000_hw *hw = &adapter->hw; 2969 u32 rctl; 2970 2971 rctl = rd32(E1000_RCTL); 2972 2973 rctl &= ~(3 << E1000_RCTL_MO_SHIFT); 2974 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC); 2975 2976 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF | 2977 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); 2978 2979 /* 2980 * enable stripping of CRC. It's unlikely this will break BMC 2981 * redirection as it did with e1000. Newer features require 2982 * that the HW strips the CRC. 2983 */ 2984 rctl |= E1000_RCTL_SECRC; 2985 2986 /* disable store bad packets and clear size bits. */ 2987 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256); 2988 2989 /* enable LPE to prevent packets larger than max_frame_size */ 2990 rctl |= E1000_RCTL_LPE; 2991 2992 /* disable queue 0 to prevent tail write w/o re-config */ 2993 wr32(E1000_RXDCTL(0), 0); 2994 2995 /* Attention!!! For SR-IOV PF driver operations you must enable 2996 * queue drop for all VF and PF queues to prevent head of line blocking 2997 * if an un-trusted VF does not provide descriptors to hardware. 2998 */ 2999 if (adapter->vfs_allocated_count) { 3000 /* set all queue drop enable bits */ 3001 wr32(E1000_QDE, ALL_QUEUES); 3002 } 3003 3004 wr32(E1000_RCTL, rctl); 3005} 3006 3007static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, 3008 int vfn) 3009{ 3010 struct e1000_hw *hw = &adapter->hw; 3011 u32 vmolr; 3012 3013 /* if it isn't the PF check to see if VFs are enabled and 3014 * increase the size to support vlan tags */ 3015 if (vfn < adapter->vfs_allocated_count && 3016 adapter->vf_data[vfn].vlans_enabled) 3017 size += VLAN_TAG_SIZE; 3018 3019 vmolr = rd32(E1000_VMOLR(vfn)); 3020 vmolr &= ~E1000_VMOLR_RLPML_MASK; 3021 vmolr |= size | E1000_VMOLR_LPE; 3022 wr32(E1000_VMOLR(vfn), vmolr); 3023 3024 return 0; 3025} 3026 3027/** 3028 * igb_rlpml_set - set maximum receive packet size 3029 * @adapter: board private structure 3030 * 3031 * Configure maximum receivable packet size. 3032 **/ 3033static void igb_rlpml_set(struct igb_adapter *adapter) 3034{ 3035 u32 max_frame_size = adapter->max_frame_size; 3036 struct e1000_hw *hw = &adapter->hw; 3037 u16 pf_id = adapter->vfs_allocated_count; 3038 3039 if (pf_id) { 3040 igb_set_vf_rlpml(adapter, max_frame_size, pf_id); 3041 /* 3042 * If we're in VMDQ or SR-IOV mode, then set global RLPML 3043 * to our max jumbo frame size, in case we need to enable 3044 * jumbo frames on one of the rings later. 3045 * This will not pass over-length frames into the default 3046 * queue because it's gated by the VMOLR.RLPML. 3047 */ 3048 max_frame_size = MAX_JUMBO_FRAME_SIZE; 3049 } 3050 3051 wr32(E1000_RLPML, max_frame_size); 3052} 3053 3054static inline void igb_set_vmolr(struct igb_adapter *adapter, 3055 int vfn, bool aupe) 3056{ 3057 struct e1000_hw *hw = &adapter->hw; 3058 u32 vmolr; 3059 3060 /* 3061 * This register exists only on 82576 and newer so if we are older then 3062 * we should exit and do nothing 3063 */ 3064 if (hw->mac.type < e1000_82576) 3065 return; 3066 3067 vmolr = rd32(E1000_VMOLR(vfn)); 3068 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */ 3069 if (aupe) 3070 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */ 3071 else 3072 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */ 3073 3074 /* clear all bits that might not be set */ 3075 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE); 3076 3077 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count) 3078 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */ 3079 /* 3080 * for VMDq only allow the VFs and pool 0 to accept broadcast and 3081 * multicast packets 3082 */ 3083 if (vfn <= adapter->vfs_allocated_count) 3084 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */ 3085 3086 wr32(E1000_VMOLR(vfn), vmolr); 3087} 3088 3089/** 3090 * igb_configure_rx_ring - Configure a receive ring after Reset 3091 * @adapter: board private structure 3092 * @ring: receive ring to be configured 3093 * 3094 * Configure the Rx unit of the MAC after a reset. 3095 **/ 3096void igb_configure_rx_ring(struct igb_adapter *adapter, 3097 struct igb_ring *ring) 3098{ 3099 struct e1000_hw *hw = &adapter->hw; 3100 u64 rdba = ring->dma; 3101 int reg_idx = ring->reg_idx; 3102 u32 srrctl = 0, rxdctl = 0; 3103 3104 /* disable the queue */ 3105 wr32(E1000_RXDCTL(reg_idx), 0); 3106 3107 /* Set DMA base address registers */ 3108 wr32(E1000_RDBAL(reg_idx), 3109 rdba & 0x00000000ffffffffULL); 3110 wr32(E1000_RDBAH(reg_idx), rdba >> 32); 3111 wr32(E1000_RDLEN(reg_idx), 3112 ring->count * sizeof(union e1000_adv_rx_desc)); 3113 3114 /* initialize head and tail */ 3115 ring->tail = hw->hw_addr + E1000_RDT(reg_idx); 3116 wr32(E1000_RDH(reg_idx), 0); 3117 writel(0, ring->tail); 3118 3119 /* set descriptor configuration */ 3120 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; 3121#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384 3122 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 3123#else 3124 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT; 3125#endif 3126 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; 3127 if (hw->mac.type >= e1000_82580) 3128 srrctl |= E1000_SRRCTL_TIMESTAMP; 3129 /* Only set Drop Enable if we are supporting multiple queues */ 3130 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1) 3131 srrctl |= E1000_SRRCTL_DROP_EN; 3132 3133 wr32(E1000_SRRCTL(reg_idx), srrctl); 3134 3135 /* set filtering for VMDQ pools */ 3136 igb_set_vmolr(adapter, reg_idx & 0x7, true); 3137 3138 rxdctl |= IGB_RX_PTHRESH; 3139 rxdctl |= IGB_RX_HTHRESH << 8; 3140 rxdctl |= IGB_RX_WTHRESH << 16; 3141 3142 /* enable receive descriptor fetching */ 3143 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; 3144 wr32(E1000_RXDCTL(reg_idx), rxdctl); 3145} 3146 3147/** 3148 * igb_configure_rx - Configure receive Unit after Reset 3149 * @adapter: board private structure 3150 * 3151 * Configure the Rx unit of the MAC after a reset. 3152 **/ 3153static void igb_configure_rx(struct igb_adapter *adapter) 3154{ 3155 int i; 3156 3157 /* set UTA to appropriate mode */ 3158 igb_set_uta(adapter); 3159 3160 /* set the correct pool for the PF default MAC address in entry 0 */ 3161 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0, 3162 adapter->vfs_allocated_count); 3163 3164 /* Setup the HW Rx Head and Tail Descriptor Pointers and 3165 * the Base and Length of the Rx Descriptor Ring */ 3166 for (i = 0; i < adapter->num_rx_queues; i++) 3167 igb_configure_rx_ring(adapter, adapter->rx_ring[i]); 3168} 3169 3170/** 3171 * igb_free_tx_resources - Free Tx Resources per Queue 3172 * @tx_ring: Tx descriptor ring for a specific queue 3173 * 3174 * Free all transmit software resources 3175 **/ 3176void igb_free_tx_resources(struct igb_ring *tx_ring) 3177{ 3178 igb_clean_tx_ring(tx_ring); 3179 3180 vfree(tx_ring->tx_buffer_info); 3181 tx_ring->tx_buffer_info = NULL; 3182 3183 /* if not set, then don't free */ 3184 if (!tx_ring->desc) 3185 return; 3186 3187 dma_free_coherent(tx_ring->dev, tx_ring->size, 3188 tx_ring->desc, tx_ring->dma); 3189 3190 tx_ring->desc = NULL; 3191} 3192 3193/** 3194 * igb_free_all_tx_resources - Free Tx Resources for All Queues 3195 * @adapter: board private structure 3196 * 3197 * Free all transmit software resources 3198 **/ 3199static void igb_free_all_tx_resources(struct igb_adapter *adapter) 3200{ 3201 int i; 3202 3203 for (i = 0; i < adapter->num_tx_queues; i++) 3204 igb_free_tx_resources(adapter->tx_ring[i]); 3205} 3206 3207void igb_unmap_and_free_tx_resource(struct igb_ring *ring, 3208 struct igb_tx_buffer *tx_buffer) 3209{ 3210 if (tx_buffer->skb) { 3211 dev_kfree_skb_any(tx_buffer->skb); 3212 if (tx_buffer->dma) 3213 dma_unmap_single(ring->dev, 3214 tx_buffer->dma, 3215 tx_buffer->length, 3216 DMA_TO_DEVICE); 3217 } else if (tx_buffer->dma) { 3218 dma_unmap_page(ring->dev, 3219 tx_buffer->dma, 3220 tx_buffer->length, 3221 DMA_TO_DEVICE); 3222 } 3223 tx_buffer->next_to_watch = NULL; 3224 tx_buffer->skb = NULL; 3225 tx_buffer->dma = 0; 3226 /* buffer_info must be completely set up in the transmit path */ 3227} 3228 3229/** 3230 * igb_clean_tx_ring - Free Tx Buffers 3231 * @tx_ring: ring to be cleaned 3232 **/ 3233static void igb_clean_tx_ring(struct igb_ring *tx_ring) 3234{ 3235 struct igb_tx_buffer *buffer_info; 3236 unsigned long size; 3237 u16 i; 3238 3239 if (!tx_ring->tx_buffer_info) 3240 return; 3241 /* Free all the Tx ring sk_buffs */ 3242 3243 for (i = 0; i < tx_ring->count; i++) { 3244 buffer_info = &tx_ring->tx_buffer_info[i]; 3245 igb_unmap_and_free_tx_resource(tx_ring, buffer_info); 3246 } 3247 netdev_tx_reset_queue(txring_txq(tx_ring)); 3248 3249 size = sizeof(struct igb_tx_buffer) * tx_ring->count; 3250 memset(tx_ring->tx_buffer_info, 0, size); 3251 3252 /* Zero out the descriptor ring */ 3253 memset(tx_ring->desc, 0, tx_ring->size); 3254 3255 tx_ring->next_to_use = 0; 3256 tx_ring->next_to_clean = 0; 3257} 3258 3259/** 3260 * igb_clean_all_tx_rings - Free Tx Buffers for all queues 3261 * @adapter: board private structure 3262 **/ 3263static void igb_clean_all_tx_rings(struct igb_adapter *adapter) 3264{ 3265 int i; 3266 3267 for (i = 0; i < adapter->num_tx_queues; i++) 3268 igb_clean_tx_ring(adapter->tx_ring[i]); 3269} 3270 3271/** 3272 * igb_free_rx_resources - Free Rx Resources 3273 * @rx_ring: ring to clean the resources from 3274 * 3275 * Free all receive software resources 3276 **/ 3277void igb_free_rx_resources(struct igb_ring *rx_ring) 3278{ 3279 igb_clean_rx_ring(rx_ring); 3280 3281 vfree(rx_ring->rx_buffer_info); 3282 rx_ring->rx_buffer_info = NULL; 3283 3284 /* if not set, then don't free */ 3285 if (!rx_ring->desc) 3286 return; 3287 3288 dma_free_coherent(rx_ring->dev, rx_ring->size, 3289 rx_ring->desc, rx_ring->dma); 3290 3291 rx_ring->desc = NULL; 3292} 3293 3294/** 3295 * igb_free_all_rx_resources - Free Rx Resources for All Queues 3296 * @adapter: board private structure 3297 * 3298 * Free all receive software resources 3299 **/ 3300static void igb_free_all_rx_resources(struct igb_adapter *adapter) 3301{ 3302 int i; 3303 3304 for (i = 0; i < adapter->num_rx_queues; i++) 3305 igb_free_rx_resources(adapter->rx_ring[i]); 3306} 3307 3308/** 3309 * igb_clean_rx_ring - Free Rx Buffers per Queue 3310 * @rx_ring: ring to free buffers from 3311 **/ 3312static void igb_clean_rx_ring(struct igb_ring *rx_ring) 3313{ 3314 unsigned long size; 3315 u16 i; 3316 3317 if (!rx_ring->rx_buffer_info) 3318 return; 3319 3320 /* Free all the Rx ring sk_buffs */ 3321 for (i = 0; i < rx_ring->count; i++) { 3322 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; 3323 if (buffer_info->dma) { 3324 dma_unmap_single(rx_ring->dev, 3325 buffer_info->dma, 3326 IGB_RX_HDR_LEN, 3327 DMA_FROM_DEVICE); 3328 buffer_info->dma = 0; 3329 } 3330 3331 if (buffer_info->skb) { 3332 dev_kfree_skb(buffer_info->skb); 3333 buffer_info->skb = NULL; 3334 } 3335 if (buffer_info->page_dma) { 3336 dma_unmap_page(rx_ring->dev, 3337 buffer_info->page_dma, 3338 PAGE_SIZE / 2, 3339 DMA_FROM_DEVICE); 3340 buffer_info->page_dma = 0; 3341 } 3342 if (buffer_info->page) { 3343 put_page(buffer_info->page); 3344 buffer_info->page = NULL; 3345 buffer_info->page_offset = 0; 3346 } 3347 } 3348 3349 size = sizeof(struct igb_rx_buffer) * rx_ring->count; 3350 memset(rx_ring->rx_buffer_info, 0, size); 3351 3352 /* Zero out the descriptor ring */ 3353 memset(rx_ring->desc, 0, rx_ring->size); 3354 3355 rx_ring->next_to_clean = 0; 3356 rx_ring->next_to_use = 0; 3357} 3358 3359/** 3360 * igb_clean_all_rx_rings - Free Rx Buffers for all queues 3361 * @adapter: board private structure 3362 **/ 3363static void igb_clean_all_rx_rings(struct igb_adapter *adapter) 3364{ 3365 int i; 3366 3367 for (i = 0; i < adapter->num_rx_queues; i++) 3368 igb_clean_rx_ring(adapter->rx_ring[i]); 3369} 3370 3371/** 3372 * igb_set_mac - Change the Ethernet Address of the NIC 3373 * @netdev: network interface device structure 3374 * @p: pointer to an address structure 3375 * 3376 * Returns 0 on success, negative on failure 3377 **/ 3378static int igb_set_mac(struct net_device *netdev, void *p) 3379{ 3380 struct igb_adapter *adapter = netdev_priv(netdev); 3381 struct e1000_hw *hw = &adapter->hw; 3382 struct sockaddr *addr = p; 3383 3384 if (!is_valid_ether_addr(addr->sa_data)) 3385 return -EADDRNOTAVAIL; 3386 3387 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); 3388 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); 3389 3390 /* set the correct pool for the new PF MAC address in entry 0 */ 3391 igb_rar_set_qsel(adapter, hw->mac.addr, 0, 3392 adapter->vfs_allocated_count); 3393 3394 return 0; 3395} 3396 3397/** 3398 * igb_write_mc_addr_list - write multicast addresses to MTA 3399 * @netdev: network interface device structure 3400 * 3401 * Writes multicast address list to the MTA hash table. 3402 * Returns: -ENOMEM on failure 3403 * 0 on no addresses written 3404 * X on writing X addresses to MTA 3405 **/ 3406static int igb_write_mc_addr_list(struct net_device *netdev) 3407{ 3408 struct igb_adapter *adapter = netdev_priv(netdev); 3409 struct e1000_hw *hw = &adapter->hw; 3410 struct netdev_hw_addr *ha; 3411 u8 *mta_list; 3412 int i; 3413 3414 if (netdev_mc_empty(netdev)) { 3415 /* nothing to program, so clear mc list */ 3416 igb_update_mc_addr_list(hw, NULL, 0); 3417 igb_restore_vf_multicasts(adapter); 3418 return 0; 3419 } 3420 3421 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC); 3422 if (!mta_list) 3423 return -ENOMEM; 3424 3425 /* The shared function expects a packed array of only addresses. */ 3426 i = 0; 3427 netdev_for_each_mc_addr(ha, netdev) 3428 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); 3429 3430 igb_update_mc_addr_list(hw, mta_list, i); 3431 kfree(mta_list); 3432 3433 return netdev_mc_count(netdev); 3434} 3435 3436/** 3437 * igb_write_uc_addr_list - write unicast addresses to RAR table 3438 * @netdev: network interface device structure 3439 * 3440 * Writes unicast address list to the RAR table. 3441 * Returns: -ENOMEM on failure/insufficient address space 3442 * 0 on no addresses written 3443 * X on writing X addresses to the RAR table 3444 **/ 3445static int igb_write_uc_addr_list(struct net_device *netdev) 3446{ 3447 struct igb_adapter *adapter = netdev_priv(netdev); 3448 struct e1000_hw *hw = &adapter->hw; 3449 unsigned int vfn = adapter->vfs_allocated_count; 3450 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1); 3451 int count = 0; 3452 3453 /* return ENOMEM indicating insufficient memory for addresses */ 3454 if (netdev_uc_count(netdev) > rar_entries) 3455 return -ENOMEM; 3456 3457 if (!netdev_uc_empty(netdev) && rar_entries) { 3458 struct netdev_hw_addr *ha; 3459 3460 netdev_for_each_uc_addr(ha, netdev) { 3461 if (!rar_entries) 3462 break; 3463 igb_rar_set_qsel(adapter, ha->addr, 3464 rar_entries--, 3465 vfn); 3466 count++; 3467 } 3468 } 3469 /* write the addresses in reverse order to avoid write combining */ 3470 for (; rar_entries > 0 ; rar_entries--) { 3471 wr32(E1000_RAH(rar_entries), 0); 3472 wr32(E1000_RAL(rar_entries), 0); 3473 } 3474 wrfl(); 3475 3476 return count; 3477} 3478 3479/** 3480 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set 3481 * @netdev: network interface device structure 3482 * 3483 * The set_rx_mode entry point is called whenever the unicast or multicast 3484 * address lists or the network interface flags are updated. This routine is 3485 * responsible for configuring the hardware for proper unicast, multicast, 3486 * promiscuous mode, and all-multi behavior. 3487 **/ 3488static void igb_set_rx_mode(struct net_device *netdev) 3489{ 3490 struct igb_adapter *adapter = netdev_priv(netdev); 3491 struct e1000_hw *hw = &adapter->hw; 3492 unsigned int vfn = adapter->vfs_allocated_count; 3493 u32 rctl, vmolr = 0; 3494 int count; 3495 3496 /* Check for Promiscuous and All Multicast modes */ 3497 rctl = rd32(E1000_RCTL); 3498 3499 /* clear the effected bits */ 3500 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE); 3501 3502 if (netdev->flags & IFF_PROMISC) { 3503 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); 3504 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME); 3505 } else { 3506 if (netdev->flags & IFF_ALLMULTI) { 3507 rctl |= E1000_RCTL_MPE; 3508 vmolr |= E1000_VMOLR_MPME; 3509 } else { 3510 /* 3511 * Write addresses to the MTA, if the attempt fails 3512 * then we should just turn on promiscuous mode so 3513 * that we can at least receive multicast traffic 3514 */ 3515 count = igb_write_mc_addr_list(netdev); 3516 if (count < 0) { 3517 rctl |= E1000_RCTL_MPE; 3518 vmolr |= E1000_VMOLR_MPME; 3519 } else if (count) { 3520 vmolr |= E1000_VMOLR_ROMPE; 3521 } 3522 } 3523 /* 3524 * Write addresses to available RAR registers, if there is not 3525 * sufficient space to store all the addresses then enable 3526 * unicast promiscuous mode 3527 */ 3528 count = igb_write_uc_addr_list(netdev); 3529 if (count < 0) { 3530 rctl |= E1000_RCTL_UPE; 3531 vmolr |= E1000_VMOLR_ROPE; 3532 } 3533 rctl |= E1000_RCTL_VFE; 3534 } 3535 wr32(E1000_RCTL, rctl); 3536 3537 /* 3538 * In order to support SR-IOV and eventually VMDq it is necessary to set 3539 * the VMOLR to enable the appropriate modes. Without this workaround 3540 * we will have issues with VLAN tag stripping not being done for frames 3541 * that are only arriving because we are the default pool 3542 */ 3543 if (hw->mac.type < e1000_82576) 3544 return; 3545 3546 vmolr |= rd32(E1000_VMOLR(vfn)) & 3547 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE); 3548 wr32(E1000_VMOLR(vfn), vmolr); 3549 igb_restore_vf_multicasts(adapter); 3550} 3551 3552static void igb_check_wvbr(struct igb_adapter *adapter) 3553{ 3554 struct e1000_hw *hw = &adapter->hw; 3555 u32 wvbr = 0; 3556 3557 switch (hw->mac.type) { 3558 case e1000_82576: 3559 case e1000_i350: 3560 if (!(wvbr = rd32(E1000_WVBR))) 3561 return; 3562 break; 3563 default: 3564 break; 3565 } 3566 3567 adapter->wvbr |= wvbr; 3568} 3569 3570#define IGB_STAGGERED_QUEUE_OFFSET 8 3571 3572static void igb_spoof_check(struct igb_adapter *adapter) 3573{ 3574 int j; 3575 3576 if (!adapter->wvbr) 3577 return; 3578 3579 for(j = 0; j < adapter->vfs_allocated_count; j++) { 3580 if (adapter->wvbr & (1 << j) || 3581 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) { 3582 dev_warn(&adapter->pdev->dev, 3583 "Spoof event(s) detected on VF %d\n", j); 3584 adapter->wvbr &= 3585 ~((1 << j) | 3586 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))); 3587 } 3588 } 3589} 3590 3591/* Need to wait a few seconds after link up to get diagnostic information from 3592 * the phy */ 3593static void igb_update_phy_info(unsigned long data) 3594{ 3595 struct igb_adapter *adapter = (struct igb_adapter *) data; 3596 igb_get_phy_info(&adapter->hw); 3597} 3598 3599/** 3600 * igb_has_link - check shared code for link and determine up/down 3601 * @adapter: pointer to driver private info 3602 **/ 3603bool igb_has_link(struct igb_adapter *adapter) 3604{ 3605 struct e1000_hw *hw = &adapter->hw; 3606 bool link_active = false; 3607 s32 ret_val = 0; 3608 3609 /* get_link_status is set on LSC (link status) interrupt or 3610 * rx sequence error interrupt. get_link_status will stay 3611 * false until the e1000_check_for_link establishes link 3612 * for copper adapters ONLY 3613 */ 3614 switch (hw->phy.media_type) { 3615 case e1000_media_type_copper: 3616 if (hw->mac.get_link_status) { 3617 ret_val = hw->mac.ops.check_for_link(hw); 3618 link_active = !hw->mac.get_link_status; 3619 } else { 3620 link_active = true; 3621 } 3622 break; 3623 case e1000_media_type_internal_serdes: 3624 ret_val = hw->mac.ops.check_for_link(hw); 3625 link_active = hw->mac.serdes_has_link; 3626 break; 3627 default: 3628 case e1000_media_type_unknown: 3629 break; 3630 } 3631 3632 return link_active; 3633} 3634 3635static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event) 3636{ 3637 bool ret = false; 3638 u32 ctrl_ext, thstat; 3639 3640 /* check for thermal sensor event on i350, copper only */ 3641 if (hw->mac.type == e1000_i350) { 3642 thstat = rd32(E1000_THSTAT); 3643 ctrl_ext = rd32(E1000_CTRL_EXT); 3644 3645 if ((hw->phy.media_type == e1000_media_type_copper) && 3646 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) { 3647 ret = !!(thstat & event); 3648 } 3649 } 3650 3651 return ret; 3652} 3653 3654/** 3655 * igb_watchdog - Timer Call-back 3656 * @data: pointer to adapter cast into an unsigned long 3657 **/ 3658static void igb_watchdog(unsigned long data) 3659{ 3660 struct igb_adapter *adapter = (struct igb_adapter *)data; 3661 /* Do the rest outside of interrupt context */ 3662 schedule_work(&adapter->watchdog_task); 3663} 3664 3665static void igb_watchdog_task(struct work_struct *work) 3666{ 3667 struct igb_adapter *adapter = container_of(work, 3668 struct igb_adapter, 3669 watchdog_task); 3670 struct e1000_hw *hw = &adapter->hw; 3671 struct net_device *netdev = adapter->netdev; 3672 u32 link; 3673 int i; 3674 3675 link = igb_has_link(adapter); 3676 if (link) { 3677 /* Cancel scheduled suspend requests. */ 3678 pm_runtime_resume(netdev->dev.parent); 3679 3680 if (!netif_carrier_ok(netdev)) { 3681 u32 ctrl; 3682 hw->mac.ops.get_speed_and_duplex(hw, 3683 &adapter->link_speed, 3684 &adapter->link_duplex); 3685 3686 ctrl = rd32(E1000_CTRL); 3687 /* Links status message must follow this format */ 3688 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s " 3689 "Duplex, Flow Control: %s\n", 3690 netdev->name, 3691 adapter->link_speed, 3692 adapter->link_duplex == FULL_DUPLEX ? 3693 "Full" : "Half", 3694 (ctrl & E1000_CTRL_TFCE) && 3695 (ctrl & E1000_CTRL_RFCE) ? "RX/TX" : 3696 (ctrl & E1000_CTRL_RFCE) ? "RX" : 3697 (ctrl & E1000_CTRL_TFCE) ? "TX" : "None"); 3698 3699 /* check for thermal sensor event */ 3700 if (igb_thermal_sensor_event(hw, 3701 E1000_THSTAT_LINK_THROTTLE)) { 3702 netdev_info(netdev, "The network adapter link " 3703 "speed was downshifted because it " 3704 "overheated\n"); 3705 } 3706 3707 /* adjust timeout factor according to speed/duplex */ 3708 adapter->tx_timeout_factor = 1; 3709 switch (adapter->link_speed) { 3710 case SPEED_10: 3711 adapter->tx_timeout_factor = 14; 3712 break; 3713 case SPEED_100: 3714 /* maybe add some timeout factor ? */ 3715 break; 3716 } 3717 3718 netif_carrier_on(netdev); 3719 3720 igb_ping_all_vfs(adapter); 3721 igb_check_vf_rate_limit(adapter); 3722 3723 /* link state has changed, schedule phy info update */ 3724 if (!test_bit(__IGB_DOWN, &adapter->state)) 3725 mod_timer(&adapter->phy_info_timer, 3726 round_jiffies(jiffies + 2 * HZ)); 3727 } 3728 } else { 3729 if (netif_carrier_ok(netdev)) { 3730 adapter->link_speed = 0; 3731 adapter->link_duplex = 0; 3732 3733 /* check for thermal sensor event */ 3734 if (igb_thermal_sensor_event(hw, 3735 E1000_THSTAT_PWR_DOWN)) { 3736 netdev_err(netdev, "The network adapter was " 3737 "stopped because it overheated\n"); 3738 } 3739 3740 /* Links status message must follow this format */ 3741 printk(KERN_INFO "igb: %s NIC Link is Down\n", 3742 netdev->name); 3743 netif_carrier_off(netdev); 3744 3745 igb_ping_all_vfs(adapter); 3746 3747 /* link state has changed, schedule phy info update */ 3748 if (!test_bit(__IGB_DOWN, &adapter->state)) 3749 mod_timer(&adapter->phy_info_timer, 3750 round_jiffies(jiffies + 2 * HZ)); 3751 3752 pm_schedule_suspend(netdev->dev.parent, 3753 MSEC_PER_SEC * 5); 3754 } 3755 } 3756 3757 spin_lock(&adapter->stats64_lock); 3758 igb_update_stats(adapter, &adapter->stats64); 3759 spin_unlock(&adapter->stats64_lock); 3760 3761 for (i = 0; i < adapter->num_tx_queues; i++) { 3762 struct igb_ring *tx_ring = adapter->tx_ring[i]; 3763 if (!netif_carrier_ok(netdev)) { 3764 /* We've lost link, so the controller stops DMA, 3765 * but we've got queued Tx work that's never going 3766 * to get done, so reset controller to flush Tx. 3767 * (Do the reset outside of interrupt context). */ 3768 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) { 3769 adapter->tx_timeout_count++; 3770 schedule_work(&adapter->reset_task); 3771 /* return immediately since reset is imminent */ 3772 return; 3773 } 3774 } 3775 3776 /* Force detection of hung controller every watchdog period */ 3777 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 3778 } 3779 3780 /* Cause software interrupt to ensure rx ring is cleaned */ 3781 if (adapter->msix_entries) { 3782 u32 eics = 0; 3783 for (i = 0; i < adapter->num_q_vectors; i++) 3784 eics |= adapter->q_vector[i]->eims_value; 3785 wr32(E1000_EICS, eics); 3786 } else { 3787 wr32(E1000_ICS, E1000_ICS_RXDMT0); 3788 } 3789 3790 igb_spoof_check(adapter); 3791 3792 /* Reset the timer */ 3793 if (!test_bit(__IGB_DOWN, &adapter->state)) 3794 mod_timer(&adapter->watchdog_timer, 3795 round_jiffies(jiffies + 2 * HZ)); 3796} 3797 3798enum latency_range { 3799 lowest_latency = 0, 3800 low_latency = 1, 3801 bulk_latency = 2, 3802 latency_invalid = 255 3803}; 3804 3805/** 3806 * igb_update_ring_itr - update the dynamic ITR value based on packet size 3807 * 3808 * Stores a new ITR value based on strictly on packet size. This 3809 * algorithm is less sophisticated than that used in igb_update_itr, 3810 * due to the difficulty of synchronizing statistics across multiple 3811 * receive rings. The divisors and thresholds used by this function 3812 * were determined based on theoretical maximum wire speed and testing 3813 * data, in order to minimize response time while increasing bulk 3814 * throughput. 3815 * This functionality is controlled by the InterruptThrottleRate module 3816 * parameter (see igb_param.c) 3817 * NOTE: This function is called only when operating in a multiqueue 3818 * receive environment. 3819 * @q_vector: pointer to q_vector 3820 **/ 3821static void igb_update_ring_itr(struct igb_q_vector *q_vector) 3822{ 3823 int new_val = q_vector->itr_val; 3824 int avg_wire_size = 0; 3825 struct igb_adapter *adapter = q_vector->adapter; 3826 unsigned int packets; 3827 3828 /* For non-gigabit speeds, just fix the interrupt rate at 4000 3829 * ints/sec - ITR timer value of 120 ticks. 3830 */ 3831 if (adapter->link_speed != SPEED_1000) { 3832 new_val = IGB_4K_ITR; 3833 goto set_itr_val; 3834 } 3835 3836 packets = q_vector->rx.total_packets; 3837 if (packets) 3838 avg_wire_size = q_vector->rx.total_bytes / packets; 3839 3840 packets = q_vector->tx.total_packets; 3841 if (packets) 3842 avg_wire_size = max_t(u32, avg_wire_size, 3843 q_vector->tx.total_bytes / packets); 3844 3845 /* if avg_wire_size isn't set no work was done */ 3846 if (!avg_wire_size) 3847 goto clear_counts; 3848 3849 /* Add 24 bytes to size to account for CRC, preamble, and gap */ 3850 avg_wire_size += 24; 3851 3852 /* Don't starve jumbo frames */ 3853 avg_wire_size = min(avg_wire_size, 3000); 3854 3855 /* Give a little boost to mid-size frames */ 3856 if ((avg_wire_size > 300) && (avg_wire_size < 1200)) 3857 new_val = avg_wire_size / 3; 3858 else 3859 new_val = avg_wire_size / 2; 3860 3861 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 3862 if (new_val < IGB_20K_ITR && 3863 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 3864 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 3865 new_val = IGB_20K_ITR; 3866 3867set_itr_val: 3868 if (new_val != q_vector->itr_val) { 3869 q_vector->itr_val = new_val; 3870 q_vector->set_itr = 1; 3871 } 3872clear_counts: 3873 q_vector->rx.total_bytes = 0; 3874 q_vector->rx.total_packets = 0; 3875 q_vector->tx.total_bytes = 0; 3876 q_vector->tx.total_packets = 0; 3877} 3878 3879/** 3880 * igb_update_itr - update the dynamic ITR value based on statistics 3881 * Stores a new ITR value based on packets and byte 3882 * counts during the last interrupt. The advantage of per interrupt 3883 * computation is faster updates and more accurate ITR for the current 3884 * traffic pattern. Constants in this function were computed 3885 * based on theoretical maximum wire speed and thresholds were set based 3886 * on testing data as well as attempting to minimize response time 3887 * while increasing bulk throughput. 3888 * this functionality is controlled by the InterruptThrottleRate module 3889 * parameter (see igb_param.c) 3890 * NOTE: These calculations are only valid when operating in a single- 3891 * queue environment. 3892 * @q_vector: pointer to q_vector 3893 * @ring_container: ring info to update the itr for 3894 **/ 3895static void igb_update_itr(struct igb_q_vector *q_vector, 3896 struct igb_ring_container *ring_container) 3897{ 3898 unsigned int packets = ring_container->total_packets; 3899 unsigned int bytes = ring_container->total_bytes; 3900 u8 itrval = ring_container->itr; 3901 3902 /* no packets, exit with status unchanged */ 3903 if (packets == 0) 3904 return; 3905 3906 switch (itrval) { 3907 case lowest_latency: 3908 /* handle TSO and jumbo frames */ 3909 if (bytes/packets > 8000) 3910 itrval = bulk_latency; 3911 else if ((packets < 5) && (bytes > 512)) 3912 itrval = low_latency; 3913 break; 3914 case low_latency: /* 50 usec aka 20000 ints/s */ 3915 if (bytes > 10000) { 3916 /* this if handles the TSO accounting */ 3917 if (bytes/packets > 8000) { 3918 itrval = bulk_latency; 3919 } else if ((packets < 10) || ((bytes/packets) > 1200)) { 3920 itrval = bulk_latency; 3921 } else if ((packets > 35)) { 3922 itrval = lowest_latency; 3923 } 3924 } else if (bytes/packets > 2000) { 3925 itrval = bulk_latency; 3926 } else if (packets <= 2 && bytes < 512) { 3927 itrval = lowest_latency; 3928 } 3929 break; 3930 case bulk_latency: /* 250 usec aka 4000 ints/s */ 3931 if (bytes > 25000) { 3932 if (packets > 35) 3933 itrval = low_latency; 3934 } else if (bytes < 1500) { 3935 itrval = low_latency; 3936 } 3937 break; 3938 } 3939 3940 /* clear work counters since we have the values we need */ 3941 ring_container->total_bytes = 0; 3942 ring_container->total_packets = 0; 3943 3944 /* write updated itr to ring container */ 3945 ring_container->itr = itrval; 3946} 3947 3948static void igb_set_itr(struct igb_q_vector *q_vector) 3949{ 3950 struct igb_adapter *adapter = q_vector->adapter; 3951 u32 new_itr = q_vector->itr_val; 3952 u8 current_itr = 0; 3953 3954 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ 3955 if (adapter->link_speed != SPEED_1000) { 3956 current_itr = 0; 3957 new_itr = IGB_4K_ITR; 3958 goto set_itr_now; 3959 } 3960 3961 igb_update_itr(q_vector, &q_vector->tx); 3962 igb_update_itr(q_vector, &q_vector->rx); 3963 3964 current_itr = max(q_vector->rx.itr, q_vector->tx.itr); 3965 3966 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 3967 if (current_itr == lowest_latency && 3968 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 3969 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 3970 current_itr = low_latency; 3971 3972 switch (current_itr) { 3973 /* counts and packets in update_itr are dependent on these numbers */ 3974 case lowest_latency: 3975 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */ 3976 break; 3977 case low_latency: 3978 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */ 3979 break; 3980 case bulk_latency: 3981 new_itr = IGB_4K_ITR; /* 4,000 ints/sec */ 3982 break; 3983 default: 3984 break; 3985 } 3986 3987set_itr_now: 3988 if (new_itr != q_vector->itr_val) { 3989 /* this attempts to bias the interrupt rate towards Bulk 3990 * by adding intermediate steps when interrupt rate is 3991 * increasing */ 3992 new_itr = new_itr > q_vector->itr_val ? 3993 max((new_itr * q_vector->itr_val) / 3994 (new_itr + (q_vector->itr_val >> 2)), 3995 new_itr) : 3996 new_itr; 3997 /* Don't write the value here; it resets the adapter's 3998 * internal timer, and causes us to delay far longer than 3999 * we should between interrupts. Instead, we write the ITR 4000 * value at the beginning of the next interrupt so the timing 4001 * ends up being correct. 4002 */ 4003 q_vector->itr_val = new_itr; 4004 q_vector->set_itr = 1; 4005 } 4006} 4007 4008static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens, 4009 u32 type_tucmd, u32 mss_l4len_idx) 4010{ 4011 struct e1000_adv_tx_context_desc *context_desc; 4012 u16 i = tx_ring->next_to_use; 4013 4014 context_desc = IGB_TX_CTXTDESC(tx_ring, i); 4015 4016 i++; 4017 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 4018 4019 /* set bits to identify this as an advanced context descriptor */ 4020 type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 4021 4022 /* For 82575, context index must be unique per ring. */ 4023 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) 4024 mss_l4len_idx |= tx_ring->reg_idx << 4; 4025 4026 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); 4027 context_desc->seqnum_seed = 0; 4028 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); 4029 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); 4030} 4031 4032static int igb_tso(struct igb_ring *tx_ring, 4033 struct igb_tx_buffer *first, 4034 u8 *hdr_len) 4035{ 4036 struct sk_buff *skb = first->skb; 4037 u32 vlan_macip_lens, type_tucmd; 4038 u32 mss_l4len_idx, l4len; 4039 4040 if (!skb_is_gso(skb)) 4041 return 0; 4042 4043 if (skb_header_cloned(skb)) { 4044 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); 4045 if (err) 4046 return err; 4047 } 4048 4049 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ 4050 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; 4051 4052 if (first->protocol == __constant_htons(ETH_P_IP)) { 4053 struct iphdr *iph = ip_hdr(skb); 4054 iph->tot_len = 0; 4055 iph->check = 0; 4056 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, 4057 iph->daddr, 0, 4058 IPPROTO_TCP, 4059 0); 4060 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; 4061 first->tx_flags |= IGB_TX_FLAGS_TSO | 4062 IGB_TX_FLAGS_CSUM | 4063 IGB_TX_FLAGS_IPV4; 4064 } else if (skb_is_gso_v6(skb)) { 4065 ipv6_hdr(skb)->payload_len = 0; 4066 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 4067 &ipv6_hdr(skb)->daddr, 4068 0, IPPROTO_TCP, 0); 4069 first->tx_flags |= IGB_TX_FLAGS_TSO | 4070 IGB_TX_FLAGS_CSUM; 4071 } 4072 4073 /* compute header lengths */ 4074 l4len = tcp_hdrlen(skb); 4075 *hdr_len = skb_transport_offset(skb) + l4len; 4076 4077 /* update gso size and bytecount with header size */ 4078 first->gso_segs = skb_shinfo(skb)->gso_segs; 4079 first->bytecount += (first->gso_segs - 1) * *hdr_len; 4080 4081 /* MSS L4LEN IDX */ 4082 mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT; 4083 mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT; 4084 4085 /* VLAN MACLEN IPLEN */ 4086 vlan_macip_lens = skb_network_header_len(skb); 4087 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; 4088 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK; 4089 4090 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); 4091 4092 return 1; 4093} 4094 4095static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first) 4096{ 4097 struct sk_buff *skb = first->skb; 4098 u32 vlan_macip_lens = 0; 4099 u32 mss_l4len_idx = 0; 4100 u32 type_tucmd = 0; 4101 4102 if (skb->ip_summed != CHECKSUM_PARTIAL) { 4103 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN)) 4104 return; 4105 } else { 4106 u8 l4_hdr = 0; 4107 switch (first->protocol) { 4108 case __constant_htons(ETH_P_IP): 4109 vlan_macip_lens |= skb_network_header_len(skb); 4110 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; 4111 l4_hdr = ip_hdr(skb)->protocol; 4112 break; 4113 case __constant_htons(ETH_P_IPV6): 4114 vlan_macip_lens |= skb_network_header_len(skb); 4115 l4_hdr = ipv6_hdr(skb)->nexthdr; 4116 break; 4117 default: 4118 if (unlikely(net_ratelimit())) { 4119 dev_warn(tx_ring->dev, 4120 "partial checksum but proto=%x!\n", 4121 first->protocol); 4122 } 4123 break; 4124 } 4125 4126 switch (l4_hdr) { 4127 case IPPROTO_TCP: 4128 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP; 4129 mss_l4len_idx = tcp_hdrlen(skb) << 4130 E1000_ADVTXD_L4LEN_SHIFT; 4131 break; 4132 case IPPROTO_SCTP: 4133 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP; 4134 mss_l4len_idx = sizeof(struct sctphdr) << 4135 E1000_ADVTXD_L4LEN_SHIFT; 4136 break; 4137 case IPPROTO_UDP: 4138 mss_l4len_idx = sizeof(struct udphdr) << 4139 E1000_ADVTXD_L4LEN_SHIFT; 4140 break; 4141 default: 4142 if (unlikely(net_ratelimit())) { 4143 dev_warn(tx_ring->dev, 4144 "partial checksum but l4 proto=%x!\n", 4145 l4_hdr); 4146 } 4147 break; 4148 } 4149 4150 /* update TX checksum flag */ 4151 first->tx_flags |= IGB_TX_FLAGS_CSUM; 4152 } 4153 4154 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; 4155 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK; 4156 4157 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); 4158} 4159 4160static __le32 igb_tx_cmd_type(u32 tx_flags) 4161{ 4162 /* set type for advanced descriptor with frame checksum insertion */ 4163 __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA | 4164 E1000_ADVTXD_DCMD_IFCS | 4165 E1000_ADVTXD_DCMD_DEXT); 4166 4167 /* set HW vlan bit if vlan is present */ 4168 if (tx_flags & IGB_TX_FLAGS_VLAN) 4169 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE); 4170 4171 /* set timestamp bit if present */ 4172 if (tx_flags & IGB_TX_FLAGS_TSTAMP) 4173 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP); 4174 4175 /* set segmentation bits for TSO */ 4176 if (tx_flags & IGB_TX_FLAGS_TSO) 4177 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE); 4178 4179 return cmd_type; 4180} 4181 4182static void igb_tx_olinfo_status(struct igb_ring *tx_ring, 4183 union e1000_adv_tx_desc *tx_desc, 4184 u32 tx_flags, unsigned int paylen) 4185{ 4186 u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT; 4187 4188 /* 82575 requires a unique index per ring if any offload is enabled */ 4189 if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) && 4190 test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) 4191 olinfo_status |= tx_ring->reg_idx << 4; 4192 4193 /* insert L4 checksum */ 4194 if (tx_flags & IGB_TX_FLAGS_CSUM) { 4195 olinfo_status |= E1000_TXD_POPTS_TXSM << 8; 4196 4197 /* insert IPv4 checksum */ 4198 if (tx_flags & IGB_TX_FLAGS_IPV4) 4199 olinfo_status |= E1000_TXD_POPTS_IXSM << 8; 4200 } 4201 4202 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 4203} 4204 4205/* 4206 * The largest size we can write to the descriptor is 65535. In order to 4207 * maintain a power of two alignment we have to limit ourselves to 32K. 4208 */ 4209#define IGB_MAX_TXD_PWR 15 4210#define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR) 4211 4212static void igb_tx_map(struct igb_ring *tx_ring, 4213 struct igb_tx_buffer *first, 4214 const u8 hdr_len) 4215{ 4216 struct sk_buff *skb = first->skb; 4217 struct igb_tx_buffer *tx_buffer_info; 4218 union e1000_adv_tx_desc *tx_desc; 4219 dma_addr_t dma; 4220 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; 4221 unsigned int data_len = skb->data_len; 4222 unsigned int size = skb_headlen(skb); 4223 unsigned int paylen = skb->len - hdr_len; 4224 __le32 cmd_type; 4225 u32 tx_flags = first->tx_flags; 4226 u16 i = tx_ring->next_to_use; 4227 4228 tx_desc = IGB_TX_DESC(tx_ring, i); 4229 4230 igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen); 4231 cmd_type = igb_tx_cmd_type(tx_flags); 4232 4233 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); 4234 if (dma_mapping_error(tx_ring->dev, dma)) 4235 goto dma_error; 4236 4237 /* record length, and DMA address */ 4238 first->length = size; 4239 first->dma = dma; 4240 tx_desc->read.buffer_addr = cpu_to_le64(dma); 4241 4242 for (;;) { 4243 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) { 4244 tx_desc->read.cmd_type_len = 4245 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD); 4246 4247 i++; 4248 tx_desc++; 4249 if (i == tx_ring->count) { 4250 tx_desc = IGB_TX_DESC(tx_ring, 0); 4251 i = 0; 4252 } 4253 4254 dma += IGB_MAX_DATA_PER_TXD; 4255 size -= IGB_MAX_DATA_PER_TXD; 4256 4257 tx_desc->read.olinfo_status = 0; 4258 tx_desc->read.buffer_addr = cpu_to_le64(dma); 4259 } 4260 4261 if (likely(!data_len)) 4262 break; 4263 4264 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size); 4265 4266 i++; 4267 tx_desc++; 4268 if (i == tx_ring->count) { 4269 tx_desc = IGB_TX_DESC(tx_ring, 0); 4270 i = 0; 4271 } 4272 4273 size = skb_frag_size(frag); 4274 data_len -= size; 4275 4276 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, 4277 size, DMA_TO_DEVICE); 4278 if (dma_mapping_error(tx_ring->dev, dma)) 4279 goto dma_error; 4280 4281 tx_buffer_info = &tx_ring->tx_buffer_info[i]; 4282 tx_buffer_info->length = size; 4283 tx_buffer_info->dma = dma; 4284 4285 tx_desc->read.olinfo_status = 0; 4286 tx_desc->read.buffer_addr = cpu_to_le64(dma); 4287 4288 frag++; 4289 } 4290 4291 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); 4292 4293 /* write last descriptor with RS and EOP bits */ 4294 cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD); 4295 tx_desc->read.cmd_type_len = cmd_type; 4296 4297 /* set the timestamp */ 4298 first->time_stamp = jiffies; 4299 4300 /* 4301 * Force memory writes to complete before letting h/w know there 4302 * are new descriptors to fetch. (Only applicable for weak-ordered 4303 * memory model archs, such as IA-64). 4304 * 4305 * We also need this memory barrier to make certain all of the 4306 * status bits have been updated before next_to_watch is written. 4307 */ 4308 wmb(); 4309 4310 /* set next_to_watch value indicating a packet is present */ 4311 first->next_to_watch = tx_desc; 4312 4313 i++; 4314 if (i == tx_ring->count) 4315 i = 0; 4316 4317 tx_ring->next_to_use = i; 4318 4319 writel(i, tx_ring->tail); 4320 4321 /* we need this if more than one processor can write to our tail 4322 * at a time, it syncronizes IO on IA64/Altix systems */ 4323 mmiowb(); 4324 4325 return; 4326 4327dma_error: 4328 dev_err(tx_ring->dev, "TX DMA map failed\n"); 4329 4330 /* clear dma mappings for failed tx_buffer_info map */ 4331 for (;;) { 4332 tx_buffer_info = &tx_ring->tx_buffer_info[i]; 4333 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); 4334 if (tx_buffer_info == first) 4335 break; 4336 if (i == 0) 4337 i = tx_ring->count; 4338 i--; 4339 } 4340 4341 tx_ring->next_to_use = i; 4342} 4343 4344static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) 4345{ 4346 struct net_device *netdev = tx_ring->netdev; 4347 4348 netif_stop_subqueue(netdev, tx_ring->queue_index); 4349 4350 /* Herbert's original patch had: 4351 * smp_mb__after_netif_stop_queue(); 4352 * but since that doesn't exist yet, just open code it. */ 4353 smp_mb(); 4354 4355 /* We need to check again in a case another CPU has just 4356 * made room available. */ 4357 if (igb_desc_unused(tx_ring) < size) 4358 return -EBUSY; 4359 4360 /* A reprieve! */ 4361 netif_wake_subqueue(netdev, tx_ring->queue_index); 4362 4363 u64_stats_update_begin(&tx_ring->tx_syncp2); 4364 tx_ring->tx_stats.restart_queue2++; 4365 u64_stats_update_end(&tx_ring->tx_syncp2); 4366 4367 return 0; 4368} 4369 4370static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) 4371{ 4372 if (igb_desc_unused(tx_ring) >= size) 4373 return 0; 4374 return __igb_maybe_stop_tx(tx_ring, size); 4375} 4376 4377netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, 4378 struct igb_ring *tx_ring) 4379{ 4380 struct igb_tx_buffer *first; 4381 int tso; 4382 u32 tx_flags = 0; 4383 __be16 protocol = vlan_get_protocol(skb); 4384 u8 hdr_len = 0; 4385 4386 /* need: 1 descriptor per page, 4387 * + 2 desc gap to keep tail from touching head, 4388 * + 1 desc for skb->data, 4389 * + 1 desc for context descriptor, 4390 * otherwise try next time */ 4391 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) { 4392 /* this is a hard error */ 4393 return NETDEV_TX_BUSY; 4394 } 4395 4396 /* record the location of the first descriptor for this packet */ 4397 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 4398 first->skb = skb; 4399 first->bytecount = skb->len; 4400 first->gso_segs = 1; 4401 4402 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { 4403 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 4404 tx_flags |= IGB_TX_FLAGS_TSTAMP; 4405 } 4406 4407 if (vlan_tx_tag_present(skb)) { 4408 tx_flags |= IGB_TX_FLAGS_VLAN; 4409 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT); 4410 } 4411 4412 /* record initial flags and protocol */ 4413 first->tx_flags = tx_flags; 4414 first->protocol = protocol; 4415 4416 tso = igb_tso(tx_ring, first, &hdr_len); 4417 if (tso < 0) 4418 goto out_drop; 4419 else if (!tso) 4420 igb_tx_csum(tx_ring, first); 4421 4422 igb_tx_map(tx_ring, first, hdr_len); 4423 4424 /* Make sure there is space in the ring for the next send. */ 4425 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4); 4426 4427 return NETDEV_TX_OK; 4428 4429out_drop: 4430 igb_unmap_and_free_tx_resource(tx_ring, first); 4431 4432 return NETDEV_TX_OK; 4433} 4434 4435static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter, 4436 struct sk_buff *skb) 4437{ 4438 unsigned int r_idx = skb->queue_mapping; 4439 4440 if (r_idx >= adapter->num_tx_queues) 4441 r_idx = r_idx % adapter->num_tx_queues; 4442 4443 return adapter->tx_ring[r_idx]; 4444} 4445 4446static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, 4447 struct net_device *netdev) 4448{ 4449 struct igb_adapter *adapter = netdev_priv(netdev); 4450 4451 if (test_bit(__IGB_DOWN, &adapter->state)) { 4452 dev_kfree_skb_any(skb); 4453 return NETDEV_TX_OK; 4454 } 4455 4456 if (skb->len <= 0) { 4457 dev_kfree_skb_any(skb); 4458 return NETDEV_TX_OK; 4459 } 4460 4461 /* 4462 * The minimum packet size with TCTL.PSP set is 17 so pad the skb 4463 * in order to meet this minimum size requirement. 4464 */ 4465 if (skb->len < 17) { 4466 if (skb_padto(skb, 17)) 4467 return NETDEV_TX_OK; 4468 skb->len = 17; 4469 } 4470 4471 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb)); 4472} 4473 4474/** 4475 * igb_tx_timeout - Respond to a Tx Hang 4476 * @netdev: network interface device structure 4477 **/ 4478static void igb_tx_timeout(struct net_device *netdev) 4479{ 4480 struct igb_adapter *adapter = netdev_priv(netdev); 4481 struct e1000_hw *hw = &adapter->hw; 4482 4483 /* Do the reset outside of interrupt context */ 4484 adapter->tx_timeout_count++; 4485 4486 if (hw->mac.type >= e1000_82580) 4487 hw->dev_spec._82575.global_device_reset = true; 4488 4489 schedule_work(&adapter->reset_task); 4490 wr32(E1000_EICS, 4491 (adapter->eims_enable_mask & ~adapter->eims_other)); 4492} 4493 4494static void igb_reset_task(struct work_struct *work) 4495{ 4496 struct igb_adapter *adapter; 4497 adapter = container_of(work, struct igb_adapter, reset_task); 4498 4499 igb_dump(adapter); 4500 netdev_err(adapter->netdev, "Reset adapter\n"); 4501 igb_reinit_locked(adapter); 4502} 4503 4504/** 4505 * igb_get_stats64 - Get System Network Statistics 4506 * @netdev: network interface device structure 4507 * @stats: rtnl_link_stats64 pointer 4508 * 4509 **/ 4510static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev, 4511 struct rtnl_link_stats64 *stats) 4512{ 4513 struct igb_adapter *adapter = netdev_priv(netdev); 4514 4515 spin_lock(&adapter->stats64_lock); 4516 igb_update_stats(adapter, &adapter->stats64); 4517 memcpy(stats, &adapter->stats64, sizeof(*stats)); 4518 spin_unlock(&adapter->stats64_lock); 4519 4520 return stats; 4521} 4522 4523/** 4524 * igb_change_mtu - Change the Maximum Transfer Unit 4525 * @netdev: network interface device structure 4526 * @new_mtu: new value for maximum frame size 4527 * 4528 * Returns 0 on success, negative on failure 4529 **/ 4530static int igb_change_mtu(struct net_device *netdev, int new_mtu) 4531{ 4532 struct igb_adapter *adapter = netdev_priv(netdev); 4533 struct pci_dev *pdev = adapter->pdev; 4534 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; 4535 4536 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) { 4537 dev_err(&pdev->dev, "Invalid MTU setting\n"); 4538 return -EINVAL; 4539 } 4540 4541#define MAX_STD_JUMBO_FRAME_SIZE 9238 4542 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) { 4543 dev_err(&pdev->dev, "MTU > 9216 not supported.\n"); 4544 return -EINVAL; 4545 } 4546 4547 while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) 4548 msleep(1); 4549 4550 /* igb_down has a dependency on max_frame_size */ 4551 adapter->max_frame_size = max_frame; 4552 4553 if (netif_running(netdev)) 4554 igb_down(adapter); 4555 4556 dev_info(&pdev->dev, "changing MTU from %d to %d\n", 4557 netdev->mtu, new_mtu); 4558 netdev->mtu = new_mtu; 4559 4560 if (netif_running(netdev)) 4561 igb_up(adapter); 4562 else 4563 igb_reset(adapter); 4564 4565 clear_bit(__IGB_RESETTING, &adapter->state); 4566 4567 return 0; 4568} 4569 4570/** 4571 * igb_update_stats - Update the board statistics counters 4572 * @adapter: board private structure 4573 **/ 4574 4575void igb_update_stats(struct igb_adapter *adapter, 4576 struct rtnl_link_stats64 *net_stats) 4577{ 4578 struct e1000_hw *hw = &adapter->hw; 4579 struct pci_dev *pdev = adapter->pdev; 4580 u32 reg, mpc; 4581 u16 phy_tmp; 4582 int i; 4583 u64 bytes, packets; 4584 unsigned int start; 4585 u64 _bytes, _packets; 4586 4587#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF 4588 4589 /* 4590 * Prevent stats update while adapter is being reset, or if the pci 4591 * connection is down. 4592 */ 4593 if (adapter->link_speed == 0) 4594 return; 4595 if (pci_channel_offline(pdev)) 4596 return; 4597 4598 bytes = 0; 4599 packets = 0; 4600 for (i = 0; i < adapter->num_rx_queues; i++) { 4601 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF; 4602 struct igb_ring *ring = adapter->rx_ring[i]; 4603 4604 ring->rx_stats.drops += rqdpc_tmp; 4605 net_stats->rx_fifo_errors += rqdpc_tmp; 4606 4607 do { 4608 start = u64_stats_fetch_begin_bh(&ring->rx_syncp); 4609 _bytes = ring->rx_stats.bytes; 4610 _packets = ring->rx_stats.packets; 4611 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start)); 4612 bytes += _bytes; 4613 packets += _packets; 4614 } 4615 4616 net_stats->rx_bytes = bytes; 4617 net_stats->rx_packets = packets; 4618 4619 bytes = 0; 4620 packets = 0; 4621 for (i = 0; i < adapter->num_tx_queues; i++) { 4622 struct igb_ring *ring = adapter->tx_ring[i]; 4623 do { 4624 start = u64_stats_fetch_begin_bh(&ring->tx_syncp); 4625 _bytes = ring->tx_stats.bytes; 4626 _packets = ring->tx_stats.packets; 4627 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start)); 4628 bytes += _bytes; 4629 packets += _packets; 4630 } 4631 net_stats->tx_bytes = bytes; 4632 net_stats->tx_packets = packets; 4633 4634 /* read stats registers */ 4635 adapter->stats.crcerrs += rd32(E1000_CRCERRS); 4636 adapter->stats.gprc += rd32(E1000_GPRC); 4637 adapter->stats.gorc += rd32(E1000_GORCL); 4638 rd32(E1000_GORCH); /* clear GORCL */ 4639 adapter->stats.bprc += rd32(E1000_BPRC); 4640 adapter->stats.mprc += rd32(E1000_MPRC); 4641 adapter->stats.roc += rd32(E1000_ROC); 4642 4643 adapter->stats.prc64 += rd32(E1000_PRC64); 4644 adapter->stats.prc127 += rd32(E1000_PRC127); 4645 adapter->stats.prc255 += rd32(E1000_PRC255); 4646 adapter->stats.prc511 += rd32(E1000_PRC511); 4647 adapter->stats.prc1023 += rd32(E1000_PRC1023); 4648 adapter->stats.prc1522 += rd32(E1000_PRC1522); 4649 adapter->stats.symerrs += rd32(E1000_SYMERRS); 4650 adapter->stats.sec += rd32(E1000_SEC); 4651 4652 mpc = rd32(E1000_MPC); 4653 adapter->stats.mpc += mpc; 4654 net_stats->rx_fifo_errors += mpc; 4655 adapter->stats.scc += rd32(E1000_SCC); 4656 adapter->stats.ecol += rd32(E1000_ECOL); 4657 adapter->stats.mcc += rd32(E1000_MCC); 4658 adapter->stats.latecol += rd32(E1000_LATECOL); 4659 adapter->stats.dc += rd32(E1000_DC); 4660 adapter->stats.rlec += rd32(E1000_RLEC); 4661 adapter->stats.xonrxc += rd32(E1000_XONRXC); 4662 adapter->stats.xontxc += rd32(E1000_XONTXC); 4663 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC); 4664 adapter->stats.xofftxc += rd32(E1000_XOFFTXC); 4665 adapter->stats.fcruc += rd32(E1000_FCRUC); 4666 adapter->stats.gptc += rd32(E1000_GPTC); 4667 adapter->stats.gotc += rd32(E1000_GOTCL); 4668 rd32(E1000_GOTCH); /* clear GOTCL */ 4669 adapter->stats.rnbc += rd32(E1000_RNBC); 4670 adapter->stats.ruc += rd32(E1000_RUC); 4671 adapter->stats.rfc += rd32(E1000_RFC); 4672 adapter->stats.rjc += rd32(E1000_RJC); 4673 adapter->stats.tor += rd32(E1000_TORH); 4674 adapter->stats.tot += rd32(E1000_TOTH); 4675 adapter->stats.tpr += rd32(E1000_TPR); 4676 4677 adapter->stats.ptc64 += rd32(E1000_PTC64); 4678 adapter->stats.ptc127 += rd32(E1000_PTC127); 4679 adapter->stats.ptc255 += rd32(E1000_PTC255); 4680 adapter->stats.ptc511 += rd32(E1000_PTC511); 4681 adapter->stats.ptc1023 += rd32(E1000_PTC1023); 4682 adapter->stats.ptc1522 += rd32(E1000_PTC1522); 4683 4684 adapter->stats.mptc += rd32(E1000_MPTC); 4685 adapter->stats.bptc += rd32(E1000_BPTC); 4686 4687 adapter->stats.tpt += rd32(E1000_TPT); 4688 adapter->stats.colc += rd32(E1000_COLC); 4689 4690 adapter->stats.algnerrc += rd32(E1000_ALGNERRC); 4691 /* read internal phy specific stats */ 4692 reg = rd32(E1000_CTRL_EXT); 4693 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) { 4694 adapter->stats.rxerrc += rd32(E1000_RXERRC); 4695 adapter->stats.tncrs += rd32(E1000_TNCRS); 4696 } 4697 4698 adapter->stats.tsctc += rd32(E1000_TSCTC); 4699 adapter->stats.tsctfc += rd32(E1000_TSCTFC); 4700 4701 adapter->stats.iac += rd32(E1000_IAC); 4702 adapter->stats.icrxoc += rd32(E1000_ICRXOC); 4703 adapter->stats.icrxptc += rd32(E1000_ICRXPTC); 4704 adapter->stats.icrxatc += rd32(E1000_ICRXATC); 4705 adapter->stats.ictxptc += rd32(E1000_ICTXPTC); 4706 adapter->stats.ictxatc += rd32(E1000_ICTXATC); 4707 adapter->stats.ictxqec += rd32(E1000_ICTXQEC); 4708 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC); 4709 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC); 4710 4711 /* Fill out the OS statistics structure */ 4712 net_stats->multicast = adapter->stats.mprc; 4713 net_stats->collisions = adapter->stats.colc; 4714 4715 /* Rx Errors */ 4716 4717 /* RLEC on some newer hardware can be incorrect so build 4718 * our own version based on RUC and ROC */ 4719 net_stats->rx_errors = adapter->stats.rxerrc + 4720 adapter->stats.crcerrs + adapter->stats.algnerrc + 4721 adapter->stats.ruc + adapter->stats.roc + 4722 adapter->stats.cexterr; 4723 net_stats->rx_length_errors = adapter->stats.ruc + 4724 adapter->stats.roc; 4725 net_stats->rx_crc_errors = adapter->stats.crcerrs; 4726 net_stats->rx_frame_errors = adapter->stats.algnerrc; 4727 net_stats->rx_missed_errors = adapter->stats.mpc; 4728 4729 /* Tx Errors */ 4730 net_stats->tx_errors = adapter->stats.ecol + 4731 adapter->stats.latecol; 4732 net_stats->tx_aborted_errors = adapter->stats.ecol; 4733 net_stats->tx_window_errors = adapter->stats.latecol; 4734 net_stats->tx_carrier_errors = adapter->stats.tncrs; 4735 4736 /* Tx Dropped needs to be maintained elsewhere */ 4737 4738 /* Phy Stats */ 4739 if (hw->phy.media_type == e1000_media_type_copper) { 4740 if ((adapter->link_speed == SPEED_1000) && 4741 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) { 4742 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK; 4743 adapter->phy_stats.idle_errors += phy_tmp; 4744 } 4745 } 4746 4747 /* Management Stats */ 4748 adapter->stats.mgptc += rd32(E1000_MGTPTC); 4749 adapter->stats.mgprc += rd32(E1000_MGTPRC); 4750 adapter->stats.mgpdc += rd32(E1000_MGTPDC); 4751 4752 /* OS2BMC Stats */ 4753 reg = rd32(E1000_MANC); 4754 if (reg & E1000_MANC_EN_BMC2OS) { 4755 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC); 4756 adapter->stats.o2bspc += rd32(E1000_O2BSPC); 4757 adapter->stats.b2ospc += rd32(E1000_B2OSPC); 4758 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC); 4759 } 4760} 4761 4762static irqreturn_t igb_msix_other(int irq, void *data) 4763{ 4764 struct igb_adapter *adapter = data; 4765 struct e1000_hw *hw = &adapter->hw; 4766 u32 icr = rd32(E1000_ICR); 4767 /* reading ICR causes bit 31 of EICR to be cleared */ 4768 4769 if (icr & E1000_ICR_DRSTA) 4770 schedule_work(&adapter->reset_task); 4771 4772 if (icr & E1000_ICR_DOUTSYNC) { 4773 /* HW is reporting DMA is out of sync */ 4774 adapter->stats.doosync++; 4775 /* The DMA Out of Sync is also indication of a spoof event 4776 * in IOV mode. Check the Wrong VM Behavior register to 4777 * see if it is really a spoof event. */ 4778 igb_check_wvbr(adapter); 4779 } 4780 4781 /* Check for a mailbox event */ 4782 if (icr & E1000_ICR_VMMB) 4783 igb_msg_task(adapter); 4784 4785 if (icr & E1000_ICR_LSC) { 4786 hw->mac.get_link_status = 1; 4787 /* guard against interrupt when we're going down */ 4788 if (!test_bit(__IGB_DOWN, &adapter->state)) 4789 mod_timer(&adapter->watchdog_timer, jiffies + 1); 4790 } 4791 4792 wr32(E1000_EIMS, adapter->eims_other); 4793 4794 return IRQ_HANDLED; 4795} 4796 4797static void igb_write_itr(struct igb_q_vector *q_vector) 4798{ 4799 struct igb_adapter *adapter = q_vector->adapter; 4800 u32 itr_val = q_vector->itr_val & 0x7FFC; 4801 4802 if (!q_vector->set_itr) 4803 return; 4804 4805 if (!itr_val) 4806 itr_val = 0x4; 4807 4808 if (adapter->hw.mac.type == e1000_82575) 4809 itr_val |= itr_val << 16; 4810 else 4811 itr_val |= E1000_EITR_CNT_IGNR; 4812 4813 writel(itr_val, q_vector->itr_register); 4814 q_vector->set_itr = 0; 4815} 4816 4817static irqreturn_t igb_msix_ring(int irq, void *data) 4818{ 4819 struct igb_q_vector *q_vector = data; 4820 4821 /* Write the ITR value calculated from the previous interrupt. */ 4822 igb_write_itr(q_vector); 4823 4824 napi_schedule(&q_vector->napi); 4825 4826 return IRQ_HANDLED; 4827} 4828 4829#ifdef CONFIG_IGB_DCA 4830static void igb_update_dca(struct igb_q_vector *q_vector) 4831{ 4832 struct igb_adapter *adapter = q_vector->adapter; 4833 struct e1000_hw *hw = &adapter->hw; 4834 int cpu = get_cpu(); 4835 4836 if (q_vector->cpu == cpu) 4837 goto out_no_update; 4838 4839 if (q_vector->tx.ring) { 4840 int q = q_vector->tx.ring->reg_idx; 4841 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q)); 4842 if (hw->mac.type == e1000_82575) { 4843 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK; 4844 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu); 4845 } else { 4846 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576; 4847 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) << 4848 E1000_DCA_TXCTRL_CPUID_SHIFT; 4849 } 4850 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN; 4851 wr32(E1000_DCA_TXCTRL(q), dca_txctrl); 4852 } 4853 if (q_vector->rx.ring) { 4854 int q = q_vector->rx.ring->reg_idx; 4855 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q)); 4856 if (hw->mac.type == e1000_82575) { 4857 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK; 4858 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu); 4859 } else { 4860 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576; 4861 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) << 4862 E1000_DCA_RXCTRL_CPUID_SHIFT; 4863 } 4864 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN; 4865 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN; 4866 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN; 4867 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl); 4868 } 4869 q_vector->cpu = cpu; 4870out_no_update: 4871 put_cpu(); 4872} 4873 4874static void igb_setup_dca(struct igb_adapter *adapter) 4875{ 4876 struct e1000_hw *hw = &adapter->hw; 4877 int i; 4878 4879 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED)) 4880 return; 4881 4882 /* Always use CB2 mode, difference is masked in the CB driver. */ 4883 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2); 4884 4885 for (i = 0; i < adapter->num_q_vectors; i++) { 4886 adapter->q_vector[i]->cpu = -1; 4887 igb_update_dca(adapter->q_vector[i]); 4888 } 4889} 4890 4891static int __igb_notify_dca(struct device *dev, void *data) 4892{ 4893 struct net_device *netdev = dev_get_drvdata(dev); 4894 struct igb_adapter *adapter = netdev_priv(netdev); 4895 struct pci_dev *pdev = adapter->pdev; 4896 struct e1000_hw *hw = &adapter->hw; 4897 unsigned long event = *(unsigned long *)data; 4898 4899 switch (event) { 4900 case DCA_PROVIDER_ADD: 4901 /* if already enabled, don't do it again */ 4902 if (adapter->flags & IGB_FLAG_DCA_ENABLED) 4903 break; 4904 if (dca_add_requester(dev) == 0) { 4905 adapter->flags |= IGB_FLAG_DCA_ENABLED; 4906 dev_info(&pdev->dev, "DCA enabled\n"); 4907 igb_setup_dca(adapter); 4908 break; 4909 } 4910 /* Fall Through since DCA is disabled. */ 4911 case DCA_PROVIDER_REMOVE: 4912 if (adapter->flags & IGB_FLAG_DCA_ENABLED) { 4913 /* without this a class_device is left 4914 * hanging around in the sysfs model */ 4915 dca_remove_requester(dev); 4916 dev_info(&pdev->dev, "DCA disabled\n"); 4917 adapter->flags &= ~IGB_FLAG_DCA_ENABLED; 4918 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE); 4919 } 4920 break; 4921 } 4922 4923 return 0; 4924} 4925 4926static int igb_notify_dca(struct notifier_block *nb, unsigned long event, 4927 void *p) 4928{ 4929 int ret_val; 4930 4931 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event, 4932 __igb_notify_dca); 4933 4934 return ret_val ? NOTIFY_BAD : NOTIFY_DONE; 4935} 4936#endif /* CONFIG_IGB_DCA */ 4937 4938#ifdef CONFIG_PCI_IOV 4939static int igb_vf_configure(struct igb_adapter *adapter, int vf) 4940{ 4941 unsigned char mac_addr[ETH_ALEN]; 4942 struct pci_dev *pdev = adapter->pdev; 4943 struct e1000_hw *hw = &adapter->hw; 4944 struct pci_dev *pvfdev; 4945 unsigned int device_id; 4946 u16 thisvf_devfn; 4947 4948 random_ether_addr(mac_addr); 4949 igb_set_vf_mac(adapter, vf, mac_addr); 4950 4951 switch (adapter->hw.mac.type) { 4952 case e1000_82576: 4953 device_id = IGB_82576_VF_DEV_ID; 4954 /* VF Stride for 82576 is 2 */ 4955 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) | 4956 (pdev->devfn & 1); 4957 break; 4958 case e1000_i350: 4959 device_id = IGB_I350_VF_DEV_ID; 4960 /* VF Stride for I350 is 4 */ 4961 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) | 4962 (pdev->devfn & 3); 4963 break; 4964 default: 4965 device_id = 0; 4966 thisvf_devfn = 0; 4967 break; 4968 } 4969 4970 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL); 4971 while (pvfdev) { 4972 if (pvfdev->devfn == thisvf_devfn) 4973 break; 4974 pvfdev = pci_get_device(hw->vendor_id, 4975 device_id, pvfdev); 4976 } 4977 4978 if (pvfdev) 4979 adapter->vf_data[vf].vfdev = pvfdev; 4980 else 4981 dev_err(&pdev->dev, 4982 "Couldn't find pci dev ptr for VF %4.4x\n", 4983 thisvf_devfn); 4984 return pvfdev != NULL; 4985} 4986 4987static int igb_find_enabled_vfs(struct igb_adapter *adapter) 4988{ 4989 struct e1000_hw *hw = &adapter->hw; 4990 struct pci_dev *pdev = adapter->pdev; 4991 struct pci_dev *pvfdev; 4992 u16 vf_devfn = 0; 4993 u16 vf_stride; 4994 unsigned int device_id; 4995 int vfs_found = 0; 4996 4997 switch (adapter->hw.mac.type) { 4998 case e1000_82576: 4999 device_id = IGB_82576_VF_DEV_ID; 5000 /* VF Stride for 82576 is 2 */ 5001 vf_stride = 2; 5002 break; 5003 case e1000_i350: 5004 device_id = IGB_I350_VF_DEV_ID; 5005 /* VF Stride for I350 is 4 */ 5006 vf_stride = 4; 5007 break; 5008 default: 5009 device_id = 0; 5010 vf_stride = 0; 5011 break; 5012 } 5013 5014 vf_devfn = pdev->devfn + 0x80; 5015 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL); 5016 while (pvfdev) { 5017 if (pvfdev->devfn == vf_devfn) 5018 vfs_found++; 5019 vf_devfn += vf_stride; 5020 pvfdev = pci_get_device(hw->vendor_id, 5021 device_id, pvfdev); 5022 } 5023 5024 return vfs_found; 5025} 5026 5027static int igb_check_vf_assignment(struct igb_adapter *adapter) 5028{ 5029 int i; 5030 for (i = 0; i < adapter->vfs_allocated_count; i++) { 5031 if (adapter->vf_data[i].vfdev) { 5032 if (adapter->vf_data[i].vfdev->dev_flags & 5033 PCI_DEV_FLAGS_ASSIGNED) 5034 return true; 5035 } 5036 } 5037 return false; 5038} 5039 5040#endif 5041static void igb_ping_all_vfs(struct igb_adapter *adapter) 5042{ 5043 struct e1000_hw *hw = &adapter->hw; 5044 u32 ping; 5045 int i; 5046 5047 for (i = 0 ; i < adapter->vfs_allocated_count; i++) { 5048 ping = E1000_PF_CONTROL_MSG; 5049 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS) 5050 ping |= E1000_VT_MSGTYPE_CTS; 5051 igb_write_mbx(hw, &ping, 1, i); 5052 } 5053} 5054 5055static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) 5056{ 5057 struct e1000_hw *hw = &adapter->hw; 5058 u32 vmolr = rd32(E1000_VMOLR(vf)); 5059 struct vf_data_storage *vf_data = &adapter->vf_data[vf]; 5060 5061 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC | 5062 IGB_VF_FLAG_MULTI_PROMISC); 5063 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME); 5064 5065 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) { 5066 vmolr |= E1000_VMOLR_MPME; 5067 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC; 5068 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST; 5069 } else { 5070 /* 5071 * if we have hashes and we are clearing a multicast promisc 5072 * flag we need to write the hashes to the MTA as this step 5073 * was previously skipped 5074 */ 5075 if (vf_data->num_vf_mc_hashes > 30) { 5076 vmolr |= E1000_VMOLR_MPME; 5077 } else if (vf_data->num_vf_mc_hashes) { 5078 int j; 5079 vmolr |= E1000_VMOLR_ROMPE; 5080 for (j = 0; j < vf_data->num_vf_mc_hashes; j++) 5081 igb_mta_set(hw, vf_data->vf_mc_hashes[j]); 5082 } 5083 } 5084 5085 wr32(E1000_VMOLR(vf), vmolr); 5086 5087 /* there are flags left unprocessed, likely not supported */ 5088 if (*msgbuf & E1000_VT_MSGINFO_MASK) 5089 return -EINVAL; 5090 5091 return 0; 5092 5093} 5094 5095static int igb_set_vf_multicasts(struct igb_adapter *adapter, 5096 u32 *msgbuf, u32 vf) 5097{ 5098 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; 5099 u16 *hash_list = (u16 *)&msgbuf[1]; 5100 struct vf_data_storage *vf_data = &adapter->vf_data[vf]; 5101 int i; 5102 5103 /* salt away the number of multicast addresses assigned 5104 * to this VF for later use to restore when the PF multi cast 5105 * list changes 5106 */ 5107 vf_data->num_vf_mc_hashes = n; 5108 5109 /* only up to 30 hash values supported */ 5110 if (n > 30) 5111 n = 30; 5112 5113 /* store the hashes for later use */ 5114 for (i = 0; i < n; i++) 5115 vf_data->vf_mc_hashes[i] = hash_list[i]; 5116 5117 /* Flush and reset the mta with the new values */ 5118 igb_set_rx_mode(adapter->netdev); 5119 5120 return 0; 5121} 5122 5123static void igb_restore_vf_multicasts(struct igb_adapter *adapter) 5124{ 5125 struct e1000_hw *hw = &adapter->hw; 5126 struct vf_data_storage *vf_data; 5127 int i, j; 5128 5129 for (i = 0; i < adapter->vfs_allocated_count; i++) { 5130 u32 vmolr = rd32(E1000_VMOLR(i)); 5131 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME); 5132 5133 vf_data = &adapter->vf_data[i]; 5134 5135 if ((vf_data->num_vf_mc_hashes > 30) || 5136 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) { 5137 vmolr |= E1000_VMOLR_MPME; 5138 } else if (vf_data->num_vf_mc_hashes) { 5139 vmolr |= E1000_VMOLR_ROMPE; 5140 for (j = 0; j < vf_data->num_vf_mc_hashes; j++) 5141 igb_mta_set(hw, vf_data->vf_mc_hashes[j]); 5142 } 5143 wr32(E1000_VMOLR(i), vmolr); 5144 } 5145} 5146 5147static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf) 5148{ 5149 struct e1000_hw *hw = &adapter->hw; 5150 u32 pool_mask, reg, vid; 5151 int i; 5152 5153 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); 5154 5155 /* Find the vlan filter for this id */ 5156 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { 5157 reg = rd32(E1000_VLVF(i)); 5158 5159 /* remove the vf from the pool */ 5160 reg &= ~pool_mask; 5161 5162 /* if pool is empty then remove entry from vfta */ 5163 if (!(reg & E1000_VLVF_POOLSEL_MASK) && 5164 (reg & E1000_VLVF_VLANID_ENABLE)) { 5165 reg = 0; 5166 vid = reg & E1000_VLVF_VLANID_MASK; 5167 igb_vfta_set(hw, vid, false); 5168 } 5169 5170 wr32(E1000_VLVF(i), reg); 5171 } 5172 5173 adapter->vf_data[vf].vlans_enabled = 0; 5174} 5175 5176static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf) 5177{ 5178 struct e1000_hw *hw = &adapter->hw; 5179 u32 reg, i; 5180 5181 /* The vlvf table only exists on 82576 hardware and newer */ 5182 if (hw->mac.type < e1000_82576) 5183 return -1; 5184 5185 /* we only need to do this if VMDq is enabled */ 5186 if (!adapter->vfs_allocated_count) 5187 return -1; 5188 5189 /* Find the vlan filter for this id */ 5190 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { 5191 reg = rd32(E1000_VLVF(i)); 5192 if ((reg & E1000_VLVF_VLANID_ENABLE) && 5193 vid == (reg & E1000_VLVF_VLANID_MASK)) 5194 break; 5195 } 5196 5197 if (add) { 5198 if (i == E1000_VLVF_ARRAY_SIZE) { 5199 /* Did not find a matching VLAN ID entry that was 5200 * enabled. Search for a free filter entry, i.e. 5201 * one without the enable bit set 5202 */ 5203 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { 5204 reg = rd32(E1000_VLVF(i)); 5205 if (!(reg & E1000_VLVF_VLANID_ENABLE)) 5206 break; 5207 } 5208 } 5209 if (i < E1000_VLVF_ARRAY_SIZE) { 5210 /* Found an enabled/available entry */ 5211 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); 5212 5213 /* if !enabled we need to set this up in vfta */ 5214 if (!(reg & E1000_VLVF_VLANID_ENABLE)) { 5215 /* add VID to filter table */ 5216 igb_vfta_set(hw, vid, true); 5217 reg |= E1000_VLVF_VLANID_ENABLE; 5218 } 5219 reg &= ~E1000_VLVF_VLANID_MASK; 5220 reg |= vid; 5221 wr32(E1000_VLVF(i), reg); 5222 5223 /* do not modify RLPML for PF devices */ 5224 if (vf >= adapter->vfs_allocated_count) 5225 return 0; 5226 5227 if (!adapter->vf_data[vf].vlans_enabled) { 5228 u32 size; 5229 reg = rd32(E1000_VMOLR(vf)); 5230 size = reg & E1000_VMOLR_RLPML_MASK; 5231 size += 4; 5232 reg &= ~E1000_VMOLR_RLPML_MASK; 5233 reg |= size; 5234 wr32(E1000_VMOLR(vf), reg); 5235 } 5236 5237 adapter->vf_data[vf].vlans_enabled++; 5238 } 5239 } else { 5240 if (i < E1000_VLVF_ARRAY_SIZE) { 5241 /* remove vf from the pool */ 5242 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf)); 5243 /* if pool is empty then remove entry from vfta */ 5244 if (!(reg & E1000_VLVF_POOLSEL_MASK)) { 5245 reg = 0; 5246 igb_vfta_set(hw, vid, false); 5247 } 5248 wr32(E1000_VLVF(i), reg); 5249 5250 /* do not modify RLPML for PF devices */ 5251 if (vf >= adapter->vfs_allocated_count) 5252 return 0; 5253 5254 adapter->vf_data[vf].vlans_enabled--; 5255 if (!adapter->vf_data[vf].vlans_enabled) { 5256 u32 size; 5257 reg = rd32(E1000_VMOLR(vf)); 5258 size = reg & E1000_VMOLR_RLPML_MASK; 5259 size -= 4; 5260 reg &= ~E1000_VMOLR_RLPML_MASK; 5261 reg |= size; 5262 wr32(E1000_VMOLR(vf), reg); 5263 } 5264 } 5265 } 5266 return 0; 5267} 5268 5269static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf) 5270{ 5271 struct e1000_hw *hw = &adapter->hw; 5272 5273 if (vid) 5274 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT)); 5275 else 5276 wr32(E1000_VMVIR(vf), 0); 5277} 5278 5279static int igb_ndo_set_vf_vlan(struct net_device *netdev, 5280 int vf, u16 vlan, u8 qos) 5281{ 5282 int err = 0; 5283 struct igb_adapter *adapter = netdev_priv(netdev); 5284 5285 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7)) 5286 return -EINVAL; 5287 if (vlan || qos) { 5288 err = igb_vlvf_set(adapter, vlan, !!vlan, vf); 5289 if (err) 5290 goto out; 5291 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf); 5292 igb_set_vmolr(adapter, vf, !vlan); 5293 adapter->vf_data[vf].pf_vlan = vlan; 5294 adapter->vf_data[vf].pf_qos = qos; 5295 dev_info(&adapter->pdev->dev, 5296 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf); 5297 if (test_bit(__IGB_DOWN, &adapter->state)) { 5298 dev_warn(&adapter->pdev->dev, 5299 "The VF VLAN has been set," 5300 " but the PF device is not up.\n"); 5301 dev_warn(&adapter->pdev->dev, 5302 "Bring the PF device up before" 5303 " attempting to use the VF device.\n"); 5304 } 5305 } else { 5306 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan, 5307 false, vf); 5308 igb_set_vmvir(adapter, vlan, vf); 5309 igb_set_vmolr(adapter, vf, true); 5310 adapter->vf_data[vf].pf_vlan = 0; 5311 adapter->vf_data[vf].pf_qos = 0; 5312 } 5313out: 5314 return err; 5315} 5316 5317static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) 5318{ 5319 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; 5320 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK); 5321 5322 return igb_vlvf_set(adapter, vid, add, vf); 5323} 5324 5325static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf) 5326{ 5327 /* clear flags - except flag that indicates PF has set the MAC */ 5328 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC; 5329 adapter->vf_data[vf].last_nack = jiffies; 5330 5331 /* reset offloads to defaults */ 5332 igb_set_vmolr(adapter, vf, true); 5333 5334 /* reset vlans for device */ 5335 igb_clear_vf_vfta(adapter, vf); 5336 if (adapter->vf_data[vf].pf_vlan) 5337 igb_ndo_set_vf_vlan(adapter->netdev, vf, 5338 adapter->vf_data[vf].pf_vlan, 5339 adapter->vf_data[vf].pf_qos); 5340 else 5341 igb_clear_vf_vfta(adapter, vf); 5342 5343 /* reset multicast table array for vf */ 5344 adapter->vf_data[vf].num_vf_mc_hashes = 0; 5345 5346 /* Flush and reset the mta with the new values */ 5347 igb_set_rx_mode(adapter->netdev); 5348} 5349 5350static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf) 5351{ 5352 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; 5353 5354 /* generate a new mac address as we were hotplug removed/added */ 5355 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC)) 5356 random_ether_addr(vf_mac); 5357 5358 /* process remaining reset events */ 5359 igb_vf_reset(adapter, vf); 5360} 5361 5362static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf) 5363{ 5364 struct e1000_hw *hw = &adapter->hw; 5365 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; 5366 int rar_entry = hw->mac.rar_entry_count - (vf + 1); 5367 u32 reg, msgbuf[3]; 5368 u8 *addr = (u8 *)(&msgbuf[1]); 5369 5370 /* process all the same items cleared in a function level reset */ 5371 igb_vf_reset(adapter, vf); 5372 5373 /* set vf mac address */ 5374 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf); 5375 5376 /* enable transmit and receive for vf */ 5377 reg = rd32(E1000_VFTE); 5378 wr32(E1000_VFTE, reg | (1 << vf)); 5379 reg = rd32(E1000_VFRE); 5380 wr32(E1000_VFRE, reg | (1 << vf)); 5381 5382 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS; 5383 5384 /* reply to reset with ack and vf mac address */ 5385 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK; 5386 memcpy(addr, vf_mac, 6); 5387 igb_write_mbx(hw, msgbuf, 3, vf); 5388} 5389 5390static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf) 5391{ 5392 /* 5393 * The VF MAC Address is stored in a packed array of bytes 5394 * starting at the second 32 bit word of the msg array 5395 */ 5396 unsigned char *addr = (char *)&msg[1]; 5397 int err = -1; 5398 5399 if (is_valid_ether_addr(addr)) 5400 err = igb_set_vf_mac(adapter, vf, addr); 5401 5402 return err; 5403} 5404 5405static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf) 5406{ 5407 struct e1000_hw *hw = &adapter->hw; 5408 struct vf_data_storage *vf_data = &adapter->vf_data[vf]; 5409 u32 msg = E1000_VT_MSGTYPE_NACK; 5410 5411 /* if device isn't clear to send it shouldn't be reading either */ 5412 if (!(vf_data->flags & IGB_VF_FLAG_CTS) && 5413 time_after(jiffies, vf_data->last_nack + (2 * HZ))) { 5414 igb_write_mbx(hw, &msg, 1, vf); 5415 vf_data->last_nack = jiffies; 5416 } 5417} 5418 5419static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) 5420{ 5421 struct pci_dev *pdev = adapter->pdev; 5422 u32 msgbuf[E1000_VFMAILBOX_SIZE]; 5423 struct e1000_hw *hw = &adapter->hw; 5424 struct vf_data_storage *vf_data = &adapter->vf_data[vf]; 5425 s32 retval; 5426 5427 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf); 5428 5429 if (retval) { 5430 /* if receive failed revoke VF CTS stats and restart init */ 5431 dev_err(&pdev->dev, "Error receiving message from VF\n"); 5432 vf_data->flags &= ~IGB_VF_FLAG_CTS; 5433 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ))) 5434 return; 5435 goto out; 5436 } 5437 5438 /* this is a message we already processed, do nothing */ 5439 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK)) 5440 return; 5441 5442 /* 5443 * until the vf completes a reset it should not be 5444 * allowed to start any configuration. 5445 */ 5446 5447 if (msgbuf[0] == E1000_VF_RESET) { 5448 igb_vf_reset_msg(adapter, vf); 5449 return; 5450 } 5451 5452 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) { 5453 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ))) 5454 return; 5455 retval = -1; 5456 goto out; 5457 } 5458 5459 switch ((msgbuf[0] & 0xFFFF)) { 5460 case E1000_VF_SET_MAC_ADDR: 5461 retval = -EINVAL; 5462 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC)) 5463 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf); 5464 else 5465 dev_warn(&pdev->dev, 5466 "VF %d attempted to override administratively " 5467 "set MAC address\nReload the VF driver to " 5468 "resume operations\n", vf); 5469 break; 5470 case E1000_VF_SET_PROMISC: 5471 retval = igb_set_vf_promisc(adapter, msgbuf, vf); 5472 break; 5473 case E1000_VF_SET_MULTICAST: 5474 retval = igb_set_vf_multicasts(adapter, msgbuf, vf); 5475 break; 5476 case E1000_VF_SET_LPE: 5477 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf); 5478 break; 5479 case E1000_VF_SET_VLAN: 5480 retval = -1; 5481 if (vf_data->pf_vlan) 5482 dev_warn(&pdev->dev, 5483 "VF %d attempted to override administratively " 5484 "set VLAN tag\nReload the VF driver to " 5485 "resume operations\n", vf); 5486 else 5487 retval = igb_set_vf_vlan(adapter, msgbuf, vf); 5488 break; 5489 default: 5490 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]); 5491 retval = -1; 5492 break; 5493 } 5494 5495 msgbuf[0] |= E1000_VT_MSGTYPE_CTS; 5496out: 5497 /* notify the VF of the results of what it sent us */ 5498 if (retval) 5499 msgbuf[0] |= E1000_VT_MSGTYPE_NACK; 5500 else 5501 msgbuf[0] |= E1000_VT_MSGTYPE_ACK; 5502 5503 igb_write_mbx(hw, msgbuf, 1, vf); 5504} 5505 5506static void igb_msg_task(struct igb_adapter *adapter) 5507{ 5508 struct e1000_hw *hw = &adapter->hw; 5509 u32 vf; 5510 5511 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) { 5512 /* process any reset requests */ 5513 if (!igb_check_for_rst(hw, vf)) 5514 igb_vf_reset_event(adapter, vf); 5515 5516 /* process any messages pending */ 5517 if (!igb_check_for_msg(hw, vf)) 5518 igb_rcv_msg_from_vf(adapter, vf); 5519 5520 /* process any acks */ 5521 if (!igb_check_for_ack(hw, vf)) 5522 igb_rcv_ack_from_vf(adapter, vf); 5523 } 5524} 5525 5526/** 5527 * igb_set_uta - Set unicast filter table address 5528 * @adapter: board private structure 5529 * 5530 * The unicast table address is a register array of 32-bit registers. 5531 * The table is meant to be used in a way similar to how the MTA is used 5532 * however due to certain limitations in the hardware it is necessary to 5533 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous 5534 * enable bit to allow vlan tag stripping when promiscuous mode is enabled 5535 **/ 5536static void igb_set_uta(struct igb_adapter *adapter) 5537{ 5538 struct e1000_hw *hw = &adapter->hw; 5539 int i; 5540 5541 /* The UTA table only exists on 82576 hardware and newer */ 5542 if (hw->mac.type < e1000_82576) 5543 return; 5544 5545 /* we only need to do this if VMDq is enabled */ 5546 if (!adapter->vfs_allocated_count) 5547 return; 5548 5549 for (i = 0; i < hw->mac.uta_reg_count; i++) 5550 array_wr32(E1000_UTA, i, ~0); 5551} 5552 5553/** 5554 * igb_intr_msi - Interrupt Handler 5555 * @irq: interrupt number 5556 * @data: pointer to a network interface device structure 5557 **/ 5558static irqreturn_t igb_intr_msi(int irq, void *data) 5559{ 5560 struct igb_adapter *adapter = data; 5561 struct igb_q_vector *q_vector = adapter->q_vector[0]; 5562 struct e1000_hw *hw = &adapter->hw; 5563 /* read ICR disables interrupts using IAM */ 5564 u32 icr = rd32(E1000_ICR); 5565 5566 igb_write_itr(q_vector); 5567 5568 if (icr & E1000_ICR_DRSTA) 5569 schedule_work(&adapter->reset_task); 5570 5571 if (icr & E1000_ICR_DOUTSYNC) { 5572 /* HW is reporting DMA is out of sync */ 5573 adapter->stats.doosync++; 5574 } 5575 5576 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 5577 hw->mac.get_link_status = 1; 5578 if (!test_bit(__IGB_DOWN, &adapter->state)) 5579 mod_timer(&adapter->watchdog_timer, jiffies + 1); 5580 } 5581 5582 napi_schedule(&q_vector->napi); 5583 5584 return IRQ_HANDLED; 5585} 5586 5587/** 5588 * igb_intr - Legacy Interrupt Handler 5589 * @irq: interrupt number 5590 * @data: pointer to a network interface device structure 5591 **/ 5592static irqreturn_t igb_intr(int irq, void *data) 5593{ 5594 struct igb_adapter *adapter = data; 5595 struct igb_q_vector *q_vector = adapter->q_vector[0]; 5596 struct e1000_hw *hw = &adapter->hw; 5597 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No 5598 * need for the IMC write */ 5599 u32 icr = rd32(E1000_ICR); 5600 5601 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is 5602 * not set, then the adapter didn't send an interrupt */ 5603 if (!(icr & E1000_ICR_INT_ASSERTED)) 5604 return IRQ_NONE; 5605 5606 igb_write_itr(q_vector); 5607 5608 if (icr & E1000_ICR_DRSTA) 5609 schedule_work(&adapter->reset_task); 5610 5611 if (icr & E1000_ICR_DOUTSYNC) { 5612 /* HW is reporting DMA is out of sync */ 5613 adapter->stats.doosync++; 5614 } 5615 5616 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 5617 hw->mac.get_link_status = 1; 5618 /* guard against interrupt when we're going down */ 5619 if (!test_bit(__IGB_DOWN, &adapter->state)) 5620 mod_timer(&adapter->watchdog_timer, jiffies + 1); 5621 } 5622 5623 napi_schedule(&q_vector->napi); 5624 5625 return IRQ_HANDLED; 5626} 5627 5628static void igb_ring_irq_enable(struct igb_q_vector *q_vector) 5629{ 5630 struct igb_adapter *adapter = q_vector->adapter; 5631 struct e1000_hw *hw = &adapter->hw; 5632 5633 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) || 5634 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) { 5635 if ((adapter->num_q_vectors == 1) && !adapter->vf_data) 5636 igb_set_itr(q_vector); 5637 else 5638 igb_update_ring_itr(q_vector); 5639 } 5640 5641 if (!test_bit(__IGB_DOWN, &adapter->state)) { 5642 if (adapter->msix_entries) 5643 wr32(E1000_EIMS, q_vector->eims_value); 5644 else 5645 igb_irq_enable(adapter); 5646 } 5647} 5648 5649/** 5650 * igb_poll - NAPI Rx polling callback 5651 * @napi: napi polling structure 5652 * @budget: count of how many packets we should handle 5653 **/ 5654static int igb_poll(struct napi_struct *napi, int budget) 5655{ 5656 struct igb_q_vector *q_vector = container_of(napi, 5657 struct igb_q_vector, 5658 napi); 5659 bool clean_complete = true; 5660 5661#ifdef CONFIG_IGB_DCA 5662 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED) 5663 igb_update_dca(q_vector); 5664#endif 5665 if (q_vector->tx.ring) 5666 clean_complete = igb_clean_tx_irq(q_vector); 5667 5668 if (q_vector->rx.ring) 5669 clean_complete &= igb_clean_rx_irq(q_vector, budget); 5670 5671 /* If all work not completed, return budget and keep polling */ 5672 if (!clean_complete) 5673 return budget; 5674 5675 /* If not enough Rx work done, exit the polling mode */ 5676 napi_complete(napi); 5677 igb_ring_irq_enable(q_vector); 5678 5679 return 0; 5680} 5681 5682/** 5683 * igb_systim_to_hwtstamp - convert system time value to hw timestamp 5684 * @adapter: board private structure 5685 * @shhwtstamps: timestamp structure to update 5686 * @regval: unsigned 64bit system time value. 5687 * 5688 * We need to convert the system time value stored in the RX/TXSTMP registers 5689 * into a hwtstamp which can be used by the upper level timestamping functions 5690 */ 5691static void igb_systim_to_hwtstamp(struct igb_adapter *adapter, 5692 struct skb_shared_hwtstamps *shhwtstamps, 5693 u64 regval) 5694{ 5695 u64 ns; 5696 5697 /* 5698 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to 5699 * 24 to match clock shift we setup earlier. 5700 */ 5701 if (adapter->hw.mac.type >= e1000_82580) 5702 regval <<= IGB_82580_TSYNC_SHIFT; 5703 5704 ns = timecounter_cyc2time(&adapter->clock, regval); 5705 timecompare_update(&adapter->compare, ns); 5706 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps)); 5707 shhwtstamps->hwtstamp = ns_to_ktime(ns); 5708 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns); 5709} 5710 5711/** 5712 * igb_tx_hwtstamp - utility function which checks for TX time stamp 5713 * @q_vector: pointer to q_vector containing needed info 5714 * @buffer: pointer to igb_tx_buffer structure 5715 * 5716 * If we were asked to do hardware stamping and such a time stamp is 5717 * available, then it must have been for this skb here because we only 5718 * allow only one such packet into the queue. 5719 */ 5720static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, 5721 struct igb_tx_buffer *buffer_info) 5722{ 5723 struct igb_adapter *adapter = q_vector->adapter; 5724 struct e1000_hw *hw = &adapter->hw; 5725 struct skb_shared_hwtstamps shhwtstamps; 5726 u64 regval; 5727 5728 /* if skb does not support hw timestamp or TX stamp not valid exit */ 5729 if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) || 5730 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID)) 5731 return; 5732 5733 regval = rd32(E1000_TXSTMPL); 5734 regval |= (u64)rd32(E1000_TXSTMPH) << 32; 5735 5736 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval); 5737 skb_tstamp_tx(buffer_info->skb, &shhwtstamps); 5738} 5739 5740/** 5741 * igb_clean_tx_irq - Reclaim resources after transmit completes 5742 * @q_vector: pointer to q_vector containing needed info 5743 * returns true if ring is completely cleaned 5744 **/ 5745static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) 5746{ 5747 struct igb_adapter *adapter = q_vector->adapter; 5748 struct igb_ring *tx_ring = q_vector->tx.ring; 5749 struct igb_tx_buffer *tx_buffer; 5750 union e1000_adv_tx_desc *tx_desc, *eop_desc; 5751 unsigned int total_bytes = 0, total_packets = 0; 5752 unsigned int budget = q_vector->tx.work_limit; 5753 unsigned int i = tx_ring->next_to_clean; 5754 5755 if (test_bit(__IGB_DOWN, &adapter->state)) 5756 return true; 5757 5758 tx_buffer = &tx_ring->tx_buffer_info[i]; 5759 tx_desc = IGB_TX_DESC(tx_ring, i); 5760 i -= tx_ring->count; 5761 5762 for (; budget; budget--) { 5763 eop_desc = tx_buffer->next_to_watch; 5764 5765 /* prevent any other reads prior to eop_desc */ 5766 rmb(); 5767 5768 /* if next_to_watch is not set then there is no work pending */ 5769 if (!eop_desc) 5770 break; 5771 5772 /* if DD is not set pending work has not been completed */ 5773 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) 5774 break; 5775 5776 /* clear next_to_watch to prevent false hangs */ 5777 tx_buffer->next_to_watch = NULL; 5778 5779 /* update the statistics for this packet */ 5780 total_bytes += tx_buffer->bytecount; 5781 total_packets += tx_buffer->gso_segs; 5782 5783 /* retrieve hardware timestamp */ 5784 igb_tx_hwtstamp(q_vector, tx_buffer); 5785 5786 /* free the skb */ 5787 dev_kfree_skb_any(tx_buffer->skb); 5788 tx_buffer->skb = NULL; 5789 5790 /* unmap skb header data */ 5791 dma_unmap_single(tx_ring->dev, 5792 tx_buffer->dma, 5793 tx_buffer->length, 5794 DMA_TO_DEVICE); 5795 5796 /* clear last DMA location and unmap remaining buffers */ 5797 while (tx_desc != eop_desc) { 5798 tx_buffer->dma = 0; 5799 5800 tx_buffer++; 5801 tx_desc++; 5802 i++; 5803 if (unlikely(!i)) { 5804 i -= tx_ring->count; 5805 tx_buffer = tx_ring->tx_buffer_info; 5806 tx_desc = IGB_TX_DESC(tx_ring, 0); 5807 } 5808 5809 /* unmap any remaining paged data */ 5810 if (tx_buffer->dma) { 5811 dma_unmap_page(tx_ring->dev, 5812 tx_buffer->dma, 5813 tx_buffer->length, 5814 DMA_TO_DEVICE); 5815 } 5816 } 5817 5818 /* clear last DMA location */ 5819 tx_buffer->dma = 0; 5820 5821 /* move us one more past the eop_desc for start of next pkt */ 5822 tx_buffer++; 5823 tx_desc++; 5824 i++; 5825 if (unlikely(!i)) { 5826 i -= tx_ring->count; 5827 tx_buffer = tx_ring->tx_buffer_info; 5828 tx_desc = IGB_TX_DESC(tx_ring, 0); 5829 } 5830 } 5831 5832 netdev_tx_completed_queue(txring_txq(tx_ring), 5833 total_packets, total_bytes); 5834 i += tx_ring->count; 5835 tx_ring->next_to_clean = i; 5836 u64_stats_update_begin(&tx_ring->tx_syncp); 5837 tx_ring->tx_stats.bytes += total_bytes; 5838 tx_ring->tx_stats.packets += total_packets; 5839 u64_stats_update_end(&tx_ring->tx_syncp); 5840 q_vector->tx.total_bytes += total_bytes; 5841 q_vector->tx.total_packets += total_packets; 5842 5843 if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { 5844 struct e1000_hw *hw = &adapter->hw; 5845 5846 eop_desc = tx_buffer->next_to_watch; 5847 5848 /* Detect a transmit hang in hardware, this serializes the 5849 * check with the clearing of time_stamp and movement of i */ 5850 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 5851 if (eop_desc && 5852 time_after(jiffies, tx_buffer->time_stamp + 5853 (adapter->tx_timeout_factor * HZ)) && 5854 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) { 5855 5856 /* detected Tx unit hang */ 5857 dev_err(tx_ring->dev, 5858 "Detected Tx Unit Hang\n" 5859 " Tx Queue <%d>\n" 5860 " TDH <%x>\n" 5861 " TDT <%x>\n" 5862 " next_to_use <%x>\n" 5863 " next_to_clean <%x>\n" 5864 "buffer_info[next_to_clean]\n" 5865 " time_stamp <%lx>\n" 5866 " next_to_watch <%p>\n" 5867 " jiffies <%lx>\n" 5868 " desc.status <%x>\n", 5869 tx_ring->queue_index, 5870 rd32(E1000_TDH(tx_ring->reg_idx)), 5871 readl(tx_ring->tail), 5872 tx_ring->next_to_use, 5873 tx_ring->next_to_clean, 5874 tx_buffer->time_stamp, 5875 eop_desc, 5876 jiffies, 5877 eop_desc->wb.status); 5878 netif_stop_subqueue(tx_ring->netdev, 5879 tx_ring->queue_index); 5880 5881 /* we are about to reset, no point in enabling stuff */ 5882 return true; 5883 } 5884 } 5885 5886 if (unlikely(total_packets && 5887 netif_carrier_ok(tx_ring->netdev) && 5888 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) { 5889 /* Make sure that anybody stopping the queue after this 5890 * sees the new next_to_clean. 5891 */ 5892 smp_mb(); 5893 if (__netif_subqueue_stopped(tx_ring->netdev, 5894 tx_ring->queue_index) && 5895 !(test_bit(__IGB_DOWN, &adapter->state))) { 5896 netif_wake_subqueue(tx_ring->netdev, 5897 tx_ring->queue_index); 5898 5899 u64_stats_update_begin(&tx_ring->tx_syncp); 5900 tx_ring->tx_stats.restart_queue++; 5901 u64_stats_update_end(&tx_ring->tx_syncp); 5902 } 5903 } 5904 5905 return !!budget; 5906} 5907 5908static inline void igb_rx_checksum(struct igb_ring *ring, 5909 union e1000_adv_rx_desc *rx_desc, 5910 struct sk_buff *skb) 5911{ 5912 skb_checksum_none_assert(skb); 5913 5914 /* Ignore Checksum bit is set */ 5915 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM)) 5916 return; 5917 5918 /* Rx checksum disabled via ethtool */ 5919 if (!(ring->netdev->features & NETIF_F_RXCSUM)) 5920 return; 5921 5922 /* TCP/UDP checksum error bit is set */ 5923 if (igb_test_staterr(rx_desc, 5924 E1000_RXDEXT_STATERR_TCPE | 5925 E1000_RXDEXT_STATERR_IPE)) { 5926 /* 5927 * work around errata with sctp packets where the TCPE aka 5928 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc) 5929 * packets, (aka let the stack check the crc32c) 5930 */ 5931 if (!((skb->len == 60) && 5932 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) { 5933 u64_stats_update_begin(&ring->rx_syncp); 5934 ring->rx_stats.csum_err++; 5935 u64_stats_update_end(&ring->rx_syncp); 5936 } 5937 /* let the stack verify checksum errors */ 5938 return; 5939 } 5940 /* It must be a TCP or UDP packet with a valid checksum */ 5941 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS | 5942 E1000_RXD_STAT_UDPCS)) 5943 skb->ip_summed = CHECKSUM_UNNECESSARY; 5944 5945 dev_dbg(ring->dev, "cksum success: bits %08X\n", 5946 le32_to_cpu(rx_desc->wb.upper.status_error)); 5947} 5948 5949static inline void igb_rx_hash(struct igb_ring *ring, 5950 union e1000_adv_rx_desc *rx_desc, 5951 struct sk_buff *skb) 5952{ 5953 if (ring->netdev->features & NETIF_F_RXHASH) 5954 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); 5955} 5956 5957static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, 5958 union e1000_adv_rx_desc *rx_desc, 5959 struct sk_buff *skb) 5960{ 5961 struct igb_adapter *adapter = q_vector->adapter; 5962 struct e1000_hw *hw = &adapter->hw; 5963 u64 regval; 5964 5965 if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP | 5966 E1000_RXDADV_STAT_TS)) 5967 return; 5968 5969 /* 5970 * If this bit is set, then the RX registers contain the time stamp. No 5971 * other packet will be time stamped until we read these registers, so 5972 * read the registers to make them available again. Because only one 5973 * packet can be time stamped at a time, we know that the register 5974 * values must belong to this one here and therefore we don't need to 5975 * compare any of the additional attributes stored for it. 5976 * 5977 * If nothing went wrong, then it should have a shared tx_flags that we 5978 * can turn into a skb_shared_hwtstamps. 5979 */ 5980 if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { 5981 u32 *stamp = (u32 *)skb->data; 5982 regval = le32_to_cpu(*(stamp + 2)); 5983 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32; 5984 skb_pull(skb, IGB_TS_HDR_LEN); 5985 } else { 5986 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID)) 5987 return; 5988 5989 regval = rd32(E1000_RXSTMPL); 5990 regval |= (u64)rd32(E1000_RXSTMPH) << 32; 5991 } 5992 5993 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); 5994} 5995 5996static void igb_rx_vlan(struct igb_ring *ring, 5997 union e1000_adv_rx_desc *rx_desc, 5998 struct sk_buff *skb) 5999{ 6000 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) { 6001 u16 vid; 6002 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) && 6003 test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags)) 6004 vid = be16_to_cpu(rx_desc->wb.upper.vlan); 6005 else 6006 vid = le16_to_cpu(rx_desc->wb.upper.vlan); 6007 6008 __vlan_hwaccel_put_tag(skb, vid); 6009 } 6010} 6011 6012static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc) 6013{ 6014 /* HW will not DMA in data larger than the given buffer, even if it 6015 * parses the (NFS, of course) header to be larger. In that case, it 6016 * fills the header buffer and spills the rest into the page. 6017 */ 6018 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) & 6019 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT; 6020 if (hlen > IGB_RX_HDR_LEN) 6021 hlen = IGB_RX_HDR_LEN; 6022 return hlen; 6023} 6024 6025static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) 6026{ 6027 struct igb_ring *rx_ring = q_vector->rx.ring; 6028 union e1000_adv_rx_desc *rx_desc; 6029 const int current_node = numa_node_id(); 6030 unsigned int total_bytes = 0, total_packets = 0; 6031 u16 cleaned_count = igb_desc_unused(rx_ring); 6032 u16 i = rx_ring->next_to_clean; 6033 6034 rx_desc = IGB_RX_DESC(rx_ring, i); 6035 6036 while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) { 6037 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; 6038 struct sk_buff *skb = buffer_info->skb; 6039 union e1000_adv_rx_desc *next_rxd; 6040 6041 buffer_info->skb = NULL; 6042 prefetch(skb->data); 6043 6044 i++; 6045 if (i == rx_ring->count) 6046 i = 0; 6047 6048 next_rxd = IGB_RX_DESC(rx_ring, i); 6049 prefetch(next_rxd); 6050 6051 /* 6052 * This memory barrier is needed to keep us from reading 6053 * any other fields out of the rx_desc until we know the 6054 * RXD_STAT_DD bit is set 6055 */ 6056 rmb(); 6057 6058 if (!skb_is_nonlinear(skb)) { 6059 __skb_put(skb, igb_get_hlen(rx_desc)); 6060 dma_unmap_single(rx_ring->dev, buffer_info->dma, 6061 IGB_RX_HDR_LEN, 6062 DMA_FROM_DEVICE); 6063 buffer_info->dma = 0; 6064 } 6065 6066 if (rx_desc->wb.upper.length) { 6067 u16 length = le16_to_cpu(rx_desc->wb.upper.length); 6068 6069 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, 6070 buffer_info->page, 6071 buffer_info->page_offset, 6072 length); 6073 6074 skb->len += length; 6075 skb->data_len += length; 6076 skb->truesize += PAGE_SIZE / 2; 6077 6078 if ((page_count(buffer_info->page) != 1) || 6079 (page_to_nid(buffer_info->page) != current_node)) 6080 buffer_info->page = NULL; 6081 else 6082 get_page(buffer_info->page); 6083 6084 dma_unmap_page(rx_ring->dev, buffer_info->page_dma, 6085 PAGE_SIZE / 2, DMA_FROM_DEVICE); 6086 buffer_info->page_dma = 0; 6087 } 6088 6089 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) { 6090 struct igb_rx_buffer *next_buffer; 6091 next_buffer = &rx_ring->rx_buffer_info[i]; 6092 buffer_info->skb = next_buffer->skb; 6093 buffer_info->dma = next_buffer->dma; 6094 next_buffer->skb = skb; 6095 next_buffer->dma = 0; 6096 goto next_desc; 6097 } 6098 6099 if (igb_test_staterr(rx_desc, 6100 E1000_RXDEXT_ERR_FRAME_ERR_MASK)) { 6101 dev_kfree_skb_any(skb); 6102 goto next_desc; 6103 } 6104 6105 igb_rx_hwtstamp(q_vector, rx_desc, skb); 6106 igb_rx_hash(rx_ring, rx_desc, skb); 6107 igb_rx_checksum(rx_ring, rx_desc, skb); 6108 igb_rx_vlan(rx_ring, rx_desc, skb); 6109 6110 total_bytes += skb->len; 6111 total_packets++; 6112 6113 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 6114 6115 napi_gro_receive(&q_vector->napi, skb); 6116 6117 budget--; 6118next_desc: 6119 if (!budget) 6120 break; 6121 6122 cleaned_count++; 6123 /* return some buffers to hardware, one at a time is too slow */ 6124 if (cleaned_count >= IGB_RX_BUFFER_WRITE) { 6125 igb_alloc_rx_buffers(rx_ring, cleaned_count); 6126 cleaned_count = 0; 6127 } 6128 6129 /* use prefetched values */ 6130 rx_desc = next_rxd; 6131 } 6132 6133 rx_ring->next_to_clean = i; 6134 u64_stats_update_begin(&rx_ring->rx_syncp); 6135 rx_ring->rx_stats.packets += total_packets; 6136 rx_ring->rx_stats.bytes += total_bytes; 6137 u64_stats_update_end(&rx_ring->rx_syncp); 6138 q_vector->rx.total_packets += total_packets; 6139 q_vector->rx.total_bytes += total_bytes; 6140 6141 if (cleaned_count) 6142 igb_alloc_rx_buffers(rx_ring, cleaned_count); 6143 6144 return !!budget; 6145} 6146 6147static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring, 6148 struct igb_rx_buffer *bi) 6149{ 6150 struct sk_buff *skb = bi->skb; 6151 dma_addr_t dma = bi->dma; 6152 6153 if (dma) 6154 return true; 6155 6156 if (likely(!skb)) { 6157 skb = netdev_alloc_skb_ip_align(rx_ring->netdev, 6158 IGB_RX_HDR_LEN); 6159 bi->skb = skb; 6160 if (!skb) { 6161 rx_ring->rx_stats.alloc_failed++; 6162 return false; 6163 } 6164 6165 /* initialize skb for ring */ 6166 skb_record_rx_queue(skb, rx_ring->queue_index); 6167 } 6168 6169 dma = dma_map_single(rx_ring->dev, skb->data, 6170 IGB_RX_HDR_LEN, DMA_FROM_DEVICE); 6171 6172 if (dma_mapping_error(rx_ring->dev, dma)) { 6173 rx_ring->rx_stats.alloc_failed++; 6174 return false; 6175 } 6176 6177 bi->dma = dma; 6178 return true; 6179} 6180 6181static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, 6182 struct igb_rx_buffer *bi) 6183{ 6184 struct page *page = bi->page; 6185 dma_addr_t page_dma = bi->page_dma; 6186 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2); 6187 6188 if (page_dma) 6189 return true; 6190 6191 if (!page) { 6192 page = alloc_page(GFP_ATOMIC | __GFP_COLD); 6193 bi->page = page; 6194 if (unlikely(!page)) { 6195 rx_ring->rx_stats.alloc_failed++; 6196 return false; 6197 } 6198 } 6199 6200 page_dma = dma_map_page(rx_ring->dev, page, 6201 page_offset, PAGE_SIZE / 2, 6202 DMA_FROM_DEVICE); 6203 6204 if (dma_mapping_error(rx_ring->dev, page_dma)) { 6205 rx_ring->rx_stats.alloc_failed++; 6206 return false; 6207 } 6208 6209 bi->page_dma = page_dma; 6210 bi->page_offset = page_offset; 6211 return true; 6212} 6213 6214/** 6215 * igb_alloc_rx_buffers - Replace used receive buffers; packet split 6216 * @adapter: address of board private structure 6217 **/ 6218void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) 6219{ 6220 union e1000_adv_rx_desc *rx_desc; 6221 struct igb_rx_buffer *bi; 6222 u16 i = rx_ring->next_to_use; 6223 6224 rx_desc = IGB_RX_DESC(rx_ring, i); 6225 bi = &rx_ring->rx_buffer_info[i]; 6226 i -= rx_ring->count; 6227 6228 while (cleaned_count--) { 6229 if (!igb_alloc_mapped_skb(rx_ring, bi)) 6230 break; 6231 6232 /* Refresh the desc even if buffer_addrs didn't change 6233 * because each write-back erases this info. */ 6234 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); 6235 6236 if (!igb_alloc_mapped_page(rx_ring, bi)) 6237 break; 6238 6239 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); 6240 6241 rx_desc++; 6242 bi++; 6243 i++; 6244 if (unlikely(!i)) { 6245 rx_desc = IGB_RX_DESC(rx_ring, 0); 6246 bi = rx_ring->rx_buffer_info; 6247 i -= rx_ring->count; 6248 } 6249 6250 /* clear the hdr_addr for the next_to_use descriptor */ 6251 rx_desc->read.hdr_addr = 0; 6252 } 6253 6254 i += rx_ring->count; 6255 6256 if (rx_ring->next_to_use != i) { 6257 rx_ring->next_to_use = i; 6258 6259 /* Force memory writes to complete before letting h/w 6260 * know there are new descriptors to fetch. (Only 6261 * applicable for weak-ordered memory model archs, 6262 * such as IA-64). */ 6263 wmb(); 6264 writel(i, rx_ring->tail); 6265 } 6266} 6267 6268/** 6269 * igb_mii_ioctl - 6270 * @netdev: 6271 * @ifreq: 6272 * @cmd: 6273 **/ 6274static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) 6275{ 6276 struct igb_adapter *adapter = netdev_priv(netdev); 6277 struct mii_ioctl_data *data = if_mii(ifr); 6278 6279 if (adapter->hw.phy.media_type != e1000_media_type_copper) 6280 return -EOPNOTSUPP; 6281 6282 switch (cmd) { 6283 case SIOCGMIIPHY: 6284 data->phy_id = adapter->hw.phy.addr; 6285 break; 6286 case SIOCGMIIREG: 6287 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F, 6288 &data->val_out)) 6289 return -EIO; 6290 break; 6291 case SIOCSMIIREG: 6292 default: 6293 return -EOPNOTSUPP; 6294 } 6295 return 0; 6296} 6297 6298/** 6299 * igb_hwtstamp_ioctl - control hardware time stamping 6300 * @netdev: 6301 * @ifreq: 6302 * @cmd: 6303 * 6304 * Outgoing time stamping can be enabled and disabled. Play nice and 6305 * disable it when requested, although it shouldn't case any overhead 6306 * when no packet needs it. At most one packet in the queue may be 6307 * marked for time stamping, otherwise it would be impossible to tell 6308 * for sure to which packet the hardware time stamp belongs. 6309 * 6310 * Incoming time stamping has to be configured via the hardware 6311 * filters. Not all combinations are supported, in particular event 6312 * type has to be specified. Matching the kind of event packet is 6313 * not supported, with the exception of "all V2 events regardless of 6314 * level 2 or 4". 6315 * 6316 **/ 6317static int igb_hwtstamp_ioctl(struct net_device *netdev, 6318 struct ifreq *ifr, int cmd) 6319{ 6320 struct igb_adapter *adapter = netdev_priv(netdev); 6321 struct e1000_hw *hw = &adapter->hw; 6322 struct hwtstamp_config config; 6323 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED; 6324 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; 6325 u32 tsync_rx_cfg = 0; 6326 bool is_l4 = false; 6327 bool is_l2 = false; 6328 u32 regval; 6329 6330 if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) 6331 return -EFAULT; 6332 6333 /* reserved for future extensions */ 6334 if (config.flags) 6335 return -EINVAL; 6336 6337 switch (config.tx_type) { 6338 case HWTSTAMP_TX_OFF: 6339 tsync_tx_ctl = 0; 6340 case HWTSTAMP_TX_ON: 6341 break; 6342 default: 6343 return -ERANGE; 6344 } 6345 6346 switch (config.rx_filter) { 6347 case HWTSTAMP_FILTER_NONE: 6348 tsync_rx_ctl = 0; 6349 break; 6350 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: 6351 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: 6352 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: 6353 case HWTSTAMP_FILTER_ALL: 6354 /* 6355 * register TSYNCRXCFG must be set, therefore it is not 6356 * possible to time stamp both Sync and Delay_Req messages 6357 * => fall back to time stamping all packets 6358 */ 6359 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; 6360 config.rx_filter = HWTSTAMP_FILTER_ALL; 6361 break; 6362 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: 6363 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; 6364 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE; 6365 is_l4 = true; 6366 break; 6367 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: 6368 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; 6369 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE; 6370 is_l4 = true; 6371 break; 6372 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: 6373 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: 6374 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; 6375 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE; 6376 is_l2 = true; 6377 is_l4 = true; 6378 config.rx_filter = HWTSTAMP_FILTER_SOME; 6379 break; 6380 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: 6381 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: 6382 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; 6383 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE; 6384 is_l2 = true; 6385 is_l4 = true; 6386 config.rx_filter = HWTSTAMP_FILTER_SOME; 6387 break; 6388 case HWTSTAMP_FILTER_PTP_V2_EVENT: 6389 case HWTSTAMP_FILTER_PTP_V2_SYNC: 6390 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: 6391 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2; 6392 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; 6393 is_l2 = true; 6394 is_l4 = true; 6395 break; 6396 default: 6397 return -ERANGE; 6398 } 6399 6400 if (hw->mac.type == e1000_82575) { 6401 if (tsync_rx_ctl | tsync_tx_ctl) 6402 return -EINVAL; 6403 return 0; 6404 } 6405 6406 /* 6407 * Per-packet timestamping only works if all packets are 6408 * timestamped, so enable timestamping in all packets as 6409 * long as one rx filter was configured. 6410 */ 6411 if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) { 6412 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; 6413 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; 6414 } 6415 6416 /* enable/disable TX */ 6417 regval = rd32(E1000_TSYNCTXCTL); 6418 regval &= ~E1000_TSYNCTXCTL_ENABLED; 6419 regval |= tsync_tx_ctl; 6420 wr32(E1000_TSYNCTXCTL, regval); 6421 6422 /* enable/disable RX */ 6423 regval = rd32(E1000_TSYNCRXCTL); 6424 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK); 6425 regval |= tsync_rx_ctl; 6426 wr32(E1000_TSYNCRXCTL, regval); 6427 6428 /* define which PTP packets are time stamped */ 6429 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg); 6430 6431 /* define ethertype filter for timestamped packets */ 6432 if (is_l2) 6433 wr32(E1000_ETQF(3), 6434 (E1000_ETQF_FILTER_ENABLE | /* enable filter */ 6435 E1000_ETQF_1588 | /* enable timestamping */ 6436 ETH_P_1588)); /* 1588 eth protocol type */ 6437 else 6438 wr32(E1000_ETQF(3), 0); 6439 6440#define PTP_PORT 319 6441 /* L4 Queue Filter[3]: filter by destination port and protocol */ 6442 if (is_l4) { 6443 u32 ftqf = (IPPROTO_UDP /* UDP */ 6444 | E1000_FTQF_VF_BP /* VF not compared */ 6445 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */ 6446 | E1000_FTQF_MASK); /* mask all inputs */ 6447 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */ 6448 6449 wr32(E1000_IMIR(3), htons(PTP_PORT)); 6450 wr32(E1000_IMIREXT(3), 6451 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP)); 6452 if (hw->mac.type == e1000_82576) { 6453 /* enable source port check */ 6454 wr32(E1000_SPQF(3), htons(PTP_PORT)); 6455 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP; 6456 } 6457 wr32(E1000_FTQF(3), ftqf); 6458 } else { 6459 wr32(E1000_FTQF(3), E1000_FTQF_MASK); 6460 } 6461 wrfl(); 6462 6463 adapter->hwtstamp_config = config; 6464 6465 /* clear TX/RX time stamp registers, just to be sure */ 6466 regval = rd32(E1000_TXSTMPH); 6467 regval = rd32(E1000_RXSTMPH); 6468 6469 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? 6470 -EFAULT : 0; 6471} 6472 6473/** 6474 * igb_ioctl - 6475 * @netdev: 6476 * @ifreq: 6477 * @cmd: 6478 **/ 6479static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) 6480{ 6481 switch (cmd) { 6482 case SIOCGMIIPHY: 6483 case SIOCGMIIREG: 6484 case SIOCSMIIREG: 6485 return igb_mii_ioctl(netdev, ifr, cmd); 6486 case SIOCSHWTSTAMP: 6487 return igb_hwtstamp_ioctl(netdev, ifr, cmd); 6488 default: 6489 return -EOPNOTSUPP; 6490 } 6491} 6492 6493s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) 6494{ 6495 struct igb_adapter *adapter = hw->back; 6496 u16 cap_offset; 6497 6498 cap_offset = adapter->pdev->pcie_cap; 6499 if (!cap_offset) 6500 return -E1000_ERR_CONFIG; 6501 6502 pci_read_config_word(adapter->pdev, cap_offset + reg, value); 6503 6504 return 0; 6505} 6506 6507s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) 6508{ 6509 struct igb_adapter *adapter = hw->back; 6510 u16 cap_offset; 6511 6512 cap_offset = adapter->pdev->pcie_cap; 6513 if (!cap_offset) 6514 return -E1000_ERR_CONFIG; 6515 6516 pci_write_config_word(adapter->pdev, cap_offset + reg, *value); 6517 6518 return 0; 6519} 6520 6521static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features) 6522{ 6523 struct igb_adapter *adapter = netdev_priv(netdev); 6524 struct e1000_hw *hw = &adapter->hw; 6525 u32 ctrl, rctl; 6526 bool enable = !!(features & NETIF_F_HW_VLAN_RX); 6527 6528 if (enable) { 6529 /* enable VLAN tag insert/strip */ 6530 ctrl = rd32(E1000_CTRL); 6531 ctrl |= E1000_CTRL_VME; 6532 wr32(E1000_CTRL, ctrl); 6533 6534 /* Disable CFI check */ 6535 rctl = rd32(E1000_RCTL); 6536 rctl &= ~E1000_RCTL_CFIEN; 6537 wr32(E1000_RCTL, rctl); 6538 } else { 6539 /* disable VLAN tag insert/strip */ 6540 ctrl = rd32(E1000_CTRL); 6541 ctrl &= ~E1000_CTRL_VME; 6542 wr32(E1000_CTRL, ctrl); 6543 } 6544 6545 igb_rlpml_set(adapter); 6546} 6547 6548static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) 6549{ 6550 struct igb_adapter *adapter = netdev_priv(netdev); 6551 struct e1000_hw *hw = &adapter->hw; 6552 int pf_id = adapter->vfs_allocated_count; 6553 6554 /* attempt to add filter to vlvf array */ 6555 igb_vlvf_set(adapter, vid, true, pf_id); 6556 6557 /* add the filter since PF can receive vlans w/o entry in vlvf */ 6558 igb_vfta_set(hw, vid, true); 6559 6560 set_bit(vid, adapter->active_vlans); 6561 6562 return 0; 6563} 6564 6565static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) 6566{ 6567 struct igb_adapter *adapter = netdev_priv(netdev); 6568 struct e1000_hw *hw = &adapter->hw; 6569 int pf_id = adapter->vfs_allocated_count; 6570 s32 err; 6571 6572 /* remove vlan from VLVF table array */ 6573 err = igb_vlvf_set(adapter, vid, false, pf_id); 6574 6575 /* if vid was not present in VLVF just remove it from table */ 6576 if (err) 6577 igb_vfta_set(hw, vid, false); 6578 6579 clear_bit(vid, adapter->active_vlans); 6580 6581 return 0; 6582} 6583 6584static void igb_restore_vlan(struct igb_adapter *adapter) 6585{ 6586 u16 vid; 6587 6588 igb_vlan_mode(adapter->netdev, adapter->netdev->features); 6589 6590 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) 6591 igb_vlan_rx_add_vid(adapter->netdev, vid); 6592} 6593 6594int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx) 6595{ 6596 struct pci_dev *pdev = adapter->pdev; 6597 struct e1000_mac_info *mac = &adapter->hw.mac; 6598 6599 mac->autoneg = 0; 6600 6601 /* Make sure dplx is at most 1 bit and lsb of speed is not set 6602 * for the switch() below to work */ 6603 if ((spd & 1) || (dplx & ~1)) 6604 goto err_inval; 6605 6606 /* Fiber NIC's only allow 1000 Gbps Full duplex */ 6607 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) && 6608 spd != SPEED_1000 && 6609 dplx != DUPLEX_FULL) 6610 goto err_inval; 6611 6612 switch (spd + dplx) { 6613 case SPEED_10 + DUPLEX_HALF: 6614 mac->forced_speed_duplex = ADVERTISE_10_HALF; 6615 break; 6616 case SPEED_10 + DUPLEX_FULL: 6617 mac->forced_speed_duplex = ADVERTISE_10_FULL; 6618 break; 6619 case SPEED_100 + DUPLEX_HALF: 6620 mac->forced_speed_duplex = ADVERTISE_100_HALF; 6621 break; 6622 case SPEED_100 + DUPLEX_FULL: 6623 mac->forced_speed_duplex = ADVERTISE_100_FULL; 6624 break; 6625 case SPEED_1000 + DUPLEX_FULL: 6626 mac->autoneg = 1; 6627 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; 6628 break; 6629 case SPEED_1000 + DUPLEX_HALF: /* not supported */ 6630 default: 6631 goto err_inval; 6632 } 6633 return 0; 6634 6635err_inval: 6636 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n"); 6637 return -EINVAL; 6638} 6639 6640static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, 6641 bool runtime) 6642{ 6643 struct net_device *netdev = pci_get_drvdata(pdev); 6644 struct igb_adapter *adapter = netdev_priv(netdev); 6645 struct e1000_hw *hw = &adapter->hw; 6646 u32 ctrl, rctl, status; 6647 u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol; 6648#ifdef CONFIG_PM 6649 int retval = 0; 6650#endif 6651 6652 netif_device_detach(netdev); 6653 6654 if (netif_running(netdev)) 6655 __igb_close(netdev, true); 6656 6657 igb_clear_interrupt_scheme(adapter); 6658 6659#ifdef CONFIG_PM 6660 retval = pci_save_state(pdev); 6661 if (retval) 6662 return retval; 6663#endif 6664 6665 status = rd32(E1000_STATUS); 6666 if (status & E1000_STATUS_LU) 6667 wufc &= ~E1000_WUFC_LNKC; 6668 6669 if (wufc) { 6670 igb_setup_rctl(adapter); 6671 igb_set_rx_mode(netdev); 6672 6673 /* turn on all-multi mode if wake on multicast is enabled */ 6674 if (wufc & E1000_WUFC_MC) { 6675 rctl = rd32(E1000_RCTL); 6676 rctl |= E1000_RCTL_MPE; 6677 wr32(E1000_RCTL, rctl); 6678 } 6679 6680 ctrl = rd32(E1000_CTRL); 6681 /* advertise wake from D3Cold */ 6682 #define E1000_CTRL_ADVD3WUC 0x00100000 6683 /* phy power management enable */ 6684 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 6685 ctrl |= E1000_CTRL_ADVD3WUC; 6686 wr32(E1000_CTRL, ctrl); 6687 6688 /* Allow time for pending master requests to run */ 6689 igb_disable_pcie_master(hw); 6690 6691 wr32(E1000_WUC, E1000_WUC_PME_EN); 6692 wr32(E1000_WUFC, wufc); 6693 } else { 6694 wr32(E1000_WUC, 0); 6695 wr32(E1000_WUFC, 0); 6696 } 6697 6698 *enable_wake = wufc || adapter->en_mng_pt; 6699 if (!*enable_wake) 6700 igb_power_down_link(adapter); 6701 else 6702 igb_power_up_link(adapter); 6703 6704 /* Release control of h/w to f/w. If f/w is AMT enabled, this 6705 * would have already happened in close and is redundant. */ 6706 igb_release_hw_control(adapter); 6707 6708 pci_disable_device(pdev); 6709 6710 return 0; 6711} 6712 6713#ifdef CONFIG_PM 6714#ifdef CONFIG_PM_SLEEP 6715static int igb_suspend(struct device *dev) 6716{ 6717 int retval; 6718 bool wake; 6719 struct pci_dev *pdev = to_pci_dev(dev); 6720 6721 retval = __igb_shutdown(pdev, &wake, 0); 6722 if (retval) 6723 return retval; 6724 6725 if (wake) { 6726 pci_prepare_to_sleep(pdev); 6727 } else { 6728 pci_wake_from_d3(pdev, false); 6729 pci_set_power_state(pdev, PCI_D3hot); 6730 } 6731 6732 return 0; 6733} 6734#endif /* CONFIG_PM_SLEEP */ 6735 6736static int igb_resume(struct device *dev) 6737{ 6738 struct pci_dev *pdev = to_pci_dev(dev); 6739 struct net_device *netdev = pci_get_drvdata(pdev); 6740 struct igb_adapter *adapter = netdev_priv(netdev); 6741 struct e1000_hw *hw = &adapter->hw; 6742 u32 err; 6743 6744 pci_set_power_state(pdev, PCI_D0); 6745 pci_restore_state(pdev); 6746 pci_save_state(pdev); 6747 6748 err = pci_enable_device_mem(pdev); 6749 if (err) { 6750 dev_err(&pdev->dev, 6751 "igb: Cannot enable PCI device from suspend\n"); 6752 return err; 6753 } 6754 pci_set_master(pdev); 6755 6756 pci_enable_wake(pdev, PCI_D3hot, 0); 6757 pci_enable_wake(pdev, PCI_D3cold, 0); 6758 6759 if (!rtnl_is_locked()) { 6760 /* 6761 * shut up ASSERT_RTNL() warning in 6762 * netif_set_real_num_tx/rx_queues. 6763 */ 6764 rtnl_lock(); 6765 err = igb_init_interrupt_scheme(adapter); 6766 rtnl_unlock(); 6767 } else { 6768 err = igb_init_interrupt_scheme(adapter); 6769 } 6770 if (err) { 6771 dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); 6772 return -ENOMEM; 6773 } 6774 6775 igb_reset(adapter); 6776 6777 /* let the f/w know that the h/w is now under the control of the 6778 * driver. */ 6779 igb_get_hw_control(adapter); 6780 6781 wr32(E1000_WUS, ~0); 6782 6783 if (netdev->flags & IFF_UP) { 6784 err = __igb_open(netdev, true); 6785 if (err) 6786 return err; 6787 } 6788 6789 netif_device_attach(netdev); 6790 return 0; 6791} 6792 6793#ifdef CONFIG_PM_RUNTIME 6794static int igb_runtime_idle(struct device *dev) 6795{ 6796 struct pci_dev *pdev = to_pci_dev(dev); 6797 struct net_device *netdev = pci_get_drvdata(pdev); 6798 struct igb_adapter *adapter = netdev_priv(netdev); 6799 6800 if (!igb_has_link(adapter)) 6801 pm_schedule_suspend(dev, MSEC_PER_SEC * 5); 6802 6803 return -EBUSY; 6804} 6805 6806static int igb_runtime_suspend(struct device *dev) 6807{ 6808 struct pci_dev *pdev = to_pci_dev(dev); 6809 int retval; 6810 bool wake; 6811 6812 retval = __igb_shutdown(pdev, &wake, 1); 6813 if (retval) 6814 return retval; 6815 6816 if (wake) { 6817 pci_prepare_to_sleep(pdev); 6818 } else { 6819 pci_wake_from_d3(pdev, false); 6820 pci_set_power_state(pdev, PCI_D3hot); 6821 } 6822 6823 return 0; 6824} 6825 6826static int igb_runtime_resume(struct device *dev) 6827{ 6828 return igb_resume(dev); 6829} 6830#endif /* CONFIG_PM_RUNTIME */ 6831#endif 6832 6833static void igb_shutdown(struct pci_dev *pdev) 6834{ 6835 bool wake; 6836 6837 __igb_shutdown(pdev, &wake, 0); 6838 6839 if (system_state == SYSTEM_POWER_OFF) { 6840 pci_wake_from_d3(pdev, wake); 6841 pci_set_power_state(pdev, PCI_D3hot); 6842 } 6843} 6844 6845#ifdef CONFIG_NET_POLL_CONTROLLER 6846/* 6847 * Polling 'interrupt' - used by things like netconsole to send skbs 6848 * without having to re-enable interrupts. It's not called while 6849 * the interrupt routine is executing. 6850 */ 6851static void igb_netpoll(struct net_device *netdev) 6852{ 6853 struct igb_adapter *adapter = netdev_priv(netdev); 6854 struct e1000_hw *hw = &adapter->hw; 6855 struct igb_q_vector *q_vector; 6856 int i; 6857 6858 for (i = 0; i < adapter->num_q_vectors; i++) { 6859 q_vector = adapter->q_vector[i]; 6860 if (adapter->msix_entries) 6861 wr32(E1000_EIMC, q_vector->eims_value); 6862 else 6863 igb_irq_disable(adapter); 6864 napi_schedule(&q_vector->napi); 6865 } 6866} 6867#endif /* CONFIG_NET_POLL_CONTROLLER */ 6868 6869/** 6870 * igb_io_error_detected - called when PCI error is detected 6871 * @pdev: Pointer to PCI device 6872 * @state: The current pci connection state 6873 * 6874 * This function is called after a PCI bus error affecting 6875 * this device has been detected. 6876 */ 6877static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, 6878 pci_channel_state_t state) 6879{ 6880 struct net_device *netdev = pci_get_drvdata(pdev); 6881 struct igb_adapter *adapter = netdev_priv(netdev); 6882 6883 netif_device_detach(netdev); 6884 6885 if (state == pci_channel_io_perm_failure) 6886 return PCI_ERS_RESULT_DISCONNECT; 6887 6888 if (netif_running(netdev)) 6889 igb_down(adapter); 6890 pci_disable_device(pdev); 6891 6892 /* Request a slot slot reset. */ 6893 return PCI_ERS_RESULT_NEED_RESET; 6894} 6895 6896/** 6897 * igb_io_slot_reset - called after the pci bus has been reset. 6898 * @pdev: Pointer to PCI device 6899 * 6900 * Restart the card from scratch, as if from a cold-boot. Implementation 6901 * resembles the first-half of the igb_resume routine. 6902 */ 6903static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) 6904{ 6905 struct net_device *netdev = pci_get_drvdata(pdev); 6906 struct igb_adapter *adapter = netdev_priv(netdev); 6907 struct e1000_hw *hw = &adapter->hw; 6908 pci_ers_result_t result; 6909 int err; 6910 6911 if (pci_enable_device_mem(pdev)) { 6912 dev_err(&pdev->dev, 6913 "Cannot re-enable PCI device after reset.\n"); 6914 result = PCI_ERS_RESULT_DISCONNECT; 6915 } else { 6916 pci_set_master(pdev); 6917 pci_restore_state(pdev); 6918 pci_save_state(pdev); 6919 6920 pci_enable_wake(pdev, PCI_D3hot, 0); 6921 pci_enable_wake(pdev, PCI_D3cold, 0); 6922 6923 igb_reset(adapter); 6924 wr32(E1000_WUS, ~0); 6925 result = PCI_ERS_RESULT_RECOVERED; 6926 } 6927 6928 err = pci_cleanup_aer_uncorrect_error_status(pdev); 6929 if (err) { 6930 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status " 6931 "failed 0x%0x\n", err); 6932 /* non-fatal, continue */ 6933 } 6934 6935 return result; 6936} 6937 6938/** 6939 * igb_io_resume - called when traffic can start flowing again. 6940 * @pdev: Pointer to PCI device 6941 * 6942 * This callback is called when the error recovery driver tells us that 6943 * its OK to resume normal operation. Implementation resembles the 6944 * second-half of the igb_resume routine. 6945 */ 6946static void igb_io_resume(struct pci_dev *pdev) 6947{ 6948 struct net_device *netdev = pci_get_drvdata(pdev); 6949 struct igb_adapter *adapter = netdev_priv(netdev); 6950 6951 if (netif_running(netdev)) { 6952 if (igb_up(adapter)) { 6953 dev_err(&pdev->dev, "igb_up failed after reset\n"); 6954 return; 6955 } 6956 } 6957 6958 netif_device_attach(netdev); 6959 6960 /* let the f/w know that the h/w is now under the control of the 6961 * driver. */ 6962 igb_get_hw_control(adapter); 6963} 6964 6965static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index, 6966 u8 qsel) 6967{ 6968 u32 rar_low, rar_high; 6969 struct e1000_hw *hw = &adapter->hw; 6970 6971 /* HW expects these in little endian so we reverse the byte order 6972 * from network order (big endian) to little endian 6973 */ 6974 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | 6975 ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); 6976 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); 6977 6978 /* Indicate to hardware the Address is Valid. */ 6979 rar_high |= E1000_RAH_AV; 6980 6981 if (hw->mac.type == e1000_82575) 6982 rar_high |= E1000_RAH_POOL_1 * qsel; 6983 else 6984 rar_high |= E1000_RAH_POOL_1 << qsel; 6985 6986 wr32(E1000_RAL(index), rar_low); 6987 wrfl(); 6988 wr32(E1000_RAH(index), rar_high); 6989 wrfl(); 6990} 6991 6992static int igb_set_vf_mac(struct igb_adapter *adapter, 6993 int vf, unsigned char *mac_addr) 6994{ 6995 struct e1000_hw *hw = &adapter->hw; 6996 /* VF MAC addresses start at end of receive addresses and moves 6997 * torwards the first, as a result a collision should not be possible */ 6998 int rar_entry = hw->mac.rar_entry_count - (vf + 1); 6999 7000 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN); 7001 7002 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf); 7003 7004 return 0; 7005} 7006 7007static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) 7008{ 7009 struct igb_adapter *adapter = netdev_priv(netdev); 7010 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count)) 7011 return -EINVAL; 7012 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC; 7013 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf); 7014 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this" 7015 " change effective."); 7016 if (test_bit(__IGB_DOWN, &adapter->state)) { 7017 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set," 7018 " but the PF device is not up.\n"); 7019 dev_warn(&adapter->pdev->dev, "Bring the PF device up before" 7020 " attempting to use the VF device.\n"); 7021 } 7022 return igb_set_vf_mac(adapter, vf, mac); 7023} 7024 7025static int igb_link_mbps(int internal_link_speed) 7026{ 7027 switch (internal_link_speed) { 7028 case SPEED_100: 7029 return 100; 7030 case SPEED_1000: 7031 return 1000; 7032 default: 7033 return 0; 7034 } 7035} 7036 7037static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate, 7038 int link_speed) 7039{ 7040 int rf_dec, rf_int; 7041 u32 bcnrc_val; 7042 7043 if (tx_rate != 0) { 7044 /* Calculate the rate factor values to set */ 7045 rf_int = link_speed / tx_rate; 7046 rf_dec = (link_speed - (rf_int * tx_rate)); 7047 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate; 7048 7049 bcnrc_val = E1000_RTTBCNRC_RS_ENA; 7050 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) & 7051 E1000_RTTBCNRC_RF_INT_MASK); 7052 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK); 7053 } else { 7054 bcnrc_val = 0; 7055 } 7056 7057 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */ 7058 wr32(E1000_RTTBCNRC, bcnrc_val); 7059} 7060 7061static void igb_check_vf_rate_limit(struct igb_adapter *adapter) 7062{ 7063 int actual_link_speed, i; 7064 bool reset_rate = false; 7065 7066 /* VF TX rate limit was not set or not supported */ 7067 if ((adapter->vf_rate_link_speed == 0) || 7068 (adapter->hw.mac.type != e1000_82576)) 7069 return; 7070 7071 actual_link_speed = igb_link_mbps(adapter->link_speed); 7072 if (actual_link_speed != adapter->vf_rate_link_speed) { 7073 reset_rate = true; 7074 adapter->vf_rate_link_speed = 0; 7075 dev_info(&adapter->pdev->dev, 7076 "Link speed has been changed. VF Transmit " 7077 "rate is disabled\n"); 7078 } 7079 7080 for (i = 0; i < adapter->vfs_allocated_count; i++) { 7081 if (reset_rate) 7082 adapter->vf_data[i].tx_rate = 0; 7083 7084 igb_set_vf_rate_limit(&adapter->hw, i, 7085 adapter->vf_data[i].tx_rate, 7086 actual_link_speed); 7087 } 7088} 7089 7090static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) 7091{ 7092 struct igb_adapter *adapter = netdev_priv(netdev); 7093 struct e1000_hw *hw = &adapter->hw; 7094 int actual_link_speed; 7095 7096 if (hw->mac.type != e1000_82576) 7097 return -EOPNOTSUPP; 7098 7099 actual_link_speed = igb_link_mbps(adapter->link_speed); 7100 if ((vf >= adapter->vfs_allocated_count) || 7101 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) || 7102 (tx_rate < 0) || (tx_rate > actual_link_speed)) 7103 return -EINVAL; 7104 7105 adapter->vf_rate_link_speed = actual_link_speed; 7106 adapter->vf_data[vf].tx_rate = (u16)tx_rate; 7107 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed); 7108 7109 return 0; 7110} 7111 7112static int igb_ndo_get_vf_config(struct net_device *netdev, 7113 int vf, struct ifla_vf_info *ivi) 7114{ 7115 struct igb_adapter *adapter = netdev_priv(netdev); 7116 if (vf >= adapter->vfs_allocated_count) 7117 return -EINVAL; 7118 ivi->vf = vf; 7119 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN); 7120 ivi->tx_rate = adapter->vf_data[vf].tx_rate; 7121 ivi->vlan = adapter->vf_data[vf].pf_vlan; 7122 ivi->qos = adapter->vf_data[vf].pf_qos; 7123 return 0; 7124} 7125 7126static void igb_vmm_control(struct igb_adapter *adapter) 7127{ 7128 struct e1000_hw *hw = &adapter->hw; 7129 u32 reg; 7130 7131 switch (hw->mac.type) { 7132 case e1000_82575: 7133 default: 7134 /* replication is not supported for 82575 */ 7135 return; 7136 case e1000_82576: 7137 /* notify HW that the MAC is adding vlan tags */ 7138 reg = rd32(E1000_DTXCTL); 7139 reg |= E1000_DTXCTL_VLAN_ADDED; 7140 wr32(E1000_DTXCTL, reg); 7141 case e1000_82580: 7142 /* enable replication vlan tag stripping */ 7143 reg = rd32(E1000_RPLOLR); 7144 reg |= E1000_RPLOLR_STRVLAN; 7145 wr32(E1000_RPLOLR, reg); 7146 case e1000_i350: 7147 /* none of the above registers are supported by i350 */ 7148 break; 7149 } 7150 7151 if (adapter->vfs_allocated_count) { 7152 igb_vmdq_set_loopback_pf(hw, true); 7153 igb_vmdq_set_replication_pf(hw, true); 7154 igb_vmdq_set_anti_spoofing_pf(hw, true, 7155 adapter->vfs_allocated_count); 7156 } else { 7157 igb_vmdq_set_loopback_pf(hw, false); 7158 igb_vmdq_set_replication_pf(hw, false); 7159 } 7160} 7161 7162static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) 7163{ 7164 struct e1000_hw *hw = &adapter->hw; 7165 u32 dmac_thr; 7166 u16 hwm; 7167 7168 if (hw->mac.type > e1000_82580) { 7169 if (adapter->flags & IGB_FLAG_DMAC) { 7170 u32 reg; 7171 7172 /* force threshold to 0. */ 7173 wr32(E1000_DMCTXTH, 0); 7174 7175 /* 7176 * DMA Coalescing high water mark needs to be greater 7177 * than the Rx threshold. Set hwm to PBA - max frame 7178 * size in 16B units, capping it at PBA - 6KB. 7179 */ 7180 hwm = 64 * pba - adapter->max_frame_size / 16; 7181 if (hwm < 64 * (pba - 6)) 7182 hwm = 64 * (pba - 6); 7183 reg = rd32(E1000_FCRTC); 7184 reg &= ~E1000_FCRTC_RTH_COAL_MASK; 7185 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) 7186 & E1000_FCRTC_RTH_COAL_MASK); 7187 wr32(E1000_FCRTC, reg); 7188 7189 /* 7190 * Set the DMA Coalescing Rx threshold to PBA - 2 * max 7191 * frame size, capping it at PBA - 10KB. 7192 */ 7193 dmac_thr = pba - adapter->max_frame_size / 512; 7194 if (dmac_thr < pba - 10) 7195 dmac_thr = pba - 10; 7196 reg = rd32(E1000_DMACR); 7197 reg &= ~E1000_DMACR_DMACTHR_MASK; 7198 reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT) 7199 & E1000_DMACR_DMACTHR_MASK); 7200 7201 /* transition to L0x or L1 if available..*/ 7202 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); 7203 7204 /* watchdog timer= +-1000 usec in 32usec intervals */ 7205 reg |= (1000 >> 5); 7206 wr32(E1000_DMACR, reg); 7207 7208 /* 7209 * no lower threshold to disable 7210 * coalescing(smart fifb)-UTRESH=0 7211 */ 7212 wr32(E1000_DMCRTRH, 0); 7213 7214 reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4); 7215 7216 wr32(E1000_DMCTLX, reg); 7217 7218 /* 7219 * free space in tx packet buffer to wake from 7220 * DMA coal 7221 */ 7222 wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE - 7223 (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6); 7224 7225 /* 7226 * make low power state decision controlled 7227 * by DMA coal 7228 */ 7229 reg = rd32(E1000_PCIEMISC); 7230 reg &= ~E1000_PCIEMISC_LX_DECISION; 7231 wr32(E1000_PCIEMISC, reg); 7232 } /* endif adapter->dmac is not disabled */ 7233 } else if (hw->mac.type == e1000_82580) { 7234 u32 reg = rd32(E1000_PCIEMISC); 7235 wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION); 7236 wr32(E1000_DMACR, 0); 7237 } 7238} 7239 7240/* igb_main.c */ 7241