igb_main.c revision 11ba69e876e1141fa4b11a7c0efb256a8df9ae7d
1/******************************************************************************* 2 3 Intel(R) Gigabit Ethernet Linux driver 4 Copyright(c) 2007-2011 Intel Corporation. 5 6 This program is free software; you can redistribute it and/or modify it 7 under the terms and conditions of the GNU General Public License, 8 version 2, as published by the Free Software Foundation. 9 10 This program is distributed in the hope it will be useful, but WITHOUT 11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 more details. 14 15 You should have received a copy of the GNU General Public License along with 16 this program; if not, write to the Free Software Foundation, Inc., 17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 18 19 The full GNU General Public License is included in this distribution in 20 the file called "COPYING". 21 22 Contact Information: 23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> 24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 25 26*******************************************************************************/ 27 28#include <linux/module.h> 29#include <linux/types.h> 30#include <linux/init.h> 31#include <linux/bitops.h> 32#include <linux/vmalloc.h> 33#include <linux/pagemap.h> 34#include <linux/netdevice.h> 35#include <linux/ipv6.h> 36#include <linux/slab.h> 37#include <net/checksum.h> 38#include <net/ip6_checksum.h> 39#include <linux/net_tstamp.h> 40#include <linux/mii.h> 41#include <linux/ethtool.h> 42#include <linux/if.h> 43#include <linux/if_vlan.h> 44#include <linux/pci.h> 45#include <linux/pci-aspm.h> 46#include <linux/delay.h> 47#include <linux/interrupt.h> 48#include <linux/ip.h> 49#include <linux/tcp.h> 50#include <linux/sctp.h> 51#include <linux/if_ether.h> 52#include <linux/aer.h> 53#include <linux/prefetch.h> 54#ifdef CONFIG_IGB_DCA 55#include <linux/dca.h> 56#endif 57#include "igb.h" 58 59#define MAJ 3 60#define MIN 2 61#define BUILD 10 62#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ 63__stringify(BUILD) "-k" 64char igb_driver_name[] = "igb"; 65char igb_driver_version[] = DRV_VERSION; 66static const char igb_driver_string[] = 67 "Intel(R) Gigabit Ethernet Network Driver"; 68static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation."; 69 70static const struct e1000_info *igb_info_tbl[] = { 71 [board_82575] = &e1000_82575_info, 72}; 73 74static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = { 75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 }, 76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 }, 77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 }, 78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 }, 79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 }, 80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 }, 81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 }, 82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 }, 83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 }, 84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 }, 85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 }, 86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 }, 87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 }, 88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 }, 89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 }, 90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 }, 91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 }, 92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 }, 93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 }, 94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 }, 95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 }, 96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 }, 97 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 }, 98 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 }, 99 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 }, 100 /* required last entry */ 101 {0, } 102}; 103 104MODULE_DEVICE_TABLE(pci, igb_pci_tbl); 105 106void igb_reset(struct igb_adapter *); 107static int igb_setup_all_tx_resources(struct igb_adapter *); 108static int igb_setup_all_rx_resources(struct igb_adapter *); 109static void igb_free_all_tx_resources(struct igb_adapter *); 110static void igb_free_all_rx_resources(struct igb_adapter *); 111static void igb_setup_mrqc(struct igb_adapter *); 112static int igb_probe(struct pci_dev *, const struct pci_device_id *); 113static void __devexit igb_remove(struct pci_dev *pdev); 114static void igb_init_hw_timer(struct igb_adapter *adapter); 115static int igb_sw_init(struct igb_adapter *); 116static int igb_open(struct net_device *); 117static int igb_close(struct net_device *); 118static void igb_configure_tx(struct igb_adapter *); 119static void igb_configure_rx(struct igb_adapter *); 120static void igb_clean_all_tx_rings(struct igb_adapter *); 121static void igb_clean_all_rx_rings(struct igb_adapter *); 122static void igb_clean_tx_ring(struct igb_ring *); 123static void igb_clean_rx_ring(struct igb_ring *); 124static void igb_set_rx_mode(struct net_device *); 125static void igb_update_phy_info(unsigned long); 126static void igb_watchdog(unsigned long); 127static void igb_watchdog_task(struct work_struct *); 128static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *); 129static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev, 130 struct rtnl_link_stats64 *stats); 131static int igb_change_mtu(struct net_device *, int); 132static int igb_set_mac(struct net_device *, void *); 133static void igb_set_uta(struct igb_adapter *adapter); 134static irqreturn_t igb_intr(int irq, void *); 135static irqreturn_t igb_intr_msi(int irq, void *); 136static irqreturn_t igb_msix_other(int irq, void *); 137static irqreturn_t igb_msix_ring(int irq, void *); 138#ifdef CONFIG_IGB_DCA 139static void igb_update_dca(struct igb_q_vector *); 140static void igb_setup_dca(struct igb_adapter *); 141#endif /* CONFIG_IGB_DCA */ 142static int igb_poll(struct napi_struct *, int); 143static bool igb_clean_tx_irq(struct igb_q_vector *); 144static bool igb_clean_rx_irq(struct igb_q_vector *, int); 145static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); 146static void igb_tx_timeout(struct net_device *); 147static void igb_reset_task(struct work_struct *); 148static void igb_vlan_mode(struct net_device *netdev, u32 features); 149static void igb_vlan_rx_add_vid(struct net_device *, u16); 150static void igb_vlan_rx_kill_vid(struct net_device *, u16); 151static void igb_restore_vlan(struct igb_adapter *); 152static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8); 153static void igb_ping_all_vfs(struct igb_adapter *); 154static void igb_msg_task(struct igb_adapter *); 155static void igb_vmm_control(struct igb_adapter *); 156static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *); 157static void igb_restore_vf_multicasts(struct igb_adapter *adapter); 158static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac); 159static int igb_ndo_set_vf_vlan(struct net_device *netdev, 160 int vf, u16 vlan, u8 qos); 161static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate); 162static int igb_ndo_get_vf_config(struct net_device *netdev, int vf, 163 struct ifla_vf_info *ivi); 164static void igb_check_vf_rate_limit(struct igb_adapter *); 165 166#ifdef CONFIG_PM 167static int igb_suspend(struct pci_dev *, pm_message_t); 168static int igb_resume(struct pci_dev *); 169#endif 170static void igb_shutdown(struct pci_dev *); 171#ifdef CONFIG_IGB_DCA 172static int igb_notify_dca(struct notifier_block *, unsigned long, void *); 173static struct notifier_block dca_notifier = { 174 .notifier_call = igb_notify_dca, 175 .next = NULL, 176 .priority = 0 177}; 178#endif 179#ifdef CONFIG_NET_POLL_CONTROLLER 180/* for netdump / net console */ 181static void igb_netpoll(struct net_device *); 182#endif 183#ifdef CONFIG_PCI_IOV 184static unsigned int max_vfs = 0; 185module_param(max_vfs, uint, 0); 186MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate " 187 "per physical function"); 188#endif /* CONFIG_PCI_IOV */ 189 190static pci_ers_result_t igb_io_error_detected(struct pci_dev *, 191 pci_channel_state_t); 192static pci_ers_result_t igb_io_slot_reset(struct pci_dev *); 193static void igb_io_resume(struct pci_dev *); 194 195static struct pci_error_handlers igb_err_handler = { 196 .error_detected = igb_io_error_detected, 197 .slot_reset = igb_io_slot_reset, 198 .resume = igb_io_resume, 199}; 200 201 202static struct pci_driver igb_driver = { 203 .name = igb_driver_name, 204 .id_table = igb_pci_tbl, 205 .probe = igb_probe, 206 .remove = __devexit_p(igb_remove), 207#ifdef CONFIG_PM 208 /* Power Management Hooks */ 209 .suspend = igb_suspend, 210 .resume = igb_resume, 211#endif 212 .shutdown = igb_shutdown, 213 .err_handler = &igb_err_handler 214}; 215 216MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>"); 217MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver"); 218MODULE_LICENSE("GPL"); 219MODULE_VERSION(DRV_VERSION); 220 221struct igb_reg_info { 222 u32 ofs; 223 char *name; 224}; 225 226static const struct igb_reg_info igb_reg_info_tbl[] = { 227 228 /* General Registers */ 229 {E1000_CTRL, "CTRL"}, 230 {E1000_STATUS, "STATUS"}, 231 {E1000_CTRL_EXT, "CTRL_EXT"}, 232 233 /* Interrupt Registers */ 234 {E1000_ICR, "ICR"}, 235 236 /* RX Registers */ 237 {E1000_RCTL, "RCTL"}, 238 {E1000_RDLEN(0), "RDLEN"}, 239 {E1000_RDH(0), "RDH"}, 240 {E1000_RDT(0), "RDT"}, 241 {E1000_RXDCTL(0), "RXDCTL"}, 242 {E1000_RDBAL(0), "RDBAL"}, 243 {E1000_RDBAH(0), "RDBAH"}, 244 245 /* TX Registers */ 246 {E1000_TCTL, "TCTL"}, 247 {E1000_TDBAL(0), "TDBAL"}, 248 {E1000_TDBAH(0), "TDBAH"}, 249 {E1000_TDLEN(0), "TDLEN"}, 250 {E1000_TDH(0), "TDH"}, 251 {E1000_TDT(0), "TDT"}, 252 {E1000_TXDCTL(0), "TXDCTL"}, 253 {E1000_TDFH, "TDFH"}, 254 {E1000_TDFT, "TDFT"}, 255 {E1000_TDFHS, "TDFHS"}, 256 {E1000_TDFPC, "TDFPC"}, 257 258 /* List Terminator */ 259 {} 260}; 261 262/* 263 * igb_regdump - register printout routine 264 */ 265static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo) 266{ 267 int n = 0; 268 char rname[16]; 269 u32 regs[8]; 270 271 switch (reginfo->ofs) { 272 case E1000_RDLEN(0): 273 for (n = 0; n < 4; n++) 274 regs[n] = rd32(E1000_RDLEN(n)); 275 break; 276 case E1000_RDH(0): 277 for (n = 0; n < 4; n++) 278 regs[n] = rd32(E1000_RDH(n)); 279 break; 280 case E1000_RDT(0): 281 for (n = 0; n < 4; n++) 282 regs[n] = rd32(E1000_RDT(n)); 283 break; 284 case E1000_RXDCTL(0): 285 for (n = 0; n < 4; n++) 286 regs[n] = rd32(E1000_RXDCTL(n)); 287 break; 288 case E1000_RDBAL(0): 289 for (n = 0; n < 4; n++) 290 regs[n] = rd32(E1000_RDBAL(n)); 291 break; 292 case E1000_RDBAH(0): 293 for (n = 0; n < 4; n++) 294 regs[n] = rd32(E1000_RDBAH(n)); 295 break; 296 case E1000_TDBAL(0): 297 for (n = 0; n < 4; n++) 298 regs[n] = rd32(E1000_RDBAL(n)); 299 break; 300 case E1000_TDBAH(0): 301 for (n = 0; n < 4; n++) 302 regs[n] = rd32(E1000_TDBAH(n)); 303 break; 304 case E1000_TDLEN(0): 305 for (n = 0; n < 4; n++) 306 regs[n] = rd32(E1000_TDLEN(n)); 307 break; 308 case E1000_TDH(0): 309 for (n = 0; n < 4; n++) 310 regs[n] = rd32(E1000_TDH(n)); 311 break; 312 case E1000_TDT(0): 313 for (n = 0; n < 4; n++) 314 regs[n] = rd32(E1000_TDT(n)); 315 break; 316 case E1000_TXDCTL(0): 317 for (n = 0; n < 4; n++) 318 regs[n] = rd32(E1000_TXDCTL(n)); 319 break; 320 default: 321 printk(KERN_INFO "%-15s %08x\n", 322 reginfo->name, rd32(reginfo->ofs)); 323 return; 324 } 325 326 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]"); 327 printk(KERN_INFO "%-15s ", rname); 328 for (n = 0; n < 4; n++) 329 printk(KERN_CONT "%08x ", regs[n]); 330 printk(KERN_CONT "\n"); 331} 332 333/* 334 * igb_dump - Print registers, tx-rings and rx-rings 335 */ 336static void igb_dump(struct igb_adapter *adapter) 337{ 338 struct net_device *netdev = adapter->netdev; 339 struct e1000_hw *hw = &adapter->hw; 340 struct igb_reg_info *reginfo; 341 struct igb_ring *tx_ring; 342 union e1000_adv_tx_desc *tx_desc; 343 struct my_u0 { u64 a; u64 b; } *u0; 344 struct igb_ring *rx_ring; 345 union e1000_adv_rx_desc *rx_desc; 346 u32 staterr; 347 u16 i, n; 348 349 if (!netif_msg_hw(adapter)) 350 return; 351 352 /* Print netdevice Info */ 353 if (netdev) { 354 dev_info(&adapter->pdev->dev, "Net device Info\n"); 355 printk(KERN_INFO "Device Name state " 356 "trans_start last_rx\n"); 357 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n", 358 netdev->name, 359 netdev->state, 360 netdev->trans_start, 361 netdev->last_rx); 362 } 363 364 /* Print Registers */ 365 dev_info(&adapter->pdev->dev, "Register Dump\n"); 366 printk(KERN_INFO " Register Name Value\n"); 367 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl; 368 reginfo->name; reginfo++) { 369 igb_regdump(hw, reginfo); 370 } 371 372 /* Print TX Ring Summary */ 373 if (!netdev || !netif_running(netdev)) 374 goto exit; 375 376 dev_info(&adapter->pdev->dev, "TX Rings Summary\n"); 377 printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]" 378 " leng ntw timestamp\n"); 379 for (n = 0; n < adapter->num_tx_queues; n++) { 380 struct igb_tx_buffer *buffer_info; 381 tx_ring = adapter->tx_ring[n]; 382 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean]; 383 printk(KERN_INFO " %5d %5X %5X %016llX %04X %p %016llX\n", 384 n, tx_ring->next_to_use, tx_ring->next_to_clean, 385 (u64)buffer_info->dma, 386 buffer_info->length, 387 buffer_info->next_to_watch, 388 (u64)buffer_info->time_stamp); 389 } 390 391 /* Print TX Rings */ 392 if (!netif_msg_tx_done(adapter)) 393 goto rx_ring_summary; 394 395 dev_info(&adapter->pdev->dev, "TX Rings Dump\n"); 396 397 /* Transmit Descriptor Formats 398 * 399 * Advanced Transmit Descriptor 400 * +--------------------------------------------------------------+ 401 * 0 | Buffer Address [63:0] | 402 * +--------------------------------------------------------------+ 403 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN | 404 * +--------------------------------------------------------------+ 405 * 63 46 45 40 39 38 36 35 32 31 24 15 0 406 */ 407 408 for (n = 0; n < adapter->num_tx_queues; n++) { 409 tx_ring = adapter->tx_ring[n]; 410 printk(KERN_INFO "------------------------------------\n"); 411 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index); 412 printk(KERN_INFO "------------------------------------\n"); 413 printk(KERN_INFO "T [desc] [address 63:0 ] " 414 "[PlPOCIStDDM Ln] [bi->dma ] " 415 "leng ntw timestamp bi->skb\n"); 416 417 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) { 418 struct igb_tx_buffer *buffer_info; 419 tx_desc = IGB_TX_DESC(tx_ring, i); 420 buffer_info = &tx_ring->tx_buffer_info[i]; 421 u0 = (struct my_u0 *)tx_desc; 422 printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX" 423 " %04X %p %016llX %p", i, 424 le64_to_cpu(u0->a), 425 le64_to_cpu(u0->b), 426 (u64)buffer_info->dma, 427 buffer_info->length, 428 buffer_info->next_to_watch, 429 (u64)buffer_info->time_stamp, 430 buffer_info->skb); 431 if (i == tx_ring->next_to_use && 432 i == tx_ring->next_to_clean) 433 printk(KERN_CONT " NTC/U\n"); 434 else if (i == tx_ring->next_to_use) 435 printk(KERN_CONT " NTU\n"); 436 else if (i == tx_ring->next_to_clean) 437 printk(KERN_CONT " NTC\n"); 438 else 439 printk(KERN_CONT "\n"); 440 441 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0) 442 print_hex_dump(KERN_INFO, "", 443 DUMP_PREFIX_ADDRESS, 444 16, 1, phys_to_virt(buffer_info->dma), 445 buffer_info->length, true); 446 } 447 } 448 449 /* Print RX Rings Summary */ 450rx_ring_summary: 451 dev_info(&adapter->pdev->dev, "RX Rings Summary\n"); 452 printk(KERN_INFO "Queue [NTU] [NTC]\n"); 453 for (n = 0; n < adapter->num_rx_queues; n++) { 454 rx_ring = adapter->rx_ring[n]; 455 printk(KERN_INFO " %5d %5X %5X\n", n, 456 rx_ring->next_to_use, rx_ring->next_to_clean); 457 } 458 459 /* Print RX Rings */ 460 if (!netif_msg_rx_status(adapter)) 461 goto exit; 462 463 dev_info(&adapter->pdev->dev, "RX Rings Dump\n"); 464 465 /* Advanced Receive Descriptor (Read) Format 466 * 63 1 0 467 * +-----------------------------------------------------+ 468 * 0 | Packet Buffer Address [63:1] |A0/NSE| 469 * +----------------------------------------------+------+ 470 * 8 | Header Buffer Address [63:1] | DD | 471 * +-----------------------------------------------------+ 472 * 473 * 474 * Advanced Receive Descriptor (Write-Back) Format 475 * 476 * 63 48 47 32 31 30 21 20 17 16 4 3 0 477 * +------------------------------------------------------+ 478 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS | 479 * | Checksum Ident | | | | Type | Type | 480 * +------------------------------------------------------+ 481 * 8 | VLAN Tag | Length | Extended Error | Extended Status | 482 * +------------------------------------------------------+ 483 * 63 48 47 32 31 20 19 0 484 */ 485 486 for (n = 0; n < adapter->num_rx_queues; n++) { 487 rx_ring = adapter->rx_ring[n]; 488 printk(KERN_INFO "------------------------------------\n"); 489 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index); 490 printk(KERN_INFO "------------------------------------\n"); 491 printk(KERN_INFO "R [desc] [ PktBuf A0] " 492 "[ HeadBuf DD] [bi->dma ] [bi->skb] " 493 "<-- Adv Rx Read format\n"); 494 printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] " 495 "[vl er S cks ln] ---------------- [bi->skb] " 496 "<-- Adv Rx Write-Back format\n"); 497 498 for (i = 0; i < rx_ring->count; i++) { 499 struct igb_rx_buffer *buffer_info; 500 buffer_info = &rx_ring->rx_buffer_info[i]; 501 rx_desc = IGB_RX_DESC(rx_ring, i); 502 u0 = (struct my_u0 *)rx_desc; 503 staterr = le32_to_cpu(rx_desc->wb.upper.status_error); 504 if (staterr & E1000_RXD_STAT_DD) { 505 /* Descriptor Done */ 506 printk(KERN_INFO "RWB[0x%03X] %016llX " 507 "%016llX ---------------- %p", i, 508 le64_to_cpu(u0->a), 509 le64_to_cpu(u0->b), 510 buffer_info->skb); 511 } else { 512 printk(KERN_INFO "R [0x%03X] %016llX " 513 "%016llX %016llX %p", i, 514 le64_to_cpu(u0->a), 515 le64_to_cpu(u0->b), 516 (u64)buffer_info->dma, 517 buffer_info->skb); 518 519 if (netif_msg_pktdata(adapter)) { 520 print_hex_dump(KERN_INFO, "", 521 DUMP_PREFIX_ADDRESS, 522 16, 1, 523 phys_to_virt(buffer_info->dma), 524 IGB_RX_HDR_LEN, true); 525 print_hex_dump(KERN_INFO, "", 526 DUMP_PREFIX_ADDRESS, 527 16, 1, 528 phys_to_virt( 529 buffer_info->page_dma + 530 buffer_info->page_offset), 531 PAGE_SIZE/2, true); 532 } 533 } 534 535 if (i == rx_ring->next_to_use) 536 printk(KERN_CONT " NTU\n"); 537 else if (i == rx_ring->next_to_clean) 538 printk(KERN_CONT " NTC\n"); 539 else 540 printk(KERN_CONT "\n"); 541 542 } 543 } 544 545exit: 546 return; 547} 548 549 550/** 551 * igb_read_clock - read raw cycle counter (to be used by time counter) 552 */ 553static cycle_t igb_read_clock(const struct cyclecounter *tc) 554{ 555 struct igb_adapter *adapter = 556 container_of(tc, struct igb_adapter, cycles); 557 struct e1000_hw *hw = &adapter->hw; 558 u64 stamp = 0; 559 int shift = 0; 560 561 /* 562 * The timestamp latches on lowest register read. For the 82580 563 * the lowest register is SYSTIMR instead of SYSTIML. However we never 564 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it. 565 */ 566 if (hw->mac.type >= e1000_82580) { 567 stamp = rd32(E1000_SYSTIMR) >> 8; 568 shift = IGB_82580_TSYNC_SHIFT; 569 } 570 571 stamp |= (u64)rd32(E1000_SYSTIML) << shift; 572 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32); 573 return stamp; 574} 575 576/** 577 * igb_get_hw_dev - return device 578 * used by hardware layer to print debugging information 579 **/ 580struct net_device *igb_get_hw_dev(struct e1000_hw *hw) 581{ 582 struct igb_adapter *adapter = hw->back; 583 return adapter->netdev; 584} 585 586/** 587 * igb_init_module - Driver Registration Routine 588 * 589 * igb_init_module is the first routine called when the driver is 590 * loaded. All it does is register with the PCI subsystem. 591 **/ 592static int __init igb_init_module(void) 593{ 594 int ret; 595 printk(KERN_INFO "%s - version %s\n", 596 igb_driver_string, igb_driver_version); 597 598 printk(KERN_INFO "%s\n", igb_copyright); 599 600#ifdef CONFIG_IGB_DCA 601 dca_register_notify(&dca_notifier); 602#endif 603 ret = pci_register_driver(&igb_driver); 604 return ret; 605} 606 607module_init(igb_init_module); 608 609/** 610 * igb_exit_module - Driver Exit Cleanup Routine 611 * 612 * igb_exit_module is called just before the driver is removed 613 * from memory. 614 **/ 615static void __exit igb_exit_module(void) 616{ 617#ifdef CONFIG_IGB_DCA 618 dca_unregister_notify(&dca_notifier); 619#endif 620 pci_unregister_driver(&igb_driver); 621} 622 623module_exit(igb_exit_module); 624 625#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1)) 626/** 627 * igb_cache_ring_register - Descriptor ring to register mapping 628 * @adapter: board private structure to initialize 629 * 630 * Once we know the feature-set enabled for the device, we'll cache 631 * the register offset the descriptor ring is assigned to. 632 **/ 633static void igb_cache_ring_register(struct igb_adapter *adapter) 634{ 635 int i = 0, j = 0; 636 u32 rbase_offset = adapter->vfs_allocated_count; 637 638 switch (adapter->hw.mac.type) { 639 case e1000_82576: 640 /* The queues are allocated for virtualization such that VF 0 641 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc. 642 * In order to avoid collision we start at the first free queue 643 * and continue consuming queues in the same sequence 644 */ 645 if (adapter->vfs_allocated_count) { 646 for (; i < adapter->rss_queues; i++) 647 adapter->rx_ring[i]->reg_idx = rbase_offset + 648 Q_IDX_82576(i); 649 } 650 case e1000_82575: 651 case e1000_82580: 652 case e1000_i350: 653 default: 654 for (; i < adapter->num_rx_queues; i++) 655 adapter->rx_ring[i]->reg_idx = rbase_offset + i; 656 for (; j < adapter->num_tx_queues; j++) 657 adapter->tx_ring[j]->reg_idx = rbase_offset + j; 658 break; 659 } 660} 661 662static void igb_free_queues(struct igb_adapter *adapter) 663{ 664 int i; 665 666 for (i = 0; i < adapter->num_tx_queues; i++) { 667 kfree(adapter->tx_ring[i]); 668 adapter->tx_ring[i] = NULL; 669 } 670 for (i = 0; i < adapter->num_rx_queues; i++) { 671 kfree(adapter->rx_ring[i]); 672 adapter->rx_ring[i] = NULL; 673 } 674 adapter->num_rx_queues = 0; 675 adapter->num_tx_queues = 0; 676} 677 678/** 679 * igb_alloc_queues - Allocate memory for all rings 680 * @adapter: board private structure to initialize 681 * 682 * We allocate one ring per queue at run-time since we don't know the 683 * number of queues at compile-time. 684 **/ 685static int igb_alloc_queues(struct igb_adapter *adapter) 686{ 687 struct igb_ring *ring; 688 int i; 689 int orig_node = adapter->node; 690 691 for (i = 0; i < adapter->num_tx_queues; i++) { 692 if (orig_node == -1) { 693 int cur_node = next_online_node(adapter->node); 694 if (cur_node == MAX_NUMNODES) 695 cur_node = first_online_node; 696 adapter->node = cur_node; 697 } 698 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL, 699 adapter->node); 700 if (!ring) 701 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); 702 if (!ring) 703 goto err; 704 ring->count = adapter->tx_ring_count; 705 ring->queue_index = i; 706 ring->dev = &adapter->pdev->dev; 707 ring->netdev = adapter->netdev; 708 ring->numa_node = adapter->node; 709 /* For 82575, context index must be unique per ring. */ 710 if (adapter->hw.mac.type == e1000_82575) 711 set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags); 712 adapter->tx_ring[i] = ring; 713 } 714 /* Restore the adapter's original node */ 715 adapter->node = orig_node; 716 717 for (i = 0; i < adapter->num_rx_queues; i++) { 718 if (orig_node == -1) { 719 int cur_node = next_online_node(adapter->node); 720 if (cur_node == MAX_NUMNODES) 721 cur_node = first_online_node; 722 adapter->node = cur_node; 723 } 724 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL, 725 adapter->node); 726 if (!ring) 727 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); 728 if (!ring) 729 goto err; 730 ring->count = adapter->rx_ring_count; 731 ring->queue_index = i; 732 ring->dev = &adapter->pdev->dev; 733 ring->netdev = adapter->netdev; 734 ring->numa_node = adapter->node; 735 /* set flag indicating ring supports SCTP checksum offload */ 736 if (adapter->hw.mac.type >= e1000_82576) 737 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags); 738 739 /* On i350, loopback VLAN packets have the tag byte-swapped. */ 740 if (adapter->hw.mac.type == e1000_i350) 741 set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags); 742 743 adapter->rx_ring[i] = ring; 744 } 745 /* Restore the adapter's original node */ 746 adapter->node = orig_node; 747 748 igb_cache_ring_register(adapter); 749 750 return 0; 751 752err: 753 /* Restore the adapter's original node */ 754 adapter->node = orig_node; 755 igb_free_queues(adapter); 756 757 return -ENOMEM; 758} 759 760/** 761 * igb_write_ivar - configure ivar for given MSI-X vector 762 * @hw: pointer to the HW structure 763 * @msix_vector: vector number we are allocating to a given ring 764 * @index: row index of IVAR register to write within IVAR table 765 * @offset: column offset of in IVAR, should be multiple of 8 766 * 767 * This function is intended to handle the writing of the IVAR register 768 * for adapters 82576 and newer. The IVAR table consists of 2 columns, 769 * each containing an cause allocation for an Rx and Tx ring, and a 770 * variable number of rows depending on the number of queues supported. 771 **/ 772static void igb_write_ivar(struct e1000_hw *hw, int msix_vector, 773 int index, int offset) 774{ 775 u32 ivar = array_rd32(E1000_IVAR0, index); 776 777 /* clear any bits that are currently set */ 778 ivar &= ~((u32)0xFF << offset); 779 780 /* write vector and valid bit */ 781 ivar |= (msix_vector | E1000_IVAR_VALID) << offset; 782 783 array_wr32(E1000_IVAR0, index, ivar); 784} 785 786#define IGB_N0_QUEUE -1 787static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector) 788{ 789 struct igb_adapter *adapter = q_vector->adapter; 790 struct e1000_hw *hw = &adapter->hw; 791 int rx_queue = IGB_N0_QUEUE; 792 int tx_queue = IGB_N0_QUEUE; 793 u32 msixbm = 0; 794 795 if (q_vector->rx.ring) 796 rx_queue = q_vector->rx.ring->reg_idx; 797 if (q_vector->tx.ring) 798 tx_queue = q_vector->tx.ring->reg_idx; 799 800 switch (hw->mac.type) { 801 case e1000_82575: 802 /* The 82575 assigns vectors using a bitmask, which matches the 803 bitmask for the EICR/EIMS/EIMC registers. To assign one 804 or more queues to a vector, we write the appropriate bits 805 into the MSIXBM register for that vector. */ 806 if (rx_queue > IGB_N0_QUEUE) 807 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue; 808 if (tx_queue > IGB_N0_QUEUE) 809 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue; 810 if (!adapter->msix_entries && msix_vector == 0) 811 msixbm |= E1000_EIMS_OTHER; 812 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm); 813 q_vector->eims_value = msixbm; 814 break; 815 case e1000_82576: 816 /* 817 * 82576 uses a table that essentially consists of 2 columns 818 * with 8 rows. The ordering is column-major so we use the 819 * lower 3 bits as the row index, and the 4th bit as the 820 * column offset. 821 */ 822 if (rx_queue > IGB_N0_QUEUE) 823 igb_write_ivar(hw, msix_vector, 824 rx_queue & 0x7, 825 (rx_queue & 0x8) << 1); 826 if (tx_queue > IGB_N0_QUEUE) 827 igb_write_ivar(hw, msix_vector, 828 tx_queue & 0x7, 829 ((tx_queue & 0x8) << 1) + 8); 830 q_vector->eims_value = 1 << msix_vector; 831 break; 832 case e1000_82580: 833 case e1000_i350: 834 /* 835 * On 82580 and newer adapters the scheme is similar to 82576 836 * however instead of ordering column-major we have things 837 * ordered row-major. So we traverse the table by using 838 * bit 0 as the column offset, and the remaining bits as the 839 * row index. 840 */ 841 if (rx_queue > IGB_N0_QUEUE) 842 igb_write_ivar(hw, msix_vector, 843 rx_queue >> 1, 844 (rx_queue & 0x1) << 4); 845 if (tx_queue > IGB_N0_QUEUE) 846 igb_write_ivar(hw, msix_vector, 847 tx_queue >> 1, 848 ((tx_queue & 0x1) << 4) + 8); 849 q_vector->eims_value = 1 << msix_vector; 850 break; 851 default: 852 BUG(); 853 break; 854 } 855 856 /* add q_vector eims value to global eims_enable_mask */ 857 adapter->eims_enable_mask |= q_vector->eims_value; 858 859 /* configure q_vector to set itr on first interrupt */ 860 q_vector->set_itr = 1; 861} 862 863/** 864 * igb_configure_msix - Configure MSI-X hardware 865 * 866 * igb_configure_msix sets up the hardware to properly 867 * generate MSI-X interrupts. 868 **/ 869static void igb_configure_msix(struct igb_adapter *adapter) 870{ 871 u32 tmp; 872 int i, vector = 0; 873 struct e1000_hw *hw = &adapter->hw; 874 875 adapter->eims_enable_mask = 0; 876 877 /* set vector for other causes, i.e. link changes */ 878 switch (hw->mac.type) { 879 case e1000_82575: 880 tmp = rd32(E1000_CTRL_EXT); 881 /* enable MSI-X PBA support*/ 882 tmp |= E1000_CTRL_EXT_PBA_CLR; 883 884 /* Auto-Mask interrupts upon ICR read. */ 885 tmp |= E1000_CTRL_EXT_EIAME; 886 tmp |= E1000_CTRL_EXT_IRCA; 887 888 wr32(E1000_CTRL_EXT, tmp); 889 890 /* enable msix_other interrupt */ 891 array_wr32(E1000_MSIXBM(0), vector++, 892 E1000_EIMS_OTHER); 893 adapter->eims_other = E1000_EIMS_OTHER; 894 895 break; 896 897 case e1000_82576: 898 case e1000_82580: 899 case e1000_i350: 900 /* Turn on MSI-X capability first, or our settings 901 * won't stick. And it will take days to debug. */ 902 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE | 903 E1000_GPIE_PBA | E1000_GPIE_EIAME | 904 E1000_GPIE_NSICR); 905 906 /* enable msix_other interrupt */ 907 adapter->eims_other = 1 << vector; 908 tmp = (vector++ | E1000_IVAR_VALID) << 8; 909 910 wr32(E1000_IVAR_MISC, tmp); 911 break; 912 default: 913 /* do nothing, since nothing else supports MSI-X */ 914 break; 915 } /* switch (hw->mac.type) */ 916 917 adapter->eims_enable_mask |= adapter->eims_other; 918 919 for (i = 0; i < adapter->num_q_vectors; i++) 920 igb_assign_vector(adapter->q_vector[i], vector++); 921 922 wrfl(); 923} 924 925/** 926 * igb_request_msix - Initialize MSI-X interrupts 927 * 928 * igb_request_msix allocates MSI-X vectors and requests interrupts from the 929 * kernel. 930 **/ 931static int igb_request_msix(struct igb_adapter *adapter) 932{ 933 struct net_device *netdev = adapter->netdev; 934 struct e1000_hw *hw = &adapter->hw; 935 int i, err = 0, vector = 0; 936 937 err = request_irq(adapter->msix_entries[vector].vector, 938 igb_msix_other, 0, netdev->name, adapter); 939 if (err) 940 goto out; 941 vector++; 942 943 for (i = 0; i < adapter->num_q_vectors; i++) { 944 struct igb_q_vector *q_vector = adapter->q_vector[i]; 945 946 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector); 947 948 if (q_vector->rx.ring && q_vector->tx.ring) 949 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name, 950 q_vector->rx.ring->queue_index); 951 else if (q_vector->tx.ring) 952 sprintf(q_vector->name, "%s-tx-%u", netdev->name, 953 q_vector->tx.ring->queue_index); 954 else if (q_vector->rx.ring) 955 sprintf(q_vector->name, "%s-rx-%u", netdev->name, 956 q_vector->rx.ring->queue_index); 957 else 958 sprintf(q_vector->name, "%s-unused", netdev->name); 959 960 err = request_irq(adapter->msix_entries[vector].vector, 961 igb_msix_ring, 0, q_vector->name, 962 q_vector); 963 if (err) 964 goto out; 965 vector++; 966 } 967 968 igb_configure_msix(adapter); 969 return 0; 970out: 971 return err; 972} 973 974static void igb_reset_interrupt_capability(struct igb_adapter *adapter) 975{ 976 if (adapter->msix_entries) { 977 pci_disable_msix(adapter->pdev); 978 kfree(adapter->msix_entries); 979 adapter->msix_entries = NULL; 980 } else if (adapter->flags & IGB_FLAG_HAS_MSI) { 981 pci_disable_msi(adapter->pdev); 982 } 983} 984 985/** 986 * igb_free_q_vectors - Free memory allocated for interrupt vectors 987 * @adapter: board private structure to initialize 988 * 989 * This function frees the memory allocated to the q_vectors. In addition if 990 * NAPI is enabled it will delete any references to the NAPI struct prior 991 * to freeing the q_vector. 992 **/ 993static void igb_free_q_vectors(struct igb_adapter *adapter) 994{ 995 int v_idx; 996 997 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) { 998 struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; 999 adapter->q_vector[v_idx] = NULL; 1000 if (!q_vector) 1001 continue; 1002 netif_napi_del(&q_vector->napi); 1003 kfree(q_vector); 1004 } 1005 adapter->num_q_vectors = 0; 1006} 1007 1008/** 1009 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts 1010 * 1011 * This function resets the device so that it has 0 rx queues, tx queues, and 1012 * MSI-X interrupts allocated. 1013 */ 1014static void igb_clear_interrupt_scheme(struct igb_adapter *adapter) 1015{ 1016 igb_free_queues(adapter); 1017 igb_free_q_vectors(adapter); 1018 igb_reset_interrupt_capability(adapter); 1019} 1020 1021/** 1022 * igb_set_interrupt_capability - set MSI or MSI-X if supported 1023 * 1024 * Attempt to configure interrupts using the best available 1025 * capabilities of the hardware and kernel. 1026 **/ 1027static int igb_set_interrupt_capability(struct igb_adapter *adapter) 1028{ 1029 int err; 1030 int numvecs, i; 1031 1032 /* Number of supported queues. */ 1033 adapter->num_rx_queues = adapter->rss_queues; 1034 if (adapter->vfs_allocated_count) 1035 adapter->num_tx_queues = 1; 1036 else 1037 adapter->num_tx_queues = adapter->rss_queues; 1038 1039 /* start with one vector for every rx queue */ 1040 numvecs = adapter->num_rx_queues; 1041 1042 /* if tx handler is separate add 1 for every tx queue */ 1043 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS)) 1044 numvecs += adapter->num_tx_queues; 1045 1046 /* store the number of vectors reserved for queues */ 1047 adapter->num_q_vectors = numvecs; 1048 1049 /* add 1 vector for link status interrupts */ 1050 numvecs++; 1051 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry), 1052 GFP_KERNEL); 1053 if (!adapter->msix_entries) 1054 goto msi_only; 1055 1056 for (i = 0; i < numvecs; i++) 1057 adapter->msix_entries[i].entry = i; 1058 1059 err = pci_enable_msix(adapter->pdev, 1060 adapter->msix_entries, 1061 numvecs); 1062 if (err == 0) 1063 goto out; 1064 1065 igb_reset_interrupt_capability(adapter); 1066 1067 /* If we can't do MSI-X, try MSI */ 1068msi_only: 1069#ifdef CONFIG_PCI_IOV 1070 /* disable SR-IOV for non MSI-X configurations */ 1071 if (adapter->vf_data) { 1072 struct e1000_hw *hw = &adapter->hw; 1073 /* disable iov and allow time for transactions to clear */ 1074 pci_disable_sriov(adapter->pdev); 1075 msleep(500); 1076 1077 kfree(adapter->vf_data); 1078 adapter->vf_data = NULL; 1079 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); 1080 wrfl(); 1081 msleep(100); 1082 dev_info(&adapter->pdev->dev, "IOV Disabled\n"); 1083 } 1084#endif 1085 adapter->vfs_allocated_count = 0; 1086 adapter->rss_queues = 1; 1087 adapter->flags |= IGB_FLAG_QUEUE_PAIRS; 1088 adapter->num_rx_queues = 1; 1089 adapter->num_tx_queues = 1; 1090 adapter->num_q_vectors = 1; 1091 if (!pci_enable_msi(adapter->pdev)) 1092 adapter->flags |= IGB_FLAG_HAS_MSI; 1093out: 1094 /* Notify the stack of the (possibly) reduced queue counts. */ 1095 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues); 1096 return netif_set_real_num_rx_queues(adapter->netdev, 1097 adapter->num_rx_queues); 1098} 1099 1100/** 1101 * igb_alloc_q_vectors - Allocate memory for interrupt vectors 1102 * @adapter: board private structure to initialize 1103 * 1104 * We allocate one q_vector per queue interrupt. If allocation fails we 1105 * return -ENOMEM. 1106 **/ 1107static int igb_alloc_q_vectors(struct igb_adapter *adapter) 1108{ 1109 struct igb_q_vector *q_vector; 1110 struct e1000_hw *hw = &adapter->hw; 1111 int v_idx; 1112 int orig_node = adapter->node; 1113 1114 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) { 1115 if ((adapter->num_q_vectors == (adapter->num_rx_queues + 1116 adapter->num_tx_queues)) && 1117 (adapter->num_rx_queues == v_idx)) 1118 adapter->node = orig_node; 1119 if (orig_node == -1) { 1120 int cur_node = next_online_node(adapter->node); 1121 if (cur_node == MAX_NUMNODES) 1122 cur_node = first_online_node; 1123 adapter->node = cur_node; 1124 } 1125 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL, 1126 adapter->node); 1127 if (!q_vector) 1128 q_vector = kzalloc(sizeof(struct igb_q_vector), 1129 GFP_KERNEL); 1130 if (!q_vector) 1131 goto err_out; 1132 q_vector->adapter = adapter; 1133 q_vector->itr_register = hw->hw_addr + E1000_EITR(0); 1134 q_vector->itr_val = IGB_START_ITR; 1135 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64); 1136 adapter->q_vector[v_idx] = q_vector; 1137 } 1138 /* Restore the adapter's original node */ 1139 adapter->node = orig_node; 1140 1141 return 0; 1142 1143err_out: 1144 /* Restore the adapter's original node */ 1145 adapter->node = orig_node; 1146 igb_free_q_vectors(adapter); 1147 return -ENOMEM; 1148} 1149 1150static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter, 1151 int ring_idx, int v_idx) 1152{ 1153 struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; 1154 1155 q_vector->rx.ring = adapter->rx_ring[ring_idx]; 1156 q_vector->rx.ring->q_vector = q_vector; 1157 q_vector->rx.count++; 1158 q_vector->itr_val = adapter->rx_itr_setting; 1159 if (q_vector->itr_val && q_vector->itr_val <= 3) 1160 q_vector->itr_val = IGB_START_ITR; 1161} 1162 1163static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter, 1164 int ring_idx, int v_idx) 1165{ 1166 struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; 1167 1168 q_vector->tx.ring = adapter->tx_ring[ring_idx]; 1169 q_vector->tx.ring->q_vector = q_vector; 1170 q_vector->tx.count++; 1171 q_vector->itr_val = adapter->tx_itr_setting; 1172 q_vector->tx.work_limit = adapter->tx_work_limit; 1173 if (q_vector->itr_val && q_vector->itr_val <= 3) 1174 q_vector->itr_val = IGB_START_ITR; 1175} 1176 1177/** 1178 * igb_map_ring_to_vector - maps allocated queues to vectors 1179 * 1180 * This function maps the recently allocated queues to vectors. 1181 **/ 1182static int igb_map_ring_to_vector(struct igb_adapter *adapter) 1183{ 1184 int i; 1185 int v_idx = 0; 1186 1187 if ((adapter->num_q_vectors < adapter->num_rx_queues) || 1188 (adapter->num_q_vectors < adapter->num_tx_queues)) 1189 return -ENOMEM; 1190 1191 if (adapter->num_q_vectors >= 1192 (adapter->num_rx_queues + adapter->num_tx_queues)) { 1193 for (i = 0; i < adapter->num_rx_queues; i++) 1194 igb_map_rx_ring_to_vector(adapter, i, v_idx++); 1195 for (i = 0; i < adapter->num_tx_queues; i++) 1196 igb_map_tx_ring_to_vector(adapter, i, v_idx++); 1197 } else { 1198 for (i = 0; i < adapter->num_rx_queues; i++) { 1199 if (i < adapter->num_tx_queues) 1200 igb_map_tx_ring_to_vector(adapter, i, v_idx); 1201 igb_map_rx_ring_to_vector(adapter, i, v_idx++); 1202 } 1203 for (; i < adapter->num_tx_queues; i++) 1204 igb_map_tx_ring_to_vector(adapter, i, v_idx++); 1205 } 1206 return 0; 1207} 1208 1209/** 1210 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors 1211 * 1212 * This function initializes the interrupts and allocates all of the queues. 1213 **/ 1214static int igb_init_interrupt_scheme(struct igb_adapter *adapter) 1215{ 1216 struct pci_dev *pdev = adapter->pdev; 1217 int err; 1218 1219 err = igb_set_interrupt_capability(adapter); 1220 if (err) 1221 return err; 1222 1223 err = igb_alloc_q_vectors(adapter); 1224 if (err) { 1225 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n"); 1226 goto err_alloc_q_vectors; 1227 } 1228 1229 err = igb_alloc_queues(adapter); 1230 if (err) { 1231 dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); 1232 goto err_alloc_queues; 1233 } 1234 1235 err = igb_map_ring_to_vector(adapter); 1236 if (err) { 1237 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n"); 1238 goto err_map_queues; 1239 } 1240 1241 1242 return 0; 1243err_map_queues: 1244 igb_free_queues(adapter); 1245err_alloc_queues: 1246 igb_free_q_vectors(adapter); 1247err_alloc_q_vectors: 1248 igb_reset_interrupt_capability(adapter); 1249 return err; 1250} 1251 1252/** 1253 * igb_request_irq - initialize interrupts 1254 * 1255 * Attempts to configure interrupts using the best available 1256 * capabilities of the hardware and kernel. 1257 **/ 1258static int igb_request_irq(struct igb_adapter *adapter) 1259{ 1260 struct net_device *netdev = adapter->netdev; 1261 struct pci_dev *pdev = adapter->pdev; 1262 int err = 0; 1263 1264 if (adapter->msix_entries) { 1265 err = igb_request_msix(adapter); 1266 if (!err) 1267 goto request_done; 1268 /* fall back to MSI */ 1269 igb_clear_interrupt_scheme(adapter); 1270 if (!pci_enable_msi(pdev)) 1271 adapter->flags |= IGB_FLAG_HAS_MSI; 1272 igb_free_all_tx_resources(adapter); 1273 igb_free_all_rx_resources(adapter); 1274 adapter->num_tx_queues = 1; 1275 adapter->num_rx_queues = 1; 1276 adapter->num_q_vectors = 1; 1277 err = igb_alloc_q_vectors(adapter); 1278 if (err) { 1279 dev_err(&pdev->dev, 1280 "Unable to allocate memory for vectors\n"); 1281 goto request_done; 1282 } 1283 err = igb_alloc_queues(adapter); 1284 if (err) { 1285 dev_err(&pdev->dev, 1286 "Unable to allocate memory for queues\n"); 1287 igb_free_q_vectors(adapter); 1288 goto request_done; 1289 } 1290 igb_setup_all_tx_resources(adapter); 1291 igb_setup_all_rx_resources(adapter); 1292 } 1293 1294 igb_assign_vector(adapter->q_vector[0], 0); 1295 1296 if (adapter->flags & IGB_FLAG_HAS_MSI) { 1297 err = request_irq(pdev->irq, igb_intr_msi, 0, 1298 netdev->name, adapter); 1299 if (!err) 1300 goto request_done; 1301 1302 /* fall back to legacy interrupts */ 1303 igb_reset_interrupt_capability(adapter); 1304 adapter->flags &= ~IGB_FLAG_HAS_MSI; 1305 } 1306 1307 err = request_irq(pdev->irq, igb_intr, IRQF_SHARED, 1308 netdev->name, adapter); 1309 1310 if (err) 1311 dev_err(&pdev->dev, "Error %d getting interrupt\n", 1312 err); 1313 1314request_done: 1315 return err; 1316} 1317 1318static void igb_free_irq(struct igb_adapter *adapter) 1319{ 1320 if (adapter->msix_entries) { 1321 int vector = 0, i; 1322 1323 free_irq(adapter->msix_entries[vector++].vector, adapter); 1324 1325 for (i = 0; i < adapter->num_q_vectors; i++) 1326 free_irq(adapter->msix_entries[vector++].vector, 1327 adapter->q_vector[i]); 1328 } else { 1329 free_irq(adapter->pdev->irq, adapter); 1330 } 1331} 1332 1333/** 1334 * igb_irq_disable - Mask off interrupt generation on the NIC 1335 * @adapter: board private structure 1336 **/ 1337static void igb_irq_disable(struct igb_adapter *adapter) 1338{ 1339 struct e1000_hw *hw = &adapter->hw; 1340 1341 /* 1342 * we need to be careful when disabling interrupts. The VFs are also 1343 * mapped into these registers and so clearing the bits can cause 1344 * issues on the VF drivers so we only need to clear what we set 1345 */ 1346 if (adapter->msix_entries) { 1347 u32 regval = rd32(E1000_EIAM); 1348 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask); 1349 wr32(E1000_EIMC, adapter->eims_enable_mask); 1350 regval = rd32(E1000_EIAC); 1351 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask); 1352 } 1353 1354 wr32(E1000_IAM, 0); 1355 wr32(E1000_IMC, ~0); 1356 wrfl(); 1357 if (adapter->msix_entries) { 1358 int i; 1359 for (i = 0; i < adapter->num_q_vectors; i++) 1360 synchronize_irq(adapter->msix_entries[i].vector); 1361 } else { 1362 synchronize_irq(adapter->pdev->irq); 1363 } 1364} 1365 1366/** 1367 * igb_irq_enable - Enable default interrupt generation settings 1368 * @adapter: board private structure 1369 **/ 1370static void igb_irq_enable(struct igb_adapter *adapter) 1371{ 1372 struct e1000_hw *hw = &adapter->hw; 1373 1374 if (adapter->msix_entries) { 1375 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA; 1376 u32 regval = rd32(E1000_EIAC); 1377 wr32(E1000_EIAC, regval | adapter->eims_enable_mask); 1378 regval = rd32(E1000_EIAM); 1379 wr32(E1000_EIAM, regval | adapter->eims_enable_mask); 1380 wr32(E1000_EIMS, adapter->eims_enable_mask); 1381 if (adapter->vfs_allocated_count) { 1382 wr32(E1000_MBVFIMR, 0xFF); 1383 ims |= E1000_IMS_VMMB; 1384 } 1385 wr32(E1000_IMS, ims); 1386 } else { 1387 wr32(E1000_IMS, IMS_ENABLE_MASK | 1388 E1000_IMS_DRSTA); 1389 wr32(E1000_IAM, IMS_ENABLE_MASK | 1390 E1000_IMS_DRSTA); 1391 } 1392} 1393 1394static void igb_update_mng_vlan(struct igb_adapter *adapter) 1395{ 1396 struct e1000_hw *hw = &adapter->hw; 1397 u16 vid = adapter->hw.mng_cookie.vlan_id; 1398 u16 old_vid = adapter->mng_vlan_id; 1399 1400 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) { 1401 /* add VID to filter table */ 1402 igb_vfta_set(hw, vid, true); 1403 adapter->mng_vlan_id = vid; 1404 } else { 1405 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE; 1406 } 1407 1408 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) && 1409 (vid != old_vid) && 1410 !test_bit(old_vid, adapter->active_vlans)) { 1411 /* remove VID from filter table */ 1412 igb_vfta_set(hw, old_vid, false); 1413 } 1414} 1415 1416/** 1417 * igb_release_hw_control - release control of the h/w to f/w 1418 * @adapter: address of board private structure 1419 * 1420 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. 1421 * For ASF and Pass Through versions of f/w this means that the 1422 * driver is no longer loaded. 1423 * 1424 **/ 1425static void igb_release_hw_control(struct igb_adapter *adapter) 1426{ 1427 struct e1000_hw *hw = &adapter->hw; 1428 u32 ctrl_ext; 1429 1430 /* Let firmware take over control of h/w */ 1431 ctrl_ext = rd32(E1000_CTRL_EXT); 1432 wr32(E1000_CTRL_EXT, 1433 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); 1434} 1435 1436/** 1437 * igb_get_hw_control - get control of the h/w from f/w 1438 * @adapter: address of board private structure 1439 * 1440 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. 1441 * For ASF and Pass Through versions of f/w this means that 1442 * the driver is loaded. 1443 * 1444 **/ 1445static void igb_get_hw_control(struct igb_adapter *adapter) 1446{ 1447 struct e1000_hw *hw = &adapter->hw; 1448 u32 ctrl_ext; 1449 1450 /* Let firmware know the driver has taken over */ 1451 ctrl_ext = rd32(E1000_CTRL_EXT); 1452 wr32(E1000_CTRL_EXT, 1453 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); 1454} 1455 1456/** 1457 * igb_configure - configure the hardware for RX and TX 1458 * @adapter: private board structure 1459 **/ 1460static void igb_configure(struct igb_adapter *adapter) 1461{ 1462 struct net_device *netdev = adapter->netdev; 1463 int i; 1464 1465 igb_get_hw_control(adapter); 1466 igb_set_rx_mode(netdev); 1467 1468 igb_restore_vlan(adapter); 1469 1470 igb_setup_tctl(adapter); 1471 igb_setup_mrqc(adapter); 1472 igb_setup_rctl(adapter); 1473 1474 igb_configure_tx(adapter); 1475 igb_configure_rx(adapter); 1476 1477 igb_rx_fifo_flush_82575(&adapter->hw); 1478 1479 /* call igb_desc_unused which always leaves 1480 * at least 1 descriptor unused to make sure 1481 * next_to_use != next_to_clean */ 1482 for (i = 0; i < adapter->num_rx_queues; i++) { 1483 struct igb_ring *ring = adapter->rx_ring[i]; 1484 igb_alloc_rx_buffers(ring, igb_desc_unused(ring)); 1485 } 1486} 1487 1488/** 1489 * igb_power_up_link - Power up the phy/serdes link 1490 * @adapter: address of board private structure 1491 **/ 1492void igb_power_up_link(struct igb_adapter *adapter) 1493{ 1494 if (adapter->hw.phy.media_type == e1000_media_type_copper) 1495 igb_power_up_phy_copper(&adapter->hw); 1496 else 1497 igb_power_up_serdes_link_82575(&adapter->hw); 1498} 1499 1500/** 1501 * igb_power_down_link - Power down the phy/serdes link 1502 * @adapter: address of board private structure 1503 */ 1504static void igb_power_down_link(struct igb_adapter *adapter) 1505{ 1506 if (adapter->hw.phy.media_type == e1000_media_type_copper) 1507 igb_power_down_phy_copper_82575(&adapter->hw); 1508 else 1509 igb_shutdown_serdes_link_82575(&adapter->hw); 1510} 1511 1512/** 1513 * igb_up - Open the interface and prepare it to handle traffic 1514 * @adapter: board private structure 1515 **/ 1516int igb_up(struct igb_adapter *adapter) 1517{ 1518 struct e1000_hw *hw = &adapter->hw; 1519 int i; 1520 1521 /* hardware has been reset, we need to reload some things */ 1522 igb_configure(adapter); 1523 1524 clear_bit(__IGB_DOWN, &adapter->state); 1525 1526 for (i = 0; i < adapter->num_q_vectors; i++) 1527 napi_enable(&(adapter->q_vector[i]->napi)); 1528 1529 if (adapter->msix_entries) 1530 igb_configure_msix(adapter); 1531 else 1532 igb_assign_vector(adapter->q_vector[0], 0); 1533 1534 /* Clear any pending interrupts. */ 1535 rd32(E1000_ICR); 1536 igb_irq_enable(adapter); 1537 1538 /* notify VFs that reset has been completed */ 1539 if (adapter->vfs_allocated_count) { 1540 u32 reg_data = rd32(E1000_CTRL_EXT); 1541 reg_data |= E1000_CTRL_EXT_PFRSTD; 1542 wr32(E1000_CTRL_EXT, reg_data); 1543 } 1544 1545 netif_tx_start_all_queues(adapter->netdev); 1546 1547 /* start the watchdog. */ 1548 hw->mac.get_link_status = 1; 1549 schedule_work(&adapter->watchdog_task); 1550 1551 return 0; 1552} 1553 1554void igb_down(struct igb_adapter *adapter) 1555{ 1556 struct net_device *netdev = adapter->netdev; 1557 struct e1000_hw *hw = &adapter->hw; 1558 u32 tctl, rctl; 1559 int i; 1560 1561 /* signal that we're down so the interrupt handler does not 1562 * reschedule our watchdog timer */ 1563 set_bit(__IGB_DOWN, &adapter->state); 1564 1565 /* disable receives in the hardware */ 1566 rctl = rd32(E1000_RCTL); 1567 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN); 1568 /* flush and sleep below */ 1569 1570 netif_tx_stop_all_queues(netdev); 1571 1572 /* disable transmits in the hardware */ 1573 tctl = rd32(E1000_TCTL); 1574 tctl &= ~E1000_TCTL_EN; 1575 wr32(E1000_TCTL, tctl); 1576 /* flush both disables and wait for them to finish */ 1577 wrfl(); 1578 msleep(10); 1579 1580 for (i = 0; i < adapter->num_q_vectors; i++) 1581 napi_disable(&(adapter->q_vector[i]->napi)); 1582 1583 igb_irq_disable(adapter); 1584 1585 del_timer_sync(&adapter->watchdog_timer); 1586 del_timer_sync(&adapter->phy_info_timer); 1587 1588 netif_carrier_off(netdev); 1589 1590 /* record the stats before reset*/ 1591 spin_lock(&adapter->stats64_lock); 1592 igb_update_stats(adapter, &adapter->stats64); 1593 spin_unlock(&adapter->stats64_lock); 1594 1595 adapter->link_speed = 0; 1596 adapter->link_duplex = 0; 1597 1598 if (!pci_channel_offline(adapter->pdev)) 1599 igb_reset(adapter); 1600 igb_clean_all_tx_rings(adapter); 1601 igb_clean_all_rx_rings(adapter); 1602#ifdef CONFIG_IGB_DCA 1603 1604 /* since we reset the hardware DCA settings were cleared */ 1605 igb_setup_dca(adapter); 1606#endif 1607} 1608 1609void igb_reinit_locked(struct igb_adapter *adapter) 1610{ 1611 WARN_ON(in_interrupt()); 1612 while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) 1613 msleep(1); 1614 igb_down(adapter); 1615 igb_up(adapter); 1616 clear_bit(__IGB_RESETTING, &adapter->state); 1617} 1618 1619void igb_reset(struct igb_adapter *adapter) 1620{ 1621 struct pci_dev *pdev = adapter->pdev; 1622 struct e1000_hw *hw = &adapter->hw; 1623 struct e1000_mac_info *mac = &hw->mac; 1624 struct e1000_fc_info *fc = &hw->fc; 1625 u32 pba = 0, tx_space, min_tx_space, min_rx_space; 1626 u16 hwm; 1627 1628 /* Repartition Pba for greater than 9k mtu 1629 * To take effect CTRL.RST is required. 1630 */ 1631 switch (mac->type) { 1632 case e1000_i350: 1633 case e1000_82580: 1634 pba = rd32(E1000_RXPBS); 1635 pba = igb_rxpbs_adjust_82580(pba); 1636 break; 1637 case e1000_82576: 1638 pba = rd32(E1000_RXPBS); 1639 pba &= E1000_RXPBS_SIZE_MASK_82576; 1640 break; 1641 case e1000_82575: 1642 default: 1643 pba = E1000_PBA_34K; 1644 break; 1645 } 1646 1647 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) && 1648 (mac->type < e1000_82576)) { 1649 /* adjust PBA for jumbo frames */ 1650 wr32(E1000_PBA, pba); 1651 1652 /* To maintain wire speed transmits, the Tx FIFO should be 1653 * large enough to accommodate two full transmit packets, 1654 * rounded up to the next 1KB and expressed in KB. Likewise, 1655 * the Rx FIFO should be large enough to accommodate at least 1656 * one full receive packet and is similarly rounded up and 1657 * expressed in KB. */ 1658 pba = rd32(E1000_PBA); 1659 /* upper 16 bits has Tx packet buffer allocation size in KB */ 1660 tx_space = pba >> 16; 1661 /* lower 16 bits has Rx packet buffer allocation size in KB */ 1662 pba &= 0xffff; 1663 /* the tx fifo also stores 16 bytes of information about the tx 1664 * but don't include ethernet FCS because hardware appends it */ 1665 min_tx_space = (adapter->max_frame_size + 1666 sizeof(union e1000_adv_tx_desc) - 1667 ETH_FCS_LEN) * 2; 1668 min_tx_space = ALIGN(min_tx_space, 1024); 1669 min_tx_space >>= 10; 1670 /* software strips receive CRC, so leave room for it */ 1671 min_rx_space = adapter->max_frame_size; 1672 min_rx_space = ALIGN(min_rx_space, 1024); 1673 min_rx_space >>= 10; 1674 1675 /* If current Tx allocation is less than the min Tx FIFO size, 1676 * and the min Tx FIFO size is less than the current Rx FIFO 1677 * allocation, take space away from current Rx allocation */ 1678 if (tx_space < min_tx_space && 1679 ((min_tx_space - tx_space) < pba)) { 1680 pba = pba - (min_tx_space - tx_space); 1681 1682 /* if short on rx space, rx wins and must trump tx 1683 * adjustment */ 1684 if (pba < min_rx_space) 1685 pba = min_rx_space; 1686 } 1687 wr32(E1000_PBA, pba); 1688 } 1689 1690 /* flow control settings */ 1691 /* The high water mark must be low enough to fit one full frame 1692 * (or the size used for early receive) above it in the Rx FIFO. 1693 * Set it to the lower of: 1694 * - 90% of the Rx FIFO size, or 1695 * - the full Rx FIFO size minus one full frame */ 1696 hwm = min(((pba << 10) * 9 / 10), 1697 ((pba << 10) - 2 * adapter->max_frame_size)); 1698 1699 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ 1700 fc->low_water = fc->high_water - 16; 1701 fc->pause_time = 0xFFFF; 1702 fc->send_xon = 1; 1703 fc->current_mode = fc->requested_mode; 1704 1705 /* disable receive for all VFs and wait one second */ 1706 if (adapter->vfs_allocated_count) { 1707 int i; 1708 for (i = 0 ; i < adapter->vfs_allocated_count; i++) 1709 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC; 1710 1711 /* ping all the active vfs to let them know we are going down */ 1712 igb_ping_all_vfs(adapter); 1713 1714 /* disable transmits and receives */ 1715 wr32(E1000_VFRE, 0); 1716 wr32(E1000_VFTE, 0); 1717 } 1718 1719 /* Allow time for pending master requests to run */ 1720 hw->mac.ops.reset_hw(hw); 1721 wr32(E1000_WUC, 0); 1722 1723 if (hw->mac.ops.init_hw(hw)) 1724 dev_err(&pdev->dev, "Hardware Error\n"); 1725 if (hw->mac.type > e1000_82580) { 1726 if (adapter->flags & IGB_FLAG_DMAC) { 1727 u32 reg; 1728 1729 /* 1730 * DMA Coalescing high water mark needs to be higher 1731 * than * the * Rx threshold. The Rx threshold is 1732 * currently * pba - 6, so we * should use a high water 1733 * mark of pba * - 4. */ 1734 hwm = (pba - 4) << 10; 1735 1736 reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT) 1737 & E1000_DMACR_DMACTHR_MASK); 1738 1739 /* transition to L0x or L1 if available..*/ 1740 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); 1741 1742 /* watchdog timer= +-1000 usec in 32usec intervals */ 1743 reg |= (1000 >> 5); 1744 wr32(E1000_DMACR, reg); 1745 1746 /* no lower threshold to disable coalescing(smart fifb) 1747 * -UTRESH=0*/ 1748 wr32(E1000_DMCRTRH, 0); 1749 1750 /* set hwm to PBA - 2 * max frame size */ 1751 wr32(E1000_FCRTC, hwm); 1752 1753 /* 1754 * This sets the time to wait before requesting tran- 1755 * sition to * low power state to number of usecs needed 1756 * to receive 1 512 * byte frame at gigabit line rate 1757 */ 1758 reg = rd32(E1000_DMCTLX); 1759 reg |= IGB_DMCTLX_DCFLUSH_DIS; 1760 1761 /* Delay 255 usec before entering Lx state. */ 1762 reg |= 0xFF; 1763 wr32(E1000_DMCTLX, reg); 1764 1765 /* free space in Tx packet buffer to wake from DMAC */ 1766 wr32(E1000_DMCTXTH, 1767 (IGB_MIN_TXPBSIZE - 1768 (IGB_TX_BUF_4096 + adapter->max_frame_size)) 1769 >> 6); 1770 1771 /* make low power state decision controlled by DMAC */ 1772 reg = rd32(E1000_PCIEMISC); 1773 reg |= E1000_PCIEMISC_LX_DECISION; 1774 wr32(E1000_PCIEMISC, reg); 1775 } /* end if IGB_FLAG_DMAC set */ 1776 } 1777 if (hw->mac.type == e1000_82580) { 1778 u32 reg = rd32(E1000_PCIEMISC); 1779 wr32(E1000_PCIEMISC, 1780 reg & ~E1000_PCIEMISC_LX_DECISION); 1781 } 1782 if (!netif_running(adapter->netdev)) 1783 igb_power_down_link(adapter); 1784 1785 igb_update_mng_vlan(adapter); 1786 1787 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */ 1788 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE); 1789 1790 igb_get_phy_info(hw); 1791} 1792 1793static u32 igb_fix_features(struct net_device *netdev, u32 features) 1794{ 1795 /* 1796 * Since there is no support for separate rx/tx vlan accel 1797 * enable/disable make sure tx flag is always in same state as rx. 1798 */ 1799 if (features & NETIF_F_HW_VLAN_RX) 1800 features |= NETIF_F_HW_VLAN_TX; 1801 else 1802 features &= ~NETIF_F_HW_VLAN_TX; 1803 1804 return features; 1805} 1806 1807static int igb_set_features(struct net_device *netdev, u32 features) 1808{ 1809 u32 changed = netdev->features ^ features; 1810 1811 if (changed & NETIF_F_HW_VLAN_RX) 1812 igb_vlan_mode(netdev, features); 1813 1814 return 0; 1815} 1816 1817static const struct net_device_ops igb_netdev_ops = { 1818 .ndo_open = igb_open, 1819 .ndo_stop = igb_close, 1820 .ndo_start_xmit = igb_xmit_frame, 1821 .ndo_get_stats64 = igb_get_stats64, 1822 .ndo_set_rx_mode = igb_set_rx_mode, 1823 .ndo_set_mac_address = igb_set_mac, 1824 .ndo_change_mtu = igb_change_mtu, 1825 .ndo_do_ioctl = igb_ioctl, 1826 .ndo_tx_timeout = igb_tx_timeout, 1827 .ndo_validate_addr = eth_validate_addr, 1828 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid, 1829 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid, 1830 .ndo_set_vf_mac = igb_ndo_set_vf_mac, 1831 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan, 1832 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw, 1833 .ndo_get_vf_config = igb_ndo_get_vf_config, 1834#ifdef CONFIG_NET_POLL_CONTROLLER 1835 .ndo_poll_controller = igb_netpoll, 1836#endif 1837 .ndo_fix_features = igb_fix_features, 1838 .ndo_set_features = igb_set_features, 1839}; 1840 1841/** 1842 * igb_probe - Device Initialization Routine 1843 * @pdev: PCI device information struct 1844 * @ent: entry in igb_pci_tbl 1845 * 1846 * Returns 0 on success, negative on failure 1847 * 1848 * igb_probe initializes an adapter identified by a pci_dev structure. 1849 * The OS initialization, configuring of the adapter private structure, 1850 * and a hardware reset occur. 1851 **/ 1852static int __devinit igb_probe(struct pci_dev *pdev, 1853 const struct pci_device_id *ent) 1854{ 1855 struct net_device *netdev; 1856 struct igb_adapter *adapter; 1857 struct e1000_hw *hw; 1858 u16 eeprom_data = 0; 1859 s32 ret_val; 1860 static int global_quad_port_a; /* global quad port a indication */ 1861 const struct e1000_info *ei = igb_info_tbl[ent->driver_data]; 1862 unsigned long mmio_start, mmio_len; 1863 int err, pci_using_dac; 1864 u16 eeprom_apme_mask = IGB_EEPROM_APME; 1865 u8 part_str[E1000_PBANUM_LENGTH]; 1866 1867 /* Catch broken hardware that put the wrong VF device ID in 1868 * the PCIe SR-IOV capability. 1869 */ 1870 if (pdev->is_virtfn) { 1871 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n", 1872 pci_name(pdev), pdev->vendor, pdev->device); 1873 return -EINVAL; 1874 } 1875 1876 err = pci_enable_device_mem(pdev); 1877 if (err) 1878 return err; 1879 1880 pci_using_dac = 0; 1881 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); 1882 if (!err) { 1883 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); 1884 if (!err) 1885 pci_using_dac = 1; 1886 } else { 1887 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); 1888 if (err) { 1889 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); 1890 if (err) { 1891 dev_err(&pdev->dev, "No usable DMA " 1892 "configuration, aborting\n"); 1893 goto err_dma; 1894 } 1895 } 1896 } 1897 1898 err = pci_request_selected_regions(pdev, pci_select_bars(pdev, 1899 IORESOURCE_MEM), 1900 igb_driver_name); 1901 if (err) 1902 goto err_pci_reg; 1903 1904 pci_enable_pcie_error_reporting(pdev); 1905 1906 pci_set_master(pdev); 1907 pci_save_state(pdev); 1908 1909 err = -ENOMEM; 1910 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter), 1911 IGB_MAX_TX_QUEUES); 1912 if (!netdev) 1913 goto err_alloc_etherdev; 1914 1915 SET_NETDEV_DEV(netdev, &pdev->dev); 1916 1917 pci_set_drvdata(pdev, netdev); 1918 adapter = netdev_priv(netdev); 1919 adapter->netdev = netdev; 1920 adapter->pdev = pdev; 1921 hw = &adapter->hw; 1922 hw->back = adapter; 1923 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE; 1924 1925 mmio_start = pci_resource_start(pdev, 0); 1926 mmio_len = pci_resource_len(pdev, 0); 1927 1928 err = -EIO; 1929 hw->hw_addr = ioremap(mmio_start, mmio_len); 1930 if (!hw->hw_addr) 1931 goto err_ioremap; 1932 1933 netdev->netdev_ops = &igb_netdev_ops; 1934 igb_set_ethtool_ops(netdev); 1935 netdev->watchdog_timeo = 5 * HZ; 1936 1937 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); 1938 1939 netdev->mem_start = mmio_start; 1940 netdev->mem_end = mmio_start + mmio_len; 1941 1942 /* PCI config space info */ 1943 hw->vendor_id = pdev->vendor; 1944 hw->device_id = pdev->device; 1945 hw->revision_id = pdev->revision; 1946 hw->subsystem_vendor_id = pdev->subsystem_vendor; 1947 hw->subsystem_device_id = pdev->subsystem_device; 1948 1949 /* Copy the default MAC, PHY and NVM function pointers */ 1950 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops)); 1951 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops)); 1952 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops)); 1953 /* Initialize skew-specific constants */ 1954 err = ei->get_invariants(hw); 1955 if (err) 1956 goto err_sw_init; 1957 1958 /* setup the private structure */ 1959 err = igb_sw_init(adapter); 1960 if (err) 1961 goto err_sw_init; 1962 1963 igb_get_bus_info_pcie(hw); 1964 1965 hw->phy.autoneg_wait_to_complete = false; 1966 1967 /* Copper options */ 1968 if (hw->phy.media_type == e1000_media_type_copper) { 1969 hw->phy.mdix = AUTO_ALL_MODES; 1970 hw->phy.disable_polarity_correction = false; 1971 hw->phy.ms_type = e1000_ms_hw_default; 1972 } 1973 1974 if (igb_check_reset_block(hw)) 1975 dev_info(&pdev->dev, 1976 "PHY reset is blocked due to SOL/IDER session.\n"); 1977 1978 /* 1979 * features is initialized to 0 in allocation, it might have bits 1980 * set by igb_sw_init so we should use an or instead of an 1981 * assignment. 1982 */ 1983 netdev->features |= NETIF_F_SG | 1984 NETIF_F_IP_CSUM | 1985 NETIF_F_IPV6_CSUM | 1986 NETIF_F_TSO | 1987 NETIF_F_TSO6 | 1988 NETIF_F_RXHASH | 1989 NETIF_F_RXCSUM | 1990 NETIF_F_HW_VLAN_RX | 1991 NETIF_F_HW_VLAN_TX; 1992 1993 /* copy netdev features into list of user selectable features */ 1994 netdev->hw_features |= netdev->features; 1995 1996 /* set this bit last since it cannot be part of hw_features */ 1997 netdev->features |= NETIF_F_HW_VLAN_FILTER; 1998 1999 netdev->vlan_features |= NETIF_F_TSO | 2000 NETIF_F_TSO6 | 2001 NETIF_F_IP_CSUM | 2002 NETIF_F_IPV6_CSUM | 2003 NETIF_F_SG; 2004 2005 if (pci_using_dac) { 2006 netdev->features |= NETIF_F_HIGHDMA; 2007 netdev->vlan_features |= NETIF_F_HIGHDMA; 2008 } 2009 2010 if (hw->mac.type >= e1000_82576) { 2011 netdev->hw_features |= NETIF_F_SCTP_CSUM; 2012 netdev->features |= NETIF_F_SCTP_CSUM; 2013 } 2014 2015 netdev->priv_flags |= IFF_UNICAST_FLT; 2016 2017 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw); 2018 2019 /* before reading the NVM, reset the controller to put the device in a 2020 * known good starting state */ 2021 hw->mac.ops.reset_hw(hw); 2022 2023 /* make sure the NVM is good */ 2024 if (hw->nvm.ops.validate(hw) < 0) { 2025 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n"); 2026 err = -EIO; 2027 goto err_eeprom; 2028 } 2029 2030 /* copy the MAC address out of the NVM */ 2031 if (hw->mac.ops.read_mac_addr(hw)) 2032 dev_err(&pdev->dev, "NVM Read Error\n"); 2033 2034 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); 2035 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len); 2036 2037 if (!is_valid_ether_addr(netdev->perm_addr)) { 2038 dev_err(&pdev->dev, "Invalid MAC Address\n"); 2039 err = -EIO; 2040 goto err_eeprom; 2041 } 2042 2043 setup_timer(&adapter->watchdog_timer, igb_watchdog, 2044 (unsigned long) adapter); 2045 setup_timer(&adapter->phy_info_timer, igb_update_phy_info, 2046 (unsigned long) adapter); 2047 2048 INIT_WORK(&adapter->reset_task, igb_reset_task); 2049 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task); 2050 2051 /* Initialize link properties that are user-changeable */ 2052 adapter->fc_autoneg = true; 2053 hw->mac.autoneg = true; 2054 hw->phy.autoneg_advertised = 0x2f; 2055 2056 hw->fc.requested_mode = e1000_fc_default; 2057 hw->fc.current_mode = e1000_fc_default; 2058 2059 igb_validate_mdi_setting(hw); 2060 2061 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM, 2062 * enable the ACPI Magic Packet filter 2063 */ 2064 2065 if (hw->bus.func == 0) 2066 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); 2067 else if (hw->mac.type >= e1000_82580) 2068 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + 2069 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, 2070 &eeprom_data); 2071 else if (hw->bus.func == 1) 2072 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); 2073 2074 if (eeprom_data & eeprom_apme_mask) 2075 adapter->eeprom_wol |= E1000_WUFC_MAG; 2076 2077 /* now that we have the eeprom settings, apply the special cases where 2078 * the eeprom may be wrong or the board simply won't support wake on 2079 * lan on a particular port */ 2080 switch (pdev->device) { 2081 case E1000_DEV_ID_82575GB_QUAD_COPPER: 2082 adapter->eeprom_wol = 0; 2083 break; 2084 case E1000_DEV_ID_82575EB_FIBER_SERDES: 2085 case E1000_DEV_ID_82576_FIBER: 2086 case E1000_DEV_ID_82576_SERDES: 2087 /* Wake events only supported on port A for dual fiber 2088 * regardless of eeprom setting */ 2089 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1) 2090 adapter->eeprom_wol = 0; 2091 break; 2092 case E1000_DEV_ID_82576_QUAD_COPPER: 2093 case E1000_DEV_ID_82576_QUAD_COPPER_ET2: 2094 /* if quad port adapter, disable WoL on all but port A */ 2095 if (global_quad_port_a != 0) 2096 adapter->eeprom_wol = 0; 2097 else 2098 adapter->flags |= IGB_FLAG_QUAD_PORT_A; 2099 /* Reset for multiple quad port adapters */ 2100 if (++global_quad_port_a == 4) 2101 global_quad_port_a = 0; 2102 break; 2103 } 2104 2105 /* initialize the wol settings based on the eeprom settings */ 2106 adapter->wol = adapter->eeprom_wol; 2107 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); 2108 2109 /* reset the hardware with the new settings */ 2110 igb_reset(adapter); 2111 2112 /* let the f/w know that the h/w is now under the control of the 2113 * driver. */ 2114 igb_get_hw_control(adapter); 2115 2116 strcpy(netdev->name, "eth%d"); 2117 err = register_netdev(netdev); 2118 if (err) 2119 goto err_register; 2120 2121 /* carrier off reporting is important to ethtool even BEFORE open */ 2122 netif_carrier_off(netdev); 2123 2124#ifdef CONFIG_IGB_DCA 2125 if (dca_add_requester(&pdev->dev) == 0) { 2126 adapter->flags |= IGB_FLAG_DCA_ENABLED; 2127 dev_info(&pdev->dev, "DCA enabled\n"); 2128 igb_setup_dca(adapter); 2129 } 2130 2131#endif 2132 /* do hw tstamp init after resetting */ 2133 igb_init_hw_timer(adapter); 2134 2135 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n"); 2136 /* print bus type/speed/width info */ 2137 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n", 2138 netdev->name, 2139 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" : 2140 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" : 2141 "unknown"), 2142 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" : 2143 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" : 2144 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" : 2145 "unknown"), 2146 netdev->dev_addr); 2147 2148 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH); 2149 if (ret_val) 2150 strcpy(part_str, "Unknown"); 2151 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str); 2152 dev_info(&pdev->dev, 2153 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n", 2154 adapter->msix_entries ? "MSI-X" : 2155 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy", 2156 adapter->num_rx_queues, adapter->num_tx_queues); 2157 switch (hw->mac.type) { 2158 case e1000_i350: 2159 igb_set_eee_i350(hw); 2160 break; 2161 default: 2162 break; 2163 } 2164 return 0; 2165 2166err_register: 2167 igb_release_hw_control(adapter); 2168err_eeprom: 2169 if (!igb_check_reset_block(hw)) 2170 igb_reset_phy(hw); 2171 2172 if (hw->flash_address) 2173 iounmap(hw->flash_address); 2174err_sw_init: 2175 igb_clear_interrupt_scheme(adapter); 2176 iounmap(hw->hw_addr); 2177err_ioremap: 2178 free_netdev(netdev); 2179err_alloc_etherdev: 2180 pci_release_selected_regions(pdev, 2181 pci_select_bars(pdev, IORESOURCE_MEM)); 2182err_pci_reg: 2183err_dma: 2184 pci_disable_device(pdev); 2185 return err; 2186} 2187 2188/** 2189 * igb_remove - Device Removal Routine 2190 * @pdev: PCI device information struct 2191 * 2192 * igb_remove is called by the PCI subsystem to alert the driver 2193 * that it should release a PCI device. The could be caused by a 2194 * Hot-Plug event, or because the driver is going to be removed from 2195 * memory. 2196 **/ 2197static void __devexit igb_remove(struct pci_dev *pdev) 2198{ 2199 struct net_device *netdev = pci_get_drvdata(pdev); 2200 struct igb_adapter *adapter = netdev_priv(netdev); 2201 struct e1000_hw *hw = &adapter->hw; 2202 2203 /* 2204 * The watchdog timer may be rescheduled, so explicitly 2205 * disable watchdog from being rescheduled. 2206 */ 2207 set_bit(__IGB_DOWN, &adapter->state); 2208 del_timer_sync(&adapter->watchdog_timer); 2209 del_timer_sync(&adapter->phy_info_timer); 2210 2211 cancel_work_sync(&adapter->reset_task); 2212 cancel_work_sync(&adapter->watchdog_task); 2213 2214#ifdef CONFIG_IGB_DCA 2215 if (adapter->flags & IGB_FLAG_DCA_ENABLED) { 2216 dev_info(&pdev->dev, "DCA disabled\n"); 2217 dca_remove_requester(&pdev->dev); 2218 adapter->flags &= ~IGB_FLAG_DCA_ENABLED; 2219 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE); 2220 } 2221#endif 2222 2223 /* Release control of h/w to f/w. If f/w is AMT enabled, this 2224 * would have already happened in close and is redundant. */ 2225 igb_release_hw_control(adapter); 2226 2227 unregister_netdev(netdev); 2228 2229 igb_clear_interrupt_scheme(adapter); 2230 2231#ifdef CONFIG_PCI_IOV 2232 /* reclaim resources allocated to VFs */ 2233 if (adapter->vf_data) { 2234 /* disable iov and allow time for transactions to clear */ 2235 pci_disable_sriov(pdev); 2236 msleep(500); 2237 2238 kfree(adapter->vf_data); 2239 adapter->vf_data = NULL; 2240 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); 2241 wrfl(); 2242 msleep(100); 2243 dev_info(&pdev->dev, "IOV Disabled\n"); 2244 } 2245#endif 2246 2247 iounmap(hw->hw_addr); 2248 if (hw->flash_address) 2249 iounmap(hw->flash_address); 2250 pci_release_selected_regions(pdev, 2251 pci_select_bars(pdev, IORESOURCE_MEM)); 2252 2253 free_netdev(netdev); 2254 2255 pci_disable_pcie_error_reporting(pdev); 2256 2257 pci_disable_device(pdev); 2258} 2259 2260/** 2261 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space 2262 * @adapter: board private structure to initialize 2263 * 2264 * This function initializes the vf specific data storage and then attempts to 2265 * allocate the VFs. The reason for ordering it this way is because it is much 2266 * mor expensive time wise to disable SR-IOV than it is to allocate and free 2267 * the memory for the VFs. 2268 **/ 2269static void __devinit igb_probe_vfs(struct igb_adapter * adapter) 2270{ 2271#ifdef CONFIG_PCI_IOV 2272 struct pci_dev *pdev = adapter->pdev; 2273 2274 if (adapter->vfs_allocated_count) { 2275 adapter->vf_data = kcalloc(adapter->vfs_allocated_count, 2276 sizeof(struct vf_data_storage), 2277 GFP_KERNEL); 2278 /* if allocation failed then we do not support SR-IOV */ 2279 if (!adapter->vf_data) { 2280 adapter->vfs_allocated_count = 0; 2281 dev_err(&pdev->dev, "Unable to allocate memory for VF " 2282 "Data Storage\n"); 2283 } 2284 } 2285 2286 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) { 2287 kfree(adapter->vf_data); 2288 adapter->vf_data = NULL; 2289#endif /* CONFIG_PCI_IOV */ 2290 adapter->vfs_allocated_count = 0; 2291#ifdef CONFIG_PCI_IOV 2292 } else { 2293 unsigned char mac_addr[ETH_ALEN]; 2294 int i; 2295 dev_info(&pdev->dev, "%d vfs allocated\n", 2296 adapter->vfs_allocated_count); 2297 for (i = 0; i < adapter->vfs_allocated_count; i++) { 2298 random_ether_addr(mac_addr); 2299 igb_set_vf_mac(adapter, i, mac_addr); 2300 } 2301 /* DMA Coalescing is not supported in IOV mode. */ 2302 if (adapter->flags & IGB_FLAG_DMAC) 2303 adapter->flags &= ~IGB_FLAG_DMAC; 2304 } 2305#endif /* CONFIG_PCI_IOV */ 2306} 2307 2308 2309/** 2310 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp 2311 * @adapter: board private structure to initialize 2312 * 2313 * igb_init_hw_timer initializes the function pointer and values for the hw 2314 * timer found in hardware. 2315 **/ 2316static void igb_init_hw_timer(struct igb_adapter *adapter) 2317{ 2318 struct e1000_hw *hw = &adapter->hw; 2319 2320 switch (hw->mac.type) { 2321 case e1000_i350: 2322 case e1000_82580: 2323 memset(&adapter->cycles, 0, sizeof(adapter->cycles)); 2324 adapter->cycles.read = igb_read_clock; 2325 adapter->cycles.mask = CLOCKSOURCE_MASK(64); 2326 adapter->cycles.mult = 1; 2327 /* 2328 * The 82580 timesync updates the system timer every 8ns by 8ns 2329 * and the value cannot be shifted. Instead we need to shift 2330 * the registers to generate a 64bit timer value. As a result 2331 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by 2332 * 24 in order to generate a larger value for synchronization. 2333 */ 2334 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT; 2335 /* disable system timer temporarily by setting bit 31 */ 2336 wr32(E1000_TSAUXC, 0x80000000); 2337 wrfl(); 2338 2339 /* Set registers so that rollover occurs soon to test this. */ 2340 wr32(E1000_SYSTIMR, 0x00000000); 2341 wr32(E1000_SYSTIML, 0x80000000); 2342 wr32(E1000_SYSTIMH, 0x000000FF); 2343 wrfl(); 2344 2345 /* enable system timer by clearing bit 31 */ 2346 wr32(E1000_TSAUXC, 0x0); 2347 wrfl(); 2348 2349 timecounter_init(&adapter->clock, 2350 &adapter->cycles, 2351 ktime_to_ns(ktime_get_real())); 2352 /* 2353 * Synchronize our NIC clock against system wall clock. NIC 2354 * time stamp reading requires ~3us per sample, each sample 2355 * was pretty stable even under load => only require 10 2356 * samples for each offset comparison. 2357 */ 2358 memset(&adapter->compare, 0, sizeof(adapter->compare)); 2359 adapter->compare.source = &adapter->clock; 2360 adapter->compare.target = ktime_get_real; 2361 adapter->compare.num_samples = 10; 2362 timecompare_update(&adapter->compare, 0); 2363 break; 2364 case e1000_82576: 2365 /* 2366 * Initialize hardware timer: we keep it running just in case 2367 * that some program needs it later on. 2368 */ 2369 memset(&adapter->cycles, 0, sizeof(adapter->cycles)); 2370 adapter->cycles.read = igb_read_clock; 2371 adapter->cycles.mask = CLOCKSOURCE_MASK(64); 2372 adapter->cycles.mult = 1; 2373 /** 2374 * Scale the NIC clock cycle by a large factor so that 2375 * relatively small clock corrections can be added or 2376 * subtracted at each clock tick. The drawbacks of a large 2377 * factor are a) that the clock register overflows more quickly 2378 * (not such a big deal) and b) that the increment per tick has 2379 * to fit into 24 bits. As a result we need to use a shift of 2380 * 19 so we can fit a value of 16 into the TIMINCA register. 2381 */ 2382 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT; 2383 wr32(E1000_TIMINCA, 2384 (1 << E1000_TIMINCA_16NS_SHIFT) | 2385 (16 << IGB_82576_TSYNC_SHIFT)); 2386 2387 /* Set registers so that rollover occurs soon to test this. */ 2388 wr32(E1000_SYSTIML, 0x00000000); 2389 wr32(E1000_SYSTIMH, 0xFF800000); 2390 wrfl(); 2391 2392 timecounter_init(&adapter->clock, 2393 &adapter->cycles, 2394 ktime_to_ns(ktime_get_real())); 2395 /* 2396 * Synchronize our NIC clock against system wall clock. NIC 2397 * time stamp reading requires ~3us per sample, each sample 2398 * was pretty stable even under load => only require 10 2399 * samples for each offset comparison. 2400 */ 2401 memset(&adapter->compare, 0, sizeof(adapter->compare)); 2402 adapter->compare.source = &adapter->clock; 2403 adapter->compare.target = ktime_get_real; 2404 adapter->compare.num_samples = 10; 2405 timecompare_update(&adapter->compare, 0); 2406 break; 2407 case e1000_82575: 2408 /* 82575 does not support timesync */ 2409 default: 2410 break; 2411 } 2412 2413} 2414 2415/** 2416 * igb_sw_init - Initialize general software structures (struct igb_adapter) 2417 * @adapter: board private structure to initialize 2418 * 2419 * igb_sw_init initializes the Adapter private data structure. 2420 * Fields are initialized based on PCI device information and 2421 * OS network device settings (MTU size). 2422 **/ 2423static int __devinit igb_sw_init(struct igb_adapter *adapter) 2424{ 2425 struct e1000_hw *hw = &adapter->hw; 2426 struct net_device *netdev = adapter->netdev; 2427 struct pci_dev *pdev = adapter->pdev; 2428 2429 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); 2430 2431 /* set default ring sizes */ 2432 adapter->tx_ring_count = IGB_DEFAULT_TXD; 2433 adapter->rx_ring_count = IGB_DEFAULT_RXD; 2434 2435 /* set default ITR values */ 2436 adapter->rx_itr_setting = IGB_DEFAULT_ITR; 2437 adapter->tx_itr_setting = IGB_DEFAULT_ITR; 2438 2439 /* set default work limits */ 2440 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK; 2441 2442 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + 2443 VLAN_HLEN; 2444 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; 2445 2446 adapter->node = -1; 2447 2448 spin_lock_init(&adapter->stats64_lock); 2449#ifdef CONFIG_PCI_IOV 2450 switch (hw->mac.type) { 2451 case e1000_82576: 2452 case e1000_i350: 2453 if (max_vfs > 7) { 2454 dev_warn(&pdev->dev, 2455 "Maximum of 7 VFs per PF, using max\n"); 2456 adapter->vfs_allocated_count = 7; 2457 } else 2458 adapter->vfs_allocated_count = max_vfs; 2459 break; 2460 default: 2461 break; 2462 } 2463#endif /* CONFIG_PCI_IOV */ 2464 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus()); 2465 /* i350 cannot do RSS and SR-IOV at the same time */ 2466 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count) 2467 adapter->rss_queues = 1; 2468 2469 /* 2470 * if rss_queues > 4 or vfs are going to be allocated with rss_queues 2471 * then we should combine the queues into a queue pair in order to 2472 * conserve interrupts due to limited supply 2473 */ 2474 if ((adapter->rss_queues > 4) || 2475 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6))) 2476 adapter->flags |= IGB_FLAG_QUEUE_PAIRS; 2477 2478 /* This call may decrease the number of queues */ 2479 if (igb_init_interrupt_scheme(adapter)) { 2480 dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); 2481 return -ENOMEM; 2482 } 2483 2484 igb_probe_vfs(adapter); 2485 2486 /* Explicitly disable IRQ since the NIC can be in any state. */ 2487 igb_irq_disable(adapter); 2488 2489 if (hw->mac.type == e1000_i350) 2490 adapter->flags &= ~IGB_FLAG_DMAC; 2491 2492 set_bit(__IGB_DOWN, &adapter->state); 2493 return 0; 2494} 2495 2496/** 2497 * igb_open - Called when a network interface is made active 2498 * @netdev: network interface device structure 2499 * 2500 * Returns 0 on success, negative value on failure 2501 * 2502 * The open entry point is called when a network interface is made 2503 * active by the system (IFF_UP). At this point all resources needed 2504 * for transmit and receive operations are allocated, the interrupt 2505 * handler is registered with the OS, the watchdog timer is started, 2506 * and the stack is notified that the interface is ready. 2507 **/ 2508static int igb_open(struct net_device *netdev) 2509{ 2510 struct igb_adapter *adapter = netdev_priv(netdev); 2511 struct e1000_hw *hw = &adapter->hw; 2512 int err; 2513 int i; 2514 2515 /* disallow open during test */ 2516 if (test_bit(__IGB_TESTING, &adapter->state)) 2517 return -EBUSY; 2518 2519 netif_carrier_off(netdev); 2520 2521 /* allocate transmit descriptors */ 2522 err = igb_setup_all_tx_resources(adapter); 2523 if (err) 2524 goto err_setup_tx; 2525 2526 /* allocate receive descriptors */ 2527 err = igb_setup_all_rx_resources(adapter); 2528 if (err) 2529 goto err_setup_rx; 2530 2531 igb_power_up_link(adapter); 2532 2533 /* before we allocate an interrupt, we must be ready to handle it. 2534 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt 2535 * as soon as we call pci_request_irq, so we have to setup our 2536 * clean_rx handler before we do so. */ 2537 igb_configure(adapter); 2538 2539 err = igb_request_irq(adapter); 2540 if (err) 2541 goto err_req_irq; 2542 2543 /* From here on the code is the same as igb_up() */ 2544 clear_bit(__IGB_DOWN, &adapter->state); 2545 2546 for (i = 0; i < adapter->num_q_vectors; i++) 2547 napi_enable(&(adapter->q_vector[i]->napi)); 2548 2549 /* Clear any pending interrupts. */ 2550 rd32(E1000_ICR); 2551 2552 igb_irq_enable(adapter); 2553 2554 /* notify VFs that reset has been completed */ 2555 if (adapter->vfs_allocated_count) { 2556 u32 reg_data = rd32(E1000_CTRL_EXT); 2557 reg_data |= E1000_CTRL_EXT_PFRSTD; 2558 wr32(E1000_CTRL_EXT, reg_data); 2559 } 2560 2561 netif_tx_start_all_queues(netdev); 2562 2563 /* start the watchdog. */ 2564 hw->mac.get_link_status = 1; 2565 schedule_work(&adapter->watchdog_task); 2566 2567 return 0; 2568 2569err_req_irq: 2570 igb_release_hw_control(adapter); 2571 igb_power_down_link(adapter); 2572 igb_free_all_rx_resources(adapter); 2573err_setup_rx: 2574 igb_free_all_tx_resources(adapter); 2575err_setup_tx: 2576 igb_reset(adapter); 2577 2578 return err; 2579} 2580 2581/** 2582 * igb_close - Disables a network interface 2583 * @netdev: network interface device structure 2584 * 2585 * Returns 0, this is not allowed to fail 2586 * 2587 * The close entry point is called when an interface is de-activated 2588 * by the OS. The hardware is still under the driver's control, but 2589 * needs to be disabled. A global MAC reset is issued to stop the 2590 * hardware, and all transmit and receive resources are freed. 2591 **/ 2592static int igb_close(struct net_device *netdev) 2593{ 2594 struct igb_adapter *adapter = netdev_priv(netdev); 2595 2596 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state)); 2597 igb_down(adapter); 2598 2599 igb_free_irq(adapter); 2600 2601 igb_free_all_tx_resources(adapter); 2602 igb_free_all_rx_resources(adapter); 2603 2604 return 0; 2605} 2606 2607/** 2608 * igb_setup_tx_resources - allocate Tx resources (Descriptors) 2609 * @tx_ring: tx descriptor ring (for a specific queue) to setup 2610 * 2611 * Return 0 on success, negative on failure 2612 **/ 2613int igb_setup_tx_resources(struct igb_ring *tx_ring) 2614{ 2615 struct device *dev = tx_ring->dev; 2616 int orig_node = dev_to_node(dev); 2617 int size; 2618 2619 size = sizeof(struct igb_tx_buffer) * tx_ring->count; 2620 tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node); 2621 if (!tx_ring->tx_buffer_info) 2622 tx_ring->tx_buffer_info = vzalloc(size); 2623 if (!tx_ring->tx_buffer_info) 2624 goto err; 2625 2626 /* round up to nearest 4K */ 2627 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc); 2628 tx_ring->size = ALIGN(tx_ring->size, 4096); 2629 2630 set_dev_node(dev, tx_ring->numa_node); 2631 tx_ring->desc = dma_alloc_coherent(dev, 2632 tx_ring->size, 2633 &tx_ring->dma, 2634 GFP_KERNEL); 2635 set_dev_node(dev, orig_node); 2636 if (!tx_ring->desc) 2637 tx_ring->desc = dma_alloc_coherent(dev, 2638 tx_ring->size, 2639 &tx_ring->dma, 2640 GFP_KERNEL); 2641 2642 if (!tx_ring->desc) 2643 goto err; 2644 2645 tx_ring->next_to_use = 0; 2646 tx_ring->next_to_clean = 0; 2647 2648 return 0; 2649 2650err: 2651 vfree(tx_ring->tx_buffer_info); 2652 dev_err(dev, 2653 "Unable to allocate memory for the transmit descriptor ring\n"); 2654 return -ENOMEM; 2655} 2656 2657/** 2658 * igb_setup_all_tx_resources - wrapper to allocate Tx resources 2659 * (Descriptors) for all queues 2660 * @adapter: board private structure 2661 * 2662 * Return 0 on success, negative on failure 2663 **/ 2664static int igb_setup_all_tx_resources(struct igb_adapter *adapter) 2665{ 2666 struct pci_dev *pdev = adapter->pdev; 2667 int i, err = 0; 2668 2669 for (i = 0; i < adapter->num_tx_queues; i++) { 2670 err = igb_setup_tx_resources(adapter->tx_ring[i]); 2671 if (err) { 2672 dev_err(&pdev->dev, 2673 "Allocation for Tx Queue %u failed\n", i); 2674 for (i--; i >= 0; i--) 2675 igb_free_tx_resources(adapter->tx_ring[i]); 2676 break; 2677 } 2678 } 2679 2680 return err; 2681} 2682 2683/** 2684 * igb_setup_tctl - configure the transmit control registers 2685 * @adapter: Board private structure 2686 **/ 2687void igb_setup_tctl(struct igb_adapter *adapter) 2688{ 2689 struct e1000_hw *hw = &adapter->hw; 2690 u32 tctl; 2691 2692 /* disable queue 0 which is enabled by default on 82575 and 82576 */ 2693 wr32(E1000_TXDCTL(0), 0); 2694 2695 /* Program the Transmit Control Register */ 2696 tctl = rd32(E1000_TCTL); 2697 tctl &= ~E1000_TCTL_CT; 2698 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC | 2699 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT); 2700 2701 igb_config_collision_dist(hw); 2702 2703 /* Enable transmits */ 2704 tctl |= E1000_TCTL_EN; 2705 2706 wr32(E1000_TCTL, tctl); 2707} 2708 2709/** 2710 * igb_configure_tx_ring - Configure transmit ring after Reset 2711 * @adapter: board private structure 2712 * @ring: tx ring to configure 2713 * 2714 * Configure a transmit ring after a reset. 2715 **/ 2716void igb_configure_tx_ring(struct igb_adapter *adapter, 2717 struct igb_ring *ring) 2718{ 2719 struct e1000_hw *hw = &adapter->hw; 2720 u32 txdctl = 0; 2721 u64 tdba = ring->dma; 2722 int reg_idx = ring->reg_idx; 2723 2724 /* disable the queue */ 2725 wr32(E1000_TXDCTL(reg_idx), 0); 2726 wrfl(); 2727 mdelay(10); 2728 2729 wr32(E1000_TDLEN(reg_idx), 2730 ring->count * sizeof(union e1000_adv_tx_desc)); 2731 wr32(E1000_TDBAL(reg_idx), 2732 tdba & 0x00000000ffffffffULL); 2733 wr32(E1000_TDBAH(reg_idx), tdba >> 32); 2734 2735 ring->tail = hw->hw_addr + E1000_TDT(reg_idx); 2736 wr32(E1000_TDH(reg_idx), 0); 2737 writel(0, ring->tail); 2738 2739 txdctl |= IGB_TX_PTHRESH; 2740 txdctl |= IGB_TX_HTHRESH << 8; 2741 txdctl |= IGB_TX_WTHRESH << 16; 2742 2743 txdctl |= E1000_TXDCTL_QUEUE_ENABLE; 2744 wr32(E1000_TXDCTL(reg_idx), txdctl); 2745} 2746 2747/** 2748 * igb_configure_tx - Configure transmit Unit after Reset 2749 * @adapter: board private structure 2750 * 2751 * Configure the Tx unit of the MAC after a reset. 2752 **/ 2753static void igb_configure_tx(struct igb_adapter *adapter) 2754{ 2755 int i; 2756 2757 for (i = 0; i < adapter->num_tx_queues; i++) 2758 igb_configure_tx_ring(adapter, adapter->tx_ring[i]); 2759} 2760 2761/** 2762 * igb_setup_rx_resources - allocate Rx resources (Descriptors) 2763 * @rx_ring: rx descriptor ring (for a specific queue) to setup 2764 * 2765 * Returns 0 on success, negative on failure 2766 **/ 2767int igb_setup_rx_resources(struct igb_ring *rx_ring) 2768{ 2769 struct device *dev = rx_ring->dev; 2770 int orig_node = dev_to_node(dev); 2771 int size, desc_len; 2772 2773 size = sizeof(struct igb_rx_buffer) * rx_ring->count; 2774 rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node); 2775 if (!rx_ring->rx_buffer_info) 2776 rx_ring->rx_buffer_info = vzalloc(size); 2777 if (!rx_ring->rx_buffer_info) 2778 goto err; 2779 2780 desc_len = sizeof(union e1000_adv_rx_desc); 2781 2782 /* Round up to nearest 4K */ 2783 rx_ring->size = rx_ring->count * desc_len; 2784 rx_ring->size = ALIGN(rx_ring->size, 4096); 2785 2786 set_dev_node(dev, rx_ring->numa_node); 2787 rx_ring->desc = dma_alloc_coherent(dev, 2788 rx_ring->size, 2789 &rx_ring->dma, 2790 GFP_KERNEL); 2791 set_dev_node(dev, orig_node); 2792 if (!rx_ring->desc) 2793 rx_ring->desc = dma_alloc_coherent(dev, 2794 rx_ring->size, 2795 &rx_ring->dma, 2796 GFP_KERNEL); 2797 2798 if (!rx_ring->desc) 2799 goto err; 2800 2801 rx_ring->next_to_clean = 0; 2802 rx_ring->next_to_use = 0; 2803 2804 return 0; 2805 2806err: 2807 vfree(rx_ring->rx_buffer_info); 2808 rx_ring->rx_buffer_info = NULL; 2809 dev_err(dev, "Unable to allocate memory for the receive descriptor" 2810 " ring\n"); 2811 return -ENOMEM; 2812} 2813 2814/** 2815 * igb_setup_all_rx_resources - wrapper to allocate Rx resources 2816 * (Descriptors) for all queues 2817 * @adapter: board private structure 2818 * 2819 * Return 0 on success, negative on failure 2820 **/ 2821static int igb_setup_all_rx_resources(struct igb_adapter *adapter) 2822{ 2823 struct pci_dev *pdev = adapter->pdev; 2824 int i, err = 0; 2825 2826 for (i = 0; i < adapter->num_rx_queues; i++) { 2827 err = igb_setup_rx_resources(adapter->rx_ring[i]); 2828 if (err) { 2829 dev_err(&pdev->dev, 2830 "Allocation for Rx Queue %u failed\n", i); 2831 for (i--; i >= 0; i--) 2832 igb_free_rx_resources(adapter->rx_ring[i]); 2833 break; 2834 } 2835 } 2836 2837 return err; 2838} 2839 2840/** 2841 * igb_setup_mrqc - configure the multiple receive queue control registers 2842 * @adapter: Board private structure 2843 **/ 2844static void igb_setup_mrqc(struct igb_adapter *adapter) 2845{ 2846 struct e1000_hw *hw = &adapter->hw; 2847 u32 mrqc, rxcsum; 2848 u32 j, num_rx_queues, shift = 0, shift2 = 0; 2849 union e1000_reta { 2850 u32 dword; 2851 u8 bytes[4]; 2852 } reta; 2853 static const u8 rsshash[40] = { 2854 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67, 2855 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb, 2856 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 2857 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa }; 2858 2859 /* Fill out hash function seeds */ 2860 for (j = 0; j < 10; j++) { 2861 u32 rsskey = rsshash[(j * 4)]; 2862 rsskey |= rsshash[(j * 4) + 1] << 8; 2863 rsskey |= rsshash[(j * 4) + 2] << 16; 2864 rsskey |= rsshash[(j * 4) + 3] << 24; 2865 array_wr32(E1000_RSSRK(0), j, rsskey); 2866 } 2867 2868 num_rx_queues = adapter->rss_queues; 2869 2870 if (adapter->vfs_allocated_count) { 2871 /* 82575 and 82576 supports 2 RSS queues for VMDq */ 2872 switch (hw->mac.type) { 2873 case e1000_i350: 2874 case e1000_82580: 2875 num_rx_queues = 1; 2876 shift = 0; 2877 break; 2878 case e1000_82576: 2879 shift = 3; 2880 num_rx_queues = 2; 2881 break; 2882 case e1000_82575: 2883 shift = 2; 2884 shift2 = 6; 2885 default: 2886 break; 2887 } 2888 } else { 2889 if (hw->mac.type == e1000_82575) 2890 shift = 6; 2891 } 2892 2893 for (j = 0; j < (32 * 4); j++) { 2894 reta.bytes[j & 3] = (j % num_rx_queues) << shift; 2895 if (shift2) 2896 reta.bytes[j & 3] |= num_rx_queues << shift2; 2897 if ((j & 3) == 3) 2898 wr32(E1000_RETA(j >> 2), reta.dword); 2899 } 2900 2901 /* 2902 * Disable raw packet checksumming so that RSS hash is placed in 2903 * descriptor on writeback. No need to enable TCP/UDP/IP checksum 2904 * offloads as they are enabled by default 2905 */ 2906 rxcsum = rd32(E1000_RXCSUM); 2907 rxcsum |= E1000_RXCSUM_PCSD; 2908 2909 if (adapter->hw.mac.type >= e1000_82576) 2910 /* Enable Receive Checksum Offload for SCTP */ 2911 rxcsum |= E1000_RXCSUM_CRCOFL; 2912 2913 /* Don't need to set TUOFL or IPOFL, they default to 1 */ 2914 wr32(E1000_RXCSUM, rxcsum); 2915 2916 /* If VMDq is enabled then we set the appropriate mode for that, else 2917 * we default to RSS so that an RSS hash is calculated per packet even 2918 * if we are only using one queue */ 2919 if (adapter->vfs_allocated_count) { 2920 if (hw->mac.type > e1000_82575) { 2921 /* Set the default pool for the PF's first queue */ 2922 u32 vtctl = rd32(E1000_VT_CTL); 2923 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK | 2924 E1000_VT_CTL_DISABLE_DEF_POOL); 2925 vtctl |= adapter->vfs_allocated_count << 2926 E1000_VT_CTL_DEFAULT_POOL_SHIFT; 2927 wr32(E1000_VT_CTL, vtctl); 2928 } 2929 if (adapter->rss_queues > 1) 2930 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q; 2931 else 2932 mrqc = E1000_MRQC_ENABLE_VMDQ; 2933 } else { 2934 mrqc = E1000_MRQC_ENABLE_RSS_4Q; 2935 } 2936 igb_vmm_control(adapter); 2937 2938 /* 2939 * Generate RSS hash based on TCP port numbers and/or 2940 * IPv4/v6 src and dst addresses since UDP cannot be 2941 * hashed reliably due to IP fragmentation 2942 */ 2943 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 | 2944 E1000_MRQC_RSS_FIELD_IPV4_TCP | 2945 E1000_MRQC_RSS_FIELD_IPV6 | 2946 E1000_MRQC_RSS_FIELD_IPV6_TCP | 2947 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX; 2948 2949 wr32(E1000_MRQC, mrqc); 2950} 2951 2952/** 2953 * igb_setup_rctl - configure the receive control registers 2954 * @adapter: Board private structure 2955 **/ 2956void igb_setup_rctl(struct igb_adapter *adapter) 2957{ 2958 struct e1000_hw *hw = &adapter->hw; 2959 u32 rctl; 2960 2961 rctl = rd32(E1000_RCTL); 2962 2963 rctl &= ~(3 << E1000_RCTL_MO_SHIFT); 2964 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC); 2965 2966 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF | 2967 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); 2968 2969 /* 2970 * enable stripping of CRC. It's unlikely this will break BMC 2971 * redirection as it did with e1000. Newer features require 2972 * that the HW strips the CRC. 2973 */ 2974 rctl |= E1000_RCTL_SECRC; 2975 2976 /* disable store bad packets and clear size bits. */ 2977 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256); 2978 2979 /* enable LPE to prevent packets larger than max_frame_size */ 2980 rctl |= E1000_RCTL_LPE; 2981 2982 /* disable queue 0 to prevent tail write w/o re-config */ 2983 wr32(E1000_RXDCTL(0), 0); 2984 2985 /* Attention!!! For SR-IOV PF driver operations you must enable 2986 * queue drop for all VF and PF queues to prevent head of line blocking 2987 * if an un-trusted VF does not provide descriptors to hardware. 2988 */ 2989 if (adapter->vfs_allocated_count) { 2990 /* set all queue drop enable bits */ 2991 wr32(E1000_QDE, ALL_QUEUES); 2992 } 2993 2994 wr32(E1000_RCTL, rctl); 2995} 2996 2997static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, 2998 int vfn) 2999{ 3000 struct e1000_hw *hw = &adapter->hw; 3001 u32 vmolr; 3002 3003 /* if it isn't the PF check to see if VFs are enabled and 3004 * increase the size to support vlan tags */ 3005 if (vfn < adapter->vfs_allocated_count && 3006 adapter->vf_data[vfn].vlans_enabled) 3007 size += VLAN_TAG_SIZE; 3008 3009 vmolr = rd32(E1000_VMOLR(vfn)); 3010 vmolr &= ~E1000_VMOLR_RLPML_MASK; 3011 vmolr |= size | E1000_VMOLR_LPE; 3012 wr32(E1000_VMOLR(vfn), vmolr); 3013 3014 return 0; 3015} 3016 3017/** 3018 * igb_rlpml_set - set maximum receive packet size 3019 * @adapter: board private structure 3020 * 3021 * Configure maximum receivable packet size. 3022 **/ 3023static void igb_rlpml_set(struct igb_adapter *adapter) 3024{ 3025 u32 max_frame_size = adapter->max_frame_size; 3026 struct e1000_hw *hw = &adapter->hw; 3027 u16 pf_id = adapter->vfs_allocated_count; 3028 3029 if (pf_id) { 3030 igb_set_vf_rlpml(adapter, max_frame_size, pf_id); 3031 /* 3032 * If we're in VMDQ or SR-IOV mode, then set global RLPML 3033 * to our max jumbo frame size, in case we need to enable 3034 * jumbo frames on one of the rings later. 3035 * This will not pass over-length frames into the default 3036 * queue because it's gated by the VMOLR.RLPML. 3037 */ 3038 max_frame_size = MAX_JUMBO_FRAME_SIZE; 3039 } 3040 3041 wr32(E1000_RLPML, max_frame_size); 3042} 3043 3044static inline void igb_set_vmolr(struct igb_adapter *adapter, 3045 int vfn, bool aupe) 3046{ 3047 struct e1000_hw *hw = &adapter->hw; 3048 u32 vmolr; 3049 3050 /* 3051 * This register exists only on 82576 and newer so if we are older then 3052 * we should exit and do nothing 3053 */ 3054 if (hw->mac.type < e1000_82576) 3055 return; 3056 3057 vmolr = rd32(E1000_VMOLR(vfn)); 3058 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */ 3059 if (aupe) 3060 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */ 3061 else 3062 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */ 3063 3064 /* clear all bits that might not be set */ 3065 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE); 3066 3067 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count) 3068 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */ 3069 /* 3070 * for VMDq only allow the VFs and pool 0 to accept broadcast and 3071 * multicast packets 3072 */ 3073 if (vfn <= adapter->vfs_allocated_count) 3074 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */ 3075 3076 wr32(E1000_VMOLR(vfn), vmolr); 3077} 3078 3079/** 3080 * igb_configure_rx_ring - Configure a receive ring after Reset 3081 * @adapter: board private structure 3082 * @ring: receive ring to be configured 3083 * 3084 * Configure the Rx unit of the MAC after a reset. 3085 **/ 3086void igb_configure_rx_ring(struct igb_adapter *adapter, 3087 struct igb_ring *ring) 3088{ 3089 struct e1000_hw *hw = &adapter->hw; 3090 u64 rdba = ring->dma; 3091 int reg_idx = ring->reg_idx; 3092 u32 srrctl = 0, rxdctl = 0; 3093 3094 /* disable the queue */ 3095 wr32(E1000_RXDCTL(reg_idx), 0); 3096 3097 /* Set DMA base address registers */ 3098 wr32(E1000_RDBAL(reg_idx), 3099 rdba & 0x00000000ffffffffULL); 3100 wr32(E1000_RDBAH(reg_idx), rdba >> 32); 3101 wr32(E1000_RDLEN(reg_idx), 3102 ring->count * sizeof(union e1000_adv_rx_desc)); 3103 3104 /* initialize head and tail */ 3105 ring->tail = hw->hw_addr + E1000_RDT(reg_idx); 3106 wr32(E1000_RDH(reg_idx), 0); 3107 writel(0, ring->tail); 3108 3109 /* set descriptor configuration */ 3110 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; 3111#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384 3112 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 3113#else 3114 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT; 3115#endif 3116 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; 3117 if (hw->mac.type >= e1000_82580) 3118 srrctl |= E1000_SRRCTL_TIMESTAMP; 3119 /* Only set Drop Enable if we are supporting multiple queues */ 3120 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1) 3121 srrctl |= E1000_SRRCTL_DROP_EN; 3122 3123 wr32(E1000_SRRCTL(reg_idx), srrctl); 3124 3125 /* set filtering for VMDQ pools */ 3126 igb_set_vmolr(adapter, reg_idx & 0x7, true); 3127 3128 rxdctl |= IGB_RX_PTHRESH; 3129 rxdctl |= IGB_RX_HTHRESH << 8; 3130 rxdctl |= IGB_RX_WTHRESH << 16; 3131 3132 /* enable receive descriptor fetching */ 3133 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; 3134 wr32(E1000_RXDCTL(reg_idx), rxdctl); 3135} 3136 3137/** 3138 * igb_configure_rx - Configure receive Unit after Reset 3139 * @adapter: board private structure 3140 * 3141 * Configure the Rx unit of the MAC after a reset. 3142 **/ 3143static void igb_configure_rx(struct igb_adapter *adapter) 3144{ 3145 int i; 3146 3147 /* set UTA to appropriate mode */ 3148 igb_set_uta(adapter); 3149 3150 /* set the correct pool for the PF default MAC address in entry 0 */ 3151 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0, 3152 adapter->vfs_allocated_count); 3153 3154 /* Setup the HW Rx Head and Tail Descriptor Pointers and 3155 * the Base and Length of the Rx Descriptor Ring */ 3156 for (i = 0; i < adapter->num_rx_queues; i++) 3157 igb_configure_rx_ring(adapter, adapter->rx_ring[i]); 3158} 3159 3160/** 3161 * igb_free_tx_resources - Free Tx Resources per Queue 3162 * @tx_ring: Tx descriptor ring for a specific queue 3163 * 3164 * Free all transmit software resources 3165 **/ 3166void igb_free_tx_resources(struct igb_ring *tx_ring) 3167{ 3168 igb_clean_tx_ring(tx_ring); 3169 3170 vfree(tx_ring->tx_buffer_info); 3171 tx_ring->tx_buffer_info = NULL; 3172 3173 /* if not set, then don't free */ 3174 if (!tx_ring->desc) 3175 return; 3176 3177 dma_free_coherent(tx_ring->dev, tx_ring->size, 3178 tx_ring->desc, tx_ring->dma); 3179 3180 tx_ring->desc = NULL; 3181} 3182 3183/** 3184 * igb_free_all_tx_resources - Free Tx Resources for All Queues 3185 * @adapter: board private structure 3186 * 3187 * Free all transmit software resources 3188 **/ 3189static void igb_free_all_tx_resources(struct igb_adapter *adapter) 3190{ 3191 int i; 3192 3193 for (i = 0; i < adapter->num_tx_queues; i++) 3194 igb_free_tx_resources(adapter->tx_ring[i]); 3195} 3196 3197void igb_unmap_and_free_tx_resource(struct igb_ring *ring, 3198 struct igb_tx_buffer *tx_buffer) 3199{ 3200 if (tx_buffer->skb) { 3201 dev_kfree_skb_any(tx_buffer->skb); 3202 if (tx_buffer->dma) 3203 dma_unmap_single(ring->dev, 3204 tx_buffer->dma, 3205 tx_buffer->length, 3206 DMA_TO_DEVICE); 3207 } else if (tx_buffer->dma) { 3208 dma_unmap_page(ring->dev, 3209 tx_buffer->dma, 3210 tx_buffer->length, 3211 DMA_TO_DEVICE); 3212 } 3213 tx_buffer->next_to_watch = NULL; 3214 tx_buffer->skb = NULL; 3215 tx_buffer->dma = 0; 3216 /* buffer_info must be completely set up in the transmit path */ 3217} 3218 3219/** 3220 * igb_clean_tx_ring - Free Tx Buffers 3221 * @tx_ring: ring to be cleaned 3222 **/ 3223static void igb_clean_tx_ring(struct igb_ring *tx_ring) 3224{ 3225 struct igb_tx_buffer *buffer_info; 3226 unsigned long size; 3227 u16 i; 3228 3229 if (!tx_ring->tx_buffer_info) 3230 return; 3231 /* Free all the Tx ring sk_buffs */ 3232 3233 for (i = 0; i < tx_ring->count; i++) { 3234 buffer_info = &tx_ring->tx_buffer_info[i]; 3235 igb_unmap_and_free_tx_resource(tx_ring, buffer_info); 3236 } 3237 3238 size = sizeof(struct igb_tx_buffer) * tx_ring->count; 3239 memset(tx_ring->tx_buffer_info, 0, size); 3240 3241 /* Zero out the descriptor ring */ 3242 memset(tx_ring->desc, 0, tx_ring->size); 3243 3244 tx_ring->next_to_use = 0; 3245 tx_ring->next_to_clean = 0; 3246} 3247 3248/** 3249 * igb_clean_all_tx_rings - Free Tx Buffers for all queues 3250 * @adapter: board private structure 3251 **/ 3252static void igb_clean_all_tx_rings(struct igb_adapter *adapter) 3253{ 3254 int i; 3255 3256 for (i = 0; i < adapter->num_tx_queues; i++) 3257 igb_clean_tx_ring(adapter->tx_ring[i]); 3258} 3259 3260/** 3261 * igb_free_rx_resources - Free Rx Resources 3262 * @rx_ring: ring to clean the resources from 3263 * 3264 * Free all receive software resources 3265 **/ 3266void igb_free_rx_resources(struct igb_ring *rx_ring) 3267{ 3268 igb_clean_rx_ring(rx_ring); 3269 3270 vfree(rx_ring->rx_buffer_info); 3271 rx_ring->rx_buffer_info = NULL; 3272 3273 /* if not set, then don't free */ 3274 if (!rx_ring->desc) 3275 return; 3276 3277 dma_free_coherent(rx_ring->dev, rx_ring->size, 3278 rx_ring->desc, rx_ring->dma); 3279 3280 rx_ring->desc = NULL; 3281} 3282 3283/** 3284 * igb_free_all_rx_resources - Free Rx Resources for All Queues 3285 * @adapter: board private structure 3286 * 3287 * Free all receive software resources 3288 **/ 3289static void igb_free_all_rx_resources(struct igb_adapter *adapter) 3290{ 3291 int i; 3292 3293 for (i = 0; i < adapter->num_rx_queues; i++) 3294 igb_free_rx_resources(adapter->rx_ring[i]); 3295} 3296 3297/** 3298 * igb_clean_rx_ring - Free Rx Buffers per Queue 3299 * @rx_ring: ring to free buffers from 3300 **/ 3301static void igb_clean_rx_ring(struct igb_ring *rx_ring) 3302{ 3303 unsigned long size; 3304 u16 i; 3305 3306 if (!rx_ring->rx_buffer_info) 3307 return; 3308 3309 /* Free all the Rx ring sk_buffs */ 3310 for (i = 0; i < rx_ring->count; i++) { 3311 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; 3312 if (buffer_info->dma) { 3313 dma_unmap_single(rx_ring->dev, 3314 buffer_info->dma, 3315 IGB_RX_HDR_LEN, 3316 DMA_FROM_DEVICE); 3317 buffer_info->dma = 0; 3318 } 3319 3320 if (buffer_info->skb) { 3321 dev_kfree_skb(buffer_info->skb); 3322 buffer_info->skb = NULL; 3323 } 3324 if (buffer_info->page_dma) { 3325 dma_unmap_page(rx_ring->dev, 3326 buffer_info->page_dma, 3327 PAGE_SIZE / 2, 3328 DMA_FROM_DEVICE); 3329 buffer_info->page_dma = 0; 3330 } 3331 if (buffer_info->page) { 3332 put_page(buffer_info->page); 3333 buffer_info->page = NULL; 3334 buffer_info->page_offset = 0; 3335 } 3336 } 3337 3338 size = sizeof(struct igb_rx_buffer) * rx_ring->count; 3339 memset(rx_ring->rx_buffer_info, 0, size); 3340 3341 /* Zero out the descriptor ring */ 3342 memset(rx_ring->desc, 0, rx_ring->size); 3343 3344 rx_ring->next_to_clean = 0; 3345 rx_ring->next_to_use = 0; 3346} 3347 3348/** 3349 * igb_clean_all_rx_rings - Free Rx Buffers for all queues 3350 * @adapter: board private structure 3351 **/ 3352static void igb_clean_all_rx_rings(struct igb_adapter *adapter) 3353{ 3354 int i; 3355 3356 for (i = 0; i < adapter->num_rx_queues; i++) 3357 igb_clean_rx_ring(adapter->rx_ring[i]); 3358} 3359 3360/** 3361 * igb_set_mac - Change the Ethernet Address of the NIC 3362 * @netdev: network interface device structure 3363 * @p: pointer to an address structure 3364 * 3365 * Returns 0 on success, negative on failure 3366 **/ 3367static int igb_set_mac(struct net_device *netdev, void *p) 3368{ 3369 struct igb_adapter *adapter = netdev_priv(netdev); 3370 struct e1000_hw *hw = &adapter->hw; 3371 struct sockaddr *addr = p; 3372 3373 if (!is_valid_ether_addr(addr->sa_data)) 3374 return -EADDRNOTAVAIL; 3375 3376 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); 3377 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); 3378 3379 /* set the correct pool for the new PF MAC address in entry 0 */ 3380 igb_rar_set_qsel(adapter, hw->mac.addr, 0, 3381 adapter->vfs_allocated_count); 3382 3383 return 0; 3384} 3385 3386/** 3387 * igb_write_mc_addr_list - write multicast addresses to MTA 3388 * @netdev: network interface device structure 3389 * 3390 * Writes multicast address list to the MTA hash table. 3391 * Returns: -ENOMEM on failure 3392 * 0 on no addresses written 3393 * X on writing X addresses to MTA 3394 **/ 3395static int igb_write_mc_addr_list(struct net_device *netdev) 3396{ 3397 struct igb_adapter *adapter = netdev_priv(netdev); 3398 struct e1000_hw *hw = &adapter->hw; 3399 struct netdev_hw_addr *ha; 3400 u8 *mta_list; 3401 int i; 3402 3403 if (netdev_mc_empty(netdev)) { 3404 /* nothing to program, so clear mc list */ 3405 igb_update_mc_addr_list(hw, NULL, 0); 3406 igb_restore_vf_multicasts(adapter); 3407 return 0; 3408 } 3409 3410 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC); 3411 if (!mta_list) 3412 return -ENOMEM; 3413 3414 /* The shared function expects a packed array of only addresses. */ 3415 i = 0; 3416 netdev_for_each_mc_addr(ha, netdev) 3417 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); 3418 3419 igb_update_mc_addr_list(hw, mta_list, i); 3420 kfree(mta_list); 3421 3422 return netdev_mc_count(netdev); 3423} 3424 3425/** 3426 * igb_write_uc_addr_list - write unicast addresses to RAR table 3427 * @netdev: network interface device structure 3428 * 3429 * Writes unicast address list to the RAR table. 3430 * Returns: -ENOMEM on failure/insufficient address space 3431 * 0 on no addresses written 3432 * X on writing X addresses to the RAR table 3433 **/ 3434static int igb_write_uc_addr_list(struct net_device *netdev) 3435{ 3436 struct igb_adapter *adapter = netdev_priv(netdev); 3437 struct e1000_hw *hw = &adapter->hw; 3438 unsigned int vfn = adapter->vfs_allocated_count; 3439 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1); 3440 int count = 0; 3441 3442 /* return ENOMEM indicating insufficient memory for addresses */ 3443 if (netdev_uc_count(netdev) > rar_entries) 3444 return -ENOMEM; 3445 3446 if (!netdev_uc_empty(netdev) && rar_entries) { 3447 struct netdev_hw_addr *ha; 3448 3449 netdev_for_each_uc_addr(ha, netdev) { 3450 if (!rar_entries) 3451 break; 3452 igb_rar_set_qsel(adapter, ha->addr, 3453 rar_entries--, 3454 vfn); 3455 count++; 3456 } 3457 } 3458 /* write the addresses in reverse order to avoid write combining */ 3459 for (; rar_entries > 0 ; rar_entries--) { 3460 wr32(E1000_RAH(rar_entries), 0); 3461 wr32(E1000_RAL(rar_entries), 0); 3462 } 3463 wrfl(); 3464 3465 return count; 3466} 3467 3468/** 3469 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set 3470 * @netdev: network interface device structure 3471 * 3472 * The set_rx_mode entry point is called whenever the unicast or multicast 3473 * address lists or the network interface flags are updated. This routine is 3474 * responsible for configuring the hardware for proper unicast, multicast, 3475 * promiscuous mode, and all-multi behavior. 3476 **/ 3477static void igb_set_rx_mode(struct net_device *netdev) 3478{ 3479 struct igb_adapter *adapter = netdev_priv(netdev); 3480 struct e1000_hw *hw = &adapter->hw; 3481 unsigned int vfn = adapter->vfs_allocated_count; 3482 u32 rctl, vmolr = 0; 3483 int count; 3484 3485 /* Check for Promiscuous and All Multicast modes */ 3486 rctl = rd32(E1000_RCTL); 3487 3488 /* clear the effected bits */ 3489 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE); 3490 3491 if (netdev->flags & IFF_PROMISC) { 3492 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); 3493 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME); 3494 } else { 3495 if (netdev->flags & IFF_ALLMULTI) { 3496 rctl |= E1000_RCTL_MPE; 3497 vmolr |= E1000_VMOLR_MPME; 3498 } else { 3499 /* 3500 * Write addresses to the MTA, if the attempt fails 3501 * then we should just turn on promiscuous mode so 3502 * that we can at least receive multicast traffic 3503 */ 3504 count = igb_write_mc_addr_list(netdev); 3505 if (count < 0) { 3506 rctl |= E1000_RCTL_MPE; 3507 vmolr |= E1000_VMOLR_MPME; 3508 } else if (count) { 3509 vmolr |= E1000_VMOLR_ROMPE; 3510 } 3511 } 3512 /* 3513 * Write addresses to available RAR registers, if there is not 3514 * sufficient space to store all the addresses then enable 3515 * unicast promiscuous mode 3516 */ 3517 count = igb_write_uc_addr_list(netdev); 3518 if (count < 0) { 3519 rctl |= E1000_RCTL_UPE; 3520 vmolr |= E1000_VMOLR_ROPE; 3521 } 3522 rctl |= E1000_RCTL_VFE; 3523 } 3524 wr32(E1000_RCTL, rctl); 3525 3526 /* 3527 * In order to support SR-IOV and eventually VMDq it is necessary to set 3528 * the VMOLR to enable the appropriate modes. Without this workaround 3529 * we will have issues with VLAN tag stripping not being done for frames 3530 * that are only arriving because we are the default pool 3531 */ 3532 if (hw->mac.type < e1000_82576) 3533 return; 3534 3535 vmolr |= rd32(E1000_VMOLR(vfn)) & 3536 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE); 3537 wr32(E1000_VMOLR(vfn), vmolr); 3538 igb_restore_vf_multicasts(adapter); 3539} 3540 3541static void igb_check_wvbr(struct igb_adapter *adapter) 3542{ 3543 struct e1000_hw *hw = &adapter->hw; 3544 u32 wvbr = 0; 3545 3546 switch (hw->mac.type) { 3547 case e1000_82576: 3548 case e1000_i350: 3549 if (!(wvbr = rd32(E1000_WVBR))) 3550 return; 3551 break; 3552 default: 3553 break; 3554 } 3555 3556 adapter->wvbr |= wvbr; 3557} 3558 3559#define IGB_STAGGERED_QUEUE_OFFSET 8 3560 3561static void igb_spoof_check(struct igb_adapter *adapter) 3562{ 3563 int j; 3564 3565 if (!adapter->wvbr) 3566 return; 3567 3568 for(j = 0; j < adapter->vfs_allocated_count; j++) { 3569 if (adapter->wvbr & (1 << j) || 3570 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) { 3571 dev_warn(&adapter->pdev->dev, 3572 "Spoof event(s) detected on VF %d\n", j); 3573 adapter->wvbr &= 3574 ~((1 << j) | 3575 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))); 3576 } 3577 } 3578} 3579 3580/* Need to wait a few seconds after link up to get diagnostic information from 3581 * the phy */ 3582static void igb_update_phy_info(unsigned long data) 3583{ 3584 struct igb_adapter *adapter = (struct igb_adapter *) data; 3585 igb_get_phy_info(&adapter->hw); 3586} 3587 3588/** 3589 * igb_has_link - check shared code for link and determine up/down 3590 * @adapter: pointer to driver private info 3591 **/ 3592bool igb_has_link(struct igb_adapter *adapter) 3593{ 3594 struct e1000_hw *hw = &adapter->hw; 3595 bool link_active = false; 3596 s32 ret_val = 0; 3597 3598 /* get_link_status is set on LSC (link status) interrupt or 3599 * rx sequence error interrupt. get_link_status will stay 3600 * false until the e1000_check_for_link establishes link 3601 * for copper adapters ONLY 3602 */ 3603 switch (hw->phy.media_type) { 3604 case e1000_media_type_copper: 3605 if (hw->mac.get_link_status) { 3606 ret_val = hw->mac.ops.check_for_link(hw); 3607 link_active = !hw->mac.get_link_status; 3608 } else { 3609 link_active = true; 3610 } 3611 break; 3612 case e1000_media_type_internal_serdes: 3613 ret_val = hw->mac.ops.check_for_link(hw); 3614 link_active = hw->mac.serdes_has_link; 3615 break; 3616 default: 3617 case e1000_media_type_unknown: 3618 break; 3619 } 3620 3621 return link_active; 3622} 3623 3624static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event) 3625{ 3626 bool ret = false; 3627 u32 ctrl_ext, thstat; 3628 3629 /* check for thermal sensor event on i350, copper only */ 3630 if (hw->mac.type == e1000_i350) { 3631 thstat = rd32(E1000_THSTAT); 3632 ctrl_ext = rd32(E1000_CTRL_EXT); 3633 3634 if ((hw->phy.media_type == e1000_media_type_copper) && 3635 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) { 3636 ret = !!(thstat & event); 3637 } 3638 } 3639 3640 return ret; 3641} 3642 3643/** 3644 * igb_watchdog - Timer Call-back 3645 * @data: pointer to adapter cast into an unsigned long 3646 **/ 3647static void igb_watchdog(unsigned long data) 3648{ 3649 struct igb_adapter *adapter = (struct igb_adapter *)data; 3650 /* Do the rest outside of interrupt context */ 3651 schedule_work(&adapter->watchdog_task); 3652} 3653 3654static void igb_watchdog_task(struct work_struct *work) 3655{ 3656 struct igb_adapter *adapter = container_of(work, 3657 struct igb_adapter, 3658 watchdog_task); 3659 struct e1000_hw *hw = &adapter->hw; 3660 struct net_device *netdev = adapter->netdev; 3661 u32 link; 3662 int i; 3663 3664 link = igb_has_link(adapter); 3665 if (link) { 3666 if (!netif_carrier_ok(netdev)) { 3667 u32 ctrl; 3668 hw->mac.ops.get_speed_and_duplex(hw, 3669 &adapter->link_speed, 3670 &adapter->link_duplex); 3671 3672 ctrl = rd32(E1000_CTRL); 3673 /* Links status message must follow this format */ 3674 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, " 3675 "Flow Control: %s\n", 3676 netdev->name, 3677 adapter->link_speed, 3678 adapter->link_duplex == FULL_DUPLEX ? 3679 "Full Duplex" : "Half Duplex", 3680 ((ctrl & E1000_CTRL_TFCE) && 3681 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" : 3682 ((ctrl & E1000_CTRL_RFCE) ? "RX" : 3683 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None"))); 3684 3685 /* check for thermal sensor event */ 3686 if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) { 3687 printk(KERN_INFO "igb: %s The network adapter " 3688 "link speed was downshifted " 3689 "because it overheated.\n", 3690 netdev->name); 3691 } 3692 3693 /* adjust timeout factor according to speed/duplex */ 3694 adapter->tx_timeout_factor = 1; 3695 switch (adapter->link_speed) { 3696 case SPEED_10: 3697 adapter->tx_timeout_factor = 14; 3698 break; 3699 case SPEED_100: 3700 /* maybe add some timeout factor ? */ 3701 break; 3702 } 3703 3704 netif_carrier_on(netdev); 3705 3706 igb_ping_all_vfs(adapter); 3707 igb_check_vf_rate_limit(adapter); 3708 3709 /* link state has changed, schedule phy info update */ 3710 if (!test_bit(__IGB_DOWN, &adapter->state)) 3711 mod_timer(&adapter->phy_info_timer, 3712 round_jiffies(jiffies + 2 * HZ)); 3713 } 3714 } else { 3715 if (netif_carrier_ok(netdev)) { 3716 adapter->link_speed = 0; 3717 adapter->link_duplex = 0; 3718 3719 /* check for thermal sensor event */ 3720 if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) { 3721 printk(KERN_ERR "igb: %s The network adapter " 3722 "was stopped because it " 3723 "overheated.\n", 3724 netdev->name); 3725 } 3726 3727 /* Links status message must follow this format */ 3728 printk(KERN_INFO "igb: %s NIC Link is Down\n", 3729 netdev->name); 3730 netif_carrier_off(netdev); 3731 3732 igb_ping_all_vfs(adapter); 3733 3734 /* link state has changed, schedule phy info update */ 3735 if (!test_bit(__IGB_DOWN, &adapter->state)) 3736 mod_timer(&adapter->phy_info_timer, 3737 round_jiffies(jiffies + 2 * HZ)); 3738 } 3739 } 3740 3741 spin_lock(&adapter->stats64_lock); 3742 igb_update_stats(adapter, &adapter->stats64); 3743 spin_unlock(&adapter->stats64_lock); 3744 3745 for (i = 0; i < adapter->num_tx_queues; i++) { 3746 struct igb_ring *tx_ring = adapter->tx_ring[i]; 3747 if (!netif_carrier_ok(netdev)) { 3748 /* We've lost link, so the controller stops DMA, 3749 * but we've got queued Tx work that's never going 3750 * to get done, so reset controller to flush Tx. 3751 * (Do the reset outside of interrupt context). */ 3752 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) { 3753 adapter->tx_timeout_count++; 3754 schedule_work(&adapter->reset_task); 3755 /* return immediately since reset is imminent */ 3756 return; 3757 } 3758 } 3759 3760 /* Force detection of hung controller every watchdog period */ 3761 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 3762 } 3763 3764 /* Cause software interrupt to ensure rx ring is cleaned */ 3765 if (adapter->msix_entries) { 3766 u32 eics = 0; 3767 for (i = 0; i < adapter->num_q_vectors; i++) 3768 eics |= adapter->q_vector[i]->eims_value; 3769 wr32(E1000_EICS, eics); 3770 } else { 3771 wr32(E1000_ICS, E1000_ICS_RXDMT0); 3772 } 3773 3774 igb_spoof_check(adapter); 3775 3776 /* Reset the timer */ 3777 if (!test_bit(__IGB_DOWN, &adapter->state)) 3778 mod_timer(&adapter->watchdog_timer, 3779 round_jiffies(jiffies + 2 * HZ)); 3780} 3781 3782enum latency_range { 3783 lowest_latency = 0, 3784 low_latency = 1, 3785 bulk_latency = 2, 3786 latency_invalid = 255 3787}; 3788 3789/** 3790 * igb_update_ring_itr - update the dynamic ITR value based on packet size 3791 * 3792 * Stores a new ITR value based on strictly on packet size. This 3793 * algorithm is less sophisticated than that used in igb_update_itr, 3794 * due to the difficulty of synchronizing statistics across multiple 3795 * receive rings. The divisors and thresholds used by this function 3796 * were determined based on theoretical maximum wire speed and testing 3797 * data, in order to minimize response time while increasing bulk 3798 * throughput. 3799 * This functionality is controlled by the InterruptThrottleRate module 3800 * parameter (see igb_param.c) 3801 * NOTE: This function is called only when operating in a multiqueue 3802 * receive environment. 3803 * @q_vector: pointer to q_vector 3804 **/ 3805static void igb_update_ring_itr(struct igb_q_vector *q_vector) 3806{ 3807 int new_val = q_vector->itr_val; 3808 int avg_wire_size = 0; 3809 struct igb_adapter *adapter = q_vector->adapter; 3810 unsigned int packets; 3811 3812 /* For non-gigabit speeds, just fix the interrupt rate at 4000 3813 * ints/sec - ITR timer value of 120 ticks. 3814 */ 3815 if (adapter->link_speed != SPEED_1000) { 3816 new_val = IGB_4K_ITR; 3817 goto set_itr_val; 3818 } 3819 3820 packets = q_vector->rx.total_packets; 3821 if (packets) 3822 avg_wire_size = q_vector->rx.total_bytes / packets; 3823 3824 packets = q_vector->tx.total_packets; 3825 if (packets) 3826 avg_wire_size = max_t(u32, avg_wire_size, 3827 q_vector->tx.total_bytes / packets); 3828 3829 /* if avg_wire_size isn't set no work was done */ 3830 if (!avg_wire_size) 3831 goto clear_counts; 3832 3833 /* Add 24 bytes to size to account for CRC, preamble, and gap */ 3834 avg_wire_size += 24; 3835 3836 /* Don't starve jumbo frames */ 3837 avg_wire_size = min(avg_wire_size, 3000); 3838 3839 /* Give a little boost to mid-size frames */ 3840 if ((avg_wire_size > 300) && (avg_wire_size < 1200)) 3841 new_val = avg_wire_size / 3; 3842 else 3843 new_val = avg_wire_size / 2; 3844 3845 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 3846 if (new_val < IGB_20K_ITR && 3847 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 3848 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 3849 new_val = IGB_20K_ITR; 3850 3851set_itr_val: 3852 if (new_val != q_vector->itr_val) { 3853 q_vector->itr_val = new_val; 3854 q_vector->set_itr = 1; 3855 } 3856clear_counts: 3857 q_vector->rx.total_bytes = 0; 3858 q_vector->rx.total_packets = 0; 3859 q_vector->tx.total_bytes = 0; 3860 q_vector->tx.total_packets = 0; 3861} 3862 3863/** 3864 * igb_update_itr - update the dynamic ITR value based on statistics 3865 * Stores a new ITR value based on packets and byte 3866 * counts during the last interrupt. The advantage of per interrupt 3867 * computation is faster updates and more accurate ITR for the current 3868 * traffic pattern. Constants in this function were computed 3869 * based on theoretical maximum wire speed and thresholds were set based 3870 * on testing data as well as attempting to minimize response time 3871 * while increasing bulk throughput. 3872 * this functionality is controlled by the InterruptThrottleRate module 3873 * parameter (see igb_param.c) 3874 * NOTE: These calculations are only valid when operating in a single- 3875 * queue environment. 3876 * @q_vector: pointer to q_vector 3877 * @ring_container: ring info to update the itr for 3878 **/ 3879static void igb_update_itr(struct igb_q_vector *q_vector, 3880 struct igb_ring_container *ring_container) 3881{ 3882 unsigned int packets = ring_container->total_packets; 3883 unsigned int bytes = ring_container->total_bytes; 3884 u8 itrval = ring_container->itr; 3885 3886 /* no packets, exit with status unchanged */ 3887 if (packets == 0) 3888 return; 3889 3890 switch (itrval) { 3891 case lowest_latency: 3892 /* handle TSO and jumbo frames */ 3893 if (bytes/packets > 8000) 3894 itrval = bulk_latency; 3895 else if ((packets < 5) && (bytes > 512)) 3896 itrval = low_latency; 3897 break; 3898 case low_latency: /* 50 usec aka 20000 ints/s */ 3899 if (bytes > 10000) { 3900 /* this if handles the TSO accounting */ 3901 if (bytes/packets > 8000) { 3902 itrval = bulk_latency; 3903 } else if ((packets < 10) || ((bytes/packets) > 1200)) { 3904 itrval = bulk_latency; 3905 } else if ((packets > 35)) { 3906 itrval = lowest_latency; 3907 } 3908 } else if (bytes/packets > 2000) { 3909 itrval = bulk_latency; 3910 } else if (packets <= 2 && bytes < 512) { 3911 itrval = lowest_latency; 3912 } 3913 break; 3914 case bulk_latency: /* 250 usec aka 4000 ints/s */ 3915 if (bytes > 25000) { 3916 if (packets > 35) 3917 itrval = low_latency; 3918 } else if (bytes < 1500) { 3919 itrval = low_latency; 3920 } 3921 break; 3922 } 3923 3924 /* clear work counters since we have the values we need */ 3925 ring_container->total_bytes = 0; 3926 ring_container->total_packets = 0; 3927 3928 /* write updated itr to ring container */ 3929 ring_container->itr = itrval; 3930} 3931 3932static void igb_set_itr(struct igb_q_vector *q_vector) 3933{ 3934 struct igb_adapter *adapter = q_vector->adapter; 3935 u32 new_itr = q_vector->itr_val; 3936 u8 current_itr = 0; 3937 3938 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ 3939 if (adapter->link_speed != SPEED_1000) { 3940 current_itr = 0; 3941 new_itr = IGB_4K_ITR; 3942 goto set_itr_now; 3943 } 3944 3945 igb_update_itr(q_vector, &q_vector->tx); 3946 igb_update_itr(q_vector, &q_vector->rx); 3947 3948 current_itr = max(q_vector->rx.itr, q_vector->tx.itr); 3949 3950 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 3951 if (current_itr == lowest_latency && 3952 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 3953 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 3954 current_itr = low_latency; 3955 3956 switch (current_itr) { 3957 /* counts and packets in update_itr are dependent on these numbers */ 3958 case lowest_latency: 3959 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */ 3960 break; 3961 case low_latency: 3962 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */ 3963 break; 3964 case bulk_latency: 3965 new_itr = IGB_4K_ITR; /* 4,000 ints/sec */ 3966 break; 3967 default: 3968 break; 3969 } 3970 3971set_itr_now: 3972 if (new_itr != q_vector->itr_val) { 3973 /* this attempts to bias the interrupt rate towards Bulk 3974 * by adding intermediate steps when interrupt rate is 3975 * increasing */ 3976 new_itr = new_itr > q_vector->itr_val ? 3977 max((new_itr * q_vector->itr_val) / 3978 (new_itr + (q_vector->itr_val >> 2)), 3979 new_itr) : 3980 new_itr; 3981 /* Don't write the value here; it resets the adapter's 3982 * internal timer, and causes us to delay far longer than 3983 * we should between interrupts. Instead, we write the ITR 3984 * value at the beginning of the next interrupt so the timing 3985 * ends up being correct. 3986 */ 3987 q_vector->itr_val = new_itr; 3988 q_vector->set_itr = 1; 3989 } 3990} 3991 3992void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens, 3993 u32 type_tucmd, u32 mss_l4len_idx) 3994{ 3995 struct e1000_adv_tx_context_desc *context_desc; 3996 u16 i = tx_ring->next_to_use; 3997 3998 context_desc = IGB_TX_CTXTDESC(tx_ring, i); 3999 4000 i++; 4001 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 4002 4003 /* set bits to identify this as an advanced context descriptor */ 4004 type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 4005 4006 /* For 82575, context index must be unique per ring. */ 4007 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) 4008 mss_l4len_idx |= tx_ring->reg_idx << 4; 4009 4010 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); 4011 context_desc->seqnum_seed = 0; 4012 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); 4013 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); 4014} 4015 4016static int igb_tso(struct igb_ring *tx_ring, 4017 struct igb_tx_buffer *first, 4018 u8 *hdr_len) 4019{ 4020 struct sk_buff *skb = first->skb; 4021 u32 vlan_macip_lens, type_tucmd; 4022 u32 mss_l4len_idx, l4len; 4023 4024 if (!skb_is_gso(skb)) 4025 return 0; 4026 4027 if (skb_header_cloned(skb)) { 4028 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); 4029 if (err) 4030 return err; 4031 } 4032 4033 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ 4034 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; 4035 4036 if (first->protocol == __constant_htons(ETH_P_IP)) { 4037 struct iphdr *iph = ip_hdr(skb); 4038 iph->tot_len = 0; 4039 iph->check = 0; 4040 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, 4041 iph->daddr, 0, 4042 IPPROTO_TCP, 4043 0); 4044 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; 4045 first->tx_flags |= IGB_TX_FLAGS_TSO | 4046 IGB_TX_FLAGS_CSUM | 4047 IGB_TX_FLAGS_IPV4; 4048 } else if (skb_is_gso_v6(skb)) { 4049 ipv6_hdr(skb)->payload_len = 0; 4050 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 4051 &ipv6_hdr(skb)->daddr, 4052 0, IPPROTO_TCP, 0); 4053 first->tx_flags |= IGB_TX_FLAGS_TSO | 4054 IGB_TX_FLAGS_CSUM; 4055 } 4056 4057 /* compute header lengths */ 4058 l4len = tcp_hdrlen(skb); 4059 *hdr_len = skb_transport_offset(skb) + l4len; 4060 4061 /* update gso size and bytecount with header size */ 4062 first->gso_segs = skb_shinfo(skb)->gso_segs; 4063 first->bytecount += (first->gso_segs - 1) * *hdr_len; 4064 4065 /* MSS L4LEN IDX */ 4066 mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT; 4067 mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT; 4068 4069 /* VLAN MACLEN IPLEN */ 4070 vlan_macip_lens = skb_network_header_len(skb); 4071 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; 4072 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK; 4073 4074 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); 4075 4076 return 1; 4077} 4078 4079static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first) 4080{ 4081 struct sk_buff *skb = first->skb; 4082 u32 vlan_macip_lens = 0; 4083 u32 mss_l4len_idx = 0; 4084 u32 type_tucmd = 0; 4085 4086 if (skb->ip_summed != CHECKSUM_PARTIAL) { 4087 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN)) 4088 return; 4089 } else { 4090 u8 l4_hdr = 0; 4091 switch (first->protocol) { 4092 case __constant_htons(ETH_P_IP): 4093 vlan_macip_lens |= skb_network_header_len(skb); 4094 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; 4095 l4_hdr = ip_hdr(skb)->protocol; 4096 break; 4097 case __constant_htons(ETH_P_IPV6): 4098 vlan_macip_lens |= skb_network_header_len(skb); 4099 l4_hdr = ipv6_hdr(skb)->nexthdr; 4100 break; 4101 default: 4102 if (unlikely(net_ratelimit())) { 4103 dev_warn(tx_ring->dev, 4104 "partial checksum but proto=%x!\n", 4105 first->protocol); 4106 } 4107 break; 4108 } 4109 4110 switch (l4_hdr) { 4111 case IPPROTO_TCP: 4112 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP; 4113 mss_l4len_idx = tcp_hdrlen(skb) << 4114 E1000_ADVTXD_L4LEN_SHIFT; 4115 break; 4116 case IPPROTO_SCTP: 4117 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP; 4118 mss_l4len_idx = sizeof(struct sctphdr) << 4119 E1000_ADVTXD_L4LEN_SHIFT; 4120 break; 4121 case IPPROTO_UDP: 4122 mss_l4len_idx = sizeof(struct udphdr) << 4123 E1000_ADVTXD_L4LEN_SHIFT; 4124 break; 4125 default: 4126 if (unlikely(net_ratelimit())) { 4127 dev_warn(tx_ring->dev, 4128 "partial checksum but l4 proto=%x!\n", 4129 l4_hdr); 4130 } 4131 break; 4132 } 4133 4134 /* update TX checksum flag */ 4135 first->tx_flags |= IGB_TX_FLAGS_CSUM; 4136 } 4137 4138 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; 4139 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK; 4140 4141 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); 4142} 4143 4144static __le32 igb_tx_cmd_type(u32 tx_flags) 4145{ 4146 /* set type for advanced descriptor with frame checksum insertion */ 4147 __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA | 4148 E1000_ADVTXD_DCMD_IFCS | 4149 E1000_ADVTXD_DCMD_DEXT); 4150 4151 /* set HW vlan bit if vlan is present */ 4152 if (tx_flags & IGB_TX_FLAGS_VLAN) 4153 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE); 4154 4155 /* set timestamp bit if present */ 4156 if (tx_flags & IGB_TX_FLAGS_TSTAMP) 4157 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP); 4158 4159 /* set segmentation bits for TSO */ 4160 if (tx_flags & IGB_TX_FLAGS_TSO) 4161 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE); 4162 4163 return cmd_type; 4164} 4165 4166static void igb_tx_olinfo_status(struct igb_ring *tx_ring, 4167 union e1000_adv_tx_desc *tx_desc, 4168 u32 tx_flags, unsigned int paylen) 4169{ 4170 u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT; 4171 4172 /* 82575 requires a unique index per ring if any offload is enabled */ 4173 if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) && 4174 test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) 4175 olinfo_status |= tx_ring->reg_idx << 4; 4176 4177 /* insert L4 checksum */ 4178 if (tx_flags & IGB_TX_FLAGS_CSUM) { 4179 olinfo_status |= E1000_TXD_POPTS_TXSM << 8; 4180 4181 /* insert IPv4 checksum */ 4182 if (tx_flags & IGB_TX_FLAGS_IPV4) 4183 olinfo_status |= E1000_TXD_POPTS_IXSM << 8; 4184 } 4185 4186 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 4187} 4188 4189/* 4190 * The largest size we can write to the descriptor is 65535. In order to 4191 * maintain a power of two alignment we have to limit ourselves to 32K. 4192 */ 4193#define IGB_MAX_TXD_PWR 15 4194#define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR) 4195 4196static void igb_tx_map(struct igb_ring *tx_ring, 4197 struct igb_tx_buffer *first, 4198 const u8 hdr_len) 4199{ 4200 struct sk_buff *skb = first->skb; 4201 struct igb_tx_buffer *tx_buffer_info; 4202 union e1000_adv_tx_desc *tx_desc; 4203 dma_addr_t dma; 4204 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; 4205 unsigned int data_len = skb->data_len; 4206 unsigned int size = skb_headlen(skb); 4207 unsigned int paylen = skb->len - hdr_len; 4208 __le32 cmd_type; 4209 u32 tx_flags = first->tx_flags; 4210 u16 i = tx_ring->next_to_use; 4211 4212 tx_desc = IGB_TX_DESC(tx_ring, i); 4213 4214 igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen); 4215 cmd_type = igb_tx_cmd_type(tx_flags); 4216 4217 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); 4218 if (dma_mapping_error(tx_ring->dev, dma)) 4219 goto dma_error; 4220 4221 /* record length, and DMA address */ 4222 first->length = size; 4223 first->dma = dma; 4224 tx_desc->read.buffer_addr = cpu_to_le64(dma); 4225 4226 for (;;) { 4227 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) { 4228 tx_desc->read.cmd_type_len = 4229 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD); 4230 4231 i++; 4232 tx_desc++; 4233 if (i == tx_ring->count) { 4234 tx_desc = IGB_TX_DESC(tx_ring, 0); 4235 i = 0; 4236 } 4237 4238 dma += IGB_MAX_DATA_PER_TXD; 4239 size -= IGB_MAX_DATA_PER_TXD; 4240 4241 tx_desc->read.olinfo_status = 0; 4242 tx_desc->read.buffer_addr = cpu_to_le64(dma); 4243 } 4244 4245 if (likely(!data_len)) 4246 break; 4247 4248 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size); 4249 4250 i++; 4251 tx_desc++; 4252 if (i == tx_ring->count) { 4253 tx_desc = IGB_TX_DESC(tx_ring, 0); 4254 i = 0; 4255 } 4256 4257 size = frag->size; 4258 data_len -= size; 4259 4260 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, 4261 size, DMA_TO_DEVICE); 4262 if (dma_mapping_error(tx_ring->dev, dma)) 4263 goto dma_error; 4264 4265 tx_buffer_info = &tx_ring->tx_buffer_info[i]; 4266 tx_buffer_info->length = size; 4267 tx_buffer_info->dma = dma; 4268 4269 tx_desc->read.olinfo_status = 0; 4270 tx_desc->read.buffer_addr = cpu_to_le64(dma); 4271 4272 frag++; 4273 } 4274 4275 /* write last descriptor with RS and EOP bits */ 4276 cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD); 4277 tx_desc->read.cmd_type_len = cmd_type; 4278 4279 /* set the timestamp */ 4280 first->time_stamp = jiffies; 4281 4282 /* 4283 * Force memory writes to complete before letting h/w know there 4284 * are new descriptors to fetch. (Only applicable for weak-ordered 4285 * memory model archs, such as IA-64). 4286 * 4287 * We also need this memory barrier to make certain all of the 4288 * status bits have been updated before next_to_watch is written. 4289 */ 4290 wmb(); 4291 4292 /* set next_to_watch value indicating a packet is present */ 4293 first->next_to_watch = tx_desc; 4294 4295 i++; 4296 if (i == tx_ring->count) 4297 i = 0; 4298 4299 tx_ring->next_to_use = i; 4300 4301 writel(i, tx_ring->tail); 4302 4303 /* we need this if more than one processor can write to our tail 4304 * at a time, it syncronizes IO on IA64/Altix systems */ 4305 mmiowb(); 4306 4307 return; 4308 4309dma_error: 4310 dev_err(tx_ring->dev, "TX DMA map failed\n"); 4311 4312 /* clear dma mappings for failed tx_buffer_info map */ 4313 for (;;) { 4314 tx_buffer_info = &tx_ring->tx_buffer_info[i]; 4315 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); 4316 if (tx_buffer_info == first) 4317 break; 4318 if (i == 0) 4319 i = tx_ring->count; 4320 i--; 4321 } 4322 4323 tx_ring->next_to_use = i; 4324} 4325 4326static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) 4327{ 4328 struct net_device *netdev = tx_ring->netdev; 4329 4330 netif_stop_subqueue(netdev, tx_ring->queue_index); 4331 4332 /* Herbert's original patch had: 4333 * smp_mb__after_netif_stop_queue(); 4334 * but since that doesn't exist yet, just open code it. */ 4335 smp_mb(); 4336 4337 /* We need to check again in a case another CPU has just 4338 * made room available. */ 4339 if (igb_desc_unused(tx_ring) < size) 4340 return -EBUSY; 4341 4342 /* A reprieve! */ 4343 netif_wake_subqueue(netdev, tx_ring->queue_index); 4344 4345 u64_stats_update_begin(&tx_ring->tx_syncp2); 4346 tx_ring->tx_stats.restart_queue2++; 4347 u64_stats_update_end(&tx_ring->tx_syncp2); 4348 4349 return 0; 4350} 4351 4352static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) 4353{ 4354 if (igb_desc_unused(tx_ring) >= size) 4355 return 0; 4356 return __igb_maybe_stop_tx(tx_ring, size); 4357} 4358 4359netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, 4360 struct igb_ring *tx_ring) 4361{ 4362 struct igb_tx_buffer *first; 4363 int tso; 4364 u32 tx_flags = 0; 4365 __be16 protocol = vlan_get_protocol(skb); 4366 u8 hdr_len = 0; 4367 4368 /* need: 1 descriptor per page, 4369 * + 2 desc gap to keep tail from touching head, 4370 * + 1 desc for skb->data, 4371 * + 1 desc for context descriptor, 4372 * otherwise try next time */ 4373 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) { 4374 /* this is a hard error */ 4375 return NETDEV_TX_BUSY; 4376 } 4377 4378 /* record the location of the first descriptor for this packet */ 4379 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 4380 first->skb = skb; 4381 first->bytecount = skb->len; 4382 first->gso_segs = 1; 4383 4384 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { 4385 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 4386 tx_flags |= IGB_TX_FLAGS_TSTAMP; 4387 } 4388 4389 if (vlan_tx_tag_present(skb)) { 4390 tx_flags |= IGB_TX_FLAGS_VLAN; 4391 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT); 4392 } 4393 4394 /* record initial flags and protocol */ 4395 first->tx_flags = tx_flags; 4396 first->protocol = protocol; 4397 4398 tso = igb_tso(tx_ring, first, &hdr_len); 4399 if (tso < 0) 4400 goto out_drop; 4401 else if (!tso) 4402 igb_tx_csum(tx_ring, first); 4403 4404 igb_tx_map(tx_ring, first, hdr_len); 4405 4406 /* Make sure there is space in the ring for the next send. */ 4407 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4); 4408 4409 return NETDEV_TX_OK; 4410 4411out_drop: 4412 igb_unmap_and_free_tx_resource(tx_ring, first); 4413 4414 return NETDEV_TX_OK; 4415} 4416 4417static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter, 4418 struct sk_buff *skb) 4419{ 4420 unsigned int r_idx = skb->queue_mapping; 4421 4422 if (r_idx >= adapter->num_tx_queues) 4423 r_idx = r_idx % adapter->num_tx_queues; 4424 4425 return adapter->tx_ring[r_idx]; 4426} 4427 4428static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, 4429 struct net_device *netdev) 4430{ 4431 struct igb_adapter *adapter = netdev_priv(netdev); 4432 4433 if (test_bit(__IGB_DOWN, &adapter->state)) { 4434 dev_kfree_skb_any(skb); 4435 return NETDEV_TX_OK; 4436 } 4437 4438 if (skb->len <= 0) { 4439 dev_kfree_skb_any(skb); 4440 return NETDEV_TX_OK; 4441 } 4442 4443 /* 4444 * The minimum packet size with TCTL.PSP set is 17 so pad the skb 4445 * in order to meet this minimum size requirement. 4446 */ 4447 if (skb->len < 17) { 4448 if (skb_padto(skb, 17)) 4449 return NETDEV_TX_OK; 4450 skb->len = 17; 4451 } 4452 4453 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb)); 4454} 4455 4456/** 4457 * igb_tx_timeout - Respond to a Tx Hang 4458 * @netdev: network interface device structure 4459 **/ 4460static void igb_tx_timeout(struct net_device *netdev) 4461{ 4462 struct igb_adapter *adapter = netdev_priv(netdev); 4463 struct e1000_hw *hw = &adapter->hw; 4464 4465 /* Do the reset outside of interrupt context */ 4466 adapter->tx_timeout_count++; 4467 4468 if (hw->mac.type >= e1000_82580) 4469 hw->dev_spec._82575.global_device_reset = true; 4470 4471 schedule_work(&adapter->reset_task); 4472 wr32(E1000_EICS, 4473 (adapter->eims_enable_mask & ~adapter->eims_other)); 4474} 4475 4476static void igb_reset_task(struct work_struct *work) 4477{ 4478 struct igb_adapter *adapter; 4479 adapter = container_of(work, struct igb_adapter, reset_task); 4480 4481 igb_dump(adapter); 4482 netdev_err(adapter->netdev, "Reset adapter\n"); 4483 igb_reinit_locked(adapter); 4484} 4485 4486/** 4487 * igb_get_stats64 - Get System Network Statistics 4488 * @netdev: network interface device structure 4489 * @stats: rtnl_link_stats64 pointer 4490 * 4491 **/ 4492static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev, 4493 struct rtnl_link_stats64 *stats) 4494{ 4495 struct igb_adapter *adapter = netdev_priv(netdev); 4496 4497 spin_lock(&adapter->stats64_lock); 4498 igb_update_stats(adapter, &adapter->stats64); 4499 memcpy(stats, &adapter->stats64, sizeof(*stats)); 4500 spin_unlock(&adapter->stats64_lock); 4501 4502 return stats; 4503} 4504 4505/** 4506 * igb_change_mtu - Change the Maximum Transfer Unit 4507 * @netdev: network interface device structure 4508 * @new_mtu: new value for maximum frame size 4509 * 4510 * Returns 0 on success, negative on failure 4511 **/ 4512static int igb_change_mtu(struct net_device *netdev, int new_mtu) 4513{ 4514 struct igb_adapter *adapter = netdev_priv(netdev); 4515 struct pci_dev *pdev = adapter->pdev; 4516 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; 4517 4518 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) { 4519 dev_err(&pdev->dev, "Invalid MTU setting\n"); 4520 return -EINVAL; 4521 } 4522 4523#define MAX_STD_JUMBO_FRAME_SIZE 9238 4524 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) { 4525 dev_err(&pdev->dev, "MTU > 9216 not supported.\n"); 4526 return -EINVAL; 4527 } 4528 4529 while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) 4530 msleep(1); 4531 4532 /* igb_down has a dependency on max_frame_size */ 4533 adapter->max_frame_size = max_frame; 4534 4535 if (netif_running(netdev)) 4536 igb_down(adapter); 4537 4538 dev_info(&pdev->dev, "changing MTU from %d to %d\n", 4539 netdev->mtu, new_mtu); 4540 netdev->mtu = new_mtu; 4541 4542 if (netif_running(netdev)) 4543 igb_up(adapter); 4544 else 4545 igb_reset(adapter); 4546 4547 clear_bit(__IGB_RESETTING, &adapter->state); 4548 4549 return 0; 4550} 4551 4552/** 4553 * igb_update_stats - Update the board statistics counters 4554 * @adapter: board private structure 4555 **/ 4556 4557void igb_update_stats(struct igb_adapter *adapter, 4558 struct rtnl_link_stats64 *net_stats) 4559{ 4560 struct e1000_hw *hw = &adapter->hw; 4561 struct pci_dev *pdev = adapter->pdev; 4562 u32 reg, mpc; 4563 u16 phy_tmp; 4564 int i; 4565 u64 bytes, packets; 4566 unsigned int start; 4567 u64 _bytes, _packets; 4568 4569#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF 4570 4571 /* 4572 * Prevent stats update while adapter is being reset, or if the pci 4573 * connection is down. 4574 */ 4575 if (adapter->link_speed == 0) 4576 return; 4577 if (pci_channel_offline(pdev)) 4578 return; 4579 4580 bytes = 0; 4581 packets = 0; 4582 for (i = 0; i < adapter->num_rx_queues; i++) { 4583 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF; 4584 struct igb_ring *ring = adapter->rx_ring[i]; 4585 4586 ring->rx_stats.drops += rqdpc_tmp; 4587 net_stats->rx_fifo_errors += rqdpc_tmp; 4588 4589 do { 4590 start = u64_stats_fetch_begin_bh(&ring->rx_syncp); 4591 _bytes = ring->rx_stats.bytes; 4592 _packets = ring->rx_stats.packets; 4593 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start)); 4594 bytes += _bytes; 4595 packets += _packets; 4596 } 4597 4598 net_stats->rx_bytes = bytes; 4599 net_stats->rx_packets = packets; 4600 4601 bytes = 0; 4602 packets = 0; 4603 for (i = 0; i < adapter->num_tx_queues; i++) { 4604 struct igb_ring *ring = adapter->tx_ring[i]; 4605 do { 4606 start = u64_stats_fetch_begin_bh(&ring->tx_syncp); 4607 _bytes = ring->tx_stats.bytes; 4608 _packets = ring->tx_stats.packets; 4609 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start)); 4610 bytes += _bytes; 4611 packets += _packets; 4612 } 4613 net_stats->tx_bytes = bytes; 4614 net_stats->tx_packets = packets; 4615 4616 /* read stats registers */ 4617 adapter->stats.crcerrs += rd32(E1000_CRCERRS); 4618 adapter->stats.gprc += rd32(E1000_GPRC); 4619 adapter->stats.gorc += rd32(E1000_GORCL); 4620 rd32(E1000_GORCH); /* clear GORCL */ 4621 adapter->stats.bprc += rd32(E1000_BPRC); 4622 adapter->stats.mprc += rd32(E1000_MPRC); 4623 adapter->stats.roc += rd32(E1000_ROC); 4624 4625 adapter->stats.prc64 += rd32(E1000_PRC64); 4626 adapter->stats.prc127 += rd32(E1000_PRC127); 4627 adapter->stats.prc255 += rd32(E1000_PRC255); 4628 adapter->stats.prc511 += rd32(E1000_PRC511); 4629 adapter->stats.prc1023 += rd32(E1000_PRC1023); 4630 adapter->stats.prc1522 += rd32(E1000_PRC1522); 4631 adapter->stats.symerrs += rd32(E1000_SYMERRS); 4632 adapter->stats.sec += rd32(E1000_SEC); 4633 4634 mpc = rd32(E1000_MPC); 4635 adapter->stats.mpc += mpc; 4636 net_stats->rx_fifo_errors += mpc; 4637 adapter->stats.scc += rd32(E1000_SCC); 4638 adapter->stats.ecol += rd32(E1000_ECOL); 4639 adapter->stats.mcc += rd32(E1000_MCC); 4640 adapter->stats.latecol += rd32(E1000_LATECOL); 4641 adapter->stats.dc += rd32(E1000_DC); 4642 adapter->stats.rlec += rd32(E1000_RLEC); 4643 adapter->stats.xonrxc += rd32(E1000_XONRXC); 4644 adapter->stats.xontxc += rd32(E1000_XONTXC); 4645 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC); 4646 adapter->stats.xofftxc += rd32(E1000_XOFFTXC); 4647 adapter->stats.fcruc += rd32(E1000_FCRUC); 4648 adapter->stats.gptc += rd32(E1000_GPTC); 4649 adapter->stats.gotc += rd32(E1000_GOTCL); 4650 rd32(E1000_GOTCH); /* clear GOTCL */ 4651 adapter->stats.rnbc += rd32(E1000_RNBC); 4652 adapter->stats.ruc += rd32(E1000_RUC); 4653 adapter->stats.rfc += rd32(E1000_RFC); 4654 adapter->stats.rjc += rd32(E1000_RJC); 4655 adapter->stats.tor += rd32(E1000_TORH); 4656 adapter->stats.tot += rd32(E1000_TOTH); 4657 adapter->stats.tpr += rd32(E1000_TPR); 4658 4659 adapter->stats.ptc64 += rd32(E1000_PTC64); 4660 adapter->stats.ptc127 += rd32(E1000_PTC127); 4661 adapter->stats.ptc255 += rd32(E1000_PTC255); 4662 adapter->stats.ptc511 += rd32(E1000_PTC511); 4663 adapter->stats.ptc1023 += rd32(E1000_PTC1023); 4664 adapter->stats.ptc1522 += rd32(E1000_PTC1522); 4665 4666 adapter->stats.mptc += rd32(E1000_MPTC); 4667 adapter->stats.bptc += rd32(E1000_BPTC); 4668 4669 adapter->stats.tpt += rd32(E1000_TPT); 4670 adapter->stats.colc += rd32(E1000_COLC); 4671 4672 adapter->stats.algnerrc += rd32(E1000_ALGNERRC); 4673 /* read internal phy specific stats */ 4674 reg = rd32(E1000_CTRL_EXT); 4675 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) { 4676 adapter->stats.rxerrc += rd32(E1000_RXERRC); 4677 adapter->stats.tncrs += rd32(E1000_TNCRS); 4678 } 4679 4680 adapter->stats.tsctc += rd32(E1000_TSCTC); 4681 adapter->stats.tsctfc += rd32(E1000_TSCTFC); 4682 4683 adapter->stats.iac += rd32(E1000_IAC); 4684 adapter->stats.icrxoc += rd32(E1000_ICRXOC); 4685 adapter->stats.icrxptc += rd32(E1000_ICRXPTC); 4686 adapter->stats.icrxatc += rd32(E1000_ICRXATC); 4687 adapter->stats.ictxptc += rd32(E1000_ICTXPTC); 4688 adapter->stats.ictxatc += rd32(E1000_ICTXATC); 4689 adapter->stats.ictxqec += rd32(E1000_ICTXQEC); 4690 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC); 4691 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC); 4692 4693 /* Fill out the OS statistics structure */ 4694 net_stats->multicast = adapter->stats.mprc; 4695 net_stats->collisions = adapter->stats.colc; 4696 4697 /* Rx Errors */ 4698 4699 /* RLEC on some newer hardware can be incorrect so build 4700 * our own version based on RUC and ROC */ 4701 net_stats->rx_errors = adapter->stats.rxerrc + 4702 adapter->stats.crcerrs + adapter->stats.algnerrc + 4703 adapter->stats.ruc + adapter->stats.roc + 4704 adapter->stats.cexterr; 4705 net_stats->rx_length_errors = adapter->stats.ruc + 4706 adapter->stats.roc; 4707 net_stats->rx_crc_errors = adapter->stats.crcerrs; 4708 net_stats->rx_frame_errors = adapter->stats.algnerrc; 4709 net_stats->rx_missed_errors = adapter->stats.mpc; 4710 4711 /* Tx Errors */ 4712 net_stats->tx_errors = adapter->stats.ecol + 4713 adapter->stats.latecol; 4714 net_stats->tx_aborted_errors = adapter->stats.ecol; 4715 net_stats->tx_window_errors = adapter->stats.latecol; 4716 net_stats->tx_carrier_errors = adapter->stats.tncrs; 4717 4718 /* Tx Dropped needs to be maintained elsewhere */ 4719 4720 /* Phy Stats */ 4721 if (hw->phy.media_type == e1000_media_type_copper) { 4722 if ((adapter->link_speed == SPEED_1000) && 4723 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) { 4724 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK; 4725 adapter->phy_stats.idle_errors += phy_tmp; 4726 } 4727 } 4728 4729 /* Management Stats */ 4730 adapter->stats.mgptc += rd32(E1000_MGTPTC); 4731 adapter->stats.mgprc += rd32(E1000_MGTPRC); 4732 adapter->stats.mgpdc += rd32(E1000_MGTPDC); 4733 4734 /* OS2BMC Stats */ 4735 reg = rd32(E1000_MANC); 4736 if (reg & E1000_MANC_EN_BMC2OS) { 4737 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC); 4738 adapter->stats.o2bspc += rd32(E1000_O2BSPC); 4739 adapter->stats.b2ospc += rd32(E1000_B2OSPC); 4740 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC); 4741 } 4742} 4743 4744static irqreturn_t igb_msix_other(int irq, void *data) 4745{ 4746 struct igb_adapter *adapter = data; 4747 struct e1000_hw *hw = &adapter->hw; 4748 u32 icr = rd32(E1000_ICR); 4749 /* reading ICR causes bit 31 of EICR to be cleared */ 4750 4751 if (icr & E1000_ICR_DRSTA) 4752 schedule_work(&adapter->reset_task); 4753 4754 if (icr & E1000_ICR_DOUTSYNC) { 4755 /* HW is reporting DMA is out of sync */ 4756 adapter->stats.doosync++; 4757 /* The DMA Out of Sync is also indication of a spoof event 4758 * in IOV mode. Check the Wrong VM Behavior register to 4759 * see if it is really a spoof event. */ 4760 igb_check_wvbr(adapter); 4761 } 4762 4763 /* Check for a mailbox event */ 4764 if (icr & E1000_ICR_VMMB) 4765 igb_msg_task(adapter); 4766 4767 if (icr & E1000_ICR_LSC) { 4768 hw->mac.get_link_status = 1; 4769 /* guard against interrupt when we're going down */ 4770 if (!test_bit(__IGB_DOWN, &adapter->state)) 4771 mod_timer(&adapter->watchdog_timer, jiffies + 1); 4772 } 4773 4774 wr32(E1000_EIMS, adapter->eims_other); 4775 4776 return IRQ_HANDLED; 4777} 4778 4779static void igb_write_itr(struct igb_q_vector *q_vector) 4780{ 4781 struct igb_adapter *adapter = q_vector->adapter; 4782 u32 itr_val = q_vector->itr_val & 0x7FFC; 4783 4784 if (!q_vector->set_itr) 4785 return; 4786 4787 if (!itr_val) 4788 itr_val = 0x4; 4789 4790 if (adapter->hw.mac.type == e1000_82575) 4791 itr_val |= itr_val << 16; 4792 else 4793 itr_val |= E1000_EITR_CNT_IGNR; 4794 4795 writel(itr_val, q_vector->itr_register); 4796 q_vector->set_itr = 0; 4797} 4798 4799static irqreturn_t igb_msix_ring(int irq, void *data) 4800{ 4801 struct igb_q_vector *q_vector = data; 4802 4803 /* Write the ITR value calculated from the previous interrupt. */ 4804 igb_write_itr(q_vector); 4805 4806 napi_schedule(&q_vector->napi); 4807 4808 return IRQ_HANDLED; 4809} 4810 4811#ifdef CONFIG_IGB_DCA 4812static void igb_update_dca(struct igb_q_vector *q_vector) 4813{ 4814 struct igb_adapter *adapter = q_vector->adapter; 4815 struct e1000_hw *hw = &adapter->hw; 4816 int cpu = get_cpu(); 4817 4818 if (q_vector->cpu == cpu) 4819 goto out_no_update; 4820 4821 if (q_vector->tx.ring) { 4822 int q = q_vector->tx.ring->reg_idx; 4823 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q)); 4824 if (hw->mac.type == e1000_82575) { 4825 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK; 4826 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu); 4827 } else { 4828 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576; 4829 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) << 4830 E1000_DCA_TXCTRL_CPUID_SHIFT; 4831 } 4832 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN; 4833 wr32(E1000_DCA_TXCTRL(q), dca_txctrl); 4834 } 4835 if (q_vector->rx.ring) { 4836 int q = q_vector->rx.ring->reg_idx; 4837 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q)); 4838 if (hw->mac.type == e1000_82575) { 4839 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK; 4840 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu); 4841 } else { 4842 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576; 4843 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) << 4844 E1000_DCA_RXCTRL_CPUID_SHIFT; 4845 } 4846 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN; 4847 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN; 4848 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN; 4849 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl); 4850 } 4851 q_vector->cpu = cpu; 4852out_no_update: 4853 put_cpu(); 4854} 4855 4856static void igb_setup_dca(struct igb_adapter *adapter) 4857{ 4858 struct e1000_hw *hw = &adapter->hw; 4859 int i; 4860 4861 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED)) 4862 return; 4863 4864 /* Always use CB2 mode, difference is masked in the CB driver. */ 4865 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2); 4866 4867 for (i = 0; i < adapter->num_q_vectors; i++) { 4868 adapter->q_vector[i]->cpu = -1; 4869 igb_update_dca(adapter->q_vector[i]); 4870 } 4871} 4872 4873static int __igb_notify_dca(struct device *dev, void *data) 4874{ 4875 struct net_device *netdev = dev_get_drvdata(dev); 4876 struct igb_adapter *adapter = netdev_priv(netdev); 4877 struct pci_dev *pdev = adapter->pdev; 4878 struct e1000_hw *hw = &adapter->hw; 4879 unsigned long event = *(unsigned long *)data; 4880 4881 switch (event) { 4882 case DCA_PROVIDER_ADD: 4883 /* if already enabled, don't do it again */ 4884 if (adapter->flags & IGB_FLAG_DCA_ENABLED) 4885 break; 4886 if (dca_add_requester(dev) == 0) { 4887 adapter->flags |= IGB_FLAG_DCA_ENABLED; 4888 dev_info(&pdev->dev, "DCA enabled\n"); 4889 igb_setup_dca(adapter); 4890 break; 4891 } 4892 /* Fall Through since DCA is disabled. */ 4893 case DCA_PROVIDER_REMOVE: 4894 if (adapter->flags & IGB_FLAG_DCA_ENABLED) { 4895 /* without this a class_device is left 4896 * hanging around in the sysfs model */ 4897 dca_remove_requester(dev); 4898 dev_info(&pdev->dev, "DCA disabled\n"); 4899 adapter->flags &= ~IGB_FLAG_DCA_ENABLED; 4900 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE); 4901 } 4902 break; 4903 } 4904 4905 return 0; 4906} 4907 4908static int igb_notify_dca(struct notifier_block *nb, unsigned long event, 4909 void *p) 4910{ 4911 int ret_val; 4912 4913 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event, 4914 __igb_notify_dca); 4915 4916 return ret_val ? NOTIFY_BAD : NOTIFY_DONE; 4917} 4918#endif /* CONFIG_IGB_DCA */ 4919 4920static void igb_ping_all_vfs(struct igb_adapter *adapter) 4921{ 4922 struct e1000_hw *hw = &adapter->hw; 4923 u32 ping; 4924 int i; 4925 4926 for (i = 0 ; i < adapter->vfs_allocated_count; i++) { 4927 ping = E1000_PF_CONTROL_MSG; 4928 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS) 4929 ping |= E1000_VT_MSGTYPE_CTS; 4930 igb_write_mbx(hw, &ping, 1, i); 4931 } 4932} 4933 4934static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) 4935{ 4936 struct e1000_hw *hw = &adapter->hw; 4937 u32 vmolr = rd32(E1000_VMOLR(vf)); 4938 struct vf_data_storage *vf_data = &adapter->vf_data[vf]; 4939 4940 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC | 4941 IGB_VF_FLAG_MULTI_PROMISC); 4942 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME); 4943 4944 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) { 4945 vmolr |= E1000_VMOLR_MPME; 4946 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC; 4947 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST; 4948 } else { 4949 /* 4950 * if we have hashes and we are clearing a multicast promisc 4951 * flag we need to write the hashes to the MTA as this step 4952 * was previously skipped 4953 */ 4954 if (vf_data->num_vf_mc_hashes > 30) { 4955 vmolr |= E1000_VMOLR_MPME; 4956 } else if (vf_data->num_vf_mc_hashes) { 4957 int j; 4958 vmolr |= E1000_VMOLR_ROMPE; 4959 for (j = 0; j < vf_data->num_vf_mc_hashes; j++) 4960 igb_mta_set(hw, vf_data->vf_mc_hashes[j]); 4961 } 4962 } 4963 4964 wr32(E1000_VMOLR(vf), vmolr); 4965 4966 /* there are flags left unprocessed, likely not supported */ 4967 if (*msgbuf & E1000_VT_MSGINFO_MASK) 4968 return -EINVAL; 4969 4970 return 0; 4971 4972} 4973 4974static int igb_set_vf_multicasts(struct igb_adapter *adapter, 4975 u32 *msgbuf, u32 vf) 4976{ 4977 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; 4978 u16 *hash_list = (u16 *)&msgbuf[1]; 4979 struct vf_data_storage *vf_data = &adapter->vf_data[vf]; 4980 int i; 4981 4982 /* salt away the number of multicast addresses assigned 4983 * to this VF for later use to restore when the PF multi cast 4984 * list changes 4985 */ 4986 vf_data->num_vf_mc_hashes = n; 4987 4988 /* only up to 30 hash values supported */ 4989 if (n > 30) 4990 n = 30; 4991 4992 /* store the hashes for later use */ 4993 for (i = 0; i < n; i++) 4994 vf_data->vf_mc_hashes[i] = hash_list[i]; 4995 4996 /* Flush and reset the mta with the new values */ 4997 igb_set_rx_mode(adapter->netdev); 4998 4999 return 0; 5000} 5001 5002static void igb_restore_vf_multicasts(struct igb_adapter *adapter) 5003{ 5004 struct e1000_hw *hw = &adapter->hw; 5005 struct vf_data_storage *vf_data; 5006 int i, j; 5007 5008 for (i = 0; i < adapter->vfs_allocated_count; i++) { 5009 u32 vmolr = rd32(E1000_VMOLR(i)); 5010 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME); 5011 5012 vf_data = &adapter->vf_data[i]; 5013 5014 if ((vf_data->num_vf_mc_hashes > 30) || 5015 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) { 5016 vmolr |= E1000_VMOLR_MPME; 5017 } else if (vf_data->num_vf_mc_hashes) { 5018 vmolr |= E1000_VMOLR_ROMPE; 5019 for (j = 0; j < vf_data->num_vf_mc_hashes; j++) 5020 igb_mta_set(hw, vf_data->vf_mc_hashes[j]); 5021 } 5022 wr32(E1000_VMOLR(i), vmolr); 5023 } 5024} 5025 5026static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf) 5027{ 5028 struct e1000_hw *hw = &adapter->hw; 5029 u32 pool_mask, reg, vid; 5030 int i; 5031 5032 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); 5033 5034 /* Find the vlan filter for this id */ 5035 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { 5036 reg = rd32(E1000_VLVF(i)); 5037 5038 /* remove the vf from the pool */ 5039 reg &= ~pool_mask; 5040 5041 /* if pool is empty then remove entry from vfta */ 5042 if (!(reg & E1000_VLVF_POOLSEL_MASK) && 5043 (reg & E1000_VLVF_VLANID_ENABLE)) { 5044 reg = 0; 5045 vid = reg & E1000_VLVF_VLANID_MASK; 5046 igb_vfta_set(hw, vid, false); 5047 } 5048 5049 wr32(E1000_VLVF(i), reg); 5050 } 5051 5052 adapter->vf_data[vf].vlans_enabled = 0; 5053} 5054 5055static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf) 5056{ 5057 struct e1000_hw *hw = &adapter->hw; 5058 u32 reg, i; 5059 5060 /* The vlvf table only exists on 82576 hardware and newer */ 5061 if (hw->mac.type < e1000_82576) 5062 return -1; 5063 5064 /* we only need to do this if VMDq is enabled */ 5065 if (!adapter->vfs_allocated_count) 5066 return -1; 5067 5068 /* Find the vlan filter for this id */ 5069 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { 5070 reg = rd32(E1000_VLVF(i)); 5071 if ((reg & E1000_VLVF_VLANID_ENABLE) && 5072 vid == (reg & E1000_VLVF_VLANID_MASK)) 5073 break; 5074 } 5075 5076 if (add) { 5077 if (i == E1000_VLVF_ARRAY_SIZE) { 5078 /* Did not find a matching VLAN ID entry that was 5079 * enabled. Search for a free filter entry, i.e. 5080 * one without the enable bit set 5081 */ 5082 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { 5083 reg = rd32(E1000_VLVF(i)); 5084 if (!(reg & E1000_VLVF_VLANID_ENABLE)) 5085 break; 5086 } 5087 } 5088 if (i < E1000_VLVF_ARRAY_SIZE) { 5089 /* Found an enabled/available entry */ 5090 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); 5091 5092 /* if !enabled we need to set this up in vfta */ 5093 if (!(reg & E1000_VLVF_VLANID_ENABLE)) { 5094 /* add VID to filter table */ 5095 igb_vfta_set(hw, vid, true); 5096 reg |= E1000_VLVF_VLANID_ENABLE; 5097 } 5098 reg &= ~E1000_VLVF_VLANID_MASK; 5099 reg |= vid; 5100 wr32(E1000_VLVF(i), reg); 5101 5102 /* do not modify RLPML for PF devices */ 5103 if (vf >= adapter->vfs_allocated_count) 5104 return 0; 5105 5106 if (!adapter->vf_data[vf].vlans_enabled) { 5107 u32 size; 5108 reg = rd32(E1000_VMOLR(vf)); 5109 size = reg & E1000_VMOLR_RLPML_MASK; 5110 size += 4; 5111 reg &= ~E1000_VMOLR_RLPML_MASK; 5112 reg |= size; 5113 wr32(E1000_VMOLR(vf), reg); 5114 } 5115 5116 adapter->vf_data[vf].vlans_enabled++; 5117 } 5118 } else { 5119 if (i < E1000_VLVF_ARRAY_SIZE) { 5120 /* remove vf from the pool */ 5121 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf)); 5122 /* if pool is empty then remove entry from vfta */ 5123 if (!(reg & E1000_VLVF_POOLSEL_MASK)) { 5124 reg = 0; 5125 igb_vfta_set(hw, vid, false); 5126 } 5127 wr32(E1000_VLVF(i), reg); 5128 5129 /* do not modify RLPML for PF devices */ 5130 if (vf >= adapter->vfs_allocated_count) 5131 return 0; 5132 5133 adapter->vf_data[vf].vlans_enabled--; 5134 if (!adapter->vf_data[vf].vlans_enabled) { 5135 u32 size; 5136 reg = rd32(E1000_VMOLR(vf)); 5137 size = reg & E1000_VMOLR_RLPML_MASK; 5138 size -= 4; 5139 reg &= ~E1000_VMOLR_RLPML_MASK; 5140 reg |= size; 5141 wr32(E1000_VMOLR(vf), reg); 5142 } 5143 } 5144 } 5145 return 0; 5146} 5147 5148static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf) 5149{ 5150 struct e1000_hw *hw = &adapter->hw; 5151 5152 if (vid) 5153 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT)); 5154 else 5155 wr32(E1000_VMVIR(vf), 0); 5156} 5157 5158static int igb_ndo_set_vf_vlan(struct net_device *netdev, 5159 int vf, u16 vlan, u8 qos) 5160{ 5161 int err = 0; 5162 struct igb_adapter *adapter = netdev_priv(netdev); 5163 5164 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7)) 5165 return -EINVAL; 5166 if (vlan || qos) { 5167 err = igb_vlvf_set(adapter, vlan, !!vlan, vf); 5168 if (err) 5169 goto out; 5170 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf); 5171 igb_set_vmolr(adapter, vf, !vlan); 5172 adapter->vf_data[vf].pf_vlan = vlan; 5173 adapter->vf_data[vf].pf_qos = qos; 5174 dev_info(&adapter->pdev->dev, 5175 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf); 5176 if (test_bit(__IGB_DOWN, &adapter->state)) { 5177 dev_warn(&adapter->pdev->dev, 5178 "The VF VLAN has been set," 5179 " but the PF device is not up.\n"); 5180 dev_warn(&adapter->pdev->dev, 5181 "Bring the PF device up before" 5182 " attempting to use the VF device.\n"); 5183 } 5184 } else { 5185 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan, 5186 false, vf); 5187 igb_set_vmvir(adapter, vlan, vf); 5188 igb_set_vmolr(adapter, vf, true); 5189 adapter->vf_data[vf].pf_vlan = 0; 5190 adapter->vf_data[vf].pf_qos = 0; 5191 } 5192out: 5193 return err; 5194} 5195 5196static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) 5197{ 5198 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; 5199 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK); 5200 5201 return igb_vlvf_set(adapter, vid, add, vf); 5202} 5203 5204static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf) 5205{ 5206 /* clear flags - except flag that indicates PF has set the MAC */ 5207 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC; 5208 adapter->vf_data[vf].last_nack = jiffies; 5209 5210 /* reset offloads to defaults */ 5211 igb_set_vmolr(adapter, vf, true); 5212 5213 /* reset vlans for device */ 5214 igb_clear_vf_vfta(adapter, vf); 5215 if (adapter->vf_data[vf].pf_vlan) 5216 igb_ndo_set_vf_vlan(adapter->netdev, vf, 5217 adapter->vf_data[vf].pf_vlan, 5218 adapter->vf_data[vf].pf_qos); 5219 else 5220 igb_clear_vf_vfta(adapter, vf); 5221 5222 /* reset multicast table array for vf */ 5223 adapter->vf_data[vf].num_vf_mc_hashes = 0; 5224 5225 /* Flush and reset the mta with the new values */ 5226 igb_set_rx_mode(adapter->netdev); 5227} 5228 5229static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf) 5230{ 5231 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; 5232 5233 /* generate a new mac address as we were hotplug removed/added */ 5234 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC)) 5235 random_ether_addr(vf_mac); 5236 5237 /* process remaining reset events */ 5238 igb_vf_reset(adapter, vf); 5239} 5240 5241static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf) 5242{ 5243 struct e1000_hw *hw = &adapter->hw; 5244 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; 5245 int rar_entry = hw->mac.rar_entry_count - (vf + 1); 5246 u32 reg, msgbuf[3]; 5247 u8 *addr = (u8 *)(&msgbuf[1]); 5248 5249 /* process all the same items cleared in a function level reset */ 5250 igb_vf_reset(adapter, vf); 5251 5252 /* set vf mac address */ 5253 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf); 5254 5255 /* enable transmit and receive for vf */ 5256 reg = rd32(E1000_VFTE); 5257 wr32(E1000_VFTE, reg | (1 << vf)); 5258 reg = rd32(E1000_VFRE); 5259 wr32(E1000_VFRE, reg | (1 << vf)); 5260 5261 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS; 5262 5263 /* reply to reset with ack and vf mac address */ 5264 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK; 5265 memcpy(addr, vf_mac, 6); 5266 igb_write_mbx(hw, msgbuf, 3, vf); 5267} 5268 5269static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf) 5270{ 5271 /* 5272 * The VF MAC Address is stored in a packed array of bytes 5273 * starting at the second 32 bit word of the msg array 5274 */ 5275 unsigned char *addr = (char *)&msg[1]; 5276 int err = -1; 5277 5278 if (is_valid_ether_addr(addr)) 5279 err = igb_set_vf_mac(adapter, vf, addr); 5280 5281 return err; 5282} 5283 5284static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf) 5285{ 5286 struct e1000_hw *hw = &adapter->hw; 5287 struct vf_data_storage *vf_data = &adapter->vf_data[vf]; 5288 u32 msg = E1000_VT_MSGTYPE_NACK; 5289 5290 /* if device isn't clear to send it shouldn't be reading either */ 5291 if (!(vf_data->flags & IGB_VF_FLAG_CTS) && 5292 time_after(jiffies, vf_data->last_nack + (2 * HZ))) { 5293 igb_write_mbx(hw, &msg, 1, vf); 5294 vf_data->last_nack = jiffies; 5295 } 5296} 5297 5298static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) 5299{ 5300 struct pci_dev *pdev = adapter->pdev; 5301 u32 msgbuf[E1000_VFMAILBOX_SIZE]; 5302 struct e1000_hw *hw = &adapter->hw; 5303 struct vf_data_storage *vf_data = &adapter->vf_data[vf]; 5304 s32 retval; 5305 5306 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf); 5307 5308 if (retval) { 5309 /* if receive failed revoke VF CTS stats and restart init */ 5310 dev_err(&pdev->dev, "Error receiving message from VF\n"); 5311 vf_data->flags &= ~IGB_VF_FLAG_CTS; 5312 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ))) 5313 return; 5314 goto out; 5315 } 5316 5317 /* this is a message we already processed, do nothing */ 5318 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK)) 5319 return; 5320 5321 /* 5322 * until the vf completes a reset it should not be 5323 * allowed to start any configuration. 5324 */ 5325 5326 if (msgbuf[0] == E1000_VF_RESET) { 5327 igb_vf_reset_msg(adapter, vf); 5328 return; 5329 } 5330 5331 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) { 5332 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ))) 5333 return; 5334 retval = -1; 5335 goto out; 5336 } 5337 5338 switch ((msgbuf[0] & 0xFFFF)) { 5339 case E1000_VF_SET_MAC_ADDR: 5340 retval = -EINVAL; 5341 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC)) 5342 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf); 5343 else 5344 dev_warn(&pdev->dev, 5345 "VF %d attempted to override administratively " 5346 "set MAC address\nReload the VF driver to " 5347 "resume operations\n", vf); 5348 break; 5349 case E1000_VF_SET_PROMISC: 5350 retval = igb_set_vf_promisc(adapter, msgbuf, vf); 5351 break; 5352 case E1000_VF_SET_MULTICAST: 5353 retval = igb_set_vf_multicasts(adapter, msgbuf, vf); 5354 break; 5355 case E1000_VF_SET_LPE: 5356 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf); 5357 break; 5358 case E1000_VF_SET_VLAN: 5359 retval = -1; 5360 if (vf_data->pf_vlan) 5361 dev_warn(&pdev->dev, 5362 "VF %d attempted to override administratively " 5363 "set VLAN tag\nReload the VF driver to " 5364 "resume operations\n", vf); 5365 else 5366 retval = igb_set_vf_vlan(adapter, msgbuf, vf); 5367 break; 5368 default: 5369 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]); 5370 retval = -1; 5371 break; 5372 } 5373 5374 msgbuf[0] |= E1000_VT_MSGTYPE_CTS; 5375out: 5376 /* notify the VF of the results of what it sent us */ 5377 if (retval) 5378 msgbuf[0] |= E1000_VT_MSGTYPE_NACK; 5379 else 5380 msgbuf[0] |= E1000_VT_MSGTYPE_ACK; 5381 5382 igb_write_mbx(hw, msgbuf, 1, vf); 5383} 5384 5385static void igb_msg_task(struct igb_adapter *adapter) 5386{ 5387 struct e1000_hw *hw = &adapter->hw; 5388 u32 vf; 5389 5390 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) { 5391 /* process any reset requests */ 5392 if (!igb_check_for_rst(hw, vf)) 5393 igb_vf_reset_event(adapter, vf); 5394 5395 /* process any messages pending */ 5396 if (!igb_check_for_msg(hw, vf)) 5397 igb_rcv_msg_from_vf(adapter, vf); 5398 5399 /* process any acks */ 5400 if (!igb_check_for_ack(hw, vf)) 5401 igb_rcv_ack_from_vf(adapter, vf); 5402 } 5403} 5404 5405/** 5406 * igb_set_uta - Set unicast filter table address 5407 * @adapter: board private structure 5408 * 5409 * The unicast table address is a register array of 32-bit registers. 5410 * The table is meant to be used in a way similar to how the MTA is used 5411 * however due to certain limitations in the hardware it is necessary to 5412 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous 5413 * enable bit to allow vlan tag stripping when promiscuous mode is enabled 5414 **/ 5415static void igb_set_uta(struct igb_adapter *adapter) 5416{ 5417 struct e1000_hw *hw = &adapter->hw; 5418 int i; 5419 5420 /* The UTA table only exists on 82576 hardware and newer */ 5421 if (hw->mac.type < e1000_82576) 5422 return; 5423 5424 /* we only need to do this if VMDq is enabled */ 5425 if (!adapter->vfs_allocated_count) 5426 return; 5427 5428 for (i = 0; i < hw->mac.uta_reg_count; i++) 5429 array_wr32(E1000_UTA, i, ~0); 5430} 5431 5432/** 5433 * igb_intr_msi - Interrupt Handler 5434 * @irq: interrupt number 5435 * @data: pointer to a network interface device structure 5436 **/ 5437static irqreturn_t igb_intr_msi(int irq, void *data) 5438{ 5439 struct igb_adapter *adapter = data; 5440 struct igb_q_vector *q_vector = adapter->q_vector[0]; 5441 struct e1000_hw *hw = &adapter->hw; 5442 /* read ICR disables interrupts using IAM */ 5443 u32 icr = rd32(E1000_ICR); 5444 5445 igb_write_itr(q_vector); 5446 5447 if (icr & E1000_ICR_DRSTA) 5448 schedule_work(&adapter->reset_task); 5449 5450 if (icr & E1000_ICR_DOUTSYNC) { 5451 /* HW is reporting DMA is out of sync */ 5452 adapter->stats.doosync++; 5453 } 5454 5455 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 5456 hw->mac.get_link_status = 1; 5457 if (!test_bit(__IGB_DOWN, &adapter->state)) 5458 mod_timer(&adapter->watchdog_timer, jiffies + 1); 5459 } 5460 5461 napi_schedule(&q_vector->napi); 5462 5463 return IRQ_HANDLED; 5464} 5465 5466/** 5467 * igb_intr - Legacy Interrupt Handler 5468 * @irq: interrupt number 5469 * @data: pointer to a network interface device structure 5470 **/ 5471static irqreturn_t igb_intr(int irq, void *data) 5472{ 5473 struct igb_adapter *adapter = data; 5474 struct igb_q_vector *q_vector = adapter->q_vector[0]; 5475 struct e1000_hw *hw = &adapter->hw; 5476 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No 5477 * need for the IMC write */ 5478 u32 icr = rd32(E1000_ICR); 5479 5480 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is 5481 * not set, then the adapter didn't send an interrupt */ 5482 if (!(icr & E1000_ICR_INT_ASSERTED)) 5483 return IRQ_NONE; 5484 5485 igb_write_itr(q_vector); 5486 5487 if (icr & E1000_ICR_DRSTA) 5488 schedule_work(&adapter->reset_task); 5489 5490 if (icr & E1000_ICR_DOUTSYNC) { 5491 /* HW is reporting DMA is out of sync */ 5492 adapter->stats.doosync++; 5493 } 5494 5495 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 5496 hw->mac.get_link_status = 1; 5497 /* guard against interrupt when we're going down */ 5498 if (!test_bit(__IGB_DOWN, &adapter->state)) 5499 mod_timer(&adapter->watchdog_timer, jiffies + 1); 5500 } 5501 5502 napi_schedule(&q_vector->napi); 5503 5504 return IRQ_HANDLED; 5505} 5506 5507void igb_ring_irq_enable(struct igb_q_vector *q_vector) 5508{ 5509 struct igb_adapter *adapter = q_vector->adapter; 5510 struct e1000_hw *hw = &adapter->hw; 5511 5512 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) || 5513 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) { 5514 if ((adapter->num_q_vectors == 1) && !adapter->vf_data) 5515 igb_set_itr(q_vector); 5516 else 5517 igb_update_ring_itr(q_vector); 5518 } 5519 5520 if (!test_bit(__IGB_DOWN, &adapter->state)) { 5521 if (adapter->msix_entries) 5522 wr32(E1000_EIMS, q_vector->eims_value); 5523 else 5524 igb_irq_enable(adapter); 5525 } 5526} 5527 5528/** 5529 * igb_poll - NAPI Rx polling callback 5530 * @napi: napi polling structure 5531 * @budget: count of how many packets we should handle 5532 **/ 5533static int igb_poll(struct napi_struct *napi, int budget) 5534{ 5535 struct igb_q_vector *q_vector = container_of(napi, 5536 struct igb_q_vector, 5537 napi); 5538 bool clean_complete = true; 5539 5540#ifdef CONFIG_IGB_DCA 5541 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED) 5542 igb_update_dca(q_vector); 5543#endif 5544 if (q_vector->tx.ring) 5545 clean_complete = igb_clean_tx_irq(q_vector); 5546 5547 if (q_vector->rx.ring) 5548 clean_complete &= igb_clean_rx_irq(q_vector, budget); 5549 5550 /* If all work not completed, return budget and keep polling */ 5551 if (!clean_complete) 5552 return budget; 5553 5554 /* If not enough Rx work done, exit the polling mode */ 5555 napi_complete(napi); 5556 igb_ring_irq_enable(q_vector); 5557 5558 return 0; 5559} 5560 5561/** 5562 * igb_systim_to_hwtstamp - convert system time value to hw timestamp 5563 * @adapter: board private structure 5564 * @shhwtstamps: timestamp structure to update 5565 * @regval: unsigned 64bit system time value. 5566 * 5567 * We need to convert the system time value stored in the RX/TXSTMP registers 5568 * into a hwtstamp which can be used by the upper level timestamping functions 5569 */ 5570static void igb_systim_to_hwtstamp(struct igb_adapter *adapter, 5571 struct skb_shared_hwtstamps *shhwtstamps, 5572 u64 regval) 5573{ 5574 u64 ns; 5575 5576 /* 5577 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to 5578 * 24 to match clock shift we setup earlier. 5579 */ 5580 if (adapter->hw.mac.type >= e1000_82580) 5581 regval <<= IGB_82580_TSYNC_SHIFT; 5582 5583 ns = timecounter_cyc2time(&adapter->clock, regval); 5584 timecompare_update(&adapter->compare, ns); 5585 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps)); 5586 shhwtstamps->hwtstamp = ns_to_ktime(ns); 5587 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns); 5588} 5589 5590/** 5591 * igb_tx_hwtstamp - utility function which checks for TX time stamp 5592 * @q_vector: pointer to q_vector containing needed info 5593 * @buffer: pointer to igb_tx_buffer structure 5594 * 5595 * If we were asked to do hardware stamping and such a time stamp is 5596 * available, then it must have been for this skb here because we only 5597 * allow only one such packet into the queue. 5598 */ 5599static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, 5600 struct igb_tx_buffer *buffer_info) 5601{ 5602 struct igb_adapter *adapter = q_vector->adapter; 5603 struct e1000_hw *hw = &adapter->hw; 5604 struct skb_shared_hwtstamps shhwtstamps; 5605 u64 regval; 5606 5607 /* if skb does not support hw timestamp or TX stamp not valid exit */ 5608 if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) || 5609 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID)) 5610 return; 5611 5612 regval = rd32(E1000_TXSTMPL); 5613 regval |= (u64)rd32(E1000_TXSTMPH) << 32; 5614 5615 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval); 5616 skb_tstamp_tx(buffer_info->skb, &shhwtstamps); 5617} 5618 5619/** 5620 * igb_clean_tx_irq - Reclaim resources after transmit completes 5621 * @q_vector: pointer to q_vector containing needed info 5622 * returns true if ring is completely cleaned 5623 **/ 5624static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) 5625{ 5626 struct igb_adapter *adapter = q_vector->adapter; 5627 struct igb_ring *tx_ring = q_vector->tx.ring; 5628 struct igb_tx_buffer *tx_buffer; 5629 union e1000_adv_tx_desc *tx_desc, *eop_desc; 5630 unsigned int total_bytes = 0, total_packets = 0; 5631 unsigned int budget = q_vector->tx.work_limit; 5632 unsigned int i = tx_ring->next_to_clean; 5633 5634 if (test_bit(__IGB_DOWN, &adapter->state)) 5635 return true; 5636 5637 tx_buffer = &tx_ring->tx_buffer_info[i]; 5638 tx_desc = IGB_TX_DESC(tx_ring, i); 5639 i -= tx_ring->count; 5640 5641 for (; budget; budget--) { 5642 eop_desc = tx_buffer->next_to_watch; 5643 5644 /* prevent any other reads prior to eop_desc */ 5645 rmb(); 5646 5647 /* if next_to_watch is not set then there is no work pending */ 5648 if (!eop_desc) 5649 break; 5650 5651 /* if DD is not set pending work has not been completed */ 5652 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) 5653 break; 5654 5655 /* clear next_to_watch to prevent false hangs */ 5656 tx_buffer->next_to_watch = NULL; 5657 5658 /* update the statistics for this packet */ 5659 total_bytes += tx_buffer->bytecount; 5660 total_packets += tx_buffer->gso_segs; 5661 5662 /* retrieve hardware timestamp */ 5663 igb_tx_hwtstamp(q_vector, tx_buffer); 5664 5665 /* free the skb */ 5666 dev_kfree_skb_any(tx_buffer->skb); 5667 tx_buffer->skb = NULL; 5668 5669 /* unmap skb header data */ 5670 dma_unmap_single(tx_ring->dev, 5671 tx_buffer->dma, 5672 tx_buffer->length, 5673 DMA_TO_DEVICE); 5674 5675 /* clear last DMA location and unmap remaining buffers */ 5676 while (tx_desc != eop_desc) { 5677 tx_buffer->dma = 0; 5678 5679 tx_buffer++; 5680 tx_desc++; 5681 i++; 5682 if (unlikely(!i)) { 5683 i -= tx_ring->count; 5684 tx_buffer = tx_ring->tx_buffer_info; 5685 tx_desc = IGB_TX_DESC(tx_ring, 0); 5686 } 5687 5688 /* unmap any remaining paged data */ 5689 if (tx_buffer->dma) { 5690 dma_unmap_page(tx_ring->dev, 5691 tx_buffer->dma, 5692 tx_buffer->length, 5693 DMA_TO_DEVICE); 5694 } 5695 } 5696 5697 /* clear last DMA location */ 5698 tx_buffer->dma = 0; 5699 5700 /* move us one more past the eop_desc for start of next pkt */ 5701 tx_buffer++; 5702 tx_desc++; 5703 i++; 5704 if (unlikely(!i)) { 5705 i -= tx_ring->count; 5706 tx_buffer = tx_ring->tx_buffer_info; 5707 tx_desc = IGB_TX_DESC(tx_ring, 0); 5708 } 5709 } 5710 5711 i += tx_ring->count; 5712 tx_ring->next_to_clean = i; 5713 u64_stats_update_begin(&tx_ring->tx_syncp); 5714 tx_ring->tx_stats.bytes += total_bytes; 5715 tx_ring->tx_stats.packets += total_packets; 5716 u64_stats_update_end(&tx_ring->tx_syncp); 5717 q_vector->tx.total_bytes += total_bytes; 5718 q_vector->tx.total_packets += total_packets; 5719 5720 if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { 5721 struct e1000_hw *hw = &adapter->hw; 5722 5723 eop_desc = tx_buffer->next_to_watch; 5724 5725 /* Detect a transmit hang in hardware, this serializes the 5726 * check with the clearing of time_stamp and movement of i */ 5727 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 5728 if (eop_desc && 5729 time_after(jiffies, tx_buffer->time_stamp + 5730 (adapter->tx_timeout_factor * HZ)) && 5731 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) { 5732 5733 /* detected Tx unit hang */ 5734 dev_err(tx_ring->dev, 5735 "Detected Tx Unit Hang\n" 5736 " Tx Queue <%d>\n" 5737 " TDH <%x>\n" 5738 " TDT <%x>\n" 5739 " next_to_use <%x>\n" 5740 " next_to_clean <%x>\n" 5741 "buffer_info[next_to_clean]\n" 5742 " time_stamp <%lx>\n" 5743 " next_to_watch <%p>\n" 5744 " jiffies <%lx>\n" 5745 " desc.status <%x>\n", 5746 tx_ring->queue_index, 5747 rd32(E1000_TDH(tx_ring->reg_idx)), 5748 readl(tx_ring->tail), 5749 tx_ring->next_to_use, 5750 tx_ring->next_to_clean, 5751 tx_buffer->time_stamp, 5752 eop_desc, 5753 jiffies, 5754 eop_desc->wb.status); 5755 netif_stop_subqueue(tx_ring->netdev, 5756 tx_ring->queue_index); 5757 5758 /* we are about to reset, no point in enabling stuff */ 5759 return true; 5760 } 5761 } 5762 5763 if (unlikely(total_packets && 5764 netif_carrier_ok(tx_ring->netdev) && 5765 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) { 5766 /* Make sure that anybody stopping the queue after this 5767 * sees the new next_to_clean. 5768 */ 5769 smp_mb(); 5770 if (__netif_subqueue_stopped(tx_ring->netdev, 5771 tx_ring->queue_index) && 5772 !(test_bit(__IGB_DOWN, &adapter->state))) { 5773 netif_wake_subqueue(tx_ring->netdev, 5774 tx_ring->queue_index); 5775 5776 u64_stats_update_begin(&tx_ring->tx_syncp); 5777 tx_ring->tx_stats.restart_queue++; 5778 u64_stats_update_end(&tx_ring->tx_syncp); 5779 } 5780 } 5781 5782 return !!budget; 5783} 5784 5785static inline void igb_rx_checksum(struct igb_ring *ring, 5786 union e1000_adv_rx_desc *rx_desc, 5787 struct sk_buff *skb) 5788{ 5789 skb_checksum_none_assert(skb); 5790 5791 /* Ignore Checksum bit is set */ 5792 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM)) 5793 return; 5794 5795 /* Rx checksum disabled via ethtool */ 5796 if (!(ring->netdev->features & NETIF_F_RXCSUM)) 5797 return; 5798 5799 /* TCP/UDP checksum error bit is set */ 5800 if (igb_test_staterr(rx_desc, 5801 E1000_RXDEXT_STATERR_TCPE | 5802 E1000_RXDEXT_STATERR_IPE)) { 5803 /* 5804 * work around errata with sctp packets where the TCPE aka 5805 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc) 5806 * packets, (aka let the stack check the crc32c) 5807 */ 5808 if (!((skb->len == 60) && 5809 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) { 5810 u64_stats_update_begin(&ring->rx_syncp); 5811 ring->rx_stats.csum_err++; 5812 u64_stats_update_end(&ring->rx_syncp); 5813 } 5814 /* let the stack verify checksum errors */ 5815 return; 5816 } 5817 /* It must be a TCP or UDP packet with a valid checksum */ 5818 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS | 5819 E1000_RXD_STAT_UDPCS)) 5820 skb->ip_summed = CHECKSUM_UNNECESSARY; 5821 5822 dev_dbg(ring->dev, "cksum success: bits %08X\n", 5823 le32_to_cpu(rx_desc->wb.upper.status_error)); 5824} 5825 5826static inline void igb_rx_hash(struct igb_ring *ring, 5827 union e1000_adv_rx_desc *rx_desc, 5828 struct sk_buff *skb) 5829{ 5830 if (ring->netdev->features & NETIF_F_RXHASH) 5831 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); 5832} 5833 5834static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, 5835 union e1000_adv_rx_desc *rx_desc, 5836 struct sk_buff *skb) 5837{ 5838 struct igb_adapter *adapter = q_vector->adapter; 5839 struct e1000_hw *hw = &adapter->hw; 5840 u64 regval; 5841 5842 if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP | 5843 E1000_RXDADV_STAT_TS)) 5844 return; 5845 5846 /* 5847 * If this bit is set, then the RX registers contain the time stamp. No 5848 * other packet will be time stamped until we read these registers, so 5849 * read the registers to make them available again. Because only one 5850 * packet can be time stamped at a time, we know that the register 5851 * values must belong to this one here and therefore we don't need to 5852 * compare any of the additional attributes stored for it. 5853 * 5854 * If nothing went wrong, then it should have a shared tx_flags that we 5855 * can turn into a skb_shared_hwtstamps. 5856 */ 5857 if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { 5858 u32 *stamp = (u32 *)skb->data; 5859 regval = le32_to_cpu(*(stamp + 2)); 5860 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32; 5861 skb_pull(skb, IGB_TS_HDR_LEN); 5862 } else { 5863 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID)) 5864 return; 5865 5866 regval = rd32(E1000_RXSTMPL); 5867 regval |= (u64)rd32(E1000_RXSTMPH) << 32; 5868 } 5869 5870 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); 5871} 5872 5873static void igb_rx_vlan(struct igb_ring *ring, 5874 union e1000_adv_rx_desc *rx_desc, 5875 struct sk_buff *skb) 5876{ 5877 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) { 5878 u16 vid; 5879 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) && 5880 test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags)) 5881 vid = be16_to_cpu(rx_desc->wb.upper.vlan); 5882 else 5883 vid = le16_to_cpu(rx_desc->wb.upper.vlan); 5884 5885 __vlan_hwaccel_put_tag(skb, vid); 5886 } 5887} 5888 5889static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc) 5890{ 5891 /* HW will not DMA in data larger than the given buffer, even if it 5892 * parses the (NFS, of course) header to be larger. In that case, it 5893 * fills the header buffer and spills the rest into the page. 5894 */ 5895 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) & 5896 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT; 5897 if (hlen > IGB_RX_HDR_LEN) 5898 hlen = IGB_RX_HDR_LEN; 5899 return hlen; 5900} 5901 5902static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) 5903{ 5904 struct igb_ring *rx_ring = q_vector->rx.ring; 5905 union e1000_adv_rx_desc *rx_desc; 5906 const int current_node = numa_node_id(); 5907 unsigned int total_bytes = 0, total_packets = 0; 5908 u16 cleaned_count = igb_desc_unused(rx_ring); 5909 u16 i = rx_ring->next_to_clean; 5910 5911 rx_desc = IGB_RX_DESC(rx_ring, i); 5912 5913 while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) { 5914 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; 5915 struct sk_buff *skb = buffer_info->skb; 5916 union e1000_adv_rx_desc *next_rxd; 5917 5918 buffer_info->skb = NULL; 5919 prefetch(skb->data); 5920 5921 i++; 5922 if (i == rx_ring->count) 5923 i = 0; 5924 5925 next_rxd = IGB_RX_DESC(rx_ring, i); 5926 prefetch(next_rxd); 5927 5928 /* 5929 * This memory barrier is needed to keep us from reading 5930 * any other fields out of the rx_desc until we know the 5931 * RXD_STAT_DD bit is set 5932 */ 5933 rmb(); 5934 5935 if (!skb_is_nonlinear(skb)) { 5936 __skb_put(skb, igb_get_hlen(rx_desc)); 5937 dma_unmap_single(rx_ring->dev, buffer_info->dma, 5938 IGB_RX_HDR_LEN, 5939 DMA_FROM_DEVICE); 5940 buffer_info->dma = 0; 5941 } 5942 5943 if (rx_desc->wb.upper.length) { 5944 u16 length = le16_to_cpu(rx_desc->wb.upper.length); 5945 5946 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, 5947 buffer_info->page, 5948 buffer_info->page_offset, 5949 length); 5950 5951 skb->len += length; 5952 skb->data_len += length; 5953 skb->truesize += PAGE_SIZE / 2; 5954 5955 if ((page_count(buffer_info->page) != 1) || 5956 (page_to_nid(buffer_info->page) != current_node)) 5957 buffer_info->page = NULL; 5958 else 5959 get_page(buffer_info->page); 5960 5961 dma_unmap_page(rx_ring->dev, buffer_info->page_dma, 5962 PAGE_SIZE / 2, DMA_FROM_DEVICE); 5963 buffer_info->page_dma = 0; 5964 } 5965 5966 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) { 5967 struct igb_rx_buffer *next_buffer; 5968 next_buffer = &rx_ring->rx_buffer_info[i]; 5969 buffer_info->skb = next_buffer->skb; 5970 buffer_info->dma = next_buffer->dma; 5971 next_buffer->skb = skb; 5972 next_buffer->dma = 0; 5973 goto next_desc; 5974 } 5975 5976 if (igb_test_staterr(rx_desc, 5977 E1000_RXDEXT_ERR_FRAME_ERR_MASK)) { 5978 dev_kfree_skb_any(skb); 5979 goto next_desc; 5980 } 5981 5982 igb_rx_hwtstamp(q_vector, rx_desc, skb); 5983 igb_rx_hash(rx_ring, rx_desc, skb); 5984 igb_rx_checksum(rx_ring, rx_desc, skb); 5985 igb_rx_vlan(rx_ring, rx_desc, skb); 5986 5987 total_bytes += skb->len; 5988 total_packets++; 5989 5990 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 5991 5992 napi_gro_receive(&q_vector->napi, skb); 5993 5994 budget--; 5995next_desc: 5996 if (!budget) 5997 break; 5998 5999 cleaned_count++; 6000 /* return some buffers to hardware, one at a time is too slow */ 6001 if (cleaned_count >= IGB_RX_BUFFER_WRITE) { 6002 igb_alloc_rx_buffers(rx_ring, cleaned_count); 6003 cleaned_count = 0; 6004 } 6005 6006 /* use prefetched values */ 6007 rx_desc = next_rxd; 6008 } 6009 6010 rx_ring->next_to_clean = i; 6011 u64_stats_update_begin(&rx_ring->rx_syncp); 6012 rx_ring->rx_stats.packets += total_packets; 6013 rx_ring->rx_stats.bytes += total_bytes; 6014 u64_stats_update_end(&rx_ring->rx_syncp); 6015 q_vector->rx.total_packets += total_packets; 6016 q_vector->rx.total_bytes += total_bytes; 6017 6018 if (cleaned_count) 6019 igb_alloc_rx_buffers(rx_ring, cleaned_count); 6020 6021 return !!budget; 6022} 6023 6024static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring, 6025 struct igb_rx_buffer *bi) 6026{ 6027 struct sk_buff *skb = bi->skb; 6028 dma_addr_t dma = bi->dma; 6029 6030 if (dma) 6031 return true; 6032 6033 if (likely(!skb)) { 6034 skb = netdev_alloc_skb_ip_align(rx_ring->netdev, 6035 IGB_RX_HDR_LEN); 6036 bi->skb = skb; 6037 if (!skb) { 6038 rx_ring->rx_stats.alloc_failed++; 6039 return false; 6040 } 6041 6042 /* initialize skb for ring */ 6043 skb_record_rx_queue(skb, rx_ring->queue_index); 6044 } 6045 6046 dma = dma_map_single(rx_ring->dev, skb->data, 6047 IGB_RX_HDR_LEN, DMA_FROM_DEVICE); 6048 6049 if (dma_mapping_error(rx_ring->dev, dma)) { 6050 rx_ring->rx_stats.alloc_failed++; 6051 return false; 6052 } 6053 6054 bi->dma = dma; 6055 return true; 6056} 6057 6058static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, 6059 struct igb_rx_buffer *bi) 6060{ 6061 struct page *page = bi->page; 6062 dma_addr_t page_dma = bi->page_dma; 6063 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2); 6064 6065 if (page_dma) 6066 return true; 6067 6068 if (!page) { 6069 page = netdev_alloc_page(rx_ring->netdev); 6070 bi->page = page; 6071 if (unlikely(!page)) { 6072 rx_ring->rx_stats.alloc_failed++; 6073 return false; 6074 } 6075 } 6076 6077 page_dma = dma_map_page(rx_ring->dev, page, 6078 page_offset, PAGE_SIZE / 2, 6079 DMA_FROM_DEVICE); 6080 6081 if (dma_mapping_error(rx_ring->dev, page_dma)) { 6082 rx_ring->rx_stats.alloc_failed++; 6083 return false; 6084 } 6085 6086 bi->page_dma = page_dma; 6087 bi->page_offset = page_offset; 6088 return true; 6089} 6090 6091/** 6092 * igb_alloc_rx_buffers - Replace used receive buffers; packet split 6093 * @adapter: address of board private structure 6094 **/ 6095void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) 6096{ 6097 union e1000_adv_rx_desc *rx_desc; 6098 struct igb_rx_buffer *bi; 6099 u16 i = rx_ring->next_to_use; 6100 6101 rx_desc = IGB_RX_DESC(rx_ring, i); 6102 bi = &rx_ring->rx_buffer_info[i]; 6103 i -= rx_ring->count; 6104 6105 while (cleaned_count--) { 6106 if (!igb_alloc_mapped_skb(rx_ring, bi)) 6107 break; 6108 6109 /* Refresh the desc even if buffer_addrs didn't change 6110 * because each write-back erases this info. */ 6111 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); 6112 6113 if (!igb_alloc_mapped_page(rx_ring, bi)) 6114 break; 6115 6116 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); 6117 6118 rx_desc++; 6119 bi++; 6120 i++; 6121 if (unlikely(!i)) { 6122 rx_desc = IGB_RX_DESC(rx_ring, 0); 6123 bi = rx_ring->rx_buffer_info; 6124 i -= rx_ring->count; 6125 } 6126 6127 /* clear the hdr_addr for the next_to_use descriptor */ 6128 rx_desc->read.hdr_addr = 0; 6129 } 6130 6131 i += rx_ring->count; 6132 6133 if (rx_ring->next_to_use != i) { 6134 rx_ring->next_to_use = i; 6135 6136 /* Force memory writes to complete before letting h/w 6137 * know there are new descriptors to fetch. (Only 6138 * applicable for weak-ordered memory model archs, 6139 * such as IA-64). */ 6140 wmb(); 6141 writel(i, rx_ring->tail); 6142 } 6143} 6144 6145/** 6146 * igb_mii_ioctl - 6147 * @netdev: 6148 * @ifreq: 6149 * @cmd: 6150 **/ 6151static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) 6152{ 6153 struct igb_adapter *adapter = netdev_priv(netdev); 6154 struct mii_ioctl_data *data = if_mii(ifr); 6155 6156 if (adapter->hw.phy.media_type != e1000_media_type_copper) 6157 return -EOPNOTSUPP; 6158 6159 switch (cmd) { 6160 case SIOCGMIIPHY: 6161 data->phy_id = adapter->hw.phy.addr; 6162 break; 6163 case SIOCGMIIREG: 6164 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F, 6165 &data->val_out)) 6166 return -EIO; 6167 break; 6168 case SIOCSMIIREG: 6169 default: 6170 return -EOPNOTSUPP; 6171 } 6172 return 0; 6173} 6174 6175/** 6176 * igb_hwtstamp_ioctl - control hardware time stamping 6177 * @netdev: 6178 * @ifreq: 6179 * @cmd: 6180 * 6181 * Outgoing time stamping can be enabled and disabled. Play nice and 6182 * disable it when requested, although it shouldn't case any overhead 6183 * when no packet needs it. At most one packet in the queue may be 6184 * marked for time stamping, otherwise it would be impossible to tell 6185 * for sure to which packet the hardware time stamp belongs. 6186 * 6187 * Incoming time stamping has to be configured via the hardware 6188 * filters. Not all combinations are supported, in particular event 6189 * type has to be specified. Matching the kind of event packet is 6190 * not supported, with the exception of "all V2 events regardless of 6191 * level 2 or 4". 6192 * 6193 **/ 6194static int igb_hwtstamp_ioctl(struct net_device *netdev, 6195 struct ifreq *ifr, int cmd) 6196{ 6197 struct igb_adapter *adapter = netdev_priv(netdev); 6198 struct e1000_hw *hw = &adapter->hw; 6199 struct hwtstamp_config config; 6200 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED; 6201 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; 6202 u32 tsync_rx_cfg = 0; 6203 bool is_l4 = false; 6204 bool is_l2 = false; 6205 u32 regval; 6206 6207 if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) 6208 return -EFAULT; 6209 6210 /* reserved for future extensions */ 6211 if (config.flags) 6212 return -EINVAL; 6213 6214 switch (config.tx_type) { 6215 case HWTSTAMP_TX_OFF: 6216 tsync_tx_ctl = 0; 6217 case HWTSTAMP_TX_ON: 6218 break; 6219 default: 6220 return -ERANGE; 6221 } 6222 6223 switch (config.rx_filter) { 6224 case HWTSTAMP_FILTER_NONE: 6225 tsync_rx_ctl = 0; 6226 break; 6227 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: 6228 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: 6229 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: 6230 case HWTSTAMP_FILTER_ALL: 6231 /* 6232 * register TSYNCRXCFG must be set, therefore it is not 6233 * possible to time stamp both Sync and Delay_Req messages 6234 * => fall back to time stamping all packets 6235 */ 6236 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; 6237 config.rx_filter = HWTSTAMP_FILTER_ALL; 6238 break; 6239 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: 6240 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; 6241 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE; 6242 is_l4 = true; 6243 break; 6244 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: 6245 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; 6246 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE; 6247 is_l4 = true; 6248 break; 6249 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: 6250 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: 6251 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; 6252 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE; 6253 is_l2 = true; 6254 is_l4 = true; 6255 config.rx_filter = HWTSTAMP_FILTER_SOME; 6256 break; 6257 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: 6258 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: 6259 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; 6260 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE; 6261 is_l2 = true; 6262 is_l4 = true; 6263 config.rx_filter = HWTSTAMP_FILTER_SOME; 6264 break; 6265 case HWTSTAMP_FILTER_PTP_V2_EVENT: 6266 case HWTSTAMP_FILTER_PTP_V2_SYNC: 6267 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: 6268 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2; 6269 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; 6270 is_l2 = true; 6271 is_l4 = true; 6272 break; 6273 default: 6274 return -ERANGE; 6275 } 6276 6277 if (hw->mac.type == e1000_82575) { 6278 if (tsync_rx_ctl | tsync_tx_ctl) 6279 return -EINVAL; 6280 return 0; 6281 } 6282 6283 /* 6284 * Per-packet timestamping only works if all packets are 6285 * timestamped, so enable timestamping in all packets as 6286 * long as one rx filter was configured. 6287 */ 6288 if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) { 6289 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; 6290 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; 6291 } 6292 6293 /* enable/disable TX */ 6294 regval = rd32(E1000_TSYNCTXCTL); 6295 regval &= ~E1000_TSYNCTXCTL_ENABLED; 6296 regval |= tsync_tx_ctl; 6297 wr32(E1000_TSYNCTXCTL, regval); 6298 6299 /* enable/disable RX */ 6300 regval = rd32(E1000_TSYNCRXCTL); 6301 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK); 6302 regval |= tsync_rx_ctl; 6303 wr32(E1000_TSYNCRXCTL, regval); 6304 6305 /* define which PTP packets are time stamped */ 6306 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg); 6307 6308 /* define ethertype filter for timestamped packets */ 6309 if (is_l2) 6310 wr32(E1000_ETQF(3), 6311 (E1000_ETQF_FILTER_ENABLE | /* enable filter */ 6312 E1000_ETQF_1588 | /* enable timestamping */ 6313 ETH_P_1588)); /* 1588 eth protocol type */ 6314 else 6315 wr32(E1000_ETQF(3), 0); 6316 6317#define PTP_PORT 319 6318 /* L4 Queue Filter[3]: filter by destination port and protocol */ 6319 if (is_l4) { 6320 u32 ftqf = (IPPROTO_UDP /* UDP */ 6321 | E1000_FTQF_VF_BP /* VF not compared */ 6322 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */ 6323 | E1000_FTQF_MASK); /* mask all inputs */ 6324 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */ 6325 6326 wr32(E1000_IMIR(3), htons(PTP_PORT)); 6327 wr32(E1000_IMIREXT(3), 6328 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP)); 6329 if (hw->mac.type == e1000_82576) { 6330 /* enable source port check */ 6331 wr32(E1000_SPQF(3), htons(PTP_PORT)); 6332 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP; 6333 } 6334 wr32(E1000_FTQF(3), ftqf); 6335 } else { 6336 wr32(E1000_FTQF(3), E1000_FTQF_MASK); 6337 } 6338 wrfl(); 6339 6340 adapter->hwtstamp_config = config; 6341 6342 /* clear TX/RX time stamp registers, just to be sure */ 6343 regval = rd32(E1000_TXSTMPH); 6344 regval = rd32(E1000_RXSTMPH); 6345 6346 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? 6347 -EFAULT : 0; 6348} 6349 6350/** 6351 * igb_ioctl - 6352 * @netdev: 6353 * @ifreq: 6354 * @cmd: 6355 **/ 6356static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) 6357{ 6358 switch (cmd) { 6359 case SIOCGMIIPHY: 6360 case SIOCGMIIREG: 6361 case SIOCSMIIREG: 6362 return igb_mii_ioctl(netdev, ifr, cmd); 6363 case SIOCSHWTSTAMP: 6364 return igb_hwtstamp_ioctl(netdev, ifr, cmd); 6365 default: 6366 return -EOPNOTSUPP; 6367 } 6368} 6369 6370s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) 6371{ 6372 struct igb_adapter *adapter = hw->back; 6373 u16 cap_offset; 6374 6375 cap_offset = adapter->pdev->pcie_cap; 6376 if (!cap_offset) 6377 return -E1000_ERR_CONFIG; 6378 6379 pci_read_config_word(adapter->pdev, cap_offset + reg, value); 6380 6381 return 0; 6382} 6383 6384s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) 6385{ 6386 struct igb_adapter *adapter = hw->back; 6387 u16 cap_offset; 6388 6389 cap_offset = adapter->pdev->pcie_cap; 6390 if (!cap_offset) 6391 return -E1000_ERR_CONFIG; 6392 6393 pci_write_config_word(adapter->pdev, cap_offset + reg, *value); 6394 6395 return 0; 6396} 6397 6398static void igb_vlan_mode(struct net_device *netdev, u32 features) 6399{ 6400 struct igb_adapter *adapter = netdev_priv(netdev); 6401 struct e1000_hw *hw = &adapter->hw; 6402 u32 ctrl, rctl; 6403 bool enable = !!(features & NETIF_F_HW_VLAN_RX); 6404 6405 if (enable) { 6406 /* enable VLAN tag insert/strip */ 6407 ctrl = rd32(E1000_CTRL); 6408 ctrl |= E1000_CTRL_VME; 6409 wr32(E1000_CTRL, ctrl); 6410 6411 /* Disable CFI check */ 6412 rctl = rd32(E1000_RCTL); 6413 rctl &= ~E1000_RCTL_CFIEN; 6414 wr32(E1000_RCTL, rctl); 6415 } else { 6416 /* disable VLAN tag insert/strip */ 6417 ctrl = rd32(E1000_CTRL); 6418 ctrl &= ~E1000_CTRL_VME; 6419 wr32(E1000_CTRL, ctrl); 6420 } 6421 6422 igb_rlpml_set(adapter); 6423} 6424 6425static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) 6426{ 6427 struct igb_adapter *adapter = netdev_priv(netdev); 6428 struct e1000_hw *hw = &adapter->hw; 6429 int pf_id = adapter->vfs_allocated_count; 6430 6431 /* attempt to add filter to vlvf array */ 6432 igb_vlvf_set(adapter, vid, true, pf_id); 6433 6434 /* add the filter since PF can receive vlans w/o entry in vlvf */ 6435 igb_vfta_set(hw, vid, true); 6436 6437 set_bit(vid, adapter->active_vlans); 6438} 6439 6440static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) 6441{ 6442 struct igb_adapter *adapter = netdev_priv(netdev); 6443 struct e1000_hw *hw = &adapter->hw; 6444 int pf_id = adapter->vfs_allocated_count; 6445 s32 err; 6446 6447 /* remove vlan from VLVF table array */ 6448 err = igb_vlvf_set(adapter, vid, false, pf_id); 6449 6450 /* if vid was not present in VLVF just remove it from table */ 6451 if (err) 6452 igb_vfta_set(hw, vid, false); 6453 6454 clear_bit(vid, adapter->active_vlans); 6455} 6456 6457static void igb_restore_vlan(struct igb_adapter *adapter) 6458{ 6459 u16 vid; 6460 6461 igb_vlan_mode(adapter->netdev, adapter->netdev->features); 6462 6463 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) 6464 igb_vlan_rx_add_vid(adapter->netdev, vid); 6465} 6466 6467int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx) 6468{ 6469 struct pci_dev *pdev = adapter->pdev; 6470 struct e1000_mac_info *mac = &adapter->hw.mac; 6471 6472 mac->autoneg = 0; 6473 6474 /* Make sure dplx is at most 1 bit and lsb of speed is not set 6475 * for the switch() below to work */ 6476 if ((spd & 1) || (dplx & ~1)) 6477 goto err_inval; 6478 6479 /* Fiber NIC's only allow 1000 Gbps Full duplex */ 6480 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) && 6481 spd != SPEED_1000 && 6482 dplx != DUPLEX_FULL) 6483 goto err_inval; 6484 6485 switch (spd + dplx) { 6486 case SPEED_10 + DUPLEX_HALF: 6487 mac->forced_speed_duplex = ADVERTISE_10_HALF; 6488 break; 6489 case SPEED_10 + DUPLEX_FULL: 6490 mac->forced_speed_duplex = ADVERTISE_10_FULL; 6491 break; 6492 case SPEED_100 + DUPLEX_HALF: 6493 mac->forced_speed_duplex = ADVERTISE_100_HALF; 6494 break; 6495 case SPEED_100 + DUPLEX_FULL: 6496 mac->forced_speed_duplex = ADVERTISE_100_FULL; 6497 break; 6498 case SPEED_1000 + DUPLEX_FULL: 6499 mac->autoneg = 1; 6500 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; 6501 break; 6502 case SPEED_1000 + DUPLEX_HALF: /* not supported */ 6503 default: 6504 goto err_inval; 6505 } 6506 return 0; 6507 6508err_inval: 6509 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n"); 6510 return -EINVAL; 6511} 6512 6513static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake) 6514{ 6515 struct net_device *netdev = pci_get_drvdata(pdev); 6516 struct igb_adapter *adapter = netdev_priv(netdev); 6517 struct e1000_hw *hw = &adapter->hw; 6518 u32 ctrl, rctl, status; 6519 u32 wufc = adapter->wol; 6520#ifdef CONFIG_PM 6521 int retval = 0; 6522#endif 6523 6524 netif_device_detach(netdev); 6525 6526 if (netif_running(netdev)) 6527 igb_close(netdev); 6528 6529 igb_clear_interrupt_scheme(adapter); 6530 6531#ifdef CONFIG_PM 6532 retval = pci_save_state(pdev); 6533 if (retval) 6534 return retval; 6535#endif 6536 6537 status = rd32(E1000_STATUS); 6538 if (status & E1000_STATUS_LU) 6539 wufc &= ~E1000_WUFC_LNKC; 6540 6541 if (wufc) { 6542 igb_setup_rctl(adapter); 6543 igb_set_rx_mode(netdev); 6544 6545 /* turn on all-multi mode if wake on multicast is enabled */ 6546 if (wufc & E1000_WUFC_MC) { 6547 rctl = rd32(E1000_RCTL); 6548 rctl |= E1000_RCTL_MPE; 6549 wr32(E1000_RCTL, rctl); 6550 } 6551 6552 ctrl = rd32(E1000_CTRL); 6553 /* advertise wake from D3Cold */ 6554 #define E1000_CTRL_ADVD3WUC 0x00100000 6555 /* phy power management enable */ 6556 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 6557 ctrl |= E1000_CTRL_ADVD3WUC; 6558 wr32(E1000_CTRL, ctrl); 6559 6560 /* Allow time for pending master requests to run */ 6561 igb_disable_pcie_master(hw); 6562 6563 wr32(E1000_WUC, E1000_WUC_PME_EN); 6564 wr32(E1000_WUFC, wufc); 6565 } else { 6566 wr32(E1000_WUC, 0); 6567 wr32(E1000_WUFC, 0); 6568 } 6569 6570 *enable_wake = wufc || adapter->en_mng_pt; 6571 if (!*enable_wake) 6572 igb_power_down_link(adapter); 6573 else 6574 igb_power_up_link(adapter); 6575 6576 /* Release control of h/w to f/w. If f/w is AMT enabled, this 6577 * would have already happened in close and is redundant. */ 6578 igb_release_hw_control(adapter); 6579 6580 pci_disable_device(pdev); 6581 6582 return 0; 6583} 6584 6585#ifdef CONFIG_PM 6586static int igb_suspend(struct pci_dev *pdev, pm_message_t state) 6587{ 6588 int retval; 6589 bool wake; 6590 6591 retval = __igb_shutdown(pdev, &wake); 6592 if (retval) 6593 return retval; 6594 6595 if (wake) { 6596 pci_prepare_to_sleep(pdev); 6597 } else { 6598 pci_wake_from_d3(pdev, false); 6599 pci_set_power_state(pdev, PCI_D3hot); 6600 } 6601 6602 return 0; 6603} 6604 6605static int igb_resume(struct pci_dev *pdev) 6606{ 6607 struct net_device *netdev = pci_get_drvdata(pdev); 6608 struct igb_adapter *adapter = netdev_priv(netdev); 6609 struct e1000_hw *hw = &adapter->hw; 6610 u32 err; 6611 6612 pci_set_power_state(pdev, PCI_D0); 6613 pci_restore_state(pdev); 6614 pci_save_state(pdev); 6615 6616 err = pci_enable_device_mem(pdev); 6617 if (err) { 6618 dev_err(&pdev->dev, 6619 "igb: Cannot enable PCI device from suspend\n"); 6620 return err; 6621 } 6622 pci_set_master(pdev); 6623 6624 pci_enable_wake(pdev, PCI_D3hot, 0); 6625 pci_enable_wake(pdev, PCI_D3cold, 0); 6626 6627 if (igb_init_interrupt_scheme(adapter)) { 6628 dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); 6629 return -ENOMEM; 6630 } 6631 6632 igb_reset(adapter); 6633 6634 /* let the f/w know that the h/w is now under the control of the 6635 * driver. */ 6636 igb_get_hw_control(adapter); 6637 6638 wr32(E1000_WUS, ~0); 6639 6640 if (netif_running(netdev)) { 6641 err = igb_open(netdev); 6642 if (err) 6643 return err; 6644 } 6645 6646 netif_device_attach(netdev); 6647 6648 return 0; 6649} 6650#endif 6651 6652static void igb_shutdown(struct pci_dev *pdev) 6653{ 6654 bool wake; 6655 6656 __igb_shutdown(pdev, &wake); 6657 6658 if (system_state == SYSTEM_POWER_OFF) { 6659 pci_wake_from_d3(pdev, wake); 6660 pci_set_power_state(pdev, PCI_D3hot); 6661 } 6662} 6663 6664#ifdef CONFIG_NET_POLL_CONTROLLER 6665/* 6666 * Polling 'interrupt' - used by things like netconsole to send skbs 6667 * without having to re-enable interrupts. It's not called while 6668 * the interrupt routine is executing. 6669 */ 6670static void igb_netpoll(struct net_device *netdev) 6671{ 6672 struct igb_adapter *adapter = netdev_priv(netdev); 6673 struct e1000_hw *hw = &adapter->hw; 6674 struct igb_q_vector *q_vector; 6675 int i; 6676 6677 for (i = 0; i < adapter->num_q_vectors; i++) { 6678 q_vector = adapter->q_vector[i]; 6679 if (adapter->msix_entries) 6680 wr32(E1000_EIMC, q_vector->eims_value); 6681 else 6682 igb_irq_disable(adapter); 6683 napi_schedule(&q_vector->napi); 6684 } 6685} 6686#endif /* CONFIG_NET_POLL_CONTROLLER */ 6687 6688/** 6689 * igb_io_error_detected - called when PCI error is detected 6690 * @pdev: Pointer to PCI device 6691 * @state: The current pci connection state 6692 * 6693 * This function is called after a PCI bus error affecting 6694 * this device has been detected. 6695 */ 6696static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, 6697 pci_channel_state_t state) 6698{ 6699 struct net_device *netdev = pci_get_drvdata(pdev); 6700 struct igb_adapter *adapter = netdev_priv(netdev); 6701 6702 netif_device_detach(netdev); 6703 6704 if (state == pci_channel_io_perm_failure) 6705 return PCI_ERS_RESULT_DISCONNECT; 6706 6707 if (netif_running(netdev)) 6708 igb_down(adapter); 6709 pci_disable_device(pdev); 6710 6711 /* Request a slot slot reset. */ 6712 return PCI_ERS_RESULT_NEED_RESET; 6713} 6714 6715/** 6716 * igb_io_slot_reset - called after the pci bus has been reset. 6717 * @pdev: Pointer to PCI device 6718 * 6719 * Restart the card from scratch, as if from a cold-boot. Implementation 6720 * resembles the first-half of the igb_resume routine. 6721 */ 6722static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) 6723{ 6724 struct net_device *netdev = pci_get_drvdata(pdev); 6725 struct igb_adapter *adapter = netdev_priv(netdev); 6726 struct e1000_hw *hw = &adapter->hw; 6727 pci_ers_result_t result; 6728 int err; 6729 6730 if (pci_enable_device_mem(pdev)) { 6731 dev_err(&pdev->dev, 6732 "Cannot re-enable PCI device after reset.\n"); 6733 result = PCI_ERS_RESULT_DISCONNECT; 6734 } else { 6735 pci_set_master(pdev); 6736 pci_restore_state(pdev); 6737 pci_save_state(pdev); 6738 6739 pci_enable_wake(pdev, PCI_D3hot, 0); 6740 pci_enable_wake(pdev, PCI_D3cold, 0); 6741 6742 igb_reset(adapter); 6743 wr32(E1000_WUS, ~0); 6744 result = PCI_ERS_RESULT_RECOVERED; 6745 } 6746 6747 err = pci_cleanup_aer_uncorrect_error_status(pdev); 6748 if (err) { 6749 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status " 6750 "failed 0x%0x\n", err); 6751 /* non-fatal, continue */ 6752 } 6753 6754 return result; 6755} 6756 6757/** 6758 * igb_io_resume - called when traffic can start flowing again. 6759 * @pdev: Pointer to PCI device 6760 * 6761 * This callback is called when the error recovery driver tells us that 6762 * its OK to resume normal operation. Implementation resembles the 6763 * second-half of the igb_resume routine. 6764 */ 6765static void igb_io_resume(struct pci_dev *pdev) 6766{ 6767 struct net_device *netdev = pci_get_drvdata(pdev); 6768 struct igb_adapter *adapter = netdev_priv(netdev); 6769 6770 if (netif_running(netdev)) { 6771 if (igb_up(adapter)) { 6772 dev_err(&pdev->dev, "igb_up failed after reset\n"); 6773 return; 6774 } 6775 } 6776 6777 netif_device_attach(netdev); 6778 6779 /* let the f/w know that the h/w is now under the control of the 6780 * driver. */ 6781 igb_get_hw_control(adapter); 6782} 6783 6784static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index, 6785 u8 qsel) 6786{ 6787 u32 rar_low, rar_high; 6788 struct e1000_hw *hw = &adapter->hw; 6789 6790 /* HW expects these in little endian so we reverse the byte order 6791 * from network order (big endian) to little endian 6792 */ 6793 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | 6794 ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); 6795 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); 6796 6797 /* Indicate to hardware the Address is Valid. */ 6798 rar_high |= E1000_RAH_AV; 6799 6800 if (hw->mac.type == e1000_82575) 6801 rar_high |= E1000_RAH_POOL_1 * qsel; 6802 else 6803 rar_high |= E1000_RAH_POOL_1 << qsel; 6804 6805 wr32(E1000_RAL(index), rar_low); 6806 wrfl(); 6807 wr32(E1000_RAH(index), rar_high); 6808 wrfl(); 6809} 6810 6811static int igb_set_vf_mac(struct igb_adapter *adapter, 6812 int vf, unsigned char *mac_addr) 6813{ 6814 struct e1000_hw *hw = &adapter->hw; 6815 /* VF MAC addresses start at end of receive addresses and moves 6816 * torwards the first, as a result a collision should not be possible */ 6817 int rar_entry = hw->mac.rar_entry_count - (vf + 1); 6818 6819 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN); 6820 6821 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf); 6822 6823 return 0; 6824} 6825 6826static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) 6827{ 6828 struct igb_adapter *adapter = netdev_priv(netdev); 6829 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count)) 6830 return -EINVAL; 6831 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC; 6832 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf); 6833 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this" 6834 " change effective."); 6835 if (test_bit(__IGB_DOWN, &adapter->state)) { 6836 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set," 6837 " but the PF device is not up.\n"); 6838 dev_warn(&adapter->pdev->dev, "Bring the PF device up before" 6839 " attempting to use the VF device.\n"); 6840 } 6841 return igb_set_vf_mac(adapter, vf, mac); 6842} 6843 6844static int igb_link_mbps(int internal_link_speed) 6845{ 6846 switch (internal_link_speed) { 6847 case SPEED_100: 6848 return 100; 6849 case SPEED_1000: 6850 return 1000; 6851 default: 6852 return 0; 6853 } 6854} 6855 6856static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate, 6857 int link_speed) 6858{ 6859 int rf_dec, rf_int; 6860 u32 bcnrc_val; 6861 6862 if (tx_rate != 0) { 6863 /* Calculate the rate factor values to set */ 6864 rf_int = link_speed / tx_rate; 6865 rf_dec = (link_speed - (rf_int * tx_rate)); 6866 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate; 6867 6868 bcnrc_val = E1000_RTTBCNRC_RS_ENA; 6869 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) & 6870 E1000_RTTBCNRC_RF_INT_MASK); 6871 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK); 6872 } else { 6873 bcnrc_val = 0; 6874 } 6875 6876 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */ 6877 wr32(E1000_RTTBCNRC, bcnrc_val); 6878} 6879 6880static void igb_check_vf_rate_limit(struct igb_adapter *adapter) 6881{ 6882 int actual_link_speed, i; 6883 bool reset_rate = false; 6884 6885 /* VF TX rate limit was not set or not supported */ 6886 if ((adapter->vf_rate_link_speed == 0) || 6887 (adapter->hw.mac.type != e1000_82576)) 6888 return; 6889 6890 actual_link_speed = igb_link_mbps(adapter->link_speed); 6891 if (actual_link_speed != adapter->vf_rate_link_speed) { 6892 reset_rate = true; 6893 adapter->vf_rate_link_speed = 0; 6894 dev_info(&adapter->pdev->dev, 6895 "Link speed has been changed. VF Transmit " 6896 "rate is disabled\n"); 6897 } 6898 6899 for (i = 0; i < adapter->vfs_allocated_count; i++) { 6900 if (reset_rate) 6901 adapter->vf_data[i].tx_rate = 0; 6902 6903 igb_set_vf_rate_limit(&adapter->hw, i, 6904 adapter->vf_data[i].tx_rate, 6905 actual_link_speed); 6906 } 6907} 6908 6909static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) 6910{ 6911 struct igb_adapter *adapter = netdev_priv(netdev); 6912 struct e1000_hw *hw = &adapter->hw; 6913 int actual_link_speed; 6914 6915 if (hw->mac.type != e1000_82576) 6916 return -EOPNOTSUPP; 6917 6918 actual_link_speed = igb_link_mbps(adapter->link_speed); 6919 if ((vf >= adapter->vfs_allocated_count) || 6920 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) || 6921 (tx_rate < 0) || (tx_rate > actual_link_speed)) 6922 return -EINVAL; 6923 6924 adapter->vf_rate_link_speed = actual_link_speed; 6925 adapter->vf_data[vf].tx_rate = (u16)tx_rate; 6926 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed); 6927 6928 return 0; 6929} 6930 6931static int igb_ndo_get_vf_config(struct net_device *netdev, 6932 int vf, struct ifla_vf_info *ivi) 6933{ 6934 struct igb_adapter *adapter = netdev_priv(netdev); 6935 if (vf >= adapter->vfs_allocated_count) 6936 return -EINVAL; 6937 ivi->vf = vf; 6938 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN); 6939 ivi->tx_rate = adapter->vf_data[vf].tx_rate; 6940 ivi->vlan = adapter->vf_data[vf].pf_vlan; 6941 ivi->qos = adapter->vf_data[vf].pf_qos; 6942 return 0; 6943} 6944 6945static void igb_vmm_control(struct igb_adapter *adapter) 6946{ 6947 struct e1000_hw *hw = &adapter->hw; 6948 u32 reg; 6949 6950 switch (hw->mac.type) { 6951 case e1000_82575: 6952 default: 6953 /* replication is not supported for 82575 */ 6954 return; 6955 case e1000_82576: 6956 /* notify HW that the MAC is adding vlan tags */ 6957 reg = rd32(E1000_DTXCTL); 6958 reg |= E1000_DTXCTL_VLAN_ADDED; 6959 wr32(E1000_DTXCTL, reg); 6960 case e1000_82580: 6961 /* enable replication vlan tag stripping */ 6962 reg = rd32(E1000_RPLOLR); 6963 reg |= E1000_RPLOLR_STRVLAN; 6964 wr32(E1000_RPLOLR, reg); 6965 case e1000_i350: 6966 /* none of the above registers are supported by i350 */ 6967 break; 6968 } 6969 6970 if (adapter->vfs_allocated_count) { 6971 igb_vmdq_set_loopback_pf(hw, true); 6972 igb_vmdq_set_replication_pf(hw, true); 6973 igb_vmdq_set_anti_spoofing_pf(hw, true, 6974 adapter->vfs_allocated_count); 6975 } else { 6976 igb_vmdq_set_loopback_pf(hw, false); 6977 igb_vmdq_set_replication_pf(hw, false); 6978 } 6979} 6980 6981/* igb_main.c */ 6982