cxgb4vf_main.c revision 01789349ee52e4a3faf376f1485303d9723c4f1f
1/* 2 * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet 3 * driver for Linux. 4 * 5 * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36#include <linux/module.h> 37#include <linux/moduleparam.h> 38#include <linux/init.h> 39#include <linux/pci.h> 40#include <linux/dma-mapping.h> 41#include <linux/netdevice.h> 42#include <linux/etherdevice.h> 43#include <linux/debugfs.h> 44#include <linux/ethtool.h> 45 46#include "t4vf_common.h" 47#include "t4vf_defs.h" 48 49#include "../cxgb4/t4_regs.h" 50#include "../cxgb4/t4_msg.h" 51 52/* 53 * Generic information about the driver. 54 */ 55#define DRV_VERSION "1.0.0" 56#define DRV_DESC "Chelsio T4 Virtual Function (VF) Network Driver" 57 58/* 59 * Module Parameters. 60 * ================== 61 */ 62 63/* 64 * Default ethtool "message level" for adapters. 65 */ 66#define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \ 67 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\ 68 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) 69 70static int dflt_msg_enable = DFLT_MSG_ENABLE; 71 72module_param(dflt_msg_enable, int, 0644); 73MODULE_PARM_DESC(dflt_msg_enable, 74 "default adapter ethtool message level bitmap"); 75 76/* 77 * The driver uses the best interrupt scheme available on a platform in the 78 * order MSI-X then MSI. This parameter determines which of these schemes the 79 * driver may consider as follows: 80 * 81 * msi = 2: choose from among MSI-X and MSI 82 * msi = 1: only consider MSI interrupts 83 * 84 * Note that unlike the Physical Function driver, this Virtual Function driver 85 * does _not_ support legacy INTx interrupts (this limitation is mandated by 86 * the PCI-E SR-IOV standard). 87 */ 88#define MSI_MSIX 2 89#define MSI_MSI 1 90#define MSI_DEFAULT MSI_MSIX 91 92static int msi = MSI_DEFAULT; 93 94module_param(msi, int, 0644); 95MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI"); 96 97/* 98 * Fundamental constants. 99 * ====================== 100 */ 101 102enum { 103 MAX_TXQ_ENTRIES = 16384, 104 MAX_RSPQ_ENTRIES = 16384, 105 MAX_RX_BUFFERS = 16384, 106 107 MIN_TXQ_ENTRIES = 32, 108 MIN_RSPQ_ENTRIES = 128, 109 MIN_FL_ENTRIES = 16, 110 111 /* 112 * For purposes of manipulating the Free List size we need to 113 * recognize that Free Lists are actually Egress Queues (the host 114 * produces free buffers which the hardware consumes), Egress Queues 115 * indices are all in units of Egress Context Units bytes, and free 116 * list entries are 64-bit PCI DMA addresses. And since the state of 117 * the Producer Index == the Consumer Index implies an EMPTY list, we 118 * always have at least one Egress Unit's worth of Free List entries 119 * unused. See sge.c for more details ... 120 */ 121 EQ_UNIT = SGE_EQ_IDXSIZE, 122 FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64), 123 MIN_FL_RESID = FL_PER_EQ_UNIT, 124}; 125 126/* 127 * Global driver state. 128 * ==================== 129 */ 130 131static struct dentry *cxgb4vf_debugfs_root; 132 133/* 134 * OS "Callback" functions. 135 * ======================== 136 */ 137 138/* 139 * The link status has changed on the indicated "port" (Virtual Interface). 140 */ 141void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok) 142{ 143 struct net_device *dev = adapter->port[pidx]; 144 145 /* 146 * If the port is disabled or the current recorded "link up" 147 * status matches the new status, just return. 148 */ 149 if (!netif_running(dev) || link_ok == netif_carrier_ok(dev)) 150 return; 151 152 /* 153 * Tell the OS that the link status has changed and print a short 154 * informative message on the console about the event. 155 */ 156 if (link_ok) { 157 const char *s; 158 const char *fc; 159 const struct port_info *pi = netdev_priv(dev); 160 161 netif_carrier_on(dev); 162 163 switch (pi->link_cfg.speed) { 164 case SPEED_10000: 165 s = "10Gbps"; 166 break; 167 168 case SPEED_1000: 169 s = "1000Mbps"; 170 break; 171 172 case SPEED_100: 173 s = "100Mbps"; 174 break; 175 176 default: 177 s = "unknown"; 178 break; 179 } 180 181 switch (pi->link_cfg.fc) { 182 case PAUSE_RX: 183 fc = "RX"; 184 break; 185 186 case PAUSE_TX: 187 fc = "TX"; 188 break; 189 190 case PAUSE_RX|PAUSE_TX: 191 fc = "RX/TX"; 192 break; 193 194 default: 195 fc = "no"; 196 break; 197 } 198 199 printk(KERN_INFO "%s: link up, %s, full-duplex, %s PAUSE\n", 200 dev->name, s, fc); 201 } else { 202 netif_carrier_off(dev); 203 printk(KERN_INFO "%s: link down\n", dev->name); 204 } 205} 206 207/* 208 * Net device operations. 209 * ====================== 210 */ 211 212 213 214 215/* 216 * Perform the MAC and PHY actions needed to enable a "port" (Virtual 217 * Interface). 218 */ 219static int link_start(struct net_device *dev) 220{ 221 int ret; 222 struct port_info *pi = netdev_priv(dev); 223 224 /* 225 * We do not set address filters and promiscuity here, the stack does 226 * that step explicitly. Enable vlan accel. 227 */ 228 ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1, 229 true); 230 if (ret == 0) { 231 ret = t4vf_change_mac(pi->adapter, pi->viid, 232 pi->xact_addr_filt, dev->dev_addr, true); 233 if (ret >= 0) { 234 pi->xact_addr_filt = ret; 235 ret = 0; 236 } 237 } 238 239 /* 240 * We don't need to actually "start the link" itself since the 241 * firmware will do that for us when the first Virtual Interface 242 * is enabled on a port. 243 */ 244 if (ret == 0) 245 ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true); 246 return ret; 247} 248 249/* 250 * Name the MSI-X interrupts. 251 */ 252static void name_msix_vecs(struct adapter *adapter) 253{ 254 int namelen = sizeof(adapter->msix_info[0].desc) - 1; 255 int pidx; 256 257 /* 258 * Firmware events. 259 */ 260 snprintf(adapter->msix_info[MSIX_FW].desc, namelen, 261 "%s-FWeventq", adapter->name); 262 adapter->msix_info[MSIX_FW].desc[namelen] = 0; 263 264 /* 265 * Ethernet queues. 266 */ 267 for_each_port(adapter, pidx) { 268 struct net_device *dev = adapter->port[pidx]; 269 const struct port_info *pi = netdev_priv(dev); 270 int qs, msi; 271 272 for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) { 273 snprintf(adapter->msix_info[msi].desc, namelen, 274 "%s-%d", dev->name, qs); 275 adapter->msix_info[msi].desc[namelen] = 0; 276 } 277 } 278} 279 280/* 281 * Request all of our MSI-X resources. 282 */ 283static int request_msix_queue_irqs(struct adapter *adapter) 284{ 285 struct sge *s = &adapter->sge; 286 int rxq, msi, err; 287 288 /* 289 * Firmware events. 290 */ 291 err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix, 292 0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq); 293 if (err) 294 return err; 295 296 /* 297 * Ethernet queues. 298 */ 299 msi = MSIX_IQFLINT; 300 for_each_ethrxq(s, rxq) { 301 err = request_irq(adapter->msix_info[msi].vec, 302 t4vf_sge_intr_msix, 0, 303 adapter->msix_info[msi].desc, 304 &s->ethrxq[rxq].rspq); 305 if (err) 306 goto err_free_irqs; 307 msi++; 308 } 309 return 0; 310 311err_free_irqs: 312 while (--rxq >= 0) 313 free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq); 314 free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq); 315 return err; 316} 317 318/* 319 * Free our MSI-X resources. 320 */ 321static void free_msix_queue_irqs(struct adapter *adapter) 322{ 323 struct sge *s = &adapter->sge; 324 int rxq, msi; 325 326 free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq); 327 msi = MSIX_IQFLINT; 328 for_each_ethrxq(s, rxq) 329 free_irq(adapter->msix_info[msi++].vec, 330 &s->ethrxq[rxq].rspq); 331} 332 333/* 334 * Turn on NAPI and start up interrupts on a response queue. 335 */ 336static void qenable(struct sge_rspq *rspq) 337{ 338 napi_enable(&rspq->napi); 339 340 /* 341 * 0-increment the Going To Sleep register to start the timer and 342 * enable interrupts. 343 */ 344 t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS, 345 CIDXINC(0) | 346 SEINTARM(rspq->intr_params) | 347 INGRESSQID(rspq->cntxt_id)); 348} 349 350/* 351 * Enable NAPI scheduling and interrupt generation for all Receive Queues. 352 */ 353static void enable_rx(struct adapter *adapter) 354{ 355 int rxq; 356 struct sge *s = &adapter->sge; 357 358 for_each_ethrxq(s, rxq) 359 qenable(&s->ethrxq[rxq].rspq); 360 qenable(&s->fw_evtq); 361 362 /* 363 * The interrupt queue doesn't use NAPI so we do the 0-increment of 364 * its Going To Sleep register here to get it started. 365 */ 366 if (adapter->flags & USING_MSI) 367 t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS, 368 CIDXINC(0) | 369 SEINTARM(s->intrq.intr_params) | 370 INGRESSQID(s->intrq.cntxt_id)); 371 372} 373 374/* 375 * Wait until all NAPI handlers are descheduled. 376 */ 377static void quiesce_rx(struct adapter *adapter) 378{ 379 struct sge *s = &adapter->sge; 380 int rxq; 381 382 for_each_ethrxq(s, rxq) 383 napi_disable(&s->ethrxq[rxq].rspq.napi); 384 napi_disable(&s->fw_evtq.napi); 385} 386 387/* 388 * Response queue handler for the firmware event queue. 389 */ 390static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp, 391 const struct pkt_gl *gl) 392{ 393 /* 394 * Extract response opcode and get pointer to CPL message body. 395 */ 396 struct adapter *adapter = rspq->adapter; 397 u8 opcode = ((const struct rss_header *)rsp)->opcode; 398 void *cpl = (void *)(rsp + 1); 399 400 switch (opcode) { 401 case CPL_FW6_MSG: { 402 /* 403 * We've received an asynchronous message from the firmware. 404 */ 405 const struct cpl_fw6_msg *fw_msg = cpl; 406 if (fw_msg->type == FW6_TYPE_CMD_RPL) 407 t4vf_handle_fw_rpl(adapter, fw_msg->data); 408 break; 409 } 410 411 case CPL_SGE_EGR_UPDATE: { 412 /* 413 * We've received an Egress Queue Status Update message. We 414 * get these, if the SGE is configured to send these when the 415 * firmware passes certain points in processing our TX 416 * Ethernet Queue or if we make an explicit request for one. 417 * We use these updates to determine when we may need to 418 * restart a TX Ethernet Queue which was stopped for lack of 419 * free TX Queue Descriptors ... 420 */ 421 const struct cpl_sge_egr_update *p = (void *)cpl; 422 unsigned int qid = EGR_QID(be32_to_cpu(p->opcode_qid)); 423 struct sge *s = &adapter->sge; 424 struct sge_txq *tq; 425 struct sge_eth_txq *txq; 426 unsigned int eq_idx; 427 428 /* 429 * Perform sanity checking on the Queue ID to make sure it 430 * really refers to one of our TX Ethernet Egress Queues which 431 * is active and matches the queue's ID. None of these error 432 * conditions should ever happen so we may want to either make 433 * them fatal and/or conditionalized under DEBUG. 434 */ 435 eq_idx = EQ_IDX(s, qid); 436 if (unlikely(eq_idx >= MAX_EGRQ)) { 437 dev_err(adapter->pdev_dev, 438 "Egress Update QID %d out of range\n", qid); 439 break; 440 } 441 tq = s->egr_map[eq_idx]; 442 if (unlikely(tq == NULL)) { 443 dev_err(adapter->pdev_dev, 444 "Egress Update QID %d TXQ=NULL\n", qid); 445 break; 446 } 447 txq = container_of(tq, struct sge_eth_txq, q); 448 if (unlikely(tq->abs_id != qid)) { 449 dev_err(adapter->pdev_dev, 450 "Egress Update QID %d refers to TXQ %d\n", 451 qid, tq->abs_id); 452 break; 453 } 454 455 /* 456 * Restart a stopped TX Queue which has less than half of its 457 * TX ring in use ... 458 */ 459 txq->q.restarts++; 460 netif_tx_wake_queue(txq->txq); 461 break; 462 } 463 464 default: 465 dev_err(adapter->pdev_dev, 466 "unexpected CPL %#x on FW event queue\n", opcode); 467 } 468 469 return 0; 470} 471 472/* 473 * Allocate SGE TX/RX response queues. Determine how many sets of SGE queues 474 * to use and initializes them. We support multiple "Queue Sets" per port if 475 * we have MSI-X, otherwise just one queue set per port. 476 */ 477static int setup_sge_queues(struct adapter *adapter) 478{ 479 struct sge *s = &adapter->sge; 480 int err, pidx, msix; 481 482 /* 483 * Clear "Queue Set" Free List Starving and TX Queue Mapping Error 484 * state. 485 */ 486 bitmap_zero(s->starving_fl, MAX_EGRQ); 487 488 /* 489 * If we're using MSI interrupt mode we need to set up a "forwarded 490 * interrupt" queue which we'll set up with our MSI vector. The rest 491 * of the ingress queues will be set up to forward their interrupts to 492 * this queue ... This must be first since t4vf_sge_alloc_rxq() uses 493 * the intrq's queue ID as the interrupt forwarding queue for the 494 * subsequent calls ... 495 */ 496 if (adapter->flags & USING_MSI) { 497 err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false, 498 adapter->port[0], 0, NULL, NULL); 499 if (err) 500 goto err_free_queues; 501 } 502 503 /* 504 * Allocate our ingress queue for asynchronous firmware messages. 505 */ 506 err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0], 507 MSIX_FW, NULL, fwevtq_handler); 508 if (err) 509 goto err_free_queues; 510 511 /* 512 * Allocate each "port"'s initial Queue Sets. These can be changed 513 * later on ... up to the point where any interface on the adapter is 514 * brought up at which point lots of things get nailed down 515 * permanently ... 516 */ 517 msix = MSIX_IQFLINT; 518 for_each_port(adapter, pidx) { 519 struct net_device *dev = adapter->port[pidx]; 520 struct port_info *pi = netdev_priv(dev); 521 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset]; 522 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset]; 523 int qs; 524 525 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) { 526 err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false, 527 dev, msix++, 528 &rxq->fl, t4vf_ethrx_handler); 529 if (err) 530 goto err_free_queues; 531 532 err = t4vf_sge_alloc_eth_txq(adapter, txq, dev, 533 netdev_get_tx_queue(dev, qs), 534 s->fw_evtq.cntxt_id); 535 if (err) 536 goto err_free_queues; 537 538 rxq->rspq.idx = qs; 539 memset(&rxq->stats, 0, sizeof(rxq->stats)); 540 } 541 } 542 543 /* 544 * Create the reverse mappings for the queues. 545 */ 546 s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id; 547 s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id; 548 IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq; 549 for_each_port(adapter, pidx) { 550 struct net_device *dev = adapter->port[pidx]; 551 struct port_info *pi = netdev_priv(dev); 552 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset]; 553 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset]; 554 int qs; 555 556 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) { 557 IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq; 558 EQ_MAP(s, txq->q.abs_id) = &txq->q; 559 560 /* 561 * The FW_IQ_CMD doesn't return the Absolute Queue IDs 562 * for Free Lists but since all of the Egress Queues 563 * (including Free Lists) have Relative Queue IDs 564 * which are computed as Absolute - Base Queue ID, we 565 * can synthesize the Absolute Queue IDs for the Free 566 * Lists. This is useful for debugging purposes when 567 * we want to dump Queue Contexts via the PF Driver. 568 */ 569 rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base; 570 EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl; 571 } 572 } 573 return 0; 574 575err_free_queues: 576 t4vf_free_sge_resources(adapter); 577 return err; 578} 579 580/* 581 * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive 582 * queues. We configure the RSS CPU lookup table to distribute to the number 583 * of HW receive queues, and the response queue lookup table to narrow that 584 * down to the response queues actually configured for each "port" (Virtual 585 * Interface). We always configure the RSS mapping for all ports since the 586 * mapping table has plenty of entries. 587 */ 588static int setup_rss(struct adapter *adapter) 589{ 590 int pidx; 591 592 for_each_port(adapter, pidx) { 593 struct port_info *pi = adap2pinfo(adapter, pidx); 594 struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset]; 595 u16 rss[MAX_PORT_QSETS]; 596 int qs, err; 597 598 for (qs = 0; qs < pi->nqsets; qs++) 599 rss[qs] = rxq[qs].rspq.abs_id; 600 601 err = t4vf_config_rss_range(adapter, pi->viid, 602 0, pi->rss_size, rss, pi->nqsets); 603 if (err) 604 return err; 605 606 /* 607 * Perform Global RSS Mode-specific initialization. 608 */ 609 switch (adapter->params.rss.mode) { 610 case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL: 611 /* 612 * If Tunnel All Lookup isn't specified in the global 613 * RSS Configuration, then we need to specify a 614 * default Ingress Queue for any ingress packets which 615 * aren't hashed. We'll use our first ingress queue 616 * ... 617 */ 618 if (!adapter->params.rss.u.basicvirtual.tnlalllookup) { 619 union rss_vi_config config; 620 err = t4vf_read_rss_vi_config(adapter, 621 pi->viid, 622 &config); 623 if (err) 624 return err; 625 config.basicvirtual.defaultq = 626 rxq[0].rspq.abs_id; 627 err = t4vf_write_rss_vi_config(adapter, 628 pi->viid, 629 &config); 630 if (err) 631 return err; 632 } 633 break; 634 } 635 } 636 637 return 0; 638} 639 640/* 641 * Bring the adapter up. Called whenever we go from no "ports" open to having 642 * one open. This function performs the actions necessary to make an adapter 643 * operational, such as completing the initialization of HW modules, and 644 * enabling interrupts. Must be called with the rtnl lock held. (Note that 645 * this is called "cxgb_up" in the PF Driver.) 646 */ 647static int adapter_up(struct adapter *adapter) 648{ 649 int err; 650 651 /* 652 * If this is the first time we've been called, perform basic 653 * adapter setup. Once we've done this, many of our adapter 654 * parameters can no longer be changed ... 655 */ 656 if ((adapter->flags & FULL_INIT_DONE) == 0) { 657 err = setup_sge_queues(adapter); 658 if (err) 659 return err; 660 err = setup_rss(adapter); 661 if (err) { 662 t4vf_free_sge_resources(adapter); 663 return err; 664 } 665 666 if (adapter->flags & USING_MSIX) 667 name_msix_vecs(adapter); 668 adapter->flags |= FULL_INIT_DONE; 669 } 670 671 /* 672 * Acquire our interrupt resources. We only support MSI-X and MSI. 673 */ 674 BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0); 675 if (adapter->flags & USING_MSIX) 676 err = request_msix_queue_irqs(adapter); 677 else 678 err = request_irq(adapter->pdev->irq, 679 t4vf_intr_handler(adapter), 0, 680 adapter->name, adapter); 681 if (err) { 682 dev_err(adapter->pdev_dev, "request_irq failed, err %d\n", 683 err); 684 return err; 685 } 686 687 /* 688 * Enable NAPI ingress processing and return success. 689 */ 690 enable_rx(adapter); 691 t4vf_sge_start(adapter); 692 return 0; 693} 694 695/* 696 * Bring the adapter down. Called whenever the last "port" (Virtual 697 * Interface) closed. (Note that this routine is called "cxgb_down" in the PF 698 * Driver.) 699 */ 700static void adapter_down(struct adapter *adapter) 701{ 702 /* 703 * Free interrupt resources. 704 */ 705 if (adapter->flags & USING_MSIX) 706 free_msix_queue_irqs(adapter); 707 else 708 free_irq(adapter->pdev->irq, adapter); 709 710 /* 711 * Wait for NAPI handlers to finish. 712 */ 713 quiesce_rx(adapter); 714} 715 716/* 717 * Start up a net device. 718 */ 719static int cxgb4vf_open(struct net_device *dev) 720{ 721 int err; 722 struct port_info *pi = netdev_priv(dev); 723 struct adapter *adapter = pi->adapter; 724 725 /* 726 * If this is the first interface that we're opening on the "adapter", 727 * bring the "adapter" up now. 728 */ 729 if (adapter->open_device_map == 0) { 730 err = adapter_up(adapter); 731 if (err) 732 return err; 733 } 734 735 /* 736 * Note that this interface is up and start everything up ... 737 */ 738 netif_set_real_num_tx_queues(dev, pi->nqsets); 739 err = netif_set_real_num_rx_queues(dev, pi->nqsets); 740 if (err) 741 goto err_unwind; 742 err = link_start(dev); 743 if (err) 744 goto err_unwind; 745 746 netif_tx_start_all_queues(dev); 747 set_bit(pi->port_id, &adapter->open_device_map); 748 return 0; 749 750err_unwind: 751 if (adapter->open_device_map == 0) 752 adapter_down(adapter); 753 return err; 754} 755 756/* 757 * Shut down a net device. This routine is called "cxgb_close" in the PF 758 * Driver ... 759 */ 760static int cxgb4vf_stop(struct net_device *dev) 761{ 762 struct port_info *pi = netdev_priv(dev); 763 struct adapter *adapter = pi->adapter; 764 765 netif_tx_stop_all_queues(dev); 766 netif_carrier_off(dev); 767 t4vf_enable_vi(adapter, pi->viid, false, false); 768 pi->link_cfg.link_ok = 0; 769 770 clear_bit(pi->port_id, &adapter->open_device_map); 771 if (adapter->open_device_map == 0) 772 adapter_down(adapter); 773 return 0; 774} 775 776/* 777 * Translate our basic statistics into the standard "ifconfig" statistics. 778 */ 779static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev) 780{ 781 struct t4vf_port_stats stats; 782 struct port_info *pi = netdev2pinfo(dev); 783 struct adapter *adapter = pi->adapter; 784 struct net_device_stats *ns = &dev->stats; 785 int err; 786 787 spin_lock(&adapter->stats_lock); 788 err = t4vf_get_port_stats(adapter, pi->pidx, &stats); 789 spin_unlock(&adapter->stats_lock); 790 791 memset(ns, 0, sizeof(*ns)); 792 if (err) 793 return ns; 794 795 ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes + 796 stats.tx_ucast_bytes + stats.tx_offload_bytes); 797 ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames + 798 stats.tx_ucast_frames + stats.tx_offload_frames); 799 ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes + 800 stats.rx_ucast_bytes); 801 ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames + 802 stats.rx_ucast_frames); 803 ns->multicast = stats.rx_mcast_frames; 804 ns->tx_errors = stats.tx_drop_frames; 805 ns->rx_errors = stats.rx_err_frames; 806 807 return ns; 808} 809 810/* 811 * Collect up to maxaddrs worth of a netdevice's unicast addresses, starting 812 * at a specified offset within the list, into an array of addrss pointers and 813 * return the number collected. 814 */ 815static inline unsigned int collect_netdev_uc_list_addrs(const struct net_device *dev, 816 const u8 **addr, 817 unsigned int offset, 818 unsigned int maxaddrs) 819{ 820 unsigned int index = 0; 821 unsigned int naddr = 0; 822 const struct netdev_hw_addr *ha; 823 824 for_each_dev_addr(dev, ha) 825 if (index++ >= offset) { 826 addr[naddr++] = ha->addr; 827 if (naddr >= maxaddrs) 828 break; 829 } 830 return naddr; 831} 832 833/* 834 * Collect up to maxaddrs worth of a netdevice's multicast addresses, starting 835 * at a specified offset within the list, into an array of addrss pointers and 836 * return the number collected. 837 */ 838static inline unsigned int collect_netdev_mc_list_addrs(const struct net_device *dev, 839 const u8 **addr, 840 unsigned int offset, 841 unsigned int maxaddrs) 842{ 843 unsigned int index = 0; 844 unsigned int naddr = 0; 845 const struct netdev_hw_addr *ha; 846 847 netdev_for_each_mc_addr(ha, dev) 848 if (index++ >= offset) { 849 addr[naddr++] = ha->addr; 850 if (naddr >= maxaddrs) 851 break; 852 } 853 return naddr; 854} 855 856/* 857 * Configure the exact and hash address filters to handle a port's multicast 858 * and secondary unicast MAC addresses. 859 */ 860static int set_addr_filters(const struct net_device *dev, bool sleep) 861{ 862 u64 mhash = 0; 863 u64 uhash = 0; 864 bool free = true; 865 unsigned int offset, naddr; 866 const u8 *addr[7]; 867 int ret; 868 const struct port_info *pi = netdev_priv(dev); 869 870 /* first do the secondary unicast addresses */ 871 for (offset = 0; ; offset += naddr) { 872 naddr = collect_netdev_uc_list_addrs(dev, addr, offset, 873 ARRAY_SIZE(addr)); 874 if (naddr == 0) 875 break; 876 877 ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free, 878 naddr, addr, NULL, &uhash, sleep); 879 if (ret < 0) 880 return ret; 881 882 free = false; 883 } 884 885 /* next set up the multicast addresses */ 886 for (offset = 0; ; offset += naddr) { 887 naddr = collect_netdev_mc_list_addrs(dev, addr, offset, 888 ARRAY_SIZE(addr)); 889 if (naddr == 0) 890 break; 891 892 ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free, 893 naddr, addr, NULL, &mhash, sleep); 894 if (ret < 0) 895 return ret; 896 free = false; 897 } 898 899 return t4vf_set_addr_hash(pi->adapter, pi->viid, uhash != 0, 900 uhash | mhash, sleep); 901} 902 903/* 904 * Set RX properties of a port, such as promiscruity, address filters, and MTU. 905 * If @mtu is -1 it is left unchanged. 906 */ 907static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok) 908{ 909 int ret; 910 struct port_info *pi = netdev_priv(dev); 911 912 ret = set_addr_filters(dev, sleep_ok); 913 if (ret == 0) 914 ret = t4vf_set_rxmode(pi->adapter, pi->viid, -1, 915 (dev->flags & IFF_PROMISC) != 0, 916 (dev->flags & IFF_ALLMULTI) != 0, 917 1, -1, sleep_ok); 918 return ret; 919} 920 921/* 922 * Set the current receive modes on the device. 923 */ 924static void cxgb4vf_set_rxmode(struct net_device *dev) 925{ 926 /* unfortunately we can't return errors to the stack */ 927 set_rxmode(dev, -1, false); 928} 929 930/* 931 * Find the entry in the interrupt holdoff timer value array which comes 932 * closest to the specified interrupt holdoff value. 933 */ 934static int closest_timer(const struct sge *s, int us) 935{ 936 int i, timer_idx = 0, min_delta = INT_MAX; 937 938 for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) { 939 int delta = us - s->timer_val[i]; 940 if (delta < 0) 941 delta = -delta; 942 if (delta < min_delta) { 943 min_delta = delta; 944 timer_idx = i; 945 } 946 } 947 return timer_idx; 948} 949 950static int closest_thres(const struct sge *s, int thres) 951{ 952 int i, delta, pktcnt_idx = 0, min_delta = INT_MAX; 953 954 for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) { 955 delta = thres - s->counter_val[i]; 956 if (delta < 0) 957 delta = -delta; 958 if (delta < min_delta) { 959 min_delta = delta; 960 pktcnt_idx = i; 961 } 962 } 963 return pktcnt_idx; 964} 965 966/* 967 * Return a queue's interrupt hold-off time in us. 0 means no timer. 968 */ 969static unsigned int qtimer_val(const struct adapter *adapter, 970 const struct sge_rspq *rspq) 971{ 972 unsigned int timer_idx = QINTR_TIMER_IDX_GET(rspq->intr_params); 973 974 return timer_idx < SGE_NTIMERS 975 ? adapter->sge.timer_val[timer_idx] 976 : 0; 977} 978 979/** 980 * set_rxq_intr_params - set a queue's interrupt holdoff parameters 981 * @adapter: the adapter 982 * @rspq: the RX response queue 983 * @us: the hold-off time in us, or 0 to disable timer 984 * @cnt: the hold-off packet count, or 0 to disable counter 985 * 986 * Sets an RX response queue's interrupt hold-off time and packet count. 987 * At least one of the two needs to be enabled for the queue to generate 988 * interrupts. 989 */ 990static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq, 991 unsigned int us, unsigned int cnt) 992{ 993 unsigned int timer_idx; 994 995 /* 996 * If both the interrupt holdoff timer and count are specified as 997 * zero, default to a holdoff count of 1 ... 998 */ 999 if ((us | cnt) == 0) 1000 cnt = 1; 1001 1002 /* 1003 * If an interrupt holdoff count has been specified, then find the 1004 * closest configured holdoff count and use that. If the response 1005 * queue has already been created, then update its queue context 1006 * parameters ... 1007 */ 1008 if (cnt) { 1009 int err; 1010 u32 v, pktcnt_idx; 1011 1012 pktcnt_idx = closest_thres(&adapter->sge, cnt); 1013 if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) { 1014 v = FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 1015 FW_PARAMS_PARAM_X( 1016 FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) | 1017 FW_PARAMS_PARAM_YZ(rspq->cntxt_id); 1018 err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx); 1019 if (err) 1020 return err; 1021 } 1022 rspq->pktcnt_idx = pktcnt_idx; 1023 } 1024 1025 /* 1026 * Compute the closest holdoff timer index from the supplied holdoff 1027 * timer value. 1028 */ 1029 timer_idx = (us == 0 1030 ? SGE_TIMER_RSTRT_CNTR 1031 : closest_timer(&adapter->sge, us)); 1032 1033 /* 1034 * Update the response queue's interrupt coalescing parameters and 1035 * return success. 1036 */ 1037 rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) | 1038 (cnt > 0 ? QINTR_CNT_EN : 0)); 1039 return 0; 1040} 1041 1042/* 1043 * Return a version number to identify the type of adapter. The scheme is: 1044 * - bits 0..9: chip version 1045 * - bits 10..15: chip revision 1046 */ 1047static inline unsigned int mk_adap_vers(const struct adapter *adapter) 1048{ 1049 /* 1050 * Chip version 4, revision 0x3f (cxgb4vf). 1051 */ 1052 return 4 | (0x3f << 10); 1053} 1054 1055/* 1056 * Execute the specified ioctl command. 1057 */ 1058static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 1059{ 1060 int ret = 0; 1061 1062 switch (cmd) { 1063 /* 1064 * The VF Driver doesn't have access to any of the other 1065 * common Ethernet device ioctl()'s (like reading/writing 1066 * PHY registers, etc. 1067 */ 1068 1069 default: 1070 ret = -EOPNOTSUPP; 1071 break; 1072 } 1073 return ret; 1074} 1075 1076/* 1077 * Change the device's MTU. 1078 */ 1079static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu) 1080{ 1081 int ret; 1082 struct port_info *pi = netdev_priv(dev); 1083 1084 /* accommodate SACK */ 1085 if (new_mtu < 81) 1086 return -EINVAL; 1087 1088 ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu, 1089 -1, -1, -1, -1, true); 1090 if (!ret) 1091 dev->mtu = new_mtu; 1092 return ret; 1093} 1094 1095static u32 cxgb4vf_fix_features(struct net_device *dev, u32 features) 1096{ 1097 /* 1098 * Since there is no support for separate rx/tx vlan accel 1099 * enable/disable make sure tx flag is always in same state as rx. 1100 */ 1101 if (features & NETIF_F_HW_VLAN_RX) 1102 features |= NETIF_F_HW_VLAN_TX; 1103 else 1104 features &= ~NETIF_F_HW_VLAN_TX; 1105 1106 return features; 1107} 1108 1109static int cxgb4vf_set_features(struct net_device *dev, u32 features) 1110{ 1111 struct port_info *pi = netdev_priv(dev); 1112 u32 changed = dev->features ^ features; 1113 1114 if (changed & NETIF_F_HW_VLAN_RX) 1115 t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1, 1116 features & NETIF_F_HW_VLAN_TX, 0); 1117 1118 return 0; 1119} 1120 1121/* 1122 * Change the devices MAC address. 1123 */ 1124static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr) 1125{ 1126 int ret; 1127 struct sockaddr *addr = _addr; 1128 struct port_info *pi = netdev_priv(dev); 1129 1130 if (!is_valid_ether_addr(addr->sa_data)) 1131 return -EINVAL; 1132 1133 ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt, 1134 addr->sa_data, true); 1135 if (ret < 0) 1136 return ret; 1137 1138 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); 1139 pi->xact_addr_filt = ret; 1140 return 0; 1141} 1142 1143#ifdef CONFIG_NET_POLL_CONTROLLER 1144/* 1145 * Poll all of our receive queues. This is called outside of normal interrupt 1146 * context. 1147 */ 1148static void cxgb4vf_poll_controller(struct net_device *dev) 1149{ 1150 struct port_info *pi = netdev_priv(dev); 1151 struct adapter *adapter = pi->adapter; 1152 1153 if (adapter->flags & USING_MSIX) { 1154 struct sge_eth_rxq *rxq; 1155 int nqsets; 1156 1157 rxq = &adapter->sge.ethrxq[pi->first_qset]; 1158 for (nqsets = pi->nqsets; nqsets; nqsets--) { 1159 t4vf_sge_intr_msix(0, &rxq->rspq); 1160 rxq++; 1161 } 1162 } else 1163 t4vf_intr_handler(adapter)(0, adapter); 1164} 1165#endif 1166 1167/* 1168 * Ethtool operations. 1169 * =================== 1170 * 1171 * Note that we don't support any ethtool operations which change the physical 1172 * state of the port to which we're linked. 1173 */ 1174 1175/* 1176 * Return current port link settings. 1177 */ 1178static int cxgb4vf_get_settings(struct net_device *dev, 1179 struct ethtool_cmd *cmd) 1180{ 1181 const struct port_info *pi = netdev_priv(dev); 1182 1183 cmd->supported = pi->link_cfg.supported; 1184 cmd->advertising = pi->link_cfg.advertising; 1185 ethtool_cmd_speed_set(cmd, 1186 netif_carrier_ok(dev) ? pi->link_cfg.speed : -1); 1187 cmd->duplex = DUPLEX_FULL; 1188 1189 cmd->port = (cmd->supported & SUPPORTED_TP) ? PORT_TP : PORT_FIBRE; 1190 cmd->phy_address = pi->port_id; 1191 cmd->transceiver = XCVR_EXTERNAL; 1192 cmd->autoneg = pi->link_cfg.autoneg; 1193 cmd->maxtxpkt = 0; 1194 cmd->maxrxpkt = 0; 1195 return 0; 1196} 1197 1198/* 1199 * Return our driver information. 1200 */ 1201static void cxgb4vf_get_drvinfo(struct net_device *dev, 1202 struct ethtool_drvinfo *drvinfo) 1203{ 1204 struct adapter *adapter = netdev2adap(dev); 1205 1206 strcpy(drvinfo->driver, KBUILD_MODNAME); 1207 strcpy(drvinfo->version, DRV_VERSION); 1208 strcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent))); 1209 snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), 1210 "%u.%u.%u.%u, TP %u.%u.%u.%u", 1211 FW_HDR_FW_VER_MAJOR_GET(adapter->params.dev.fwrev), 1212 FW_HDR_FW_VER_MINOR_GET(adapter->params.dev.fwrev), 1213 FW_HDR_FW_VER_MICRO_GET(adapter->params.dev.fwrev), 1214 FW_HDR_FW_VER_BUILD_GET(adapter->params.dev.fwrev), 1215 FW_HDR_FW_VER_MAJOR_GET(adapter->params.dev.tprev), 1216 FW_HDR_FW_VER_MINOR_GET(adapter->params.dev.tprev), 1217 FW_HDR_FW_VER_MICRO_GET(adapter->params.dev.tprev), 1218 FW_HDR_FW_VER_BUILD_GET(adapter->params.dev.tprev)); 1219} 1220 1221/* 1222 * Return current adapter message level. 1223 */ 1224static u32 cxgb4vf_get_msglevel(struct net_device *dev) 1225{ 1226 return netdev2adap(dev)->msg_enable; 1227} 1228 1229/* 1230 * Set current adapter message level. 1231 */ 1232static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel) 1233{ 1234 netdev2adap(dev)->msg_enable = msglevel; 1235} 1236 1237/* 1238 * Return the device's current Queue Set ring size parameters along with the 1239 * allowed maximum values. Since ethtool doesn't understand the concept of 1240 * multi-queue devices, we just return the current values associated with the 1241 * first Queue Set. 1242 */ 1243static void cxgb4vf_get_ringparam(struct net_device *dev, 1244 struct ethtool_ringparam *rp) 1245{ 1246 const struct port_info *pi = netdev_priv(dev); 1247 const struct sge *s = &pi->adapter->sge; 1248 1249 rp->rx_max_pending = MAX_RX_BUFFERS; 1250 rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES; 1251 rp->rx_jumbo_max_pending = 0; 1252 rp->tx_max_pending = MAX_TXQ_ENTRIES; 1253 1254 rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID; 1255 rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size; 1256 rp->rx_jumbo_pending = 0; 1257 rp->tx_pending = s->ethtxq[pi->first_qset].q.size; 1258} 1259 1260/* 1261 * Set the Queue Set ring size parameters for the device. Again, since 1262 * ethtool doesn't allow for the concept of multiple queues per device, we'll 1263 * apply these new values across all of the Queue Sets associated with the 1264 * device -- after vetting them of course! 1265 */ 1266static int cxgb4vf_set_ringparam(struct net_device *dev, 1267 struct ethtool_ringparam *rp) 1268{ 1269 const struct port_info *pi = netdev_priv(dev); 1270 struct adapter *adapter = pi->adapter; 1271 struct sge *s = &adapter->sge; 1272 int qs; 1273 1274 if (rp->rx_pending > MAX_RX_BUFFERS || 1275 rp->rx_jumbo_pending || 1276 rp->tx_pending > MAX_TXQ_ENTRIES || 1277 rp->rx_mini_pending > MAX_RSPQ_ENTRIES || 1278 rp->rx_mini_pending < MIN_RSPQ_ENTRIES || 1279 rp->rx_pending < MIN_FL_ENTRIES || 1280 rp->tx_pending < MIN_TXQ_ENTRIES) 1281 return -EINVAL; 1282 1283 if (adapter->flags & FULL_INIT_DONE) 1284 return -EBUSY; 1285 1286 for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) { 1287 s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID; 1288 s->ethrxq[qs].rspq.size = rp->rx_mini_pending; 1289 s->ethtxq[qs].q.size = rp->tx_pending; 1290 } 1291 return 0; 1292} 1293 1294/* 1295 * Return the interrupt holdoff timer and count for the first Queue Set on the 1296 * device. Our extension ioctl() (the cxgbtool interface) allows the 1297 * interrupt holdoff timer to be read on all of the device's Queue Sets. 1298 */ 1299static int cxgb4vf_get_coalesce(struct net_device *dev, 1300 struct ethtool_coalesce *coalesce) 1301{ 1302 const struct port_info *pi = netdev_priv(dev); 1303 const struct adapter *adapter = pi->adapter; 1304 const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq; 1305 1306 coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq); 1307 coalesce->rx_max_coalesced_frames = 1308 ((rspq->intr_params & QINTR_CNT_EN) 1309 ? adapter->sge.counter_val[rspq->pktcnt_idx] 1310 : 0); 1311 return 0; 1312} 1313 1314/* 1315 * Set the RX interrupt holdoff timer and count for the first Queue Set on the 1316 * interface. Our extension ioctl() (the cxgbtool interface) allows us to set 1317 * the interrupt holdoff timer on any of the device's Queue Sets. 1318 */ 1319static int cxgb4vf_set_coalesce(struct net_device *dev, 1320 struct ethtool_coalesce *coalesce) 1321{ 1322 const struct port_info *pi = netdev_priv(dev); 1323 struct adapter *adapter = pi->adapter; 1324 1325 return set_rxq_intr_params(adapter, 1326 &adapter->sge.ethrxq[pi->first_qset].rspq, 1327 coalesce->rx_coalesce_usecs, 1328 coalesce->rx_max_coalesced_frames); 1329} 1330 1331/* 1332 * Report current port link pause parameter settings. 1333 */ 1334static void cxgb4vf_get_pauseparam(struct net_device *dev, 1335 struct ethtool_pauseparam *pauseparam) 1336{ 1337 struct port_info *pi = netdev_priv(dev); 1338 1339 pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0; 1340 pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0; 1341 pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0; 1342} 1343 1344/* 1345 * Identify the port by blinking the port's LED. 1346 */ 1347static int cxgb4vf_phys_id(struct net_device *dev, 1348 enum ethtool_phys_id_state state) 1349{ 1350 unsigned int val; 1351 struct port_info *pi = netdev_priv(dev); 1352 1353 if (state == ETHTOOL_ID_ACTIVE) 1354 val = 0xffff; 1355 else if (state == ETHTOOL_ID_INACTIVE) 1356 val = 0; 1357 else 1358 return -EINVAL; 1359 1360 return t4vf_identify_port(pi->adapter, pi->viid, val); 1361} 1362 1363/* 1364 * Port stats maintained per queue of the port. 1365 */ 1366struct queue_port_stats { 1367 u64 tso; 1368 u64 tx_csum; 1369 u64 rx_csum; 1370 u64 vlan_ex; 1371 u64 vlan_ins; 1372 u64 lro_pkts; 1373 u64 lro_merged; 1374}; 1375 1376/* 1377 * Strings for the ETH_SS_STATS statistics set ("ethtool -S"). Note that 1378 * these need to match the order of statistics returned by 1379 * t4vf_get_port_stats(). 1380 */ 1381static const char stats_strings[][ETH_GSTRING_LEN] = { 1382 /* 1383 * These must match the layout of the t4vf_port_stats structure. 1384 */ 1385 "TxBroadcastBytes ", 1386 "TxBroadcastFrames ", 1387 "TxMulticastBytes ", 1388 "TxMulticastFrames ", 1389 "TxUnicastBytes ", 1390 "TxUnicastFrames ", 1391 "TxDroppedFrames ", 1392 "TxOffloadBytes ", 1393 "TxOffloadFrames ", 1394 "RxBroadcastBytes ", 1395 "RxBroadcastFrames ", 1396 "RxMulticastBytes ", 1397 "RxMulticastFrames ", 1398 "RxUnicastBytes ", 1399 "RxUnicastFrames ", 1400 "RxErrorFrames ", 1401 1402 /* 1403 * These are accumulated per-queue statistics and must match the 1404 * order of the fields in the queue_port_stats structure. 1405 */ 1406 "TSO ", 1407 "TxCsumOffload ", 1408 "RxCsumGood ", 1409 "VLANextractions ", 1410 "VLANinsertions ", 1411 "GROPackets ", 1412 "GROMerged ", 1413}; 1414 1415/* 1416 * Return the number of statistics in the specified statistics set. 1417 */ 1418static int cxgb4vf_get_sset_count(struct net_device *dev, int sset) 1419{ 1420 switch (sset) { 1421 case ETH_SS_STATS: 1422 return ARRAY_SIZE(stats_strings); 1423 default: 1424 return -EOPNOTSUPP; 1425 } 1426 /*NOTREACHED*/ 1427} 1428 1429/* 1430 * Return the strings for the specified statistics set. 1431 */ 1432static void cxgb4vf_get_strings(struct net_device *dev, 1433 u32 sset, 1434 u8 *data) 1435{ 1436 switch (sset) { 1437 case ETH_SS_STATS: 1438 memcpy(data, stats_strings, sizeof(stats_strings)); 1439 break; 1440 } 1441} 1442 1443/* 1444 * Small utility routine to accumulate queue statistics across the queues of 1445 * a "port". 1446 */ 1447static void collect_sge_port_stats(const struct adapter *adapter, 1448 const struct port_info *pi, 1449 struct queue_port_stats *stats) 1450{ 1451 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset]; 1452 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset]; 1453 int qs; 1454 1455 memset(stats, 0, sizeof(*stats)); 1456 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) { 1457 stats->tso += txq->tso; 1458 stats->tx_csum += txq->tx_cso; 1459 stats->rx_csum += rxq->stats.rx_cso; 1460 stats->vlan_ex += rxq->stats.vlan_ex; 1461 stats->vlan_ins += txq->vlan_ins; 1462 stats->lro_pkts += rxq->stats.lro_pkts; 1463 stats->lro_merged += rxq->stats.lro_merged; 1464 } 1465} 1466 1467/* 1468 * Return the ETH_SS_STATS statistics set. 1469 */ 1470static void cxgb4vf_get_ethtool_stats(struct net_device *dev, 1471 struct ethtool_stats *stats, 1472 u64 *data) 1473{ 1474 struct port_info *pi = netdev2pinfo(dev); 1475 struct adapter *adapter = pi->adapter; 1476 int err = t4vf_get_port_stats(adapter, pi->pidx, 1477 (struct t4vf_port_stats *)data); 1478 if (err) 1479 memset(data, 0, sizeof(struct t4vf_port_stats)); 1480 1481 data += sizeof(struct t4vf_port_stats) / sizeof(u64); 1482 collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data); 1483} 1484 1485/* 1486 * Return the size of our register map. 1487 */ 1488static int cxgb4vf_get_regs_len(struct net_device *dev) 1489{ 1490 return T4VF_REGMAP_SIZE; 1491} 1492 1493/* 1494 * Dump a block of registers, start to end inclusive, into a buffer. 1495 */ 1496static void reg_block_dump(struct adapter *adapter, void *regbuf, 1497 unsigned int start, unsigned int end) 1498{ 1499 u32 *bp = regbuf + start - T4VF_REGMAP_START; 1500 1501 for ( ; start <= end; start += sizeof(u32)) { 1502 /* 1503 * Avoid reading the Mailbox Control register since that 1504 * can trigger a Mailbox Ownership Arbitration cycle and 1505 * interfere with communication with the firmware. 1506 */ 1507 if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL) 1508 *bp++ = 0xffff; 1509 else 1510 *bp++ = t4_read_reg(adapter, start); 1511 } 1512} 1513 1514/* 1515 * Copy our entire register map into the provided buffer. 1516 */ 1517static void cxgb4vf_get_regs(struct net_device *dev, 1518 struct ethtool_regs *regs, 1519 void *regbuf) 1520{ 1521 struct adapter *adapter = netdev2adap(dev); 1522 1523 regs->version = mk_adap_vers(adapter); 1524 1525 /* 1526 * Fill in register buffer with our register map. 1527 */ 1528 memset(regbuf, 0, T4VF_REGMAP_SIZE); 1529 1530 reg_block_dump(adapter, regbuf, 1531 T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST, 1532 T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST); 1533 reg_block_dump(adapter, regbuf, 1534 T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST, 1535 T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST); 1536 reg_block_dump(adapter, regbuf, 1537 T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST, 1538 T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_LAST); 1539 reg_block_dump(adapter, regbuf, 1540 T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST, 1541 T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST); 1542 1543 reg_block_dump(adapter, regbuf, 1544 T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST, 1545 T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST); 1546} 1547 1548/* 1549 * Report current Wake On LAN settings. 1550 */ 1551static void cxgb4vf_get_wol(struct net_device *dev, 1552 struct ethtool_wolinfo *wol) 1553{ 1554 wol->supported = 0; 1555 wol->wolopts = 0; 1556 memset(&wol->sopass, 0, sizeof(wol->sopass)); 1557} 1558 1559/* 1560 * TCP Segmentation Offload flags which we support. 1561 */ 1562#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) 1563 1564static struct ethtool_ops cxgb4vf_ethtool_ops = { 1565 .get_settings = cxgb4vf_get_settings, 1566 .get_drvinfo = cxgb4vf_get_drvinfo, 1567 .get_msglevel = cxgb4vf_get_msglevel, 1568 .set_msglevel = cxgb4vf_set_msglevel, 1569 .get_ringparam = cxgb4vf_get_ringparam, 1570 .set_ringparam = cxgb4vf_set_ringparam, 1571 .get_coalesce = cxgb4vf_get_coalesce, 1572 .set_coalesce = cxgb4vf_set_coalesce, 1573 .get_pauseparam = cxgb4vf_get_pauseparam, 1574 .get_link = ethtool_op_get_link, 1575 .get_strings = cxgb4vf_get_strings, 1576 .set_phys_id = cxgb4vf_phys_id, 1577 .get_sset_count = cxgb4vf_get_sset_count, 1578 .get_ethtool_stats = cxgb4vf_get_ethtool_stats, 1579 .get_regs_len = cxgb4vf_get_regs_len, 1580 .get_regs = cxgb4vf_get_regs, 1581 .get_wol = cxgb4vf_get_wol, 1582}; 1583 1584/* 1585 * /sys/kernel/debug/cxgb4vf support code and data. 1586 * ================================================ 1587 */ 1588 1589/* 1590 * Show SGE Queue Set information. We display QPL Queues Sets per line. 1591 */ 1592#define QPL 4 1593 1594static int sge_qinfo_show(struct seq_file *seq, void *v) 1595{ 1596 struct adapter *adapter = seq->private; 1597 int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL); 1598 int qs, r = (uintptr_t)v - 1; 1599 1600 if (r) 1601 seq_putc(seq, '\n'); 1602 1603 #define S3(fmt_spec, s, v) \ 1604 do {\ 1605 seq_printf(seq, "%-12s", s); \ 1606 for (qs = 0; qs < n; ++qs) \ 1607 seq_printf(seq, " %16" fmt_spec, v); \ 1608 seq_putc(seq, '\n'); \ 1609 } while (0) 1610 #define S(s, v) S3("s", s, v) 1611 #define T(s, v) S3("u", s, txq[qs].v) 1612 #define R(s, v) S3("u", s, rxq[qs].v) 1613 1614 if (r < eth_entries) { 1615 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL]; 1616 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL]; 1617 int n = min(QPL, adapter->sge.ethqsets - QPL * r); 1618 1619 S("QType:", "Ethernet"); 1620 S("Interface:", 1621 (rxq[qs].rspq.netdev 1622 ? rxq[qs].rspq.netdev->name 1623 : "N/A")); 1624 S3("d", "Port:", 1625 (rxq[qs].rspq.netdev 1626 ? ((struct port_info *) 1627 netdev_priv(rxq[qs].rspq.netdev))->port_id 1628 : -1)); 1629 T("TxQ ID:", q.abs_id); 1630 T("TxQ size:", q.size); 1631 T("TxQ inuse:", q.in_use); 1632 T("TxQ PIdx:", q.pidx); 1633 T("TxQ CIdx:", q.cidx); 1634 R("RspQ ID:", rspq.abs_id); 1635 R("RspQ size:", rspq.size); 1636 R("RspQE size:", rspq.iqe_len); 1637 S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq)); 1638 S3("u", "Intr pktcnt:", 1639 adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]); 1640 R("RspQ CIdx:", rspq.cidx); 1641 R("RspQ Gen:", rspq.gen); 1642 R("FL ID:", fl.abs_id); 1643 R("FL size:", fl.size - MIN_FL_RESID); 1644 R("FL avail:", fl.avail); 1645 R("FL PIdx:", fl.pidx); 1646 R("FL CIdx:", fl.cidx); 1647 return 0; 1648 } 1649 1650 r -= eth_entries; 1651 if (r == 0) { 1652 const struct sge_rspq *evtq = &adapter->sge.fw_evtq; 1653 1654 seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue"); 1655 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id); 1656 seq_printf(seq, "%-12s %16u\n", "Intr delay:", 1657 qtimer_val(adapter, evtq)); 1658 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:", 1659 adapter->sge.counter_val[evtq->pktcnt_idx]); 1660 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx); 1661 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen); 1662 } else if (r == 1) { 1663 const struct sge_rspq *intrq = &adapter->sge.intrq; 1664 1665 seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue"); 1666 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id); 1667 seq_printf(seq, "%-12s %16u\n", "Intr delay:", 1668 qtimer_val(adapter, intrq)); 1669 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:", 1670 adapter->sge.counter_val[intrq->pktcnt_idx]); 1671 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx); 1672 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen); 1673 } 1674 1675 #undef R 1676 #undef T 1677 #undef S 1678 #undef S3 1679 1680 return 0; 1681} 1682 1683/* 1684 * Return the number of "entries" in our "file". We group the multi-Queue 1685 * sections with QPL Queue Sets per "entry". The sections of the output are: 1686 * 1687 * Ethernet RX/TX Queue Sets 1688 * Firmware Event Queue 1689 * Forwarded Interrupt Queue (if in MSI mode) 1690 */ 1691static int sge_queue_entries(const struct adapter *adapter) 1692{ 1693 return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 + 1694 ((adapter->flags & USING_MSI) != 0); 1695} 1696 1697static void *sge_queue_start(struct seq_file *seq, loff_t *pos) 1698{ 1699 int entries = sge_queue_entries(seq->private); 1700 1701 return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL; 1702} 1703 1704static void sge_queue_stop(struct seq_file *seq, void *v) 1705{ 1706} 1707 1708static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos) 1709{ 1710 int entries = sge_queue_entries(seq->private); 1711 1712 ++*pos; 1713 return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL; 1714} 1715 1716static const struct seq_operations sge_qinfo_seq_ops = { 1717 .start = sge_queue_start, 1718 .next = sge_queue_next, 1719 .stop = sge_queue_stop, 1720 .show = sge_qinfo_show 1721}; 1722 1723static int sge_qinfo_open(struct inode *inode, struct file *file) 1724{ 1725 int res = seq_open(file, &sge_qinfo_seq_ops); 1726 1727 if (!res) { 1728 struct seq_file *seq = file->private_data; 1729 seq->private = inode->i_private; 1730 } 1731 return res; 1732} 1733 1734static const struct file_operations sge_qinfo_debugfs_fops = { 1735 .owner = THIS_MODULE, 1736 .open = sge_qinfo_open, 1737 .read = seq_read, 1738 .llseek = seq_lseek, 1739 .release = seq_release, 1740}; 1741 1742/* 1743 * Show SGE Queue Set statistics. We display QPL Queues Sets per line. 1744 */ 1745#define QPL 4 1746 1747static int sge_qstats_show(struct seq_file *seq, void *v) 1748{ 1749 struct adapter *adapter = seq->private; 1750 int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL); 1751 int qs, r = (uintptr_t)v - 1; 1752 1753 if (r) 1754 seq_putc(seq, '\n'); 1755 1756 #define S3(fmt, s, v) \ 1757 do { \ 1758 seq_printf(seq, "%-16s", s); \ 1759 for (qs = 0; qs < n; ++qs) \ 1760 seq_printf(seq, " %8" fmt, v); \ 1761 seq_putc(seq, '\n'); \ 1762 } while (0) 1763 #define S(s, v) S3("s", s, v) 1764 1765 #define T3(fmt, s, v) S3(fmt, s, txq[qs].v) 1766 #define T(s, v) T3("lu", s, v) 1767 1768 #define R3(fmt, s, v) S3(fmt, s, rxq[qs].v) 1769 #define R(s, v) R3("lu", s, v) 1770 1771 if (r < eth_entries) { 1772 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL]; 1773 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL]; 1774 int n = min(QPL, adapter->sge.ethqsets - QPL * r); 1775 1776 S("QType:", "Ethernet"); 1777 S("Interface:", 1778 (rxq[qs].rspq.netdev 1779 ? rxq[qs].rspq.netdev->name 1780 : "N/A")); 1781 R3("u", "RspQNullInts:", rspq.unhandled_irqs); 1782 R("RxPackets:", stats.pkts); 1783 R("RxCSO:", stats.rx_cso); 1784 R("VLANxtract:", stats.vlan_ex); 1785 R("LROmerged:", stats.lro_merged); 1786 R("LROpackets:", stats.lro_pkts); 1787 R("RxDrops:", stats.rx_drops); 1788 T("TSO:", tso); 1789 T("TxCSO:", tx_cso); 1790 T("VLANins:", vlan_ins); 1791 T("TxQFull:", q.stops); 1792 T("TxQRestarts:", q.restarts); 1793 T("TxMapErr:", mapping_err); 1794 R("FLAllocErr:", fl.alloc_failed); 1795 R("FLLrgAlcErr:", fl.large_alloc_failed); 1796 R("FLStarving:", fl.starving); 1797 return 0; 1798 } 1799 1800 r -= eth_entries; 1801 if (r == 0) { 1802 const struct sge_rspq *evtq = &adapter->sge.fw_evtq; 1803 1804 seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue"); 1805 seq_printf(seq, "%-16s %8u\n", "RspQNullInts:", 1806 evtq->unhandled_irqs); 1807 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx); 1808 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen); 1809 } else if (r == 1) { 1810 const struct sge_rspq *intrq = &adapter->sge.intrq; 1811 1812 seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue"); 1813 seq_printf(seq, "%-16s %8u\n", "RspQNullInts:", 1814 intrq->unhandled_irqs); 1815 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx); 1816 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen); 1817 } 1818 1819 #undef R 1820 #undef T 1821 #undef S 1822 #undef R3 1823 #undef T3 1824 #undef S3 1825 1826 return 0; 1827} 1828 1829/* 1830 * Return the number of "entries" in our "file". We group the multi-Queue 1831 * sections with QPL Queue Sets per "entry". The sections of the output are: 1832 * 1833 * Ethernet RX/TX Queue Sets 1834 * Firmware Event Queue 1835 * Forwarded Interrupt Queue (if in MSI mode) 1836 */ 1837static int sge_qstats_entries(const struct adapter *adapter) 1838{ 1839 return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 + 1840 ((adapter->flags & USING_MSI) != 0); 1841} 1842 1843static void *sge_qstats_start(struct seq_file *seq, loff_t *pos) 1844{ 1845 int entries = sge_qstats_entries(seq->private); 1846 1847 return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL; 1848} 1849 1850static void sge_qstats_stop(struct seq_file *seq, void *v) 1851{ 1852} 1853 1854static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos) 1855{ 1856 int entries = sge_qstats_entries(seq->private); 1857 1858 (*pos)++; 1859 return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL; 1860} 1861 1862static const struct seq_operations sge_qstats_seq_ops = { 1863 .start = sge_qstats_start, 1864 .next = sge_qstats_next, 1865 .stop = sge_qstats_stop, 1866 .show = sge_qstats_show 1867}; 1868 1869static int sge_qstats_open(struct inode *inode, struct file *file) 1870{ 1871 int res = seq_open(file, &sge_qstats_seq_ops); 1872 1873 if (res == 0) { 1874 struct seq_file *seq = file->private_data; 1875 seq->private = inode->i_private; 1876 } 1877 return res; 1878} 1879 1880static const struct file_operations sge_qstats_proc_fops = { 1881 .owner = THIS_MODULE, 1882 .open = sge_qstats_open, 1883 .read = seq_read, 1884 .llseek = seq_lseek, 1885 .release = seq_release, 1886}; 1887 1888/* 1889 * Show PCI-E SR-IOV Virtual Function Resource Limits. 1890 */ 1891static int resources_show(struct seq_file *seq, void *v) 1892{ 1893 struct adapter *adapter = seq->private; 1894 struct vf_resources *vfres = &adapter->params.vfres; 1895 1896 #define S(desc, fmt, var) \ 1897 seq_printf(seq, "%-60s " fmt "\n", \ 1898 desc " (" #var "):", vfres->var) 1899 1900 S("Virtual Interfaces", "%d", nvi); 1901 S("Egress Queues", "%d", neq); 1902 S("Ethernet Control", "%d", nethctrl); 1903 S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint); 1904 S("Ingress Queues", "%d", niq); 1905 S("Traffic Class", "%d", tc); 1906 S("Port Access Rights Mask", "%#x", pmask); 1907 S("MAC Address Filters", "%d", nexactf); 1908 S("Firmware Command Read Capabilities", "%#x", r_caps); 1909 S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps); 1910 1911 #undef S 1912 1913 return 0; 1914} 1915 1916static int resources_open(struct inode *inode, struct file *file) 1917{ 1918 return single_open(file, resources_show, inode->i_private); 1919} 1920 1921static const struct file_operations resources_proc_fops = { 1922 .owner = THIS_MODULE, 1923 .open = resources_open, 1924 .read = seq_read, 1925 .llseek = seq_lseek, 1926 .release = single_release, 1927}; 1928 1929/* 1930 * Show Virtual Interfaces. 1931 */ 1932static int interfaces_show(struct seq_file *seq, void *v) 1933{ 1934 if (v == SEQ_START_TOKEN) { 1935 seq_puts(seq, "Interface Port VIID\n"); 1936 } else { 1937 struct adapter *adapter = seq->private; 1938 int pidx = (uintptr_t)v - 2; 1939 struct net_device *dev = adapter->port[pidx]; 1940 struct port_info *pi = netdev_priv(dev); 1941 1942 seq_printf(seq, "%9s %4d %#5x\n", 1943 dev->name, pi->port_id, pi->viid); 1944 } 1945 return 0; 1946} 1947 1948static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos) 1949{ 1950 return pos <= adapter->params.nports 1951 ? (void *)(uintptr_t)(pos + 1) 1952 : NULL; 1953} 1954 1955static void *interfaces_start(struct seq_file *seq, loff_t *pos) 1956{ 1957 return *pos 1958 ? interfaces_get_idx(seq->private, *pos) 1959 : SEQ_START_TOKEN; 1960} 1961 1962static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos) 1963{ 1964 (*pos)++; 1965 return interfaces_get_idx(seq->private, *pos); 1966} 1967 1968static void interfaces_stop(struct seq_file *seq, void *v) 1969{ 1970} 1971 1972static const struct seq_operations interfaces_seq_ops = { 1973 .start = interfaces_start, 1974 .next = interfaces_next, 1975 .stop = interfaces_stop, 1976 .show = interfaces_show 1977}; 1978 1979static int interfaces_open(struct inode *inode, struct file *file) 1980{ 1981 int res = seq_open(file, &interfaces_seq_ops); 1982 1983 if (res == 0) { 1984 struct seq_file *seq = file->private_data; 1985 seq->private = inode->i_private; 1986 } 1987 return res; 1988} 1989 1990static const struct file_operations interfaces_proc_fops = { 1991 .owner = THIS_MODULE, 1992 .open = interfaces_open, 1993 .read = seq_read, 1994 .llseek = seq_lseek, 1995 .release = seq_release, 1996}; 1997 1998/* 1999 * /sys/kernel/debugfs/cxgb4vf/ files list. 2000 */ 2001struct cxgb4vf_debugfs_entry { 2002 const char *name; /* name of debugfs node */ 2003 mode_t mode; /* file system mode */ 2004 const struct file_operations *fops; 2005}; 2006 2007static struct cxgb4vf_debugfs_entry debugfs_files[] = { 2008 { "sge_qinfo", S_IRUGO, &sge_qinfo_debugfs_fops }, 2009 { "sge_qstats", S_IRUGO, &sge_qstats_proc_fops }, 2010 { "resources", S_IRUGO, &resources_proc_fops }, 2011 { "interfaces", S_IRUGO, &interfaces_proc_fops }, 2012}; 2013 2014/* 2015 * Module and device initialization and cleanup code. 2016 * ================================================== 2017 */ 2018 2019/* 2020 * Set up out /sys/kernel/debug/cxgb4vf sub-nodes. We assume that the 2021 * directory (debugfs_root) has already been set up. 2022 */ 2023static int __devinit setup_debugfs(struct adapter *adapter) 2024{ 2025 int i; 2026 2027 BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root)); 2028 2029 /* 2030 * Debugfs support is best effort. 2031 */ 2032 for (i = 0; i < ARRAY_SIZE(debugfs_files); i++) 2033 (void)debugfs_create_file(debugfs_files[i].name, 2034 debugfs_files[i].mode, 2035 adapter->debugfs_root, 2036 (void *)adapter, 2037 debugfs_files[i].fops); 2038 2039 return 0; 2040} 2041 2042/* 2043 * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above. We leave 2044 * it to our caller to tear down the directory (debugfs_root). 2045 */ 2046static void cleanup_debugfs(struct adapter *adapter) 2047{ 2048 BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root)); 2049 2050 /* 2051 * Unlike our sister routine cleanup_proc(), we don't need to remove 2052 * individual entries because a call will be made to 2053 * debugfs_remove_recursive(). We just need to clean up any ancillary 2054 * persistent state. 2055 */ 2056 /* nothing to do */ 2057} 2058 2059/* 2060 * Perform early "adapter" initialization. This is where we discover what 2061 * adapter parameters we're going to be using and initialize basic adapter 2062 * hardware support. 2063 */ 2064static int __devinit adap_init0(struct adapter *adapter) 2065{ 2066 struct vf_resources *vfres = &adapter->params.vfres; 2067 struct sge_params *sge_params = &adapter->params.sge; 2068 struct sge *s = &adapter->sge; 2069 unsigned int ethqsets; 2070 int err; 2071 2072 /* 2073 * Wait for the device to become ready before proceeding ... 2074 */ 2075 err = t4vf_wait_dev_ready(adapter); 2076 if (err) { 2077 dev_err(adapter->pdev_dev, "device didn't become ready:" 2078 " err=%d\n", err); 2079 return err; 2080 } 2081 2082 /* 2083 * Some environments do not properly handle PCIE FLRs -- e.g. in Linux 2084 * 2.6.31 and later we can't call pci_reset_function() in order to 2085 * issue an FLR because of a self- deadlock on the device semaphore. 2086 * Meanwhile, the OS infrastructure doesn't issue FLRs in all the 2087 * cases where they're needed -- for instance, some versions of KVM 2088 * fail to reset "Assigned Devices" when the VM reboots. Therefore we 2089 * use the firmware based reset in order to reset any per function 2090 * state. 2091 */ 2092 err = t4vf_fw_reset(adapter); 2093 if (err < 0) { 2094 dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err); 2095 return err; 2096 } 2097 2098 /* 2099 * Grab basic operational parameters. These will predominantly have 2100 * been set up by the Physical Function Driver or will be hard coded 2101 * into the adapter. We just have to live with them ... Note that 2102 * we _must_ get our VPD parameters before our SGE parameters because 2103 * we need to know the adapter's core clock from the VPD in order to 2104 * properly decode the SGE Timer Values. 2105 */ 2106 err = t4vf_get_dev_params(adapter); 2107 if (err) { 2108 dev_err(adapter->pdev_dev, "unable to retrieve adapter" 2109 " device parameters: err=%d\n", err); 2110 return err; 2111 } 2112 err = t4vf_get_vpd_params(adapter); 2113 if (err) { 2114 dev_err(adapter->pdev_dev, "unable to retrieve adapter" 2115 " VPD parameters: err=%d\n", err); 2116 return err; 2117 } 2118 err = t4vf_get_sge_params(adapter); 2119 if (err) { 2120 dev_err(adapter->pdev_dev, "unable to retrieve adapter" 2121 " SGE parameters: err=%d\n", err); 2122 return err; 2123 } 2124 err = t4vf_get_rss_glb_config(adapter); 2125 if (err) { 2126 dev_err(adapter->pdev_dev, "unable to retrieve adapter" 2127 " RSS parameters: err=%d\n", err); 2128 return err; 2129 } 2130 if (adapter->params.rss.mode != 2131 FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) { 2132 dev_err(adapter->pdev_dev, "unable to operate with global RSS" 2133 " mode %d\n", adapter->params.rss.mode); 2134 return -EINVAL; 2135 } 2136 err = t4vf_sge_init(adapter); 2137 if (err) { 2138 dev_err(adapter->pdev_dev, "unable to use adapter parameters:" 2139 " err=%d\n", err); 2140 return err; 2141 } 2142 2143 /* 2144 * Retrieve our RX interrupt holdoff timer values and counter 2145 * threshold values from the SGE parameters. 2146 */ 2147 s->timer_val[0] = core_ticks_to_us(adapter, 2148 TIMERVALUE0_GET(sge_params->sge_timer_value_0_and_1)); 2149 s->timer_val[1] = core_ticks_to_us(adapter, 2150 TIMERVALUE1_GET(sge_params->sge_timer_value_0_and_1)); 2151 s->timer_val[2] = core_ticks_to_us(adapter, 2152 TIMERVALUE0_GET(sge_params->sge_timer_value_2_and_3)); 2153 s->timer_val[3] = core_ticks_to_us(adapter, 2154 TIMERVALUE1_GET(sge_params->sge_timer_value_2_and_3)); 2155 s->timer_val[4] = core_ticks_to_us(adapter, 2156 TIMERVALUE0_GET(sge_params->sge_timer_value_4_and_5)); 2157 s->timer_val[5] = core_ticks_to_us(adapter, 2158 TIMERVALUE1_GET(sge_params->sge_timer_value_4_and_5)); 2159 2160 s->counter_val[0] = 2161 THRESHOLD_0_GET(sge_params->sge_ingress_rx_threshold); 2162 s->counter_val[1] = 2163 THRESHOLD_1_GET(sge_params->sge_ingress_rx_threshold); 2164 s->counter_val[2] = 2165 THRESHOLD_2_GET(sge_params->sge_ingress_rx_threshold); 2166 s->counter_val[3] = 2167 THRESHOLD_3_GET(sge_params->sge_ingress_rx_threshold); 2168 2169 /* 2170 * Grab our Virtual Interface resource allocation, extract the 2171 * features that we're interested in and do a bit of sanity testing on 2172 * what we discover. 2173 */ 2174 err = t4vf_get_vfres(adapter); 2175 if (err) { 2176 dev_err(adapter->pdev_dev, "unable to get virtual interface" 2177 " resources: err=%d\n", err); 2178 return err; 2179 } 2180 2181 /* 2182 * The number of "ports" which we support is equal to the number of 2183 * Virtual Interfaces with which we've been provisioned. 2184 */ 2185 adapter->params.nports = vfres->nvi; 2186 if (adapter->params.nports > MAX_NPORTS) { 2187 dev_warn(adapter->pdev_dev, "only using %d of %d allowed" 2188 " virtual interfaces\n", MAX_NPORTS, 2189 adapter->params.nports); 2190 adapter->params.nports = MAX_NPORTS; 2191 } 2192 2193 /* 2194 * We need to reserve a number of the ingress queues with Free List 2195 * and Interrupt capabilities for special interrupt purposes (like 2196 * asynchronous firmware messages, or forwarded interrupts if we're 2197 * using MSI). The rest of the FL/Intr-capable ingress queues will be 2198 * matched up one-for-one with Ethernet/Control egress queues in order 2199 * to form "Queue Sets" which will be aportioned between the "ports". 2200 * For each Queue Set, we'll need the ability to allocate two Egress 2201 * Contexts -- one for the Ingress Queue Free List and one for the TX 2202 * Ethernet Queue. 2203 */ 2204 ethqsets = vfres->niqflint - INGQ_EXTRAS; 2205 if (vfres->nethctrl != ethqsets) { 2206 dev_warn(adapter->pdev_dev, "unequal number of [available]" 2207 " ingress/egress queues (%d/%d); using minimum for" 2208 " number of Queue Sets\n", ethqsets, vfres->nethctrl); 2209 ethqsets = min(vfres->nethctrl, ethqsets); 2210 } 2211 if (vfres->neq < ethqsets*2) { 2212 dev_warn(adapter->pdev_dev, "Not enough Egress Contexts (%d)" 2213 " to support Queue Sets (%d); reducing allowed Queue" 2214 " Sets\n", vfres->neq, ethqsets); 2215 ethqsets = vfres->neq/2; 2216 } 2217 if (ethqsets > MAX_ETH_QSETS) { 2218 dev_warn(adapter->pdev_dev, "only using %d of %d allowed Queue" 2219 " Sets\n", MAX_ETH_QSETS, adapter->sge.max_ethqsets); 2220 ethqsets = MAX_ETH_QSETS; 2221 } 2222 if (vfres->niq != 0 || vfres->neq > ethqsets*2) { 2223 dev_warn(adapter->pdev_dev, "unused resources niq/neq (%d/%d)" 2224 " ignored\n", vfres->niq, vfres->neq - ethqsets*2); 2225 } 2226 adapter->sge.max_ethqsets = ethqsets; 2227 2228 /* 2229 * Check for various parameter sanity issues. Most checks simply 2230 * result in us using fewer resources than our provissioning but we 2231 * do need at least one "port" with which to work ... 2232 */ 2233 if (adapter->sge.max_ethqsets < adapter->params.nports) { 2234 dev_warn(adapter->pdev_dev, "only using %d of %d available" 2235 " virtual interfaces (too few Queue Sets)\n", 2236 adapter->sge.max_ethqsets, adapter->params.nports); 2237 adapter->params.nports = adapter->sge.max_ethqsets; 2238 } 2239 if (adapter->params.nports == 0) { 2240 dev_err(adapter->pdev_dev, "no virtual interfaces configured/" 2241 "usable!\n"); 2242 return -EINVAL; 2243 } 2244 return 0; 2245} 2246 2247static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx, 2248 u8 pkt_cnt_idx, unsigned int size, 2249 unsigned int iqe_size) 2250{ 2251 rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) | 2252 (pkt_cnt_idx < SGE_NCOUNTERS ? QINTR_CNT_EN : 0)); 2253 rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS 2254 ? pkt_cnt_idx 2255 : 0); 2256 rspq->iqe_len = iqe_size; 2257 rspq->size = size; 2258} 2259 2260/* 2261 * Perform default configuration of DMA queues depending on the number and 2262 * type of ports we found and the number of available CPUs. Most settings can 2263 * be modified by the admin via ethtool and cxgbtool prior to the adapter 2264 * being brought up for the first time. 2265 */ 2266static void __devinit cfg_queues(struct adapter *adapter) 2267{ 2268 struct sge *s = &adapter->sge; 2269 int q10g, n10g, qidx, pidx, qs; 2270 size_t iqe_size; 2271 2272 /* 2273 * We should not be called till we know how many Queue Sets we can 2274 * support. In particular, this means that we need to know what kind 2275 * of interrupts we'll be using ... 2276 */ 2277 BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0); 2278 2279 /* 2280 * Count the number of 10GbE Virtual Interfaces that we have. 2281 */ 2282 n10g = 0; 2283 for_each_port(adapter, pidx) 2284 n10g += is_10g_port(&adap2pinfo(adapter, pidx)->link_cfg); 2285 2286 /* 2287 * We default to 1 queue per non-10G port and up to # of cores queues 2288 * per 10G port. 2289 */ 2290 if (n10g == 0) 2291 q10g = 0; 2292 else { 2293 int n1g = (adapter->params.nports - n10g); 2294 q10g = (adapter->sge.max_ethqsets - n1g) / n10g; 2295 if (q10g > num_online_cpus()) 2296 q10g = num_online_cpus(); 2297 } 2298 2299 /* 2300 * Allocate the "Queue Sets" to the various Virtual Interfaces. 2301 * The layout will be established in setup_sge_queues() when the 2302 * adapter is brough up for the first time. 2303 */ 2304 qidx = 0; 2305 for_each_port(adapter, pidx) { 2306 struct port_info *pi = adap2pinfo(adapter, pidx); 2307 2308 pi->first_qset = qidx; 2309 pi->nqsets = is_10g_port(&pi->link_cfg) ? q10g : 1; 2310 qidx += pi->nqsets; 2311 } 2312 s->ethqsets = qidx; 2313 2314 /* 2315 * The Ingress Queue Entry Size for our various Response Queues needs 2316 * to be big enough to accommodate the largest message we can receive 2317 * from the chip/firmware; which is 64 bytes ... 2318 */ 2319 iqe_size = 64; 2320 2321 /* 2322 * Set up default Queue Set parameters ... Start off with the 2323 * shortest interrupt holdoff timer. 2324 */ 2325 for (qs = 0; qs < s->max_ethqsets; qs++) { 2326 struct sge_eth_rxq *rxq = &s->ethrxq[qs]; 2327 struct sge_eth_txq *txq = &s->ethtxq[qs]; 2328 2329 init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size); 2330 rxq->fl.size = 72; 2331 txq->q.size = 1024; 2332 } 2333 2334 /* 2335 * The firmware event queue is used for link state changes and 2336 * notifications of TX DMA completions. 2337 */ 2338 init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size); 2339 2340 /* 2341 * The forwarded interrupt queue is used when we're in MSI interrupt 2342 * mode. In this mode all interrupts associated with RX queues will 2343 * be forwarded to a single queue which we'll associate with our MSI 2344 * interrupt vector. The messages dropped in the forwarded interrupt 2345 * queue will indicate which ingress queue needs servicing ... This 2346 * queue needs to be large enough to accommodate all of the ingress 2347 * queues which are forwarding their interrupt (+1 to prevent the PIDX 2348 * from equalling the CIDX if every ingress queue has an outstanding 2349 * interrupt). The queue doesn't need to be any larger because no 2350 * ingress queue will ever have more than one outstanding interrupt at 2351 * any time ... 2352 */ 2353 init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1, 2354 iqe_size); 2355} 2356 2357/* 2358 * Reduce the number of Ethernet queues across all ports to at most n. 2359 * n provides at least one queue per port. 2360 */ 2361static void __devinit reduce_ethqs(struct adapter *adapter, int n) 2362{ 2363 int i; 2364 struct port_info *pi; 2365 2366 /* 2367 * While we have too many active Ether Queue Sets, interate across the 2368 * "ports" and reduce their individual Queue Set allocations. 2369 */ 2370 BUG_ON(n < adapter->params.nports); 2371 while (n < adapter->sge.ethqsets) 2372 for_each_port(adapter, i) { 2373 pi = adap2pinfo(adapter, i); 2374 if (pi->nqsets > 1) { 2375 pi->nqsets--; 2376 adapter->sge.ethqsets--; 2377 if (adapter->sge.ethqsets <= n) 2378 break; 2379 } 2380 } 2381 2382 /* 2383 * Reassign the starting Queue Sets for each of the "ports" ... 2384 */ 2385 n = 0; 2386 for_each_port(adapter, i) { 2387 pi = adap2pinfo(adapter, i); 2388 pi->first_qset = n; 2389 n += pi->nqsets; 2390 } 2391} 2392 2393/* 2394 * We need to grab enough MSI-X vectors to cover our interrupt needs. Ideally 2395 * we get a separate MSI-X vector for every "Queue Set" plus any extras we 2396 * need. Minimally we need one for every Virtual Interface plus those needed 2397 * for our "extras". Note that this process may lower the maximum number of 2398 * allowed Queue Sets ... 2399 */ 2400static int __devinit enable_msix(struct adapter *adapter) 2401{ 2402 int i, err, want, need; 2403 struct msix_entry entries[MSIX_ENTRIES]; 2404 struct sge *s = &adapter->sge; 2405 2406 for (i = 0; i < MSIX_ENTRIES; ++i) 2407 entries[i].entry = i; 2408 2409 /* 2410 * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets" 2411 * plus those needed for our "extras" (for example, the firmware 2412 * message queue). We _need_ at least one "Queue Set" per Virtual 2413 * Interface plus those needed for our "extras". So now we get to see 2414 * if the song is right ... 2415 */ 2416 want = s->max_ethqsets + MSIX_EXTRAS; 2417 need = adapter->params.nports + MSIX_EXTRAS; 2418 while ((err = pci_enable_msix(adapter->pdev, entries, want)) >= need) 2419 want = err; 2420 2421 if (err == 0) { 2422 int nqsets = want - MSIX_EXTRAS; 2423 if (nqsets < s->max_ethqsets) { 2424 dev_warn(adapter->pdev_dev, "only enough MSI-X vectors" 2425 " for %d Queue Sets\n", nqsets); 2426 s->max_ethqsets = nqsets; 2427 if (nqsets < s->ethqsets) 2428 reduce_ethqs(adapter, nqsets); 2429 } 2430 for (i = 0; i < want; ++i) 2431 adapter->msix_info[i].vec = entries[i].vector; 2432 } else if (err > 0) { 2433 pci_disable_msix(adapter->pdev); 2434 dev_info(adapter->pdev_dev, "only %d MSI-X vectors left," 2435 " not using MSI-X\n", err); 2436 } 2437 return err; 2438} 2439 2440static const struct net_device_ops cxgb4vf_netdev_ops = { 2441 .ndo_open = cxgb4vf_open, 2442 .ndo_stop = cxgb4vf_stop, 2443 .ndo_start_xmit = t4vf_eth_xmit, 2444 .ndo_get_stats = cxgb4vf_get_stats, 2445 .ndo_set_rx_mode = cxgb4vf_set_rxmode, 2446 .ndo_set_mac_address = cxgb4vf_set_mac_addr, 2447 .ndo_validate_addr = eth_validate_addr, 2448 .ndo_do_ioctl = cxgb4vf_do_ioctl, 2449 .ndo_change_mtu = cxgb4vf_change_mtu, 2450 .ndo_fix_features = cxgb4vf_fix_features, 2451 .ndo_set_features = cxgb4vf_set_features, 2452#ifdef CONFIG_NET_POLL_CONTROLLER 2453 .ndo_poll_controller = cxgb4vf_poll_controller, 2454#endif 2455}; 2456 2457/* 2458 * "Probe" a device: initialize a device and construct all kernel and driver 2459 * state needed to manage the device. This routine is called "init_one" in 2460 * the PF Driver ... 2461 */ 2462static int __devinit cxgb4vf_pci_probe(struct pci_dev *pdev, 2463 const struct pci_device_id *ent) 2464{ 2465 static int version_printed; 2466 2467 int pci_using_dac; 2468 int err, pidx; 2469 unsigned int pmask; 2470 struct adapter *adapter; 2471 struct port_info *pi; 2472 struct net_device *netdev; 2473 2474 /* 2475 * Print our driver banner the first time we're called to initialize a 2476 * device. 2477 */ 2478 if (version_printed == 0) { 2479 printk(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION); 2480 version_printed = 1; 2481 } 2482 2483 /* 2484 * Initialize generic PCI device state. 2485 */ 2486 err = pci_enable_device(pdev); 2487 if (err) { 2488 dev_err(&pdev->dev, "cannot enable PCI device\n"); 2489 return err; 2490 } 2491 2492 /* 2493 * Reserve PCI resources for the device. If we can't get them some 2494 * other driver may have already claimed the device ... 2495 */ 2496 err = pci_request_regions(pdev, KBUILD_MODNAME); 2497 if (err) { 2498 dev_err(&pdev->dev, "cannot obtain PCI resources\n"); 2499 goto err_disable_device; 2500 } 2501 2502 /* 2503 * Set up our DMA mask: try for 64-bit address masking first and 2504 * fall back to 32-bit if we can't get 64 bits ... 2505 */ 2506 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 2507 if (err == 0) { 2508 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 2509 if (err) { 2510 dev_err(&pdev->dev, "unable to obtain 64-bit DMA for" 2511 " coherent allocations\n"); 2512 goto err_release_regions; 2513 } 2514 pci_using_dac = 1; 2515 } else { 2516 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 2517 if (err != 0) { 2518 dev_err(&pdev->dev, "no usable DMA configuration\n"); 2519 goto err_release_regions; 2520 } 2521 pci_using_dac = 0; 2522 } 2523 2524 /* 2525 * Enable bus mastering for the device ... 2526 */ 2527 pci_set_master(pdev); 2528 2529 /* 2530 * Allocate our adapter data structure and attach it to the device. 2531 */ 2532 adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); 2533 if (!adapter) { 2534 err = -ENOMEM; 2535 goto err_release_regions; 2536 } 2537 pci_set_drvdata(pdev, adapter); 2538 adapter->pdev = pdev; 2539 adapter->pdev_dev = &pdev->dev; 2540 2541 /* 2542 * Initialize SMP data synchronization resources. 2543 */ 2544 spin_lock_init(&adapter->stats_lock); 2545 2546 /* 2547 * Map our I/O registers in BAR0. 2548 */ 2549 adapter->regs = pci_ioremap_bar(pdev, 0); 2550 if (!adapter->regs) { 2551 dev_err(&pdev->dev, "cannot map device registers\n"); 2552 err = -ENOMEM; 2553 goto err_free_adapter; 2554 } 2555 2556 /* 2557 * Initialize adapter level features. 2558 */ 2559 adapter->name = pci_name(pdev); 2560 adapter->msg_enable = dflt_msg_enable; 2561 err = adap_init0(adapter); 2562 if (err) 2563 goto err_unmap_bar; 2564 2565 /* 2566 * Allocate our "adapter ports" and stitch everything together. 2567 */ 2568 pmask = adapter->params.vfres.pmask; 2569 for_each_port(adapter, pidx) { 2570 int port_id, viid; 2571 2572 /* 2573 * We simplistically allocate our virtual interfaces 2574 * sequentially across the port numbers to which we have 2575 * access rights. This should be configurable in some manner 2576 * ... 2577 */ 2578 if (pmask == 0) 2579 break; 2580 port_id = ffs(pmask) - 1; 2581 pmask &= ~(1 << port_id); 2582 viid = t4vf_alloc_vi(adapter, port_id); 2583 if (viid < 0) { 2584 dev_err(&pdev->dev, "cannot allocate VI for port %d:" 2585 " err=%d\n", port_id, viid); 2586 err = viid; 2587 goto err_free_dev; 2588 } 2589 2590 /* 2591 * Allocate our network device and stitch things together. 2592 */ 2593 netdev = alloc_etherdev_mq(sizeof(struct port_info), 2594 MAX_PORT_QSETS); 2595 if (netdev == NULL) { 2596 dev_err(&pdev->dev, "cannot allocate netdev for" 2597 " port %d\n", port_id); 2598 t4vf_free_vi(adapter, viid); 2599 err = -ENOMEM; 2600 goto err_free_dev; 2601 } 2602 adapter->port[pidx] = netdev; 2603 SET_NETDEV_DEV(netdev, &pdev->dev); 2604 pi = netdev_priv(netdev); 2605 pi->adapter = adapter; 2606 pi->pidx = pidx; 2607 pi->port_id = port_id; 2608 pi->viid = viid; 2609 2610 /* 2611 * Initialize the starting state of our "port" and register 2612 * it. 2613 */ 2614 pi->xact_addr_filt = -1; 2615 netif_carrier_off(netdev); 2616 netdev->irq = pdev->irq; 2617 2618 netdev->hw_features = NETIF_F_SG | TSO_FLAGS | 2619 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 2620 NETIF_F_HW_VLAN_RX | NETIF_F_RXCSUM; 2621 netdev->vlan_features = NETIF_F_SG | TSO_FLAGS | 2622 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 2623 NETIF_F_HIGHDMA; 2624 netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_TX; 2625 if (pci_using_dac) 2626 netdev->features |= NETIF_F_HIGHDMA; 2627 2628 netdev->priv_flags |= IFF_UNICAST_FLT; 2629 2630 netdev->netdev_ops = &cxgb4vf_netdev_ops; 2631 SET_ETHTOOL_OPS(netdev, &cxgb4vf_ethtool_ops); 2632 2633 /* 2634 * Initialize the hardware/software state for the port. 2635 */ 2636 err = t4vf_port_init(adapter, pidx); 2637 if (err) { 2638 dev_err(&pdev->dev, "cannot initialize port %d\n", 2639 pidx); 2640 goto err_free_dev; 2641 } 2642 } 2643 2644 /* 2645 * The "card" is now ready to go. If any errors occur during device 2646 * registration we do not fail the whole "card" but rather proceed 2647 * only with the ports we manage to register successfully. However we 2648 * must register at least one net device. 2649 */ 2650 for_each_port(adapter, pidx) { 2651 netdev = adapter->port[pidx]; 2652 if (netdev == NULL) 2653 continue; 2654 2655 err = register_netdev(netdev); 2656 if (err) { 2657 dev_warn(&pdev->dev, "cannot register net device %s," 2658 " skipping\n", netdev->name); 2659 continue; 2660 } 2661 2662 set_bit(pidx, &adapter->registered_device_map); 2663 } 2664 if (adapter->registered_device_map == 0) { 2665 dev_err(&pdev->dev, "could not register any net devices\n"); 2666 goto err_free_dev; 2667 } 2668 2669 /* 2670 * Set up our debugfs entries. 2671 */ 2672 if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) { 2673 adapter->debugfs_root = 2674 debugfs_create_dir(pci_name(pdev), 2675 cxgb4vf_debugfs_root); 2676 if (IS_ERR_OR_NULL(adapter->debugfs_root)) 2677 dev_warn(&pdev->dev, "could not create debugfs" 2678 " directory"); 2679 else 2680 setup_debugfs(adapter); 2681 } 2682 2683 /* 2684 * See what interrupts we'll be using. If we've been configured to 2685 * use MSI-X interrupts, try to enable them but fall back to using 2686 * MSI interrupts if we can't enable MSI-X interrupts. If we can't 2687 * get MSI interrupts we bail with the error. 2688 */ 2689 if (msi == MSI_MSIX && enable_msix(adapter) == 0) 2690 adapter->flags |= USING_MSIX; 2691 else { 2692 err = pci_enable_msi(pdev); 2693 if (err) { 2694 dev_err(&pdev->dev, "Unable to allocate %s interrupts;" 2695 " err=%d\n", 2696 msi == MSI_MSIX ? "MSI-X or MSI" : "MSI", err); 2697 goto err_free_debugfs; 2698 } 2699 adapter->flags |= USING_MSI; 2700 } 2701 2702 /* 2703 * Now that we know how many "ports" we have and what their types are, 2704 * and how many Queue Sets we can support, we can configure our queue 2705 * resources. 2706 */ 2707 cfg_queues(adapter); 2708 2709 /* 2710 * Print a short notice on the existence and configuration of the new 2711 * VF network device ... 2712 */ 2713 for_each_port(adapter, pidx) { 2714 dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n", 2715 adapter->port[pidx]->name, 2716 (adapter->flags & USING_MSIX) ? "MSI-X" : 2717 (adapter->flags & USING_MSI) ? "MSI" : ""); 2718 } 2719 2720 /* 2721 * Return success! 2722 */ 2723 return 0; 2724 2725 /* 2726 * Error recovery and exit code. Unwind state that's been created 2727 * so far and return the error. 2728 */ 2729 2730err_free_debugfs: 2731 if (!IS_ERR_OR_NULL(adapter->debugfs_root)) { 2732 cleanup_debugfs(adapter); 2733 debugfs_remove_recursive(adapter->debugfs_root); 2734 } 2735 2736err_free_dev: 2737 for_each_port(adapter, pidx) { 2738 netdev = adapter->port[pidx]; 2739 if (netdev == NULL) 2740 continue; 2741 pi = netdev_priv(netdev); 2742 t4vf_free_vi(adapter, pi->viid); 2743 if (test_bit(pidx, &adapter->registered_device_map)) 2744 unregister_netdev(netdev); 2745 free_netdev(netdev); 2746 } 2747 2748err_unmap_bar: 2749 iounmap(adapter->regs); 2750 2751err_free_adapter: 2752 kfree(adapter); 2753 pci_set_drvdata(pdev, NULL); 2754 2755err_release_regions: 2756 pci_release_regions(pdev); 2757 pci_set_drvdata(pdev, NULL); 2758 pci_clear_master(pdev); 2759 2760err_disable_device: 2761 pci_disable_device(pdev); 2762 2763 return err; 2764} 2765 2766/* 2767 * "Remove" a device: tear down all kernel and driver state created in the 2768 * "probe" routine and quiesce the device (disable interrupts, etc.). (Note 2769 * that this is called "remove_one" in the PF Driver.) 2770 */ 2771static void __devexit cxgb4vf_pci_remove(struct pci_dev *pdev) 2772{ 2773 struct adapter *adapter = pci_get_drvdata(pdev); 2774 2775 /* 2776 * Tear down driver state associated with device. 2777 */ 2778 if (adapter) { 2779 int pidx; 2780 2781 /* 2782 * Stop all of our activity. Unregister network port, 2783 * disable interrupts, etc. 2784 */ 2785 for_each_port(adapter, pidx) 2786 if (test_bit(pidx, &adapter->registered_device_map)) 2787 unregister_netdev(adapter->port[pidx]); 2788 t4vf_sge_stop(adapter); 2789 if (adapter->flags & USING_MSIX) { 2790 pci_disable_msix(adapter->pdev); 2791 adapter->flags &= ~USING_MSIX; 2792 } else if (adapter->flags & USING_MSI) { 2793 pci_disable_msi(adapter->pdev); 2794 adapter->flags &= ~USING_MSI; 2795 } 2796 2797 /* 2798 * Tear down our debugfs entries. 2799 */ 2800 if (!IS_ERR_OR_NULL(adapter->debugfs_root)) { 2801 cleanup_debugfs(adapter); 2802 debugfs_remove_recursive(adapter->debugfs_root); 2803 } 2804 2805 /* 2806 * Free all of the various resources which we've acquired ... 2807 */ 2808 t4vf_free_sge_resources(adapter); 2809 for_each_port(adapter, pidx) { 2810 struct net_device *netdev = adapter->port[pidx]; 2811 struct port_info *pi; 2812 2813 if (netdev == NULL) 2814 continue; 2815 2816 pi = netdev_priv(netdev); 2817 t4vf_free_vi(adapter, pi->viid); 2818 free_netdev(netdev); 2819 } 2820 iounmap(adapter->regs); 2821 kfree(adapter); 2822 pci_set_drvdata(pdev, NULL); 2823 } 2824 2825 /* 2826 * Disable the device and release its PCI resources. 2827 */ 2828 pci_disable_device(pdev); 2829 pci_clear_master(pdev); 2830 pci_release_regions(pdev); 2831} 2832 2833/* 2834 * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt 2835 * delivery. 2836 */ 2837static void __devexit cxgb4vf_pci_shutdown(struct pci_dev *pdev) 2838{ 2839 struct adapter *adapter; 2840 int pidx; 2841 2842 adapter = pci_get_drvdata(pdev); 2843 if (!adapter) 2844 return; 2845 2846 /* 2847 * Disable all Virtual Interfaces. This will shut down the 2848 * delivery of all ingress packets into the chip for these 2849 * Virtual Interfaces. 2850 */ 2851 for_each_port(adapter, pidx) { 2852 struct net_device *netdev; 2853 struct port_info *pi; 2854 2855 if (!test_bit(pidx, &adapter->registered_device_map)) 2856 continue; 2857 2858 netdev = adapter->port[pidx]; 2859 if (!netdev) 2860 continue; 2861 2862 pi = netdev_priv(netdev); 2863 t4vf_enable_vi(adapter, pi->viid, false, false); 2864 } 2865 2866 /* 2867 * Free up all Queues which will prevent further DMA and 2868 * Interrupts allowing various internal pathways to drain. 2869 */ 2870 t4vf_free_sge_resources(adapter); 2871} 2872 2873/* 2874 * PCI Device registration data structures. 2875 */ 2876#define CH_DEVICE(devid, idx) \ 2877 { PCI_VENDOR_ID_CHELSIO, devid, PCI_ANY_ID, PCI_ANY_ID, 0, 0, idx } 2878 2879static struct pci_device_id cxgb4vf_pci_tbl[] = { 2880 CH_DEVICE(0xb000, 0), /* PE10K FPGA */ 2881 CH_DEVICE(0x4800, 0), /* T440-dbg */ 2882 CH_DEVICE(0x4801, 0), /* T420-cr */ 2883 CH_DEVICE(0x4802, 0), /* T422-cr */ 2884 CH_DEVICE(0x4803, 0), /* T440-cr */ 2885 CH_DEVICE(0x4804, 0), /* T420-bch */ 2886 CH_DEVICE(0x4805, 0), /* T440-bch */ 2887 CH_DEVICE(0x4806, 0), /* T460-ch */ 2888 CH_DEVICE(0x4807, 0), /* T420-so */ 2889 CH_DEVICE(0x4808, 0), /* T420-cx */ 2890 CH_DEVICE(0x4809, 0), /* T420-bt */ 2891 CH_DEVICE(0x480a, 0), /* T404-bt */ 2892 { 0, } 2893}; 2894 2895MODULE_DESCRIPTION(DRV_DESC); 2896MODULE_AUTHOR("Chelsio Communications"); 2897MODULE_LICENSE("Dual BSD/GPL"); 2898MODULE_VERSION(DRV_VERSION); 2899MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl); 2900 2901static struct pci_driver cxgb4vf_driver = { 2902 .name = KBUILD_MODNAME, 2903 .id_table = cxgb4vf_pci_tbl, 2904 .probe = cxgb4vf_pci_probe, 2905 .remove = __devexit_p(cxgb4vf_pci_remove), 2906 .shutdown = __devexit_p(cxgb4vf_pci_shutdown), 2907}; 2908 2909/* 2910 * Initialize global driver state. 2911 */ 2912static int __init cxgb4vf_module_init(void) 2913{ 2914 int ret; 2915 2916 /* 2917 * Vet our module parameters. 2918 */ 2919 if (msi != MSI_MSIX && msi != MSI_MSI) { 2920 printk(KERN_WARNING KBUILD_MODNAME 2921 ": bad module parameter msi=%d; must be %d" 2922 " (MSI-X or MSI) or %d (MSI)\n", 2923 msi, MSI_MSIX, MSI_MSI); 2924 return -EINVAL; 2925 } 2926 2927 /* Debugfs support is optional, just warn if this fails */ 2928 cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL); 2929 if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) 2930 printk(KERN_WARNING KBUILD_MODNAME ": could not create" 2931 " debugfs entry, continuing\n"); 2932 2933 ret = pci_register_driver(&cxgb4vf_driver); 2934 if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) 2935 debugfs_remove(cxgb4vf_debugfs_root); 2936 return ret; 2937} 2938 2939/* 2940 * Tear down global driver state. 2941 */ 2942static void __exit cxgb4vf_module_exit(void) 2943{ 2944 pci_unregister_driver(&cxgb4vf_driver); 2945 debugfs_remove(cxgb4vf_debugfs_root); 2946} 2947 2948module_init(cxgb4vf_module_init); 2949module_exit(cxgb4vf_module_exit); 2950