mthca_srq.c revision 2fa5e2ebbe2d81f741ba7bed9e07dc38cc734625
1/* 2 * Copyright (c) 2005 Cisco Systems. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 * 32 * $Id: mthca_srq.c 3047 2005-08-10 03:59:35Z roland $ 33 */ 34 35#include <linux/slab.h> 36#include <linux/string.h> 37 38#include "mthca_dev.h" 39#include "mthca_cmd.h" 40#include "mthca_memfree.h" 41#include "mthca_wqe.h" 42 43enum { 44 MTHCA_MAX_DIRECT_SRQ_SIZE = 4 * PAGE_SIZE 45}; 46 47struct mthca_tavor_srq_context { 48 __be64 wqe_base_ds; /* low 6 bits is descriptor size */ 49 __be32 state_pd; 50 __be32 lkey; 51 __be32 uar; 52 __be32 wqe_cnt; 53 u32 reserved[2]; 54}; 55 56struct mthca_arbel_srq_context { 57 __be32 state_logsize_srqn; 58 __be32 lkey; 59 __be32 db_index; 60 __be32 logstride_usrpage; 61 __be64 wqe_base; 62 __be32 eq_pd; 63 __be16 limit_watermark; 64 __be16 wqe_cnt; 65 u16 reserved1; 66 __be16 wqe_counter; 67 u32 reserved2[3]; 68}; 69 70static void *get_wqe(struct mthca_srq *srq, int n) 71{ 72 if (srq->is_direct) 73 return srq->queue.direct.buf + (n << srq->wqe_shift); 74 else 75 return srq->queue.page_list[(n << srq->wqe_shift) >> PAGE_SHIFT].buf + 76 ((n << srq->wqe_shift) & (PAGE_SIZE - 1)); 77} 78 79/* 80 * Return a pointer to the location within a WQE that we're using as a 81 * link when the WQE is in the free list. We use the imm field 82 * because in the Tavor case, posting a WQE may overwrite the next 83 * segment of the previous WQE, but a receive WQE will never touch the 84 * imm field. This avoids corrupting our free list if the previous 85 * WQE has already completed and been put on the free list when we 86 * post the next WQE. 87 */ 88static inline int *wqe_to_link(void *wqe) 89{ 90 return (int *) (wqe + offsetof(struct mthca_next_seg, imm)); 91} 92 93static void mthca_tavor_init_srq_context(struct mthca_dev *dev, 94 struct mthca_pd *pd, 95 struct mthca_srq *srq, 96 struct mthca_tavor_srq_context *context) 97{ 98 memset(context, 0, sizeof *context); 99 100 context->wqe_base_ds = cpu_to_be64(1 << (srq->wqe_shift - 4)); 101 context->state_pd = cpu_to_be32(pd->pd_num); 102 context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); 103 104 if (pd->ibpd.uobject) 105 context->uar = 106 cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); 107 else 108 context->uar = cpu_to_be32(dev->driver_uar.index); 109} 110 111static void mthca_arbel_init_srq_context(struct mthca_dev *dev, 112 struct mthca_pd *pd, 113 struct mthca_srq *srq, 114 struct mthca_arbel_srq_context *context) 115{ 116 int logsize; 117 118 memset(context, 0, sizeof *context); 119 120 logsize = long_log2(srq->max) + srq->wqe_shift; 121 context->state_logsize_srqn = cpu_to_be32(logsize << 24 | srq->srqn); 122 context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); 123 context->db_index = cpu_to_be32(srq->db_index); 124 context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29); 125 if (pd->ibpd.uobject) 126 context->logstride_usrpage |= 127 cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); 128 else 129 context->logstride_usrpage |= cpu_to_be32(dev->driver_uar.index); 130 context->eq_pd = cpu_to_be32(MTHCA_EQ_ASYNC << 24 | pd->pd_num); 131} 132 133static void mthca_free_srq_buf(struct mthca_dev *dev, struct mthca_srq *srq) 134{ 135 mthca_buf_free(dev, srq->max << srq->wqe_shift, &srq->queue, 136 srq->is_direct, &srq->mr); 137 kfree(srq->wrid); 138} 139 140static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, 141 struct mthca_srq *srq) 142{ 143 struct mthca_data_seg *scatter; 144 void *wqe; 145 int err; 146 int i; 147 148 if (pd->ibpd.uobject) 149 return 0; 150 151 srq->wrid = kmalloc(srq->max * sizeof (u64), GFP_KERNEL); 152 if (!srq->wrid) 153 return -ENOMEM; 154 155 err = mthca_buf_alloc(dev, srq->max << srq->wqe_shift, 156 MTHCA_MAX_DIRECT_SRQ_SIZE, 157 &srq->queue, &srq->is_direct, pd, 1, &srq->mr); 158 if (err) { 159 kfree(srq->wrid); 160 return err; 161 } 162 163 /* 164 * Now initialize the SRQ buffer so that all of the WQEs are 165 * linked into the list of free WQEs. In addition, set the 166 * scatter list L_Keys to the sentry value of 0x100. 167 */ 168 for (i = 0; i < srq->max; ++i) { 169 wqe = get_wqe(srq, i); 170 171 *wqe_to_link(wqe) = i < srq->max - 1 ? i + 1 : -1; 172 173 for (scatter = wqe + sizeof (struct mthca_next_seg); 174 (void *) scatter < wqe + (1 << srq->wqe_shift); 175 ++scatter) 176 scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); 177 } 178 179 srq->last = get_wqe(srq, srq->max - 1); 180 181 return 0; 182} 183 184int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, 185 struct ib_srq_attr *attr, struct mthca_srq *srq) 186{ 187 struct mthca_mailbox *mailbox; 188 u8 status; 189 int ds; 190 int err; 191 192 /* Sanity check SRQ size before proceeding */ 193 if (attr->max_wr > dev->limits.max_srq_wqes || 194 attr->max_sge > dev->limits.max_sg) 195 return -EINVAL; 196 197 srq->max = attr->max_wr; 198 srq->max_gs = attr->max_sge; 199 srq->counter = 0; 200 201 if (mthca_is_memfree(dev)) 202 srq->max = roundup_pow_of_two(srq->max + 1); 203 204 ds = max(64UL, 205 roundup_pow_of_two(sizeof (struct mthca_next_seg) + 206 srq->max_gs * sizeof (struct mthca_data_seg))); 207 srq->wqe_shift = long_log2(ds); 208 209 srq->srqn = mthca_alloc(&dev->srq_table.alloc); 210 if (srq->srqn == -1) 211 return -ENOMEM; 212 213 if (mthca_is_memfree(dev)) { 214 err = mthca_table_get(dev, dev->srq_table.table, srq->srqn); 215 if (err) 216 goto err_out; 217 218 if (!pd->ibpd.uobject) { 219 srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ, 220 srq->srqn, &srq->db); 221 if (srq->db_index < 0) { 222 err = -ENOMEM; 223 goto err_out_icm; 224 } 225 } 226 } 227 228 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); 229 if (IS_ERR(mailbox)) { 230 err = PTR_ERR(mailbox); 231 goto err_out_db; 232 } 233 234 err = mthca_alloc_srq_buf(dev, pd, srq); 235 if (err) 236 goto err_out_mailbox; 237 238 spin_lock_init(&srq->lock); 239 atomic_set(&srq->refcount, 1); 240 init_waitqueue_head(&srq->wait); 241 242 if (mthca_is_memfree(dev)) 243 mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf); 244 else 245 mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf); 246 247 err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn, &status); 248 249 if (err) { 250 mthca_warn(dev, "SW2HW_SRQ failed (%d)\n", err); 251 goto err_out_free_buf; 252 } 253 if (status) { 254 mthca_warn(dev, "SW2HW_SRQ returned status 0x%02x\n", 255 status); 256 err = -EINVAL; 257 goto err_out_free_buf; 258 } 259 260 spin_lock_irq(&dev->srq_table.lock); 261 if (mthca_array_set(&dev->srq_table.srq, 262 srq->srqn & (dev->limits.num_srqs - 1), 263 srq)) { 264 spin_unlock_irq(&dev->srq_table.lock); 265 goto err_out_free_srq; 266 } 267 spin_unlock_irq(&dev->srq_table.lock); 268 269 mthca_free_mailbox(dev, mailbox); 270 271 srq->first_free = 0; 272 srq->last_free = srq->max - 1; 273 274 return 0; 275 276err_out_free_srq: 277 err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status); 278 if (err) 279 mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); 280 else if (status) 281 mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status); 282 283err_out_free_buf: 284 if (!pd->ibpd.uobject) 285 mthca_free_srq_buf(dev, srq); 286 287err_out_mailbox: 288 mthca_free_mailbox(dev, mailbox); 289 290err_out_db: 291 if (!pd->ibpd.uobject && mthca_is_memfree(dev)) 292 mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index); 293 294err_out_icm: 295 mthca_table_put(dev, dev->srq_table.table, srq->srqn); 296 297err_out: 298 mthca_free(&dev->srq_table.alloc, srq->srqn); 299 300 return err; 301} 302 303void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) 304{ 305 struct mthca_mailbox *mailbox; 306 int err; 307 u8 status; 308 309 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); 310 if (IS_ERR(mailbox)) { 311 mthca_warn(dev, "No memory for mailbox to free SRQ.\n"); 312 return; 313 } 314 315 err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status); 316 if (err) 317 mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); 318 else if (status) 319 mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status); 320 321 spin_lock_irq(&dev->srq_table.lock); 322 mthca_array_clear(&dev->srq_table.srq, 323 srq->srqn & (dev->limits.num_srqs - 1)); 324 spin_unlock_irq(&dev->srq_table.lock); 325 326 atomic_dec(&srq->refcount); 327 wait_event(srq->wait, !atomic_read(&srq->refcount)); 328 329 if (!srq->ibsrq.uobject) { 330 mthca_free_srq_buf(dev, srq); 331 if (mthca_is_memfree(dev)) 332 mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index); 333 } 334 335 mthca_table_put(dev, dev->srq_table.table, srq->srqn); 336 mthca_free(&dev->srq_table.alloc, srq->srqn); 337 mthca_free_mailbox(dev, mailbox); 338} 339 340int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, 341 enum ib_srq_attr_mask attr_mask) 342{ 343 struct mthca_dev *dev = to_mdev(ibsrq->device); 344 struct mthca_srq *srq = to_msrq(ibsrq); 345 int ret; 346 u8 status; 347 348 /* We don't support resizing SRQs (yet?) */ 349 if (attr_mask & IB_SRQ_MAX_WR) 350 return -EINVAL; 351 352 if (attr_mask & IB_SRQ_LIMIT) { 353 ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status); 354 if (ret) 355 return ret; 356 if (status) 357 return -EINVAL; 358 } 359 360 return 0; 361} 362 363void mthca_srq_event(struct mthca_dev *dev, u32 srqn, 364 enum ib_event_type event_type) 365{ 366 struct mthca_srq *srq; 367 struct ib_event event; 368 369 spin_lock(&dev->srq_table.lock); 370 srq = mthca_array_get(&dev->srq_table.srq, srqn & (dev->limits.num_srqs - 1)); 371 if (srq) 372 atomic_inc(&srq->refcount); 373 spin_unlock(&dev->srq_table.lock); 374 375 if (!srq) { 376 mthca_warn(dev, "Async event for bogus SRQ %08x\n", srqn); 377 return; 378 } 379 380 if (!srq->ibsrq.event_handler) 381 goto out; 382 383 event.device = &dev->ib_dev; 384 event.event = event_type; 385 event.element.srq = &srq->ibsrq; 386 srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context); 387 388out: 389 if (atomic_dec_and_test(&srq->refcount)) 390 wake_up(&srq->wait); 391} 392 393/* 394 * This function must be called with IRQs disabled. 395 */ 396void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr) 397{ 398 int ind; 399 400 ind = wqe_addr >> srq->wqe_shift; 401 402 spin_lock(&srq->lock); 403 404 if (likely(srq->first_free >= 0)) 405 *wqe_to_link(get_wqe(srq, srq->last_free)) = ind; 406 else 407 srq->first_free = ind; 408 409 *wqe_to_link(get_wqe(srq, ind)) = -1; 410 srq->last_free = ind; 411 412 spin_unlock(&srq->lock); 413} 414 415int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, 416 struct ib_recv_wr **bad_wr) 417{ 418 struct mthca_dev *dev = to_mdev(ibsrq->device); 419 struct mthca_srq *srq = to_msrq(ibsrq); 420 __be32 doorbell[2]; 421 unsigned long flags; 422 int err = 0; 423 int first_ind; 424 int ind; 425 int next_ind; 426 int nreq; 427 int i; 428 void *wqe; 429 void *prev_wqe; 430 431 spin_lock_irqsave(&srq->lock, flags); 432 433 first_ind = srq->first_free; 434 435 for (nreq = 0; wr; ++nreq, wr = wr->next) { 436 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { 437 nreq = 0; 438 439 doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift); 440 doorbell[1] = cpu_to_be32(srq->srqn << 8); 441 442 /* 443 * Make sure that descriptors are written 444 * before doorbell is rung. 445 */ 446 wmb(); 447 448 mthca_write64(doorbell, 449 dev->kar + MTHCA_RECEIVE_DOORBELL, 450 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 451 452 first_ind = srq->first_free; 453 } 454 455 ind = srq->first_free; 456 457 if (ind < 0) { 458 mthca_err(dev, "SRQ %06x full\n", srq->srqn); 459 err = -ENOMEM; 460 *bad_wr = wr; 461 break; 462 } 463 464 wqe = get_wqe(srq, ind); 465 next_ind = *wqe_to_link(wqe); 466 467 if (next_ind < 0) { 468 mthca_err(dev, "SRQ %06x full\n", srq->srqn); 469 err = -ENOMEM; 470 *bad_wr = wr; 471 break; 472 } 473 474 prev_wqe = srq->last; 475 srq->last = wqe; 476 477 ((struct mthca_next_seg *) wqe)->nda_op = 0; 478 ((struct mthca_next_seg *) wqe)->ee_nds = 0; 479 /* flags field will always remain 0 */ 480 481 wqe += sizeof (struct mthca_next_seg); 482 483 if (unlikely(wr->num_sge > srq->max_gs)) { 484 err = -EINVAL; 485 *bad_wr = wr; 486 srq->last = prev_wqe; 487 break; 488 } 489 490 for (i = 0; i < wr->num_sge; ++i) { 491 ((struct mthca_data_seg *) wqe)->byte_count = 492 cpu_to_be32(wr->sg_list[i].length); 493 ((struct mthca_data_seg *) wqe)->lkey = 494 cpu_to_be32(wr->sg_list[i].lkey); 495 ((struct mthca_data_seg *) wqe)->addr = 496 cpu_to_be64(wr->sg_list[i].addr); 497 wqe += sizeof (struct mthca_data_seg); 498 } 499 500 if (i < srq->max_gs) { 501 ((struct mthca_data_seg *) wqe)->byte_count = 0; 502 ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); 503 ((struct mthca_data_seg *) wqe)->addr = 0; 504 } 505 506 ((struct mthca_next_seg *) prev_wqe)->nda_op = 507 cpu_to_be32((ind << srq->wqe_shift) | 1); 508 wmb(); 509 ((struct mthca_next_seg *) prev_wqe)->ee_nds = 510 cpu_to_be32(MTHCA_NEXT_DBD); 511 512 srq->wrid[ind] = wr->wr_id; 513 srq->first_free = next_ind; 514 } 515 516 if (likely(nreq)) { 517 doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift); 518 doorbell[1] = cpu_to_be32((srq->srqn << 8) | nreq); 519 520 /* 521 * Make sure that descriptors are written before 522 * doorbell is rung. 523 */ 524 wmb(); 525 526 mthca_write64(doorbell, 527 dev->kar + MTHCA_RECEIVE_DOORBELL, 528 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 529 } 530 531 spin_unlock_irqrestore(&srq->lock, flags); 532 return err; 533} 534 535int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, 536 struct ib_recv_wr **bad_wr) 537{ 538 struct mthca_dev *dev = to_mdev(ibsrq->device); 539 struct mthca_srq *srq = to_msrq(ibsrq); 540 unsigned long flags; 541 int err = 0; 542 int ind; 543 int next_ind; 544 int nreq; 545 int i; 546 void *wqe; 547 548 spin_lock_irqsave(&srq->lock, flags); 549 550 for (nreq = 0; wr; ++nreq, wr = wr->next) { 551 ind = srq->first_free; 552 553 if (ind < 0) { 554 mthca_err(dev, "SRQ %06x full\n", srq->srqn); 555 err = -ENOMEM; 556 *bad_wr = wr; 557 break; 558 } 559 560 wqe = get_wqe(srq, ind); 561 next_ind = *wqe_to_link(wqe); 562 563 if (next_ind < 0) { 564 mthca_err(dev, "SRQ %06x full\n", srq->srqn); 565 err = -ENOMEM; 566 *bad_wr = wr; 567 break; 568 } 569 570 ((struct mthca_next_seg *) wqe)->nda_op = 571 cpu_to_be32((next_ind << srq->wqe_shift) | 1); 572 ((struct mthca_next_seg *) wqe)->ee_nds = 0; 573 /* flags field will always remain 0 */ 574 575 wqe += sizeof (struct mthca_next_seg); 576 577 if (unlikely(wr->num_sge > srq->max_gs)) { 578 err = -EINVAL; 579 *bad_wr = wr; 580 break; 581 } 582 583 for (i = 0; i < wr->num_sge; ++i) { 584 ((struct mthca_data_seg *) wqe)->byte_count = 585 cpu_to_be32(wr->sg_list[i].length); 586 ((struct mthca_data_seg *) wqe)->lkey = 587 cpu_to_be32(wr->sg_list[i].lkey); 588 ((struct mthca_data_seg *) wqe)->addr = 589 cpu_to_be64(wr->sg_list[i].addr); 590 wqe += sizeof (struct mthca_data_seg); 591 } 592 593 if (i < srq->max_gs) { 594 ((struct mthca_data_seg *) wqe)->byte_count = 0; 595 ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); 596 ((struct mthca_data_seg *) wqe)->addr = 0; 597 } 598 599 srq->wrid[ind] = wr->wr_id; 600 srq->first_free = next_ind; 601 } 602 603 if (likely(nreq)) { 604 srq->counter += nreq; 605 606 /* 607 * Make sure that descriptors are written before 608 * we write doorbell record. 609 */ 610 wmb(); 611 *srq->db = cpu_to_be32(srq->counter); 612 } 613 614 spin_unlock_irqrestore(&srq->lock, flags); 615 return err; 616} 617 618int __devinit mthca_init_srq_table(struct mthca_dev *dev) 619{ 620 int err; 621 622 if (!(dev->mthca_flags & MTHCA_FLAG_SRQ)) 623 return 0; 624 625 spin_lock_init(&dev->srq_table.lock); 626 627 err = mthca_alloc_init(&dev->srq_table.alloc, 628 dev->limits.num_srqs, 629 dev->limits.num_srqs - 1, 630 dev->limits.reserved_srqs); 631 if (err) 632 return err; 633 634 err = mthca_array_init(&dev->srq_table.srq, 635 dev->limits.num_srqs); 636 if (err) 637 mthca_alloc_cleanup(&dev->srq_table.alloc); 638 639 return err; 640} 641 642void __devexit mthca_cleanup_srq_table(struct mthca_dev *dev) 643{ 644 if (!(dev->mthca_flags & MTHCA_FLAG_SRQ)) 645 return; 646 647 mthca_array_cleanup(&dev->srq_table.srq, dev->limits.num_srqs); 648 mthca_alloc_cleanup(&dev->srq_table.alloc); 649} 650