1/* 2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34#include <linux/slab.h> 35#include <linux/errno.h> 36 37#include "mthca_dev.h" 38#include "mthca_cmd.h" 39#include "mthca_memfree.h" 40 41struct mthca_mtt { 42 struct mthca_buddy *buddy; 43 int order; 44 u32 first_seg; 45}; 46 47/* 48 * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits. 49 */ 50struct mthca_mpt_entry { 51 __be32 flags; 52 __be32 page_size; 53 __be32 key; 54 __be32 pd; 55 __be64 start; 56 __be64 length; 57 __be32 lkey; 58 __be32 window_count; 59 __be32 window_count_limit; 60 __be64 mtt_seg; 61 __be32 mtt_sz; /* Arbel only */ 62 u32 reserved[2]; 63} __attribute__((packed)); 64 65#define MTHCA_MPT_FLAG_SW_OWNS (0xfUL << 28) 66#define MTHCA_MPT_FLAG_MIO (1 << 17) 67#define MTHCA_MPT_FLAG_BIND_ENABLE (1 << 15) 68#define MTHCA_MPT_FLAG_PHYSICAL (1 << 9) 69#define MTHCA_MPT_FLAG_REGION (1 << 8) 70 71#define MTHCA_MTT_FLAG_PRESENT 1 72 73#define MTHCA_MPT_STATUS_SW 0xF0 74#define MTHCA_MPT_STATUS_HW 0x00 75 76#define SINAI_FMR_KEY_INC 0x1000000 77 78/* 79 * Buddy allocator for MTT segments (currently not very efficient 80 * since it doesn't keep a free list and just searches linearly 81 * through the bitmaps) 82 */ 83 84static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order) 85{ 86 int o; 87 int m; 88 u32 seg; 89 90 spin_lock(&buddy->lock); 91 92 for (o = order; o <= buddy->max_order; ++o) 93 if (buddy->num_free[o]) { 94 m = 1 << (buddy->max_order - o); 95 seg = find_first_bit(buddy->bits[o], m); 96 if (seg < m) 97 goto found; 98 } 99 100 spin_unlock(&buddy->lock); 101 return -1; 102 103 found: 104 clear_bit(seg, buddy->bits[o]); 105 --buddy->num_free[o]; 106 107 while (o > order) { 108 --o; 109 seg <<= 1; 110 set_bit(seg ^ 1, buddy->bits[o]); 111 ++buddy->num_free[o]; 112 } 113 114 spin_unlock(&buddy->lock); 115 116 seg <<= order; 117 118 return seg; 119} 120 121static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order) 122{ 123 seg >>= order; 124 125 spin_lock(&buddy->lock); 126 127 while (test_bit(seg ^ 1, buddy->bits[order])) { 128 clear_bit(seg ^ 1, buddy->bits[order]); 129 --buddy->num_free[order]; 130 seg >>= 1; 131 ++order; 132 } 133 134 set_bit(seg, buddy->bits[order]); 135 ++buddy->num_free[order]; 136 137 spin_unlock(&buddy->lock); 138} 139 140static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order) 141{ 142 int i, s; 143 144 buddy->max_order = max_order; 145 spin_lock_init(&buddy->lock); 146 147 buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *), 148 GFP_KERNEL); 149 buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free, 150 GFP_KERNEL); 151 if (!buddy->bits || !buddy->num_free) 152 goto err_out; 153 154 for (i = 0; i <= buddy->max_order; ++i) { 155 s = BITS_TO_LONGS(1 << (buddy->max_order - i)); 156 buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL); 157 if (!buddy->bits[i]) 158 goto err_out_free; 159 bitmap_zero(buddy->bits[i], 160 1 << (buddy->max_order - i)); 161 } 162 163 set_bit(0, buddy->bits[buddy->max_order]); 164 buddy->num_free[buddy->max_order] = 1; 165 166 return 0; 167 168err_out_free: 169 for (i = 0; i <= buddy->max_order; ++i) 170 kfree(buddy->bits[i]); 171 172err_out: 173 kfree(buddy->bits); 174 kfree(buddy->num_free); 175 176 return -ENOMEM; 177} 178 179static void mthca_buddy_cleanup(struct mthca_buddy *buddy) 180{ 181 int i; 182 183 for (i = 0; i <= buddy->max_order; ++i) 184 kfree(buddy->bits[i]); 185 186 kfree(buddy->bits); 187 kfree(buddy->num_free); 188} 189 190static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order, 191 struct mthca_buddy *buddy) 192{ 193 u32 seg = mthca_buddy_alloc(buddy, order); 194 195 if (seg == -1) 196 return -1; 197 198 if (mthca_is_memfree(dev)) 199 if (mthca_table_get_range(dev, dev->mr_table.mtt_table, seg, 200 seg + (1 << order) - 1)) { 201 mthca_buddy_free(buddy, seg, order); 202 seg = -1; 203 } 204 205 return seg; 206} 207 208static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size, 209 struct mthca_buddy *buddy) 210{ 211 struct mthca_mtt *mtt; 212 int i; 213 214 if (size <= 0) 215 return ERR_PTR(-EINVAL); 216 217 mtt = kmalloc(sizeof *mtt, GFP_KERNEL); 218 if (!mtt) 219 return ERR_PTR(-ENOMEM); 220 221 mtt->buddy = buddy; 222 mtt->order = 0; 223 for (i = dev->limits.mtt_seg_size / 8; i < size; i <<= 1) 224 ++mtt->order; 225 226 mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy); 227 if (mtt->first_seg == -1) { 228 kfree(mtt); 229 return ERR_PTR(-ENOMEM); 230 } 231 232 return mtt; 233} 234 235struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size) 236{ 237 return __mthca_alloc_mtt(dev, size, &dev->mr_table.mtt_buddy); 238} 239 240void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt) 241{ 242 if (!mtt) 243 return; 244 245 mthca_buddy_free(mtt->buddy, mtt->first_seg, mtt->order); 246 247 mthca_table_put_range(dev, dev->mr_table.mtt_table, 248 mtt->first_seg, 249 mtt->first_seg + (1 << mtt->order) - 1); 250 251 kfree(mtt); 252} 253 254static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, 255 int start_index, u64 *buffer_list, int list_len) 256{ 257 struct mthca_mailbox *mailbox; 258 __be64 *mtt_entry; 259 int err = 0; 260 int i; 261 262 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); 263 if (IS_ERR(mailbox)) 264 return PTR_ERR(mailbox); 265 mtt_entry = mailbox->buf; 266 267 while (list_len > 0) { 268 mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base + 269 mtt->first_seg * dev->limits.mtt_seg_size + 270 start_index * 8); 271 mtt_entry[1] = 0; 272 for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i) 273 mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] | 274 MTHCA_MTT_FLAG_PRESENT); 275 276 /* 277 * If we have an odd number of entries to write, add 278 * one more dummy entry for firmware efficiency. 279 */ 280 if (i & 1) 281 mtt_entry[i + 2] = 0; 282 283 err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1); 284 if (err) { 285 mthca_warn(dev, "WRITE_MTT failed (%d)\n", err); 286 goto out; 287 } 288 289 list_len -= i; 290 start_index += i; 291 buffer_list += i; 292 } 293 294out: 295 mthca_free_mailbox(dev, mailbox); 296 return err; 297} 298 299int mthca_write_mtt_size(struct mthca_dev *dev) 300{ 301 if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy || 302 !(dev->mthca_flags & MTHCA_FLAG_FMR)) 303 /* 304 * Be friendly to WRITE_MTT command 305 * and leave two empty slots for the 306 * index and reserved fields of the 307 * mailbox. 308 */ 309 return PAGE_SIZE / sizeof (u64) - 2; 310 311 /* For Arbel, all MTTs must fit in the same page. */ 312 return mthca_is_memfree(dev) ? (PAGE_SIZE / sizeof (u64)) : 0x7ffffff; 313} 314 315static void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, 316 struct mthca_mtt *mtt, int start_index, 317 u64 *buffer_list, int list_len) 318{ 319 u64 __iomem *mtts; 320 int i; 321 322 mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * dev->limits.mtt_seg_size + 323 start_index * sizeof (u64); 324 for (i = 0; i < list_len; ++i) 325 mthca_write64_raw(cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT), 326 mtts + i); 327} 328 329static void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, 330 struct mthca_mtt *mtt, int start_index, 331 u64 *buffer_list, int list_len) 332{ 333 __be64 *mtts; 334 dma_addr_t dma_handle; 335 int i; 336 int s = start_index * sizeof (u64); 337 338 /* For Arbel, all MTTs must fit in the same page. */ 339 BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE); 340 /* Require full segments */ 341 BUG_ON(s % dev->limits.mtt_seg_size); 342 343 mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg + 344 s / dev->limits.mtt_seg_size, &dma_handle); 345 346 BUG_ON(!mtts); 347 348 dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle, 349 list_len * sizeof (u64), DMA_TO_DEVICE); 350 351 for (i = 0; i < list_len; ++i) 352 mtts[i] = cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT); 353 354 dma_sync_single_for_device(&dev->pdev->dev, dma_handle, 355 list_len * sizeof (u64), DMA_TO_DEVICE); 356} 357 358int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, 359 int start_index, u64 *buffer_list, int list_len) 360{ 361 int size = mthca_write_mtt_size(dev); 362 int chunk; 363 364 if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy || 365 !(dev->mthca_flags & MTHCA_FLAG_FMR)) 366 return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len); 367 368 while (list_len > 0) { 369 chunk = min(size, list_len); 370 if (mthca_is_memfree(dev)) 371 mthca_arbel_write_mtt_seg(dev, mtt, start_index, 372 buffer_list, chunk); 373 else 374 mthca_tavor_write_mtt_seg(dev, mtt, start_index, 375 buffer_list, chunk); 376 377 list_len -= chunk; 378 start_index += chunk; 379 buffer_list += chunk; 380 } 381 382 return 0; 383} 384 385static inline u32 tavor_hw_index_to_key(u32 ind) 386{ 387 return ind; 388} 389 390static inline u32 tavor_key_to_hw_index(u32 key) 391{ 392 return key; 393} 394 395static inline u32 arbel_hw_index_to_key(u32 ind) 396{ 397 return (ind >> 24) | (ind << 8); 398} 399 400static inline u32 arbel_key_to_hw_index(u32 key) 401{ 402 return (key << 24) | (key >> 8); 403} 404 405static inline u32 hw_index_to_key(struct mthca_dev *dev, u32 ind) 406{ 407 if (mthca_is_memfree(dev)) 408 return arbel_hw_index_to_key(ind); 409 else 410 return tavor_hw_index_to_key(ind); 411} 412 413static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key) 414{ 415 if (mthca_is_memfree(dev)) 416 return arbel_key_to_hw_index(key); 417 else 418 return tavor_key_to_hw_index(key); 419} 420 421static inline u32 adjust_key(struct mthca_dev *dev, u32 key) 422{ 423 if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT) 424 return ((key << 20) & 0x800000) | (key & 0x7fffff); 425 else 426 return key; 427} 428 429int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, 430 u64 iova, u64 total_size, u32 access, struct mthca_mr *mr) 431{ 432 struct mthca_mailbox *mailbox; 433 struct mthca_mpt_entry *mpt_entry; 434 u32 key; 435 int i; 436 int err; 437 438 WARN_ON(buffer_size_shift >= 32); 439 440 key = mthca_alloc(&dev->mr_table.mpt_alloc); 441 if (key == -1) 442 return -ENOMEM; 443 key = adjust_key(dev, key); 444 mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key); 445 446 if (mthca_is_memfree(dev)) { 447 err = mthca_table_get(dev, dev->mr_table.mpt_table, key); 448 if (err) 449 goto err_out_mpt_free; 450 } 451 452 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); 453 if (IS_ERR(mailbox)) { 454 err = PTR_ERR(mailbox); 455 goto err_out_table; 456 } 457 mpt_entry = mailbox->buf; 458 459 mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS | 460 MTHCA_MPT_FLAG_MIO | 461 MTHCA_MPT_FLAG_REGION | 462 access); 463 if (!mr->mtt) 464 mpt_entry->flags |= cpu_to_be32(MTHCA_MPT_FLAG_PHYSICAL); 465 466 mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12); 467 mpt_entry->key = cpu_to_be32(key); 468 mpt_entry->pd = cpu_to_be32(pd); 469 mpt_entry->start = cpu_to_be64(iova); 470 mpt_entry->length = cpu_to_be64(total_size); 471 472 memset(&mpt_entry->lkey, 0, 473 sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey)); 474 475 if (mr->mtt) 476 mpt_entry->mtt_seg = 477 cpu_to_be64(dev->mr_table.mtt_base + 478 mr->mtt->first_seg * dev->limits.mtt_seg_size); 479 480 if (0) { 481 mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey); 482 for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) { 483 if (i % 4 == 0) 484 printk("[%02x] ", i * 4); 485 printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i])); 486 if ((i + 1) % 4 == 0) 487 printk("\n"); 488 } 489 } 490 491 err = mthca_SW2HW_MPT(dev, mailbox, 492 key & (dev->limits.num_mpts - 1)); 493 if (err) { 494 mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err); 495 goto err_out_mailbox; 496 } 497 498 mthca_free_mailbox(dev, mailbox); 499 return err; 500 501err_out_mailbox: 502 mthca_free_mailbox(dev, mailbox); 503 504err_out_table: 505 mthca_table_put(dev, dev->mr_table.mpt_table, key); 506 507err_out_mpt_free: 508 mthca_free(&dev->mr_table.mpt_alloc, key); 509 return err; 510} 511 512int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, 513 u32 access, struct mthca_mr *mr) 514{ 515 mr->mtt = NULL; 516 return mthca_mr_alloc(dev, pd, 12, 0, ~0ULL, access, mr); 517} 518 519int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, 520 u64 *buffer_list, int buffer_size_shift, 521 int list_len, u64 iova, u64 total_size, 522 u32 access, struct mthca_mr *mr) 523{ 524 int err; 525 526 mr->mtt = mthca_alloc_mtt(dev, list_len); 527 if (IS_ERR(mr->mtt)) 528 return PTR_ERR(mr->mtt); 529 530 err = mthca_write_mtt(dev, mr->mtt, 0, buffer_list, list_len); 531 if (err) { 532 mthca_free_mtt(dev, mr->mtt); 533 return err; 534 } 535 536 err = mthca_mr_alloc(dev, pd, buffer_size_shift, iova, 537 total_size, access, mr); 538 if (err) 539 mthca_free_mtt(dev, mr->mtt); 540 541 return err; 542} 543 544/* Free mr or fmr */ 545static void mthca_free_region(struct mthca_dev *dev, u32 lkey) 546{ 547 mthca_table_put(dev, dev->mr_table.mpt_table, 548 key_to_hw_index(dev, lkey)); 549 550 mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey)); 551} 552 553void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr) 554{ 555 int err; 556 557 err = mthca_HW2SW_MPT(dev, NULL, 558 key_to_hw_index(dev, mr->ibmr.lkey) & 559 (dev->limits.num_mpts - 1)); 560 if (err) 561 mthca_warn(dev, "HW2SW_MPT failed (%d)\n", err); 562 563 mthca_free_region(dev, mr->ibmr.lkey); 564 mthca_free_mtt(dev, mr->mtt); 565} 566 567int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, 568 u32 access, struct mthca_fmr *mr) 569{ 570 struct mthca_mpt_entry *mpt_entry; 571 struct mthca_mailbox *mailbox; 572 u64 mtt_seg; 573 u32 key, idx; 574 int list_len = mr->attr.max_pages; 575 int err = -ENOMEM; 576 int i; 577 578 if (mr->attr.page_shift < 12 || mr->attr.page_shift >= 32) 579 return -EINVAL; 580 581 /* For Arbel, all MTTs must fit in the same page. */ 582 if (mthca_is_memfree(dev) && 583 mr->attr.max_pages * sizeof *mr->mem.arbel.mtts > PAGE_SIZE) 584 return -EINVAL; 585 586 mr->maps = 0; 587 588 key = mthca_alloc(&dev->mr_table.mpt_alloc); 589 if (key == -1) 590 return -ENOMEM; 591 key = adjust_key(dev, key); 592 593 idx = key & (dev->limits.num_mpts - 1); 594 mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key); 595 596 if (mthca_is_memfree(dev)) { 597 err = mthca_table_get(dev, dev->mr_table.mpt_table, key); 598 if (err) 599 goto err_out_mpt_free; 600 601 mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key, NULL); 602 BUG_ON(!mr->mem.arbel.mpt); 603 } else 604 mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base + 605 sizeof *(mr->mem.tavor.mpt) * idx; 606 607 mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy); 608 if (IS_ERR(mr->mtt)) { 609 err = PTR_ERR(mr->mtt); 610 goto err_out_table; 611 } 612 613 mtt_seg = mr->mtt->first_seg * dev->limits.mtt_seg_size; 614 615 if (mthca_is_memfree(dev)) { 616 mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table, 617 mr->mtt->first_seg, 618 &mr->mem.arbel.dma_handle); 619 BUG_ON(!mr->mem.arbel.mtts); 620 } else 621 mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg; 622 623 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); 624 if (IS_ERR(mailbox)) { 625 err = PTR_ERR(mailbox); 626 goto err_out_free_mtt; 627 } 628 629 mpt_entry = mailbox->buf; 630 631 mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS | 632 MTHCA_MPT_FLAG_MIO | 633 MTHCA_MPT_FLAG_REGION | 634 access); 635 636 mpt_entry->page_size = cpu_to_be32(mr->attr.page_shift - 12); 637 mpt_entry->key = cpu_to_be32(key); 638 mpt_entry->pd = cpu_to_be32(pd); 639 memset(&mpt_entry->start, 0, 640 sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start)); 641 mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base + mtt_seg); 642 643 if (0) { 644 mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey); 645 for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) { 646 if (i % 4 == 0) 647 printk("[%02x] ", i * 4); 648 printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i])); 649 if ((i + 1) % 4 == 0) 650 printk("\n"); 651 } 652 } 653 654 err = mthca_SW2HW_MPT(dev, mailbox, 655 key & (dev->limits.num_mpts - 1)); 656 if (err) { 657 mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err); 658 goto err_out_mailbox_free; 659 } 660 661 mthca_free_mailbox(dev, mailbox); 662 return 0; 663 664err_out_mailbox_free: 665 mthca_free_mailbox(dev, mailbox); 666 667err_out_free_mtt: 668 mthca_free_mtt(dev, mr->mtt); 669 670err_out_table: 671 mthca_table_put(dev, dev->mr_table.mpt_table, key); 672 673err_out_mpt_free: 674 mthca_free(&dev->mr_table.mpt_alloc, key); 675 return err; 676} 677 678int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr) 679{ 680 if (fmr->maps) 681 return -EBUSY; 682 683 mthca_free_region(dev, fmr->ibmr.lkey); 684 mthca_free_mtt(dev, fmr->mtt); 685 686 return 0; 687} 688 689static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list, 690 int list_len, u64 iova) 691{ 692 int i, page_mask; 693 694 if (list_len > fmr->attr.max_pages) 695 return -EINVAL; 696 697 page_mask = (1 << fmr->attr.page_shift) - 1; 698 699 /* We are getting page lists, so va must be page aligned. */ 700 if (iova & page_mask) 701 return -EINVAL; 702 703 /* Trust the user not to pass misaligned data in page_list */ 704 if (0) 705 for (i = 0; i < list_len; ++i) { 706 if (page_list[i] & ~page_mask) 707 return -EINVAL; 708 } 709 710 if (fmr->maps >= fmr->attr.max_maps) 711 return -EINVAL; 712 713 return 0; 714} 715 716 717int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, 718 int list_len, u64 iova) 719{ 720 struct mthca_fmr *fmr = to_mfmr(ibfmr); 721 struct mthca_dev *dev = to_mdev(ibfmr->device); 722 struct mthca_mpt_entry mpt_entry; 723 u32 key; 724 int i, err; 725 726 err = mthca_check_fmr(fmr, page_list, list_len, iova); 727 if (err) 728 return err; 729 730 ++fmr->maps; 731 732 key = tavor_key_to_hw_index(fmr->ibmr.lkey); 733 key += dev->limits.num_mpts; 734 fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key); 735 736 writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt); 737 738 for (i = 0; i < list_len; ++i) { 739 __be64 mtt_entry = cpu_to_be64(page_list[i] | 740 MTHCA_MTT_FLAG_PRESENT); 741 mthca_write64_raw(mtt_entry, fmr->mem.tavor.mtts + i); 742 } 743 744 mpt_entry.lkey = cpu_to_be32(key); 745 mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift)); 746 mpt_entry.start = cpu_to_be64(iova); 747 748 __raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key); 749 memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start, 750 offsetof(struct mthca_mpt_entry, window_count) - 751 offsetof(struct mthca_mpt_entry, start)); 752 753 writeb(MTHCA_MPT_STATUS_HW, fmr->mem.tavor.mpt); 754 755 return 0; 756} 757 758int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, 759 int list_len, u64 iova) 760{ 761 struct mthca_fmr *fmr = to_mfmr(ibfmr); 762 struct mthca_dev *dev = to_mdev(ibfmr->device); 763 u32 key; 764 int i, err; 765 766 err = mthca_check_fmr(fmr, page_list, list_len, iova); 767 if (err) 768 return err; 769 770 ++fmr->maps; 771 772 key = arbel_key_to_hw_index(fmr->ibmr.lkey); 773 if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT) 774 key += SINAI_FMR_KEY_INC; 775 else 776 key += dev->limits.num_mpts; 777 fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key); 778 779 *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW; 780 781 wmb(); 782 783 dma_sync_single_for_cpu(&dev->pdev->dev, fmr->mem.arbel.dma_handle, 784 list_len * sizeof(u64), DMA_TO_DEVICE); 785 786 for (i = 0; i < list_len; ++i) 787 fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] | 788 MTHCA_MTT_FLAG_PRESENT); 789 790 dma_sync_single_for_device(&dev->pdev->dev, fmr->mem.arbel.dma_handle, 791 list_len * sizeof(u64), DMA_TO_DEVICE); 792 793 fmr->mem.arbel.mpt->key = cpu_to_be32(key); 794 fmr->mem.arbel.mpt->lkey = cpu_to_be32(key); 795 fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift)); 796 fmr->mem.arbel.mpt->start = cpu_to_be64(iova); 797 798 wmb(); 799 800 *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_HW; 801 802 wmb(); 803 804 return 0; 805} 806 807void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) 808{ 809 if (!fmr->maps) 810 return; 811 812 fmr->maps = 0; 813 814 writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt); 815} 816 817void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) 818{ 819 if (!fmr->maps) 820 return; 821 822 fmr->maps = 0; 823 824 *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW; 825} 826 827int mthca_init_mr_table(struct mthca_dev *dev) 828{ 829 phys_addr_t addr; 830 int mpts, mtts, err, i; 831 832 err = mthca_alloc_init(&dev->mr_table.mpt_alloc, 833 dev->limits.num_mpts, 834 ~0, dev->limits.reserved_mrws); 835 if (err) 836 return err; 837 838 if (!mthca_is_memfree(dev) && 839 (dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) 840 dev->limits.fmr_reserved_mtts = 0; 841 else 842 dev->mthca_flags |= MTHCA_FLAG_FMR; 843 844 if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT) 845 mthca_dbg(dev, "Memory key throughput optimization activated.\n"); 846 847 err = mthca_buddy_init(&dev->mr_table.mtt_buddy, 848 fls(dev->limits.num_mtt_segs - 1)); 849 850 if (err) 851 goto err_mtt_buddy; 852 853 dev->mr_table.tavor_fmr.mpt_base = NULL; 854 dev->mr_table.tavor_fmr.mtt_base = NULL; 855 856 if (dev->limits.fmr_reserved_mtts) { 857 i = fls(dev->limits.fmr_reserved_mtts - 1); 858 859 if (i >= 31) { 860 mthca_warn(dev, "Unable to reserve 2^31 FMR MTTs.\n"); 861 err = -EINVAL; 862 goto err_fmr_mpt; 863 } 864 mpts = mtts = 1 << i; 865 } else { 866 mtts = dev->limits.num_mtt_segs; 867 mpts = dev->limits.num_mpts; 868 } 869 870 if (!mthca_is_memfree(dev) && 871 (dev->mthca_flags & MTHCA_FLAG_FMR)) { 872 873 addr = pci_resource_start(dev->pdev, 4) + 874 ((pci_resource_len(dev->pdev, 4) - 1) & 875 dev->mr_table.mpt_base); 876 877 dev->mr_table.tavor_fmr.mpt_base = 878 ioremap(addr, mpts * sizeof(struct mthca_mpt_entry)); 879 880 if (!dev->mr_table.tavor_fmr.mpt_base) { 881 mthca_warn(dev, "MPT ioremap for FMR failed.\n"); 882 err = -ENOMEM; 883 goto err_fmr_mpt; 884 } 885 886 addr = pci_resource_start(dev->pdev, 4) + 887 ((pci_resource_len(dev->pdev, 4) - 1) & 888 dev->mr_table.mtt_base); 889 890 dev->mr_table.tavor_fmr.mtt_base = 891 ioremap(addr, mtts * dev->limits.mtt_seg_size); 892 if (!dev->mr_table.tavor_fmr.mtt_base) { 893 mthca_warn(dev, "MTT ioremap for FMR failed.\n"); 894 err = -ENOMEM; 895 goto err_fmr_mtt; 896 } 897 } 898 899 if (dev->limits.fmr_reserved_mtts) { 900 err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, fls(mtts - 1)); 901 if (err) 902 goto err_fmr_mtt_buddy; 903 904 /* Prevent regular MRs from using FMR keys */ 905 err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, fls(mtts - 1)); 906 if (err) 907 goto err_reserve_fmr; 908 909 dev->mr_table.fmr_mtt_buddy = 910 &dev->mr_table.tavor_fmr.mtt_buddy; 911 } else 912 dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy; 913 914 /* FMR table is always the first, take reserved MTTs out of there */ 915 if (dev->limits.reserved_mtts) { 916 i = fls(dev->limits.reserved_mtts - 1); 917 918 if (mthca_alloc_mtt_range(dev, i, 919 dev->mr_table.fmr_mtt_buddy) == -1) { 920 mthca_warn(dev, "MTT table of order %d is too small.\n", 921 dev->mr_table.fmr_mtt_buddy->max_order); 922 err = -ENOMEM; 923 goto err_reserve_mtts; 924 } 925 } 926 927 return 0; 928 929err_reserve_mtts: 930err_reserve_fmr: 931 if (dev->limits.fmr_reserved_mtts) 932 mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy); 933 934err_fmr_mtt_buddy: 935 if (dev->mr_table.tavor_fmr.mtt_base) 936 iounmap(dev->mr_table.tavor_fmr.mtt_base); 937 938err_fmr_mtt: 939 if (dev->mr_table.tavor_fmr.mpt_base) 940 iounmap(dev->mr_table.tavor_fmr.mpt_base); 941 942err_fmr_mpt: 943 mthca_buddy_cleanup(&dev->mr_table.mtt_buddy); 944 945err_mtt_buddy: 946 mthca_alloc_cleanup(&dev->mr_table.mpt_alloc); 947 948 return err; 949} 950 951void mthca_cleanup_mr_table(struct mthca_dev *dev) 952{ 953 /* XXX check if any MRs are still allocated? */ 954 if (dev->limits.fmr_reserved_mtts) 955 mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy); 956 957 mthca_buddy_cleanup(&dev->mr_table.mtt_buddy); 958 959 if (dev->mr_table.tavor_fmr.mtt_base) 960 iounmap(dev->mr_table.tavor_fmr.mtt_base); 961 if (dev->mr_table.tavor_fmr.mpt_base) 962 iounmap(dev->mr_table.tavor_fmr.mpt_base); 963 964 mthca_alloc_cleanup(&dev->mr_table.mpt_alloc); 965} 966