1/* 2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. 3 * Author: Joerg Roedel <joerg.roedel@amd.com> 4 * Leo Duran <leo.duran@amd.com> 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published 8 * by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 */ 19 20#include <linux/ratelimit.h> 21#include <linux/pci.h> 22#include <linux/pci-ats.h> 23#include <linux/bitmap.h> 24#include <linux/slab.h> 25#include <linux/debugfs.h> 26#include <linux/scatterlist.h> 27#include <linux/dma-mapping.h> 28#include <linux/iommu-helper.h> 29#include <linux/iommu.h> 30#include <linux/delay.h> 31#include <linux/amd-iommu.h> 32#include <linux/notifier.h> 33#include <linux/export.h> 34#include <asm/msidef.h> 35#include <asm/proto.h> 36#include <asm/iommu.h> 37#include <asm/gart.h> 38#include <asm/dma.h> 39 40#include "amd_iommu_proto.h" 41#include "amd_iommu_types.h" 42 43#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) 44 45#define LOOP_TIMEOUT 100000 46 47/* 48 * This bitmap is used to advertise the page sizes our hardware support 49 * to the IOMMU core, which will then use this information to split 50 * physically contiguous memory regions it is mapping into page sizes 51 * that we support. 52 * 53 * Traditionally the IOMMU core just handed us the mappings directly, 54 * after making sure the size is an order of a 4KiB page and that the 55 * mapping has natural alignment. 56 * 57 * To retain this behavior, we currently advertise that we support 58 * all page sizes that are an order of 4KiB. 59 * 60 * If at some point we'd like to utilize the IOMMU core's new behavior, 61 * we could change this to advertise the real page sizes we support. 62 */ 63#define AMD_IOMMU_PGSIZES (~0xFFFUL) 64 65static DEFINE_RWLOCK(amd_iommu_devtable_lock); 66 67/* A list of preallocated protection domains */ 68static LIST_HEAD(iommu_pd_list); 69static DEFINE_SPINLOCK(iommu_pd_list_lock); 70 71/* List of all available dev_data structures */ 72static LIST_HEAD(dev_data_list); 73static DEFINE_SPINLOCK(dev_data_list_lock); 74 75/* 76 * Domain for untranslated devices - only allocated 77 * if iommu=pt passed on kernel cmd line. 78 */ 79static struct protection_domain *pt_domain; 80 81static struct iommu_ops amd_iommu_ops; 82 83static ATOMIC_NOTIFIER_HEAD(ppr_notifier); 84int amd_iommu_max_glx_val = -1; 85 86/* 87 * general struct to manage commands send to an IOMMU 88 */ 89struct iommu_cmd { 90 u32 data[4]; 91}; 92 93static void update_domain(struct protection_domain *domain); 94static int __init alloc_passthrough_domain(void); 95 96/**************************************************************************** 97 * 98 * Helper functions 99 * 100 ****************************************************************************/ 101 102static struct iommu_dev_data *alloc_dev_data(u16 devid) 103{ 104 struct iommu_dev_data *dev_data; 105 unsigned long flags; 106 107 dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); 108 if (!dev_data) 109 return NULL; 110 111 dev_data->devid = devid; 112 atomic_set(&dev_data->bind, 0); 113 114 spin_lock_irqsave(&dev_data_list_lock, flags); 115 list_add_tail(&dev_data->dev_data_list, &dev_data_list); 116 spin_unlock_irqrestore(&dev_data_list_lock, flags); 117 118 return dev_data; 119} 120 121static void free_dev_data(struct iommu_dev_data *dev_data) 122{ 123 unsigned long flags; 124 125 spin_lock_irqsave(&dev_data_list_lock, flags); 126 list_del(&dev_data->dev_data_list); 127 spin_unlock_irqrestore(&dev_data_list_lock, flags); 128 129 kfree(dev_data); 130} 131 132static struct iommu_dev_data *search_dev_data(u16 devid) 133{ 134 struct iommu_dev_data *dev_data; 135 unsigned long flags; 136 137 spin_lock_irqsave(&dev_data_list_lock, flags); 138 list_for_each_entry(dev_data, &dev_data_list, dev_data_list) { 139 if (dev_data->devid == devid) 140 goto out_unlock; 141 } 142 143 dev_data = NULL; 144 145out_unlock: 146 spin_unlock_irqrestore(&dev_data_list_lock, flags); 147 148 return dev_data; 149} 150 151static struct iommu_dev_data *find_dev_data(u16 devid) 152{ 153 struct iommu_dev_data *dev_data; 154 155 dev_data = search_dev_data(devid); 156 157 if (dev_data == NULL) 158 dev_data = alloc_dev_data(devid); 159 160 return dev_data; 161} 162 163static inline u16 get_device_id(struct device *dev) 164{ 165 struct pci_dev *pdev = to_pci_dev(dev); 166 167 return calc_devid(pdev->bus->number, pdev->devfn); 168} 169 170static struct iommu_dev_data *get_dev_data(struct device *dev) 171{ 172 return dev->archdata.iommu; 173} 174 175static bool pci_iommuv2_capable(struct pci_dev *pdev) 176{ 177 static const int caps[] = { 178 PCI_EXT_CAP_ID_ATS, 179 PCI_EXT_CAP_ID_PRI, 180 PCI_EXT_CAP_ID_PASID, 181 }; 182 int i, pos; 183 184 for (i = 0; i < 3; ++i) { 185 pos = pci_find_ext_capability(pdev, caps[i]); 186 if (pos == 0) 187 return false; 188 } 189 190 return true; 191} 192 193static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum) 194{ 195 struct iommu_dev_data *dev_data; 196 197 dev_data = get_dev_data(&pdev->dev); 198 199 return dev_data->errata & (1 << erratum) ? true : false; 200} 201 202/* 203 * In this function the list of preallocated protection domains is traversed to 204 * find the domain for a specific device 205 */ 206static struct dma_ops_domain *find_protection_domain(u16 devid) 207{ 208 struct dma_ops_domain *entry, *ret = NULL; 209 unsigned long flags; 210 u16 alias = amd_iommu_alias_table[devid]; 211 212 if (list_empty(&iommu_pd_list)) 213 return NULL; 214 215 spin_lock_irqsave(&iommu_pd_list_lock, flags); 216 217 list_for_each_entry(entry, &iommu_pd_list, list) { 218 if (entry->target_dev == devid || 219 entry->target_dev == alias) { 220 ret = entry; 221 break; 222 } 223 } 224 225 spin_unlock_irqrestore(&iommu_pd_list_lock, flags); 226 227 return ret; 228} 229 230/* 231 * This function checks if the driver got a valid device from the caller to 232 * avoid dereferencing invalid pointers. 233 */ 234static bool check_device(struct device *dev) 235{ 236 u16 devid; 237 238 if (!dev || !dev->dma_mask) 239 return false; 240 241 /* No device or no PCI device */ 242 if (dev->bus != &pci_bus_type) 243 return false; 244 245 devid = get_device_id(dev); 246 247 /* Out of our scope? */ 248 if (devid > amd_iommu_last_bdf) 249 return false; 250 251 if (amd_iommu_rlookup_table[devid] == NULL) 252 return false; 253 254 return true; 255} 256 257static int iommu_init_device(struct device *dev) 258{ 259 struct pci_dev *pdev = to_pci_dev(dev); 260 struct iommu_dev_data *dev_data; 261 u16 alias; 262 263 if (dev->archdata.iommu) 264 return 0; 265 266 dev_data = find_dev_data(get_device_id(dev)); 267 if (!dev_data) 268 return -ENOMEM; 269 270 alias = amd_iommu_alias_table[dev_data->devid]; 271 if (alias != dev_data->devid) { 272 struct iommu_dev_data *alias_data; 273 274 alias_data = find_dev_data(alias); 275 if (alias_data == NULL) { 276 pr_err("AMD-Vi: Warning: Unhandled device %s\n", 277 dev_name(dev)); 278 free_dev_data(dev_data); 279 return -ENOTSUPP; 280 } 281 dev_data->alias_data = alias_data; 282 } 283 284 if (pci_iommuv2_capable(pdev)) { 285 struct amd_iommu *iommu; 286 287 iommu = amd_iommu_rlookup_table[dev_data->devid]; 288 dev_data->iommu_v2 = iommu->is_iommu_v2; 289 } 290 291 dev->archdata.iommu = dev_data; 292 293 return 0; 294} 295 296static void iommu_ignore_device(struct device *dev) 297{ 298 u16 devid, alias; 299 300 devid = get_device_id(dev); 301 alias = amd_iommu_alias_table[devid]; 302 303 memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry)); 304 memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry)); 305 306 amd_iommu_rlookup_table[devid] = NULL; 307 amd_iommu_rlookup_table[alias] = NULL; 308} 309 310static void iommu_uninit_device(struct device *dev) 311{ 312 /* 313 * Nothing to do here - we keep dev_data around for unplugged devices 314 * and reuse it when the device is re-plugged - not doing so would 315 * introduce a ton of races. 316 */ 317} 318 319void __init amd_iommu_uninit_devices(void) 320{ 321 struct iommu_dev_data *dev_data, *n; 322 struct pci_dev *pdev = NULL; 323 324 for_each_pci_dev(pdev) { 325 326 if (!check_device(&pdev->dev)) 327 continue; 328 329 iommu_uninit_device(&pdev->dev); 330 } 331 332 /* Free all of our dev_data structures */ 333 list_for_each_entry_safe(dev_data, n, &dev_data_list, dev_data_list) 334 free_dev_data(dev_data); 335} 336 337int __init amd_iommu_init_devices(void) 338{ 339 struct pci_dev *pdev = NULL; 340 int ret = 0; 341 342 for_each_pci_dev(pdev) { 343 344 if (!check_device(&pdev->dev)) 345 continue; 346 347 ret = iommu_init_device(&pdev->dev); 348 if (ret == -ENOTSUPP) 349 iommu_ignore_device(&pdev->dev); 350 else if (ret) 351 goto out_free; 352 } 353 354 return 0; 355 356out_free: 357 358 amd_iommu_uninit_devices(); 359 360 return ret; 361} 362#ifdef CONFIG_AMD_IOMMU_STATS 363 364/* 365 * Initialization code for statistics collection 366 */ 367 368DECLARE_STATS_COUNTER(compl_wait); 369DECLARE_STATS_COUNTER(cnt_map_single); 370DECLARE_STATS_COUNTER(cnt_unmap_single); 371DECLARE_STATS_COUNTER(cnt_map_sg); 372DECLARE_STATS_COUNTER(cnt_unmap_sg); 373DECLARE_STATS_COUNTER(cnt_alloc_coherent); 374DECLARE_STATS_COUNTER(cnt_free_coherent); 375DECLARE_STATS_COUNTER(cross_page); 376DECLARE_STATS_COUNTER(domain_flush_single); 377DECLARE_STATS_COUNTER(domain_flush_all); 378DECLARE_STATS_COUNTER(alloced_io_mem); 379DECLARE_STATS_COUNTER(total_map_requests); 380DECLARE_STATS_COUNTER(complete_ppr); 381DECLARE_STATS_COUNTER(invalidate_iotlb); 382DECLARE_STATS_COUNTER(invalidate_iotlb_all); 383DECLARE_STATS_COUNTER(pri_requests); 384 385 386static struct dentry *stats_dir; 387static struct dentry *de_fflush; 388 389static void amd_iommu_stats_add(struct __iommu_counter *cnt) 390{ 391 if (stats_dir == NULL) 392 return; 393 394 cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir, 395 &cnt->value); 396} 397 398static void amd_iommu_stats_init(void) 399{ 400 stats_dir = debugfs_create_dir("amd-iommu", NULL); 401 if (stats_dir == NULL) 402 return; 403 404 de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, 405 (u32 *)&amd_iommu_unmap_flush); 406 407 amd_iommu_stats_add(&compl_wait); 408 amd_iommu_stats_add(&cnt_map_single); 409 amd_iommu_stats_add(&cnt_unmap_single); 410 amd_iommu_stats_add(&cnt_map_sg); 411 amd_iommu_stats_add(&cnt_unmap_sg); 412 amd_iommu_stats_add(&cnt_alloc_coherent); 413 amd_iommu_stats_add(&cnt_free_coherent); 414 amd_iommu_stats_add(&cross_page); 415 amd_iommu_stats_add(&domain_flush_single); 416 amd_iommu_stats_add(&domain_flush_all); 417 amd_iommu_stats_add(&alloced_io_mem); 418 amd_iommu_stats_add(&total_map_requests); 419 amd_iommu_stats_add(&complete_ppr); 420 amd_iommu_stats_add(&invalidate_iotlb); 421 amd_iommu_stats_add(&invalidate_iotlb_all); 422 amd_iommu_stats_add(&pri_requests); 423} 424 425#endif 426 427/**************************************************************************** 428 * 429 * Interrupt handling functions 430 * 431 ****************************************************************************/ 432 433static void dump_dte_entry(u16 devid) 434{ 435 int i; 436 437 for (i = 0; i < 4; ++i) 438 pr_err("AMD-Vi: DTE[%d]: %016llx\n", i, 439 amd_iommu_dev_table[devid].data[i]); 440} 441 442static void dump_command(unsigned long phys_addr) 443{ 444 struct iommu_cmd *cmd = phys_to_virt(phys_addr); 445 int i; 446 447 for (i = 0; i < 4; ++i) 448 pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]); 449} 450 451static void iommu_print_event(struct amd_iommu *iommu, void *__evt) 452{ 453 u32 *event = __evt; 454 int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; 455 int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; 456 int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK; 457 int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; 458 u64 address = (u64)(((u64)event[3]) << 32) | event[2]; 459 460 printk(KERN_ERR "AMD-Vi: Event logged ["); 461 462 switch (type) { 463 case EVENT_TYPE_ILL_DEV: 464 printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x " 465 "address=0x%016llx flags=0x%04x]\n", 466 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), 467 address, flags); 468 dump_dte_entry(devid); 469 break; 470 case EVENT_TYPE_IO_FAULT: 471 printk("IO_PAGE_FAULT device=%02x:%02x.%x " 472 "domain=0x%04x address=0x%016llx flags=0x%04x]\n", 473 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), 474 domid, address, flags); 475 break; 476 case EVENT_TYPE_DEV_TAB_ERR: 477 printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " 478 "address=0x%016llx flags=0x%04x]\n", 479 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), 480 address, flags); 481 break; 482 case EVENT_TYPE_PAGE_TAB_ERR: 483 printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x " 484 "domain=0x%04x address=0x%016llx flags=0x%04x]\n", 485 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), 486 domid, address, flags); 487 break; 488 case EVENT_TYPE_ILL_CMD: 489 printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); 490 dump_command(address); 491 break; 492 case EVENT_TYPE_CMD_HARD_ERR: 493 printk("COMMAND_HARDWARE_ERROR address=0x%016llx " 494 "flags=0x%04x]\n", address, flags); 495 break; 496 case EVENT_TYPE_IOTLB_INV_TO: 497 printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x " 498 "address=0x%016llx]\n", 499 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), 500 address); 501 break; 502 case EVENT_TYPE_INV_DEV_REQ: 503 printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x " 504 "address=0x%016llx flags=0x%04x]\n", 505 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), 506 address, flags); 507 break; 508 default: 509 printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type); 510 } 511} 512 513static void iommu_poll_events(struct amd_iommu *iommu) 514{ 515 u32 head, tail; 516 unsigned long flags; 517 518 spin_lock_irqsave(&iommu->lock, flags); 519 520 head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); 521 tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); 522 523 while (head != tail) { 524 iommu_print_event(iommu, iommu->evt_buf + head); 525 head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; 526 } 527 528 writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); 529 530 spin_unlock_irqrestore(&iommu->lock, flags); 531} 532 533static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u32 head) 534{ 535 struct amd_iommu_fault fault; 536 volatile u64 *raw; 537 int i; 538 539 INC_STATS_COUNTER(pri_requests); 540 541 raw = (u64 *)(iommu->ppr_log + head); 542 543 /* 544 * Hardware bug: Interrupt may arrive before the entry is written to 545 * memory. If this happens we need to wait for the entry to arrive. 546 */ 547 for (i = 0; i < LOOP_TIMEOUT; ++i) { 548 if (PPR_REQ_TYPE(raw[0]) != 0) 549 break; 550 udelay(1); 551 } 552 553 if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) { 554 pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n"); 555 return; 556 } 557 558 fault.address = raw[1]; 559 fault.pasid = PPR_PASID(raw[0]); 560 fault.device_id = PPR_DEVID(raw[0]); 561 fault.tag = PPR_TAG(raw[0]); 562 fault.flags = PPR_FLAGS(raw[0]); 563 564 /* 565 * To detect the hardware bug we need to clear the entry 566 * to back to zero. 567 */ 568 raw[0] = raw[1] = 0; 569 570 atomic_notifier_call_chain(&ppr_notifier, 0, &fault); 571} 572 573static void iommu_poll_ppr_log(struct amd_iommu *iommu) 574{ 575 unsigned long flags; 576 u32 head, tail; 577 578 if (iommu->ppr_log == NULL) 579 return; 580 581 spin_lock_irqsave(&iommu->lock, flags); 582 583 head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); 584 tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); 585 586 while (head != tail) { 587 588 /* Handle PPR entry */ 589 iommu_handle_ppr_entry(iommu, head); 590 591 /* Update and refresh ring-buffer state*/ 592 head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; 593 writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); 594 tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); 595 } 596 597 /* enable ppr interrupts again */ 598 writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET); 599 600 spin_unlock_irqrestore(&iommu->lock, flags); 601} 602 603irqreturn_t amd_iommu_int_thread(int irq, void *data) 604{ 605 struct amd_iommu *iommu; 606 607 for_each_iommu(iommu) { 608 iommu_poll_events(iommu); 609 iommu_poll_ppr_log(iommu); 610 } 611 612 return IRQ_HANDLED; 613} 614 615irqreturn_t amd_iommu_int_handler(int irq, void *data) 616{ 617 return IRQ_WAKE_THREAD; 618} 619 620/**************************************************************************** 621 * 622 * IOMMU command queuing functions 623 * 624 ****************************************************************************/ 625 626static int wait_on_sem(volatile u64 *sem) 627{ 628 int i = 0; 629 630 while (*sem == 0 && i < LOOP_TIMEOUT) { 631 udelay(1); 632 i += 1; 633 } 634 635 if (i == LOOP_TIMEOUT) { 636 pr_alert("AMD-Vi: Completion-Wait loop timed out\n"); 637 return -EIO; 638 } 639 640 return 0; 641} 642 643static void copy_cmd_to_buffer(struct amd_iommu *iommu, 644 struct iommu_cmd *cmd, 645 u32 tail) 646{ 647 u8 *target; 648 649 target = iommu->cmd_buf + tail; 650 tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; 651 652 /* Copy command to buffer */ 653 memcpy(target, cmd, sizeof(*cmd)); 654 655 /* Tell the IOMMU about it */ 656 writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 657} 658 659static void build_completion_wait(struct iommu_cmd *cmd, u64 address) 660{ 661 WARN_ON(address & 0x7ULL); 662 663 memset(cmd, 0, sizeof(*cmd)); 664 cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK; 665 cmd->data[1] = upper_32_bits(__pa(address)); 666 cmd->data[2] = 1; 667 CMD_SET_TYPE(cmd, CMD_COMPL_WAIT); 668} 669 670static void build_inv_dte(struct iommu_cmd *cmd, u16 devid) 671{ 672 memset(cmd, 0, sizeof(*cmd)); 673 cmd->data[0] = devid; 674 CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY); 675} 676 677static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, 678 size_t size, u16 domid, int pde) 679{ 680 u64 pages; 681 int s; 682 683 pages = iommu_num_pages(address, size, PAGE_SIZE); 684 s = 0; 685 686 if (pages > 1) { 687 /* 688 * If we have to flush more than one page, flush all 689 * TLB entries for this domain 690 */ 691 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; 692 s = 1; 693 } 694 695 address &= PAGE_MASK; 696 697 memset(cmd, 0, sizeof(*cmd)); 698 cmd->data[1] |= domid; 699 cmd->data[2] = lower_32_bits(address); 700 cmd->data[3] = upper_32_bits(address); 701 CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); 702 if (s) /* size bit - we flush more than one 4kb page */ 703 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; 704 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ 705 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; 706} 707 708static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep, 709 u64 address, size_t size) 710{ 711 u64 pages; 712 int s; 713 714 pages = iommu_num_pages(address, size, PAGE_SIZE); 715 s = 0; 716 717 if (pages > 1) { 718 /* 719 * If we have to flush more than one page, flush all 720 * TLB entries for this domain 721 */ 722 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; 723 s = 1; 724 } 725 726 address &= PAGE_MASK; 727 728 memset(cmd, 0, sizeof(*cmd)); 729 cmd->data[0] = devid; 730 cmd->data[0] |= (qdep & 0xff) << 24; 731 cmd->data[1] = devid; 732 cmd->data[2] = lower_32_bits(address); 733 cmd->data[3] = upper_32_bits(address); 734 CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES); 735 if (s) 736 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; 737} 738 739static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid, 740 u64 address, bool size) 741{ 742 memset(cmd, 0, sizeof(*cmd)); 743 744 address &= ~(0xfffULL); 745 746 cmd->data[0] = pasid & PASID_MASK; 747 cmd->data[1] = domid; 748 cmd->data[2] = lower_32_bits(address); 749 cmd->data[3] = upper_32_bits(address); 750 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; 751 cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK; 752 if (size) 753 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; 754 CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); 755} 756 757static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid, 758 int qdep, u64 address, bool size) 759{ 760 memset(cmd, 0, sizeof(*cmd)); 761 762 address &= ~(0xfffULL); 763 764 cmd->data[0] = devid; 765 cmd->data[0] |= (pasid & 0xff) << 16; 766 cmd->data[0] |= (qdep & 0xff) << 24; 767 cmd->data[1] = devid; 768 cmd->data[1] |= ((pasid >> 8) & 0xfff) << 16; 769 cmd->data[2] = lower_32_bits(address); 770 cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK; 771 cmd->data[3] = upper_32_bits(address); 772 if (size) 773 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; 774 CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES); 775} 776 777static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid, 778 int status, int tag, bool gn) 779{ 780 memset(cmd, 0, sizeof(*cmd)); 781 782 cmd->data[0] = devid; 783 if (gn) { 784 cmd->data[1] = pasid & PASID_MASK; 785 cmd->data[2] = CMD_INV_IOMMU_PAGES_GN_MASK; 786 } 787 cmd->data[3] = tag & 0x1ff; 788 cmd->data[3] |= (status & PPR_STATUS_MASK) << PPR_STATUS_SHIFT; 789 790 CMD_SET_TYPE(cmd, CMD_COMPLETE_PPR); 791} 792 793static void build_inv_all(struct iommu_cmd *cmd) 794{ 795 memset(cmd, 0, sizeof(*cmd)); 796 CMD_SET_TYPE(cmd, CMD_INV_ALL); 797} 798 799/* 800 * Writes the command to the IOMMUs command buffer and informs the 801 * hardware about the new command. 802 */ 803static int iommu_queue_command_sync(struct amd_iommu *iommu, 804 struct iommu_cmd *cmd, 805 bool sync) 806{ 807 u32 left, tail, head, next_tail; 808 unsigned long flags; 809 810 WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); 811 812again: 813 spin_lock_irqsave(&iommu->lock, flags); 814 815 head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); 816 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 817 next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; 818 left = (head - next_tail) % iommu->cmd_buf_size; 819 820 if (left <= 2) { 821 struct iommu_cmd sync_cmd; 822 volatile u64 sem = 0; 823 int ret; 824 825 build_completion_wait(&sync_cmd, (u64)&sem); 826 copy_cmd_to_buffer(iommu, &sync_cmd, tail); 827 828 spin_unlock_irqrestore(&iommu->lock, flags); 829 830 if ((ret = wait_on_sem(&sem)) != 0) 831 return ret; 832 833 goto again; 834 } 835 836 copy_cmd_to_buffer(iommu, cmd, tail); 837 838 /* We need to sync now to make sure all commands are processed */ 839 iommu->need_sync = sync; 840 841 spin_unlock_irqrestore(&iommu->lock, flags); 842 843 return 0; 844} 845 846static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) 847{ 848 return iommu_queue_command_sync(iommu, cmd, true); 849} 850 851/* 852 * This function queues a completion wait command into the command 853 * buffer of an IOMMU 854 */ 855static int iommu_completion_wait(struct amd_iommu *iommu) 856{ 857 struct iommu_cmd cmd; 858 volatile u64 sem = 0; 859 int ret; 860 861 if (!iommu->need_sync) 862 return 0; 863 864 build_completion_wait(&cmd, (u64)&sem); 865 866 ret = iommu_queue_command_sync(iommu, &cmd, false); 867 if (ret) 868 return ret; 869 870 return wait_on_sem(&sem); 871} 872 873static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) 874{ 875 struct iommu_cmd cmd; 876 877 build_inv_dte(&cmd, devid); 878 879 return iommu_queue_command(iommu, &cmd); 880} 881 882static void iommu_flush_dte_all(struct amd_iommu *iommu) 883{ 884 u32 devid; 885 886 for (devid = 0; devid <= 0xffff; ++devid) 887 iommu_flush_dte(iommu, devid); 888 889 iommu_completion_wait(iommu); 890} 891 892/* 893 * This function uses heavy locking and may disable irqs for some time. But 894 * this is no issue because it is only called during resume. 895 */ 896static void iommu_flush_tlb_all(struct amd_iommu *iommu) 897{ 898 u32 dom_id; 899 900 for (dom_id = 0; dom_id <= 0xffff; ++dom_id) { 901 struct iommu_cmd cmd; 902 build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 903 dom_id, 1); 904 iommu_queue_command(iommu, &cmd); 905 } 906 907 iommu_completion_wait(iommu); 908} 909 910static void iommu_flush_all(struct amd_iommu *iommu) 911{ 912 struct iommu_cmd cmd; 913 914 build_inv_all(&cmd); 915 916 iommu_queue_command(iommu, &cmd); 917 iommu_completion_wait(iommu); 918} 919 920void iommu_flush_all_caches(struct amd_iommu *iommu) 921{ 922 if (iommu_feature(iommu, FEATURE_IA)) { 923 iommu_flush_all(iommu); 924 } else { 925 iommu_flush_dte_all(iommu); 926 iommu_flush_tlb_all(iommu); 927 } 928} 929 930/* 931 * Command send function for flushing on-device TLB 932 */ 933static int device_flush_iotlb(struct iommu_dev_data *dev_data, 934 u64 address, size_t size) 935{ 936 struct amd_iommu *iommu; 937 struct iommu_cmd cmd; 938 int qdep; 939 940 qdep = dev_data->ats.qdep; 941 iommu = amd_iommu_rlookup_table[dev_data->devid]; 942 943 build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, address, size); 944 945 return iommu_queue_command(iommu, &cmd); 946} 947 948/* 949 * Command send function for invalidating a device table entry 950 */ 951static int device_flush_dte(struct iommu_dev_data *dev_data) 952{ 953 struct amd_iommu *iommu; 954 int ret; 955 956 iommu = amd_iommu_rlookup_table[dev_data->devid]; 957 958 ret = iommu_flush_dte(iommu, dev_data->devid); 959 if (ret) 960 return ret; 961 962 if (dev_data->ats.enabled) 963 ret = device_flush_iotlb(dev_data, 0, ~0UL); 964 965 return ret; 966} 967 968/* 969 * TLB invalidation function which is called from the mapping functions. 970 * It invalidates a single PTE if the range to flush is within a single 971 * page. Otherwise it flushes the whole TLB of the IOMMU. 972 */ 973static void __domain_flush_pages(struct protection_domain *domain, 974 u64 address, size_t size, int pde) 975{ 976 struct iommu_dev_data *dev_data; 977 struct iommu_cmd cmd; 978 int ret = 0, i; 979 980 build_inv_iommu_pages(&cmd, address, size, domain->id, pde); 981 982 for (i = 0; i < amd_iommus_present; ++i) { 983 if (!domain->dev_iommu[i]) 984 continue; 985 986 /* 987 * Devices of this domain are behind this IOMMU 988 * We need a TLB flush 989 */ 990 ret |= iommu_queue_command(amd_iommus[i], &cmd); 991 } 992 993 list_for_each_entry(dev_data, &domain->dev_list, list) { 994 995 if (!dev_data->ats.enabled) 996 continue; 997 998 ret |= device_flush_iotlb(dev_data, address, size); 999 } 1000 1001 WARN_ON(ret); 1002} 1003 1004static void domain_flush_pages(struct protection_domain *domain, 1005 u64 address, size_t size) 1006{ 1007 __domain_flush_pages(domain, address, size, 0); 1008} 1009 1010/* Flush the whole IO/TLB for a given protection domain */ 1011static void domain_flush_tlb(struct protection_domain *domain) 1012{ 1013 __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); 1014} 1015 1016/* Flush the whole IO/TLB for a given protection domain - including PDE */ 1017static void domain_flush_tlb_pde(struct protection_domain *domain) 1018{ 1019 __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); 1020} 1021 1022static void domain_flush_complete(struct protection_domain *domain) 1023{ 1024 int i; 1025 1026 for (i = 0; i < amd_iommus_present; ++i) { 1027 if (!domain->dev_iommu[i]) 1028 continue; 1029 1030 /* 1031 * Devices of this domain are behind this IOMMU 1032 * We need to wait for completion of all commands. 1033 */ 1034 iommu_completion_wait(amd_iommus[i]); 1035 } 1036} 1037 1038 1039/* 1040 * This function flushes the DTEs for all devices in domain 1041 */ 1042static void domain_flush_devices(struct protection_domain *domain) 1043{ 1044 struct iommu_dev_data *dev_data; 1045 1046 list_for_each_entry(dev_data, &domain->dev_list, list) 1047 device_flush_dte(dev_data); 1048} 1049 1050/**************************************************************************** 1051 * 1052 * The functions below are used the create the page table mappings for 1053 * unity mapped regions. 1054 * 1055 ****************************************************************************/ 1056 1057/* 1058 * This function is used to add another level to an IO page table. Adding 1059 * another level increases the size of the address space by 9 bits to a size up 1060 * to 64 bits. 1061 */ 1062static bool increase_address_space(struct protection_domain *domain, 1063 gfp_t gfp) 1064{ 1065 u64 *pte; 1066 1067 if (domain->mode == PAGE_MODE_6_LEVEL) 1068 /* address space already 64 bit large */ 1069 return false; 1070 1071 pte = (void *)get_zeroed_page(gfp); 1072 if (!pte) 1073 return false; 1074 1075 *pte = PM_LEVEL_PDE(domain->mode, 1076 virt_to_phys(domain->pt_root)); 1077 domain->pt_root = pte; 1078 domain->mode += 1; 1079 domain->updated = true; 1080 1081 return true; 1082} 1083 1084static u64 *alloc_pte(struct protection_domain *domain, 1085 unsigned long address, 1086 unsigned long page_size, 1087 u64 **pte_page, 1088 gfp_t gfp) 1089{ 1090 int level, end_lvl; 1091 u64 *pte, *page; 1092 1093 BUG_ON(!is_power_of_2(page_size)); 1094 1095 while (address > PM_LEVEL_SIZE(domain->mode)) 1096 increase_address_space(domain, gfp); 1097 1098 level = domain->mode - 1; 1099 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; 1100 address = PAGE_SIZE_ALIGN(address, page_size); 1101 end_lvl = PAGE_SIZE_LEVEL(page_size); 1102 1103 while (level > end_lvl) { 1104 if (!IOMMU_PTE_PRESENT(*pte)) { 1105 page = (u64 *)get_zeroed_page(gfp); 1106 if (!page) 1107 return NULL; 1108 *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); 1109 } 1110 1111 /* No level skipping support yet */ 1112 if (PM_PTE_LEVEL(*pte) != level) 1113 return NULL; 1114 1115 level -= 1; 1116 1117 pte = IOMMU_PTE_PAGE(*pte); 1118 1119 if (pte_page && level == end_lvl) 1120 *pte_page = pte; 1121 1122 pte = &pte[PM_LEVEL_INDEX(level, address)]; 1123 } 1124 1125 return pte; 1126} 1127 1128/* 1129 * This function checks if there is a PTE for a given dma address. If 1130 * there is one, it returns the pointer to it. 1131 */ 1132static u64 *fetch_pte(struct protection_domain *domain, unsigned long address) 1133{ 1134 int level; 1135 u64 *pte; 1136 1137 if (address > PM_LEVEL_SIZE(domain->mode)) 1138 return NULL; 1139 1140 level = domain->mode - 1; 1141 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; 1142 1143 while (level > 0) { 1144 1145 /* Not Present */ 1146 if (!IOMMU_PTE_PRESENT(*pte)) 1147 return NULL; 1148 1149 /* Large PTE */ 1150 if (PM_PTE_LEVEL(*pte) == 0x07) { 1151 unsigned long pte_mask, __pte; 1152 1153 /* 1154 * If we have a series of large PTEs, make 1155 * sure to return a pointer to the first one. 1156 */ 1157 pte_mask = PTE_PAGE_SIZE(*pte); 1158 pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1); 1159 __pte = ((unsigned long)pte) & pte_mask; 1160 1161 return (u64 *)__pte; 1162 } 1163 1164 /* No level skipping support yet */ 1165 if (PM_PTE_LEVEL(*pte) != level) 1166 return NULL; 1167 1168 level -= 1; 1169 1170 /* Walk to the next level */ 1171 pte = IOMMU_PTE_PAGE(*pte); 1172 pte = &pte[PM_LEVEL_INDEX(level, address)]; 1173 } 1174 1175 return pte; 1176} 1177 1178/* 1179 * Generic mapping functions. It maps a physical address into a DMA 1180 * address space. It allocates the page table pages if necessary. 1181 * In the future it can be extended to a generic mapping function 1182 * supporting all features of AMD IOMMU page tables like level skipping 1183 * and full 64 bit address spaces. 1184 */ 1185static int iommu_map_page(struct protection_domain *dom, 1186 unsigned long bus_addr, 1187 unsigned long phys_addr, 1188 int prot, 1189 unsigned long page_size) 1190{ 1191 u64 __pte, *pte; 1192 int i, count; 1193 1194 if (!(prot & IOMMU_PROT_MASK)) 1195 return -EINVAL; 1196 1197 bus_addr = PAGE_ALIGN(bus_addr); 1198 phys_addr = PAGE_ALIGN(phys_addr); 1199 count = PAGE_SIZE_PTE_COUNT(page_size); 1200 pte = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL); 1201 1202 for (i = 0; i < count; ++i) 1203 if (IOMMU_PTE_PRESENT(pte[i])) 1204 return -EBUSY; 1205 1206 if (page_size > PAGE_SIZE) { 1207 __pte = PAGE_SIZE_PTE(phys_addr, page_size); 1208 __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC; 1209 } else 1210 __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC; 1211 1212 if (prot & IOMMU_PROT_IR) 1213 __pte |= IOMMU_PTE_IR; 1214 if (prot & IOMMU_PROT_IW) 1215 __pte |= IOMMU_PTE_IW; 1216 1217 for (i = 0; i < count; ++i) 1218 pte[i] = __pte; 1219 1220 update_domain(dom); 1221 1222 return 0; 1223} 1224 1225static unsigned long iommu_unmap_page(struct protection_domain *dom, 1226 unsigned long bus_addr, 1227 unsigned long page_size) 1228{ 1229 unsigned long long unmap_size, unmapped; 1230 u64 *pte; 1231 1232 BUG_ON(!is_power_of_2(page_size)); 1233 1234 unmapped = 0; 1235 1236 while (unmapped < page_size) { 1237 1238 pte = fetch_pte(dom, bus_addr); 1239 1240 if (!pte) { 1241 /* 1242 * No PTE for this address 1243 * move forward in 4kb steps 1244 */ 1245 unmap_size = PAGE_SIZE; 1246 } else if (PM_PTE_LEVEL(*pte) == 0) { 1247 /* 4kb PTE found for this address */ 1248 unmap_size = PAGE_SIZE; 1249 *pte = 0ULL; 1250 } else { 1251 int count, i; 1252 1253 /* Large PTE found which maps this address */ 1254 unmap_size = PTE_PAGE_SIZE(*pte); 1255 count = PAGE_SIZE_PTE_COUNT(unmap_size); 1256 for (i = 0; i < count; i++) 1257 pte[i] = 0ULL; 1258 } 1259 1260 bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size; 1261 unmapped += unmap_size; 1262 } 1263 1264 BUG_ON(!is_power_of_2(unmapped)); 1265 1266 return unmapped; 1267} 1268 1269/* 1270 * This function checks if a specific unity mapping entry is needed for 1271 * this specific IOMMU. 1272 */ 1273static int iommu_for_unity_map(struct amd_iommu *iommu, 1274 struct unity_map_entry *entry) 1275{ 1276 u16 bdf, i; 1277 1278 for (i = entry->devid_start; i <= entry->devid_end; ++i) { 1279 bdf = amd_iommu_alias_table[i]; 1280 if (amd_iommu_rlookup_table[bdf] == iommu) 1281 return 1; 1282 } 1283 1284 return 0; 1285} 1286 1287/* 1288 * This function actually applies the mapping to the page table of the 1289 * dma_ops domain. 1290 */ 1291static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, 1292 struct unity_map_entry *e) 1293{ 1294 u64 addr; 1295 int ret; 1296 1297 for (addr = e->address_start; addr < e->address_end; 1298 addr += PAGE_SIZE) { 1299 ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot, 1300 PAGE_SIZE); 1301 if (ret) 1302 return ret; 1303 /* 1304 * if unity mapping is in aperture range mark the page 1305 * as allocated in the aperture 1306 */ 1307 if (addr < dma_dom->aperture_size) 1308 __set_bit(addr >> PAGE_SHIFT, 1309 dma_dom->aperture[0]->bitmap); 1310 } 1311 1312 return 0; 1313} 1314 1315/* 1316 * Init the unity mappings for a specific IOMMU in the system 1317 * 1318 * Basically iterates over all unity mapping entries and applies them to 1319 * the default domain DMA of that IOMMU if necessary. 1320 */ 1321static int iommu_init_unity_mappings(struct amd_iommu *iommu) 1322{ 1323 struct unity_map_entry *entry; 1324 int ret; 1325 1326 list_for_each_entry(entry, &amd_iommu_unity_map, list) { 1327 if (!iommu_for_unity_map(iommu, entry)) 1328 continue; 1329 ret = dma_ops_unity_map(iommu->default_dom, entry); 1330 if (ret) 1331 return ret; 1332 } 1333 1334 return 0; 1335} 1336 1337/* 1338 * Inits the unity mappings required for a specific device 1339 */ 1340static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, 1341 u16 devid) 1342{ 1343 struct unity_map_entry *e; 1344 int ret; 1345 1346 list_for_each_entry(e, &amd_iommu_unity_map, list) { 1347 if (!(devid >= e->devid_start && devid <= e->devid_end)) 1348 continue; 1349 ret = dma_ops_unity_map(dma_dom, e); 1350 if (ret) 1351 return ret; 1352 } 1353 1354 return 0; 1355} 1356 1357/**************************************************************************** 1358 * 1359 * The next functions belong to the address allocator for the dma_ops 1360 * interface functions. They work like the allocators in the other IOMMU 1361 * drivers. Its basically a bitmap which marks the allocated pages in 1362 * the aperture. Maybe it could be enhanced in the future to a more 1363 * efficient allocator. 1364 * 1365 ****************************************************************************/ 1366 1367/* 1368 * The address allocator core functions. 1369 * 1370 * called with domain->lock held 1371 */ 1372 1373/* 1374 * Used to reserve address ranges in the aperture (e.g. for exclusion 1375 * ranges. 1376 */ 1377static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, 1378 unsigned long start_page, 1379 unsigned int pages) 1380{ 1381 unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; 1382 1383 if (start_page + pages > last_page) 1384 pages = last_page - start_page; 1385 1386 for (i = start_page; i < start_page + pages; ++i) { 1387 int index = i / APERTURE_RANGE_PAGES; 1388 int page = i % APERTURE_RANGE_PAGES; 1389 __set_bit(page, dom->aperture[index]->bitmap); 1390 } 1391} 1392 1393/* 1394 * This function is used to add a new aperture range to an existing 1395 * aperture in case of dma_ops domain allocation or address allocation 1396 * failure. 1397 */ 1398static int alloc_new_range(struct dma_ops_domain *dma_dom, 1399 bool populate, gfp_t gfp) 1400{ 1401 int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; 1402 struct amd_iommu *iommu; 1403 unsigned long i, old_size; 1404 1405#ifdef CONFIG_IOMMU_STRESS 1406 populate = false; 1407#endif 1408 1409 if (index >= APERTURE_MAX_RANGES) 1410 return -ENOMEM; 1411 1412 dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp); 1413 if (!dma_dom->aperture[index]) 1414 return -ENOMEM; 1415 1416 dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp); 1417 if (!dma_dom->aperture[index]->bitmap) 1418 goto out_free; 1419 1420 dma_dom->aperture[index]->offset = dma_dom->aperture_size; 1421 1422 if (populate) { 1423 unsigned long address = dma_dom->aperture_size; 1424 int i, num_ptes = APERTURE_RANGE_PAGES / 512; 1425 u64 *pte, *pte_page; 1426 1427 for (i = 0; i < num_ptes; ++i) { 1428 pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE, 1429 &pte_page, gfp); 1430 if (!pte) 1431 goto out_free; 1432 1433 dma_dom->aperture[index]->pte_pages[i] = pte_page; 1434 1435 address += APERTURE_RANGE_SIZE / 64; 1436 } 1437 } 1438 1439 old_size = dma_dom->aperture_size; 1440 dma_dom->aperture_size += APERTURE_RANGE_SIZE; 1441 1442 /* Reserve address range used for MSI messages */ 1443 if (old_size < MSI_ADDR_BASE_LO && 1444 dma_dom->aperture_size > MSI_ADDR_BASE_LO) { 1445 unsigned long spage; 1446 int pages; 1447 1448 pages = iommu_num_pages(MSI_ADDR_BASE_LO, 0x10000, PAGE_SIZE); 1449 spage = MSI_ADDR_BASE_LO >> PAGE_SHIFT; 1450 1451 dma_ops_reserve_addresses(dma_dom, spage, pages); 1452 } 1453 1454 /* Initialize the exclusion range if necessary */ 1455 for_each_iommu(iommu) { 1456 if (iommu->exclusion_start && 1457 iommu->exclusion_start >= dma_dom->aperture[index]->offset 1458 && iommu->exclusion_start < dma_dom->aperture_size) { 1459 unsigned long startpage; 1460 int pages = iommu_num_pages(iommu->exclusion_start, 1461 iommu->exclusion_length, 1462 PAGE_SIZE); 1463 startpage = iommu->exclusion_start >> PAGE_SHIFT; 1464 dma_ops_reserve_addresses(dma_dom, startpage, pages); 1465 } 1466 } 1467 1468 /* 1469 * Check for areas already mapped as present in the new aperture 1470 * range and mark those pages as reserved in the allocator. Such 1471 * mappings may already exist as a result of requested unity 1472 * mappings for devices. 1473 */ 1474 for (i = dma_dom->aperture[index]->offset; 1475 i < dma_dom->aperture_size; 1476 i += PAGE_SIZE) { 1477 u64 *pte = fetch_pte(&dma_dom->domain, i); 1478 if (!pte || !IOMMU_PTE_PRESENT(*pte)) 1479 continue; 1480 1481 dma_ops_reserve_addresses(dma_dom, i >> PAGE_SHIFT, 1); 1482 } 1483 1484 update_domain(&dma_dom->domain); 1485 1486 return 0; 1487 1488out_free: 1489 update_domain(&dma_dom->domain); 1490 1491 free_page((unsigned long)dma_dom->aperture[index]->bitmap); 1492 1493 kfree(dma_dom->aperture[index]); 1494 dma_dom->aperture[index] = NULL; 1495 1496 return -ENOMEM; 1497} 1498 1499static unsigned long dma_ops_area_alloc(struct device *dev, 1500 struct dma_ops_domain *dom, 1501 unsigned int pages, 1502 unsigned long align_mask, 1503 u64 dma_mask, 1504 unsigned long start) 1505{ 1506 unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE; 1507 int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT; 1508 int i = start >> APERTURE_RANGE_SHIFT; 1509 unsigned long boundary_size; 1510 unsigned long address = -1; 1511 unsigned long limit; 1512 1513 next_bit >>= PAGE_SHIFT; 1514 1515 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 1516 PAGE_SIZE) >> PAGE_SHIFT; 1517 1518 for (;i < max_index; ++i) { 1519 unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT; 1520 1521 if (dom->aperture[i]->offset >= dma_mask) 1522 break; 1523 1524 limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset, 1525 dma_mask >> PAGE_SHIFT); 1526 1527 address = iommu_area_alloc(dom->aperture[i]->bitmap, 1528 limit, next_bit, pages, 0, 1529 boundary_size, align_mask); 1530 if (address != -1) { 1531 address = dom->aperture[i]->offset + 1532 (address << PAGE_SHIFT); 1533 dom->next_address = address + (pages << PAGE_SHIFT); 1534 break; 1535 } 1536 1537 next_bit = 0; 1538 } 1539 1540 return address; 1541} 1542 1543static unsigned long dma_ops_alloc_addresses(struct device *dev, 1544 struct dma_ops_domain *dom, 1545 unsigned int pages, 1546 unsigned long align_mask, 1547 u64 dma_mask) 1548{ 1549 unsigned long address; 1550 1551#ifdef CONFIG_IOMMU_STRESS 1552 dom->next_address = 0; 1553 dom->need_flush = true; 1554#endif 1555 1556 address = dma_ops_area_alloc(dev, dom, pages, align_mask, 1557 dma_mask, dom->next_address); 1558 1559 if (address == -1) { 1560 dom->next_address = 0; 1561 address = dma_ops_area_alloc(dev, dom, pages, align_mask, 1562 dma_mask, 0); 1563 dom->need_flush = true; 1564 } 1565 1566 if (unlikely(address == -1)) 1567 address = DMA_ERROR_CODE; 1568 1569 WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); 1570 1571 return address; 1572} 1573 1574/* 1575 * The address free function. 1576 * 1577 * called with domain->lock held 1578 */ 1579static void dma_ops_free_addresses(struct dma_ops_domain *dom, 1580 unsigned long address, 1581 unsigned int pages) 1582{ 1583 unsigned i = address >> APERTURE_RANGE_SHIFT; 1584 struct aperture_range *range = dom->aperture[i]; 1585 1586 BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL); 1587 1588#ifdef CONFIG_IOMMU_STRESS 1589 if (i < 4) 1590 return; 1591#endif 1592 1593 if (address >= dom->next_address) 1594 dom->need_flush = true; 1595 1596 address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; 1597 1598 bitmap_clear(range->bitmap, address, pages); 1599 1600} 1601 1602/**************************************************************************** 1603 * 1604 * The next functions belong to the domain allocation. A domain is 1605 * allocated for every IOMMU as the default domain. If device isolation 1606 * is enabled, every device get its own domain. The most important thing 1607 * about domains is the page table mapping the DMA address space they 1608 * contain. 1609 * 1610 ****************************************************************************/ 1611 1612/* 1613 * This function adds a protection domain to the global protection domain list 1614 */ 1615static void add_domain_to_list(struct protection_domain *domain) 1616{ 1617 unsigned long flags; 1618 1619 spin_lock_irqsave(&amd_iommu_pd_lock, flags); 1620 list_add(&domain->list, &amd_iommu_pd_list); 1621 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); 1622} 1623 1624/* 1625 * This function removes a protection domain to the global 1626 * protection domain list 1627 */ 1628static void del_domain_from_list(struct protection_domain *domain) 1629{ 1630 unsigned long flags; 1631 1632 spin_lock_irqsave(&amd_iommu_pd_lock, flags); 1633 list_del(&domain->list); 1634 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); 1635} 1636 1637static u16 domain_id_alloc(void) 1638{ 1639 unsigned long flags; 1640 int id; 1641 1642 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 1643 id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID); 1644 BUG_ON(id == 0); 1645 if (id > 0 && id < MAX_DOMAIN_ID) 1646 __set_bit(id, amd_iommu_pd_alloc_bitmap); 1647 else 1648 id = 0; 1649 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1650 1651 return id; 1652} 1653 1654static void domain_id_free(int id) 1655{ 1656 unsigned long flags; 1657 1658 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 1659 if (id > 0 && id < MAX_DOMAIN_ID) 1660 __clear_bit(id, amd_iommu_pd_alloc_bitmap); 1661 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1662} 1663 1664static void free_pagetable(struct protection_domain *domain) 1665{ 1666 int i, j; 1667 u64 *p1, *p2, *p3; 1668 1669 p1 = domain->pt_root; 1670 1671 if (!p1) 1672 return; 1673 1674 for (i = 0; i < 512; ++i) { 1675 if (!IOMMU_PTE_PRESENT(p1[i])) 1676 continue; 1677 1678 p2 = IOMMU_PTE_PAGE(p1[i]); 1679 for (j = 0; j < 512; ++j) { 1680 if (!IOMMU_PTE_PRESENT(p2[j])) 1681 continue; 1682 p3 = IOMMU_PTE_PAGE(p2[j]); 1683 free_page((unsigned long)p3); 1684 } 1685 1686 free_page((unsigned long)p2); 1687 } 1688 1689 free_page((unsigned long)p1); 1690 1691 domain->pt_root = NULL; 1692} 1693 1694static void free_gcr3_tbl_level1(u64 *tbl) 1695{ 1696 u64 *ptr; 1697 int i; 1698 1699 for (i = 0; i < 512; ++i) { 1700 if (!(tbl[i] & GCR3_VALID)) 1701 continue; 1702 1703 ptr = __va(tbl[i] & PAGE_MASK); 1704 1705 free_page((unsigned long)ptr); 1706 } 1707} 1708 1709static void free_gcr3_tbl_level2(u64 *tbl) 1710{ 1711 u64 *ptr; 1712 int i; 1713 1714 for (i = 0; i < 512; ++i) { 1715 if (!(tbl[i] & GCR3_VALID)) 1716 continue; 1717 1718 ptr = __va(tbl[i] & PAGE_MASK); 1719 1720 free_gcr3_tbl_level1(ptr); 1721 } 1722} 1723 1724static void free_gcr3_table(struct protection_domain *domain) 1725{ 1726 if (domain->glx == 2) 1727 free_gcr3_tbl_level2(domain->gcr3_tbl); 1728 else if (domain->glx == 1) 1729 free_gcr3_tbl_level1(domain->gcr3_tbl); 1730 else if (domain->glx != 0) 1731 BUG(); 1732 1733 free_page((unsigned long)domain->gcr3_tbl); 1734} 1735 1736/* 1737 * Free a domain, only used if something went wrong in the 1738 * allocation path and we need to free an already allocated page table 1739 */ 1740static void dma_ops_domain_free(struct dma_ops_domain *dom) 1741{ 1742 int i; 1743 1744 if (!dom) 1745 return; 1746 1747 del_domain_from_list(&dom->domain); 1748 1749 free_pagetable(&dom->domain); 1750 1751 for (i = 0; i < APERTURE_MAX_RANGES; ++i) { 1752 if (!dom->aperture[i]) 1753 continue; 1754 free_page((unsigned long)dom->aperture[i]->bitmap); 1755 kfree(dom->aperture[i]); 1756 } 1757 1758 kfree(dom); 1759} 1760 1761/* 1762 * Allocates a new protection domain usable for the dma_ops functions. 1763 * It also initializes the page table and the address allocator data 1764 * structures required for the dma_ops interface 1765 */ 1766static struct dma_ops_domain *dma_ops_domain_alloc(void) 1767{ 1768 struct dma_ops_domain *dma_dom; 1769 1770 dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); 1771 if (!dma_dom) 1772 return NULL; 1773 1774 spin_lock_init(&dma_dom->domain.lock); 1775 1776 dma_dom->domain.id = domain_id_alloc(); 1777 if (dma_dom->domain.id == 0) 1778 goto free_dma_dom; 1779 INIT_LIST_HEAD(&dma_dom->domain.dev_list); 1780 dma_dom->domain.mode = PAGE_MODE_2_LEVEL; 1781 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); 1782 dma_dom->domain.flags = PD_DMA_OPS_MASK; 1783 dma_dom->domain.priv = dma_dom; 1784 if (!dma_dom->domain.pt_root) 1785 goto free_dma_dom; 1786 1787 dma_dom->need_flush = false; 1788 dma_dom->target_dev = 0xffff; 1789 1790 add_domain_to_list(&dma_dom->domain); 1791 1792 if (alloc_new_range(dma_dom, true, GFP_KERNEL)) 1793 goto free_dma_dom; 1794 1795 /* 1796 * mark the first page as allocated so we never return 0 as 1797 * a valid dma-address. So we can use 0 as error value 1798 */ 1799 dma_dom->aperture[0]->bitmap[0] = 1; 1800 dma_dom->next_address = 0; 1801 1802 1803 return dma_dom; 1804 1805free_dma_dom: 1806 dma_ops_domain_free(dma_dom); 1807 1808 return NULL; 1809} 1810 1811/* 1812 * little helper function to check whether a given protection domain is a 1813 * dma_ops domain 1814 */ 1815static bool dma_ops_domain(struct protection_domain *domain) 1816{ 1817 return domain->flags & PD_DMA_OPS_MASK; 1818} 1819 1820static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) 1821{ 1822 u64 pte_root = 0; 1823 u64 flags = 0; 1824 1825 if (domain->mode != PAGE_MODE_NONE) 1826 pte_root = virt_to_phys(domain->pt_root); 1827 1828 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) 1829 << DEV_ENTRY_MODE_SHIFT; 1830 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; 1831 1832 flags = amd_iommu_dev_table[devid].data[1]; 1833 1834 if (ats) 1835 flags |= DTE_FLAG_IOTLB; 1836 1837 if (domain->flags & PD_IOMMUV2_MASK) { 1838 u64 gcr3 = __pa(domain->gcr3_tbl); 1839 u64 glx = domain->glx; 1840 u64 tmp; 1841 1842 pte_root |= DTE_FLAG_GV; 1843 pte_root |= (glx & DTE_GLX_MASK) << DTE_GLX_SHIFT; 1844 1845 /* First mask out possible old values for GCR3 table */ 1846 tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; 1847 flags &= ~tmp; 1848 1849 tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; 1850 flags &= ~tmp; 1851 1852 /* Encode GCR3 table into DTE */ 1853 tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A; 1854 pte_root |= tmp; 1855 1856 tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B; 1857 flags |= tmp; 1858 1859 tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C; 1860 flags |= tmp; 1861 } 1862 1863 flags &= ~(0xffffUL); 1864 flags |= domain->id; 1865 1866 amd_iommu_dev_table[devid].data[1] = flags; 1867 amd_iommu_dev_table[devid].data[0] = pte_root; 1868} 1869 1870static void clear_dte_entry(u16 devid) 1871{ 1872 /* remove entry from the device table seen by the hardware */ 1873 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; 1874 amd_iommu_dev_table[devid].data[1] = 0; 1875 1876 amd_iommu_apply_erratum_63(devid); 1877} 1878 1879static void do_attach(struct iommu_dev_data *dev_data, 1880 struct protection_domain *domain) 1881{ 1882 struct amd_iommu *iommu; 1883 bool ats; 1884 1885 iommu = amd_iommu_rlookup_table[dev_data->devid]; 1886 ats = dev_data->ats.enabled; 1887 1888 /* Update data structures */ 1889 dev_data->domain = domain; 1890 list_add(&dev_data->list, &domain->dev_list); 1891 set_dte_entry(dev_data->devid, domain, ats); 1892 1893 /* Do reference counting */ 1894 domain->dev_iommu[iommu->index] += 1; 1895 domain->dev_cnt += 1; 1896 1897 /* Flush the DTE entry */ 1898 device_flush_dte(dev_data); 1899} 1900 1901static void do_detach(struct iommu_dev_data *dev_data) 1902{ 1903 struct amd_iommu *iommu; 1904 1905 iommu = amd_iommu_rlookup_table[dev_data->devid]; 1906 1907 /* decrease reference counters */ 1908 dev_data->domain->dev_iommu[iommu->index] -= 1; 1909 dev_data->domain->dev_cnt -= 1; 1910 1911 /* Update data structures */ 1912 dev_data->domain = NULL; 1913 list_del(&dev_data->list); 1914 clear_dte_entry(dev_data->devid); 1915 1916 /* Flush the DTE entry */ 1917 device_flush_dte(dev_data); 1918} 1919 1920/* 1921 * If a device is not yet associated with a domain, this function does 1922 * assigns it visible for the hardware 1923 */ 1924static int __attach_device(struct iommu_dev_data *dev_data, 1925 struct protection_domain *domain) 1926{ 1927 int ret; 1928 1929 /* lock domain */ 1930 spin_lock(&domain->lock); 1931 1932 if (dev_data->alias_data != NULL) { 1933 struct iommu_dev_data *alias_data = dev_data->alias_data; 1934 1935 /* Some sanity checks */ 1936 ret = -EBUSY; 1937 if (alias_data->domain != NULL && 1938 alias_data->domain != domain) 1939 goto out_unlock; 1940 1941 if (dev_data->domain != NULL && 1942 dev_data->domain != domain) 1943 goto out_unlock; 1944 1945 /* Do real assignment */ 1946 if (alias_data->domain == NULL) 1947 do_attach(alias_data, domain); 1948 1949 atomic_inc(&alias_data->bind); 1950 } 1951 1952 if (dev_data->domain == NULL) 1953 do_attach(dev_data, domain); 1954 1955 atomic_inc(&dev_data->bind); 1956 1957 ret = 0; 1958 1959out_unlock: 1960 1961 /* ready */ 1962 spin_unlock(&domain->lock); 1963 1964 return ret; 1965} 1966 1967 1968static void pdev_iommuv2_disable(struct pci_dev *pdev) 1969{ 1970 pci_disable_ats(pdev); 1971 pci_disable_pri(pdev); 1972 pci_disable_pasid(pdev); 1973} 1974 1975/* FIXME: Change generic reset-function to do the same */ 1976static int pri_reset_while_enabled(struct pci_dev *pdev) 1977{ 1978 u16 control; 1979 int pos; 1980 1981 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); 1982 if (!pos) 1983 return -EINVAL; 1984 1985 pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control); 1986 control |= PCI_PRI_CTRL_RESET; 1987 pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); 1988 1989 return 0; 1990} 1991 1992static int pdev_iommuv2_enable(struct pci_dev *pdev) 1993{ 1994 bool reset_enable; 1995 int reqs, ret; 1996 1997 /* FIXME: Hardcode number of outstanding requests for now */ 1998 reqs = 32; 1999 if (pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE)) 2000 reqs = 1; 2001 reset_enable = pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_ENABLE_RESET); 2002 2003 /* Only allow access to user-accessible pages */ 2004 ret = pci_enable_pasid(pdev, 0); 2005 if (ret) 2006 goto out_err; 2007 2008 /* First reset the PRI state of the device */ 2009 ret = pci_reset_pri(pdev); 2010 if (ret) 2011 goto out_err; 2012 2013 /* Enable PRI */ 2014 ret = pci_enable_pri(pdev, reqs); 2015 if (ret) 2016 goto out_err; 2017 2018 if (reset_enable) { 2019 ret = pri_reset_while_enabled(pdev); 2020 if (ret) 2021 goto out_err; 2022 } 2023 2024 ret = pci_enable_ats(pdev, PAGE_SHIFT); 2025 if (ret) 2026 goto out_err; 2027 2028 return 0; 2029 2030out_err: 2031 pci_disable_pri(pdev); 2032 pci_disable_pasid(pdev); 2033 2034 return ret; 2035} 2036 2037/* FIXME: Move this to PCI code */ 2038#define PCI_PRI_TLP_OFF (1 << 2) 2039 2040bool pci_pri_tlp_required(struct pci_dev *pdev) 2041{ 2042 u16 control; 2043 int pos; 2044 2045 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); 2046 if (!pos) 2047 return false; 2048 2049 pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control); 2050 2051 return (control & PCI_PRI_TLP_OFF) ? true : false; 2052} 2053 2054/* 2055 * If a device is not yet associated with a domain, this function does 2056 * assigns it visible for the hardware 2057 */ 2058static int attach_device(struct device *dev, 2059 struct protection_domain *domain) 2060{ 2061 struct pci_dev *pdev = to_pci_dev(dev); 2062 struct iommu_dev_data *dev_data; 2063 unsigned long flags; 2064 int ret; 2065 2066 dev_data = get_dev_data(dev); 2067 2068 if (domain->flags & PD_IOMMUV2_MASK) { 2069 if (!dev_data->iommu_v2 || !dev_data->passthrough) 2070 return -EINVAL; 2071 2072 if (pdev_iommuv2_enable(pdev) != 0) 2073 return -EINVAL; 2074 2075 dev_data->ats.enabled = true; 2076 dev_data->ats.qdep = pci_ats_queue_depth(pdev); 2077 dev_data->pri_tlp = pci_pri_tlp_required(pdev); 2078 } else if (amd_iommu_iotlb_sup && 2079 pci_enable_ats(pdev, PAGE_SHIFT) == 0) { 2080 dev_data->ats.enabled = true; 2081 dev_data->ats.qdep = pci_ats_queue_depth(pdev); 2082 } 2083 2084 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 2085 ret = __attach_device(dev_data, domain); 2086 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 2087 2088 /* 2089 * We might boot into a crash-kernel here. The crashed kernel 2090 * left the caches in the IOMMU dirty. So we have to flush 2091 * here to evict all dirty stuff. 2092 */ 2093 domain_flush_tlb_pde(domain); 2094 2095 return ret; 2096} 2097 2098/* 2099 * Removes a device from a protection domain (unlocked) 2100 */ 2101static void __detach_device(struct iommu_dev_data *dev_data) 2102{ 2103 struct protection_domain *domain; 2104 unsigned long flags; 2105 2106 BUG_ON(!dev_data->domain); 2107 2108 domain = dev_data->domain; 2109 2110 spin_lock_irqsave(&domain->lock, flags); 2111 2112 if (dev_data->alias_data != NULL) { 2113 struct iommu_dev_data *alias_data = dev_data->alias_data; 2114 2115 if (atomic_dec_and_test(&alias_data->bind)) 2116 do_detach(alias_data); 2117 } 2118 2119 if (atomic_dec_and_test(&dev_data->bind)) 2120 do_detach(dev_data); 2121 2122 spin_unlock_irqrestore(&domain->lock, flags); 2123 2124 /* 2125 * If we run in passthrough mode the device must be assigned to the 2126 * passthrough domain if it is detached from any other domain. 2127 * Make sure we can deassign from the pt_domain itself. 2128 */ 2129 if (dev_data->passthrough && 2130 (dev_data->domain == NULL && domain != pt_domain)) 2131 __attach_device(dev_data, pt_domain); 2132} 2133 2134/* 2135 * Removes a device from a protection domain (with devtable_lock held) 2136 */ 2137static void detach_device(struct device *dev) 2138{ 2139 struct protection_domain *domain; 2140 struct iommu_dev_data *dev_data; 2141 unsigned long flags; 2142 2143 dev_data = get_dev_data(dev); 2144 domain = dev_data->domain; 2145 2146 /* lock device table */ 2147 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 2148 __detach_device(dev_data); 2149 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 2150 2151 if (domain->flags & PD_IOMMUV2_MASK) 2152 pdev_iommuv2_disable(to_pci_dev(dev)); 2153 else if (dev_data->ats.enabled) 2154 pci_disable_ats(to_pci_dev(dev)); 2155 2156 dev_data->ats.enabled = false; 2157} 2158 2159/* 2160 * Find out the protection domain structure for a given PCI device. This 2161 * will give us the pointer to the page table root for example. 2162 */ 2163static struct protection_domain *domain_for_device(struct device *dev) 2164{ 2165 struct iommu_dev_data *dev_data; 2166 struct protection_domain *dom = NULL; 2167 unsigned long flags; 2168 2169 dev_data = get_dev_data(dev); 2170 2171 if (dev_data->domain) 2172 return dev_data->domain; 2173 2174 if (dev_data->alias_data != NULL) { 2175 struct iommu_dev_data *alias_data = dev_data->alias_data; 2176 2177 read_lock_irqsave(&amd_iommu_devtable_lock, flags); 2178 if (alias_data->domain != NULL) { 2179 __attach_device(dev_data, alias_data->domain); 2180 dom = alias_data->domain; 2181 } 2182 read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 2183 } 2184 2185 return dom; 2186} 2187 2188static int device_change_notifier(struct notifier_block *nb, 2189 unsigned long action, void *data) 2190{ 2191 struct dma_ops_domain *dma_domain; 2192 struct protection_domain *domain; 2193 struct iommu_dev_data *dev_data; 2194 struct device *dev = data; 2195 struct amd_iommu *iommu; 2196 unsigned long flags; 2197 u16 devid; 2198 2199 if (!check_device(dev)) 2200 return 0; 2201 2202 devid = get_device_id(dev); 2203 iommu = amd_iommu_rlookup_table[devid]; 2204 dev_data = get_dev_data(dev); 2205 2206 switch (action) { 2207 case BUS_NOTIFY_UNBOUND_DRIVER: 2208 2209 domain = domain_for_device(dev); 2210 2211 if (!domain) 2212 goto out; 2213 if (dev_data->passthrough) 2214 break; 2215 detach_device(dev); 2216 break; 2217 case BUS_NOTIFY_ADD_DEVICE: 2218 2219 iommu_init_device(dev); 2220 2221 domain = domain_for_device(dev); 2222 2223 /* allocate a protection domain if a device is added */ 2224 dma_domain = find_protection_domain(devid); 2225 if (dma_domain) 2226 goto out; 2227 dma_domain = dma_ops_domain_alloc(); 2228 if (!dma_domain) 2229 goto out; 2230 dma_domain->target_dev = devid; 2231 2232 spin_lock_irqsave(&iommu_pd_list_lock, flags); 2233 list_add_tail(&dma_domain->list, &iommu_pd_list); 2234 spin_unlock_irqrestore(&iommu_pd_list_lock, flags); 2235 2236 break; 2237 case BUS_NOTIFY_DEL_DEVICE: 2238 2239 iommu_uninit_device(dev); 2240 2241 default: 2242 goto out; 2243 } 2244 2245 iommu_completion_wait(iommu); 2246 2247out: 2248 return 0; 2249} 2250 2251static struct notifier_block device_nb = { 2252 .notifier_call = device_change_notifier, 2253}; 2254 2255void amd_iommu_init_notifier(void) 2256{ 2257 bus_register_notifier(&pci_bus_type, &device_nb); 2258} 2259 2260/***************************************************************************** 2261 * 2262 * The next functions belong to the dma_ops mapping/unmapping code. 2263 * 2264 *****************************************************************************/ 2265 2266/* 2267 * In the dma_ops path we only have the struct device. This function 2268 * finds the corresponding IOMMU, the protection domain and the 2269 * requestor id for a given device. 2270 * If the device is not yet associated with a domain this is also done 2271 * in this function. 2272 */ 2273static struct protection_domain *get_domain(struct device *dev) 2274{ 2275 struct protection_domain *domain; 2276 struct dma_ops_domain *dma_dom; 2277 u16 devid = get_device_id(dev); 2278 2279 if (!check_device(dev)) 2280 return ERR_PTR(-EINVAL); 2281 2282 domain = domain_for_device(dev); 2283 if (domain != NULL && !dma_ops_domain(domain)) 2284 return ERR_PTR(-EBUSY); 2285 2286 if (domain != NULL) 2287 return domain; 2288 2289 /* Device not bount yet - bind it */ 2290 dma_dom = find_protection_domain(devid); 2291 if (!dma_dom) 2292 dma_dom = amd_iommu_rlookup_table[devid]->default_dom; 2293 attach_device(dev, &dma_dom->domain); 2294 DUMP_printk("Using protection domain %d for device %s\n", 2295 dma_dom->domain.id, dev_name(dev)); 2296 2297 return &dma_dom->domain; 2298} 2299 2300static void update_device_table(struct protection_domain *domain) 2301{ 2302 struct iommu_dev_data *dev_data; 2303 2304 list_for_each_entry(dev_data, &domain->dev_list, list) 2305 set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled); 2306} 2307 2308static void update_domain(struct protection_domain *domain) 2309{ 2310 if (!domain->updated) 2311 return; 2312 2313 update_device_table(domain); 2314 2315 domain_flush_devices(domain); 2316 domain_flush_tlb_pde(domain); 2317 2318 domain->updated = false; 2319} 2320 2321/* 2322 * This function fetches the PTE for a given address in the aperture 2323 */ 2324static u64* dma_ops_get_pte(struct dma_ops_domain *dom, 2325 unsigned long address) 2326{ 2327 struct aperture_range *aperture; 2328 u64 *pte, *pte_page; 2329 2330 aperture = dom->aperture[APERTURE_RANGE_INDEX(address)]; 2331 if (!aperture) 2332 return NULL; 2333 2334 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; 2335 if (!pte) { 2336 pte = alloc_pte(&dom->domain, address, PAGE_SIZE, &pte_page, 2337 GFP_ATOMIC); 2338 aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; 2339 } else 2340 pte += PM_LEVEL_INDEX(0, address); 2341 2342 update_domain(&dom->domain); 2343 2344 return pte; 2345} 2346 2347/* 2348 * This is the generic map function. It maps one 4kb page at paddr to 2349 * the given address in the DMA address space for the domain. 2350 */ 2351static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom, 2352 unsigned long address, 2353 phys_addr_t paddr, 2354 int direction) 2355{ 2356 u64 *pte, __pte; 2357 2358 WARN_ON(address > dom->aperture_size); 2359 2360 paddr &= PAGE_MASK; 2361 2362 pte = dma_ops_get_pte(dom, address); 2363 if (!pte) 2364 return DMA_ERROR_CODE; 2365 2366 __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; 2367 2368 if (direction == DMA_TO_DEVICE) 2369 __pte |= IOMMU_PTE_IR; 2370 else if (direction == DMA_FROM_DEVICE) 2371 __pte |= IOMMU_PTE_IW; 2372 else if (direction == DMA_BIDIRECTIONAL) 2373 __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW; 2374 2375 WARN_ON(*pte); 2376 2377 *pte = __pte; 2378 2379 return (dma_addr_t)address; 2380} 2381 2382/* 2383 * The generic unmapping function for on page in the DMA address space. 2384 */ 2385static void dma_ops_domain_unmap(struct dma_ops_domain *dom, 2386 unsigned long address) 2387{ 2388 struct aperture_range *aperture; 2389 u64 *pte; 2390 2391 if (address >= dom->aperture_size) 2392 return; 2393 2394 aperture = dom->aperture[APERTURE_RANGE_INDEX(address)]; 2395 if (!aperture) 2396 return; 2397 2398 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; 2399 if (!pte) 2400 return; 2401 2402 pte += PM_LEVEL_INDEX(0, address); 2403 2404 WARN_ON(!*pte); 2405 2406 *pte = 0ULL; 2407} 2408 2409/* 2410 * This function contains common code for mapping of a physically 2411 * contiguous memory region into DMA address space. It is used by all 2412 * mapping functions provided with this IOMMU driver. 2413 * Must be called with the domain lock held. 2414 */ 2415static dma_addr_t __map_single(struct device *dev, 2416 struct dma_ops_domain *dma_dom, 2417 phys_addr_t paddr, 2418 size_t size, 2419 int dir, 2420 bool align, 2421 u64 dma_mask) 2422{ 2423 dma_addr_t offset = paddr & ~PAGE_MASK; 2424 dma_addr_t address, start, ret; 2425 unsigned int pages; 2426 unsigned long align_mask = 0; 2427 int i; 2428 2429 pages = iommu_num_pages(paddr, size, PAGE_SIZE); 2430 paddr &= PAGE_MASK; 2431 2432 INC_STATS_COUNTER(total_map_requests); 2433 2434 if (pages > 1) 2435 INC_STATS_COUNTER(cross_page); 2436 2437 if (align) 2438 align_mask = (1UL << get_order(size)) - 1; 2439 2440retry: 2441 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, 2442 dma_mask); 2443 if (unlikely(address == DMA_ERROR_CODE)) { 2444 /* 2445 * setting next_address here will let the address 2446 * allocator only scan the new allocated range in the 2447 * first run. This is a small optimization. 2448 */ 2449 dma_dom->next_address = dma_dom->aperture_size; 2450 2451 if (alloc_new_range(dma_dom, false, GFP_ATOMIC)) 2452 goto out; 2453 2454 /* 2455 * aperture was successfully enlarged by 128 MB, try 2456 * allocation again 2457 */ 2458 goto retry; 2459 } 2460 2461 start = address; 2462 for (i = 0; i < pages; ++i) { 2463 ret = dma_ops_domain_map(dma_dom, start, paddr, dir); 2464 if (ret == DMA_ERROR_CODE) 2465 goto out_unmap; 2466 2467 paddr += PAGE_SIZE; 2468 start += PAGE_SIZE; 2469 } 2470 address += offset; 2471 2472 ADD_STATS_COUNTER(alloced_io_mem, size); 2473 2474 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { 2475 domain_flush_tlb(&dma_dom->domain); 2476 dma_dom->need_flush = false; 2477 } else if (unlikely(amd_iommu_np_cache)) 2478 domain_flush_pages(&dma_dom->domain, address, size); 2479 2480out: 2481 return address; 2482 2483out_unmap: 2484 2485 for (--i; i >= 0; --i) { 2486 start -= PAGE_SIZE; 2487 dma_ops_domain_unmap(dma_dom, start); 2488 } 2489 2490 dma_ops_free_addresses(dma_dom, address, pages); 2491 2492 return DMA_ERROR_CODE; 2493} 2494 2495/* 2496 * Does the reverse of the __map_single function. Must be called with 2497 * the domain lock held too 2498 */ 2499static void __unmap_single(struct dma_ops_domain *dma_dom, 2500 dma_addr_t dma_addr, 2501 size_t size, 2502 int dir) 2503{ 2504 dma_addr_t flush_addr; 2505 dma_addr_t i, start; 2506 unsigned int pages; 2507 2508 if ((dma_addr == DMA_ERROR_CODE) || 2509 (dma_addr + size > dma_dom->aperture_size)) 2510 return; 2511 2512 flush_addr = dma_addr; 2513 pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); 2514 dma_addr &= PAGE_MASK; 2515 start = dma_addr; 2516 2517 for (i = 0; i < pages; ++i) { 2518 dma_ops_domain_unmap(dma_dom, start); 2519 start += PAGE_SIZE; 2520 } 2521 2522 SUB_STATS_COUNTER(alloced_io_mem, size); 2523 2524 dma_ops_free_addresses(dma_dom, dma_addr, pages); 2525 2526 if (amd_iommu_unmap_flush || dma_dom->need_flush) { 2527 domain_flush_pages(&dma_dom->domain, flush_addr, size); 2528 dma_dom->need_flush = false; 2529 } 2530} 2531 2532/* 2533 * The exported map_single function for dma_ops. 2534 */ 2535static dma_addr_t map_page(struct device *dev, struct page *page, 2536 unsigned long offset, size_t size, 2537 enum dma_data_direction dir, 2538 struct dma_attrs *attrs) 2539{ 2540 unsigned long flags; 2541 struct protection_domain *domain; 2542 dma_addr_t addr; 2543 u64 dma_mask; 2544 phys_addr_t paddr = page_to_phys(page) + offset; 2545 2546 INC_STATS_COUNTER(cnt_map_single); 2547 2548 domain = get_domain(dev); 2549 if (PTR_ERR(domain) == -EINVAL) 2550 return (dma_addr_t)paddr; 2551 else if (IS_ERR(domain)) 2552 return DMA_ERROR_CODE; 2553 2554 dma_mask = *dev->dma_mask; 2555 2556 spin_lock_irqsave(&domain->lock, flags); 2557 2558 addr = __map_single(dev, domain->priv, paddr, size, dir, false, 2559 dma_mask); 2560 if (addr == DMA_ERROR_CODE) 2561 goto out; 2562 2563 domain_flush_complete(domain); 2564 2565out: 2566 spin_unlock_irqrestore(&domain->lock, flags); 2567 2568 return addr; 2569} 2570 2571/* 2572 * The exported unmap_single function for dma_ops. 2573 */ 2574static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, 2575 enum dma_data_direction dir, struct dma_attrs *attrs) 2576{ 2577 unsigned long flags; 2578 struct protection_domain *domain; 2579 2580 INC_STATS_COUNTER(cnt_unmap_single); 2581 2582 domain = get_domain(dev); 2583 if (IS_ERR(domain)) 2584 return; 2585 2586 spin_lock_irqsave(&domain->lock, flags); 2587 2588 __unmap_single(domain->priv, dma_addr, size, dir); 2589 2590 domain_flush_complete(domain); 2591 2592 spin_unlock_irqrestore(&domain->lock, flags); 2593} 2594 2595/* 2596 * This is a special map_sg function which is used if we should map a 2597 * device which is not handled by an AMD IOMMU in the system. 2598 */ 2599static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, 2600 int nelems, int dir) 2601{ 2602 struct scatterlist *s; 2603 int i; 2604 2605 for_each_sg(sglist, s, nelems, i) { 2606 s->dma_address = (dma_addr_t)sg_phys(s); 2607 s->dma_length = s->length; 2608 } 2609 2610 return nelems; 2611} 2612 2613/* 2614 * The exported map_sg function for dma_ops (handles scatter-gather 2615 * lists). 2616 */ 2617static int map_sg(struct device *dev, struct scatterlist *sglist, 2618 int nelems, enum dma_data_direction dir, 2619 struct dma_attrs *attrs) 2620{ 2621 unsigned long flags; 2622 struct protection_domain *domain; 2623 int i; 2624 struct scatterlist *s; 2625 phys_addr_t paddr; 2626 int mapped_elems = 0; 2627 u64 dma_mask; 2628 2629 INC_STATS_COUNTER(cnt_map_sg); 2630 2631 domain = get_domain(dev); 2632 if (PTR_ERR(domain) == -EINVAL) 2633 return map_sg_no_iommu(dev, sglist, nelems, dir); 2634 else if (IS_ERR(domain)) 2635 return 0; 2636 2637 dma_mask = *dev->dma_mask; 2638 2639 spin_lock_irqsave(&domain->lock, flags); 2640 2641 for_each_sg(sglist, s, nelems, i) { 2642 paddr = sg_phys(s); 2643 2644 s->dma_address = __map_single(dev, domain->priv, 2645 paddr, s->length, dir, false, 2646 dma_mask); 2647 2648 if (s->dma_address) { 2649 s->dma_length = s->length; 2650 mapped_elems++; 2651 } else 2652 goto unmap; 2653 } 2654 2655 domain_flush_complete(domain); 2656 2657out: 2658 spin_unlock_irqrestore(&domain->lock, flags); 2659 2660 return mapped_elems; 2661unmap: 2662 for_each_sg(sglist, s, mapped_elems, i) { 2663 if (s->dma_address) 2664 __unmap_single(domain->priv, s->dma_address, 2665 s->dma_length, dir); 2666 s->dma_address = s->dma_length = 0; 2667 } 2668 2669 mapped_elems = 0; 2670 2671 goto out; 2672} 2673 2674/* 2675 * The exported map_sg function for dma_ops (handles scatter-gather 2676 * lists). 2677 */ 2678static void unmap_sg(struct device *dev, struct scatterlist *sglist, 2679 int nelems, enum dma_data_direction dir, 2680 struct dma_attrs *attrs) 2681{ 2682 unsigned long flags; 2683 struct protection_domain *domain; 2684 struct scatterlist *s; 2685 int i; 2686 2687 INC_STATS_COUNTER(cnt_unmap_sg); 2688 2689 domain = get_domain(dev); 2690 if (IS_ERR(domain)) 2691 return; 2692 2693 spin_lock_irqsave(&domain->lock, flags); 2694 2695 for_each_sg(sglist, s, nelems, i) { 2696 __unmap_single(domain->priv, s->dma_address, 2697 s->dma_length, dir); 2698 s->dma_address = s->dma_length = 0; 2699 } 2700 2701 domain_flush_complete(domain); 2702 2703 spin_unlock_irqrestore(&domain->lock, flags); 2704} 2705 2706/* 2707 * The exported alloc_coherent function for dma_ops. 2708 */ 2709static void *alloc_coherent(struct device *dev, size_t size, 2710 dma_addr_t *dma_addr, gfp_t flag) 2711{ 2712 unsigned long flags; 2713 void *virt_addr; 2714 struct protection_domain *domain; 2715 phys_addr_t paddr; 2716 u64 dma_mask = dev->coherent_dma_mask; 2717 2718 INC_STATS_COUNTER(cnt_alloc_coherent); 2719 2720 domain = get_domain(dev); 2721 if (PTR_ERR(domain) == -EINVAL) { 2722 virt_addr = (void *)__get_free_pages(flag, get_order(size)); 2723 *dma_addr = __pa(virt_addr); 2724 return virt_addr; 2725 } else if (IS_ERR(domain)) 2726 return NULL; 2727 2728 dma_mask = dev->coherent_dma_mask; 2729 flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); 2730 flag |= __GFP_ZERO; 2731 2732 virt_addr = (void *)__get_free_pages(flag, get_order(size)); 2733 if (!virt_addr) 2734 return NULL; 2735 2736 paddr = virt_to_phys(virt_addr); 2737 2738 if (!dma_mask) 2739 dma_mask = *dev->dma_mask; 2740 2741 spin_lock_irqsave(&domain->lock, flags); 2742 2743 *dma_addr = __map_single(dev, domain->priv, paddr, 2744 size, DMA_BIDIRECTIONAL, true, dma_mask); 2745 2746 if (*dma_addr == DMA_ERROR_CODE) { 2747 spin_unlock_irqrestore(&domain->lock, flags); 2748 goto out_free; 2749 } 2750 2751 domain_flush_complete(domain); 2752 2753 spin_unlock_irqrestore(&domain->lock, flags); 2754 2755 return virt_addr; 2756 2757out_free: 2758 2759 free_pages((unsigned long)virt_addr, get_order(size)); 2760 2761 return NULL; 2762} 2763 2764/* 2765 * The exported free_coherent function for dma_ops. 2766 */ 2767static void free_coherent(struct device *dev, size_t size, 2768 void *virt_addr, dma_addr_t dma_addr) 2769{ 2770 unsigned long flags; 2771 struct protection_domain *domain; 2772 2773 INC_STATS_COUNTER(cnt_free_coherent); 2774 2775 domain = get_domain(dev); 2776 if (IS_ERR(domain)) 2777 goto free_mem; 2778 2779 spin_lock_irqsave(&domain->lock, flags); 2780 2781 __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); 2782 2783 domain_flush_complete(domain); 2784 2785 spin_unlock_irqrestore(&domain->lock, flags); 2786 2787free_mem: 2788 free_pages((unsigned long)virt_addr, get_order(size)); 2789} 2790 2791/* 2792 * This function is called by the DMA layer to find out if we can handle a 2793 * particular device. It is part of the dma_ops. 2794 */ 2795static int amd_iommu_dma_supported(struct device *dev, u64 mask) 2796{ 2797 return check_device(dev); 2798} 2799 2800/* 2801 * The function for pre-allocating protection domains. 2802 * 2803 * If the driver core informs the DMA layer if a driver grabs a device 2804 * we don't need to preallocate the protection domains anymore. 2805 * For now we have to. 2806 */ 2807static void prealloc_protection_domains(void) 2808{ 2809 struct iommu_dev_data *dev_data; 2810 struct dma_ops_domain *dma_dom; 2811 struct pci_dev *dev = NULL; 2812 u16 devid; 2813 2814 for_each_pci_dev(dev) { 2815 2816 /* Do we handle this device? */ 2817 if (!check_device(&dev->dev)) 2818 continue; 2819 2820 dev_data = get_dev_data(&dev->dev); 2821 if (!amd_iommu_force_isolation && dev_data->iommu_v2) { 2822 /* Make sure passthrough domain is allocated */ 2823 alloc_passthrough_domain(); 2824 dev_data->passthrough = true; 2825 attach_device(&dev->dev, pt_domain); 2826 pr_info("AMD-Vi: Using passthough domain for device %s\n", 2827 dev_name(&dev->dev)); 2828 } 2829 2830 /* Is there already any domain for it? */ 2831 if (domain_for_device(&dev->dev)) 2832 continue; 2833 2834 devid = get_device_id(&dev->dev); 2835 2836 dma_dom = dma_ops_domain_alloc(); 2837 if (!dma_dom) 2838 continue; 2839 init_unity_mappings_for_device(dma_dom, devid); 2840 dma_dom->target_dev = devid; 2841 2842 attach_device(&dev->dev, &dma_dom->domain); 2843 2844 list_add_tail(&dma_dom->list, &iommu_pd_list); 2845 } 2846} 2847 2848static struct dma_map_ops amd_iommu_dma_ops = { 2849 .alloc_coherent = alloc_coherent, 2850 .free_coherent = free_coherent, 2851 .map_page = map_page, 2852 .unmap_page = unmap_page, 2853 .map_sg = map_sg, 2854 .unmap_sg = unmap_sg, 2855 .dma_supported = amd_iommu_dma_supported, 2856}; 2857 2858static unsigned device_dma_ops_init(void) 2859{ 2860 struct iommu_dev_data *dev_data; 2861 struct pci_dev *pdev = NULL; 2862 unsigned unhandled = 0; 2863 2864 for_each_pci_dev(pdev) { 2865 if (!check_device(&pdev->dev)) { 2866 2867 iommu_ignore_device(&pdev->dev); 2868 2869 unhandled += 1; 2870 continue; 2871 } 2872 2873 dev_data = get_dev_data(&pdev->dev); 2874 2875 if (!dev_data->passthrough) 2876 pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops; 2877 else 2878 pdev->dev.archdata.dma_ops = &nommu_dma_ops; 2879 } 2880 2881 return unhandled; 2882} 2883 2884/* 2885 * The function which clues the AMD IOMMU driver into dma_ops. 2886 */ 2887 2888void __init amd_iommu_init_api(void) 2889{ 2890 bus_set_iommu(&pci_bus_type, &amd_iommu_ops); 2891} 2892 2893int __init amd_iommu_init_dma_ops(void) 2894{ 2895 struct amd_iommu *iommu; 2896 int ret, unhandled; 2897 2898 /* 2899 * first allocate a default protection domain for every IOMMU we 2900 * found in the system. Devices not assigned to any other 2901 * protection domain will be assigned to the default one. 2902 */ 2903 for_each_iommu(iommu) { 2904 iommu->default_dom = dma_ops_domain_alloc(); 2905 if (iommu->default_dom == NULL) 2906 return -ENOMEM; 2907 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; 2908 ret = iommu_init_unity_mappings(iommu); 2909 if (ret) 2910 goto free_domains; 2911 } 2912 2913 /* 2914 * Pre-allocate the protection domains for each device. 2915 */ 2916 prealloc_protection_domains(); 2917 2918 iommu_detected = 1; 2919 swiotlb = 0; 2920 2921 /* Make the driver finally visible to the drivers */ 2922 unhandled = device_dma_ops_init(); 2923 if (unhandled && max_pfn > MAX_DMA32_PFN) { 2924 /* There are unhandled devices - initialize swiotlb for them */ 2925 swiotlb = 1; 2926 } 2927 2928 amd_iommu_stats_init(); 2929 2930 return 0; 2931 2932free_domains: 2933 2934 for_each_iommu(iommu) { 2935 if (iommu->default_dom) 2936 dma_ops_domain_free(iommu->default_dom); 2937 } 2938 2939 return ret; 2940} 2941 2942/***************************************************************************** 2943 * 2944 * The following functions belong to the exported interface of AMD IOMMU 2945 * 2946 * This interface allows access to lower level functions of the IOMMU 2947 * like protection domain handling and assignement of devices to domains 2948 * which is not possible with the dma_ops interface. 2949 * 2950 *****************************************************************************/ 2951 2952static void cleanup_domain(struct protection_domain *domain) 2953{ 2954 struct iommu_dev_data *dev_data, *next; 2955 unsigned long flags; 2956 2957 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 2958 2959 list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { 2960 __detach_device(dev_data); 2961 atomic_set(&dev_data->bind, 0); 2962 } 2963 2964 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 2965} 2966 2967static void protection_domain_free(struct protection_domain *domain) 2968{ 2969 if (!domain) 2970 return; 2971 2972 del_domain_from_list(domain); 2973 2974 if (domain->id) 2975 domain_id_free(domain->id); 2976 2977 kfree(domain); 2978} 2979 2980static struct protection_domain *protection_domain_alloc(void) 2981{ 2982 struct protection_domain *domain; 2983 2984 domain = kzalloc(sizeof(*domain), GFP_KERNEL); 2985 if (!domain) 2986 return NULL; 2987 2988 spin_lock_init(&domain->lock); 2989 mutex_init(&domain->api_lock); 2990 domain->id = domain_id_alloc(); 2991 if (!domain->id) 2992 goto out_err; 2993 INIT_LIST_HEAD(&domain->dev_list); 2994 2995 add_domain_to_list(domain); 2996 2997 return domain; 2998 2999out_err: 3000 kfree(domain); 3001 3002 return NULL; 3003} 3004 3005static int __init alloc_passthrough_domain(void) 3006{ 3007 if (pt_domain != NULL) 3008 return 0; 3009 3010 /* allocate passthrough domain */ 3011 pt_domain = protection_domain_alloc(); 3012 if (!pt_domain) 3013 return -ENOMEM; 3014 3015 pt_domain->mode = PAGE_MODE_NONE; 3016 3017 return 0; 3018} 3019static int amd_iommu_domain_init(struct iommu_domain *dom) 3020{ 3021 struct protection_domain *domain; 3022 3023 domain = protection_domain_alloc(); 3024 if (!domain) 3025 goto out_free; 3026 3027 domain->mode = PAGE_MODE_3_LEVEL; 3028 domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); 3029 if (!domain->pt_root) 3030 goto out_free; 3031 3032 domain->iommu_domain = dom; 3033 3034 dom->priv = domain; 3035 3036 return 0; 3037 3038out_free: 3039 protection_domain_free(domain); 3040 3041 return -ENOMEM; 3042} 3043 3044static void amd_iommu_domain_destroy(struct iommu_domain *dom) 3045{ 3046 struct protection_domain *domain = dom->priv; 3047 3048 if (!domain) 3049 return; 3050 3051 if (domain->dev_cnt > 0) 3052 cleanup_domain(domain); 3053 3054 BUG_ON(domain->dev_cnt != 0); 3055 3056 if (domain->mode != PAGE_MODE_NONE) 3057 free_pagetable(domain); 3058 3059 if (domain->flags & PD_IOMMUV2_MASK) 3060 free_gcr3_table(domain); 3061 3062 protection_domain_free(domain); 3063 3064 dom->priv = NULL; 3065} 3066 3067static void amd_iommu_detach_device(struct iommu_domain *dom, 3068 struct device *dev) 3069{ 3070 struct iommu_dev_data *dev_data = dev->archdata.iommu; 3071 struct amd_iommu *iommu; 3072 u16 devid; 3073 3074 if (!check_device(dev)) 3075 return; 3076 3077 devid = get_device_id(dev); 3078 3079 if (dev_data->domain != NULL) 3080 detach_device(dev); 3081 3082 iommu = amd_iommu_rlookup_table[devid]; 3083 if (!iommu) 3084 return; 3085 3086 iommu_completion_wait(iommu); 3087} 3088 3089static int amd_iommu_attach_device(struct iommu_domain *dom, 3090 struct device *dev) 3091{ 3092 struct protection_domain *domain = dom->priv; 3093 struct iommu_dev_data *dev_data; 3094 struct amd_iommu *iommu; 3095 int ret; 3096 3097 if (!check_device(dev)) 3098 return -EINVAL; 3099 3100 dev_data = dev->archdata.iommu; 3101 3102 iommu = amd_iommu_rlookup_table[dev_data->devid]; 3103 if (!iommu) 3104 return -EINVAL; 3105 3106 if (dev_data->domain) 3107 detach_device(dev); 3108 3109 ret = attach_device(dev, domain); 3110 3111 iommu_completion_wait(iommu); 3112 3113 return ret; 3114} 3115 3116static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, 3117 phys_addr_t paddr, size_t page_size, int iommu_prot) 3118{ 3119 struct protection_domain *domain = dom->priv; 3120 int prot = 0; 3121 int ret; 3122 3123 if (domain->mode == PAGE_MODE_NONE) 3124 return -EINVAL; 3125 3126 if (iommu_prot & IOMMU_READ) 3127 prot |= IOMMU_PROT_IR; 3128 if (iommu_prot & IOMMU_WRITE) 3129 prot |= IOMMU_PROT_IW; 3130 3131 mutex_lock(&domain->api_lock); 3132 ret = iommu_map_page(domain, iova, paddr, prot, page_size); 3133 mutex_unlock(&domain->api_lock); 3134 3135 return ret; 3136} 3137 3138static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, 3139 size_t page_size) 3140{ 3141 struct protection_domain *domain = dom->priv; 3142 size_t unmap_size; 3143 3144 if (domain->mode == PAGE_MODE_NONE) 3145 return -EINVAL; 3146 3147 mutex_lock(&domain->api_lock); 3148 unmap_size = iommu_unmap_page(domain, iova, page_size); 3149 mutex_unlock(&domain->api_lock); 3150 3151 domain_flush_tlb_pde(domain); 3152 3153 return unmap_size; 3154} 3155 3156static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, 3157 unsigned long iova) 3158{ 3159 struct protection_domain *domain = dom->priv; 3160 unsigned long offset_mask; 3161 phys_addr_t paddr; 3162 u64 *pte, __pte; 3163 3164 if (domain->mode == PAGE_MODE_NONE) 3165 return iova; 3166 3167 pte = fetch_pte(domain, iova); 3168 3169 if (!pte || !IOMMU_PTE_PRESENT(*pte)) 3170 return 0; 3171 3172 if (PM_PTE_LEVEL(*pte) == 0) 3173 offset_mask = PAGE_SIZE - 1; 3174 else 3175 offset_mask = PTE_PAGE_SIZE(*pte) - 1; 3176 3177 __pte = *pte & PM_ADDR_MASK; 3178 paddr = (__pte & ~offset_mask) | (iova & offset_mask); 3179 3180 return paddr; 3181} 3182 3183static int amd_iommu_domain_has_cap(struct iommu_domain *domain, 3184 unsigned long cap) 3185{ 3186 switch (cap) { 3187 case IOMMU_CAP_CACHE_COHERENCY: 3188 return 1; 3189 } 3190 3191 return 0; 3192} 3193 3194static int amd_iommu_device_group(struct device *dev, unsigned int *groupid) 3195{ 3196 struct iommu_dev_data *dev_data = dev->archdata.iommu; 3197 struct pci_dev *pdev = to_pci_dev(dev); 3198 u16 devid; 3199 3200 if (!dev_data) 3201 return -ENODEV; 3202 3203 if (pdev->is_virtfn || !iommu_group_mf) 3204 devid = dev_data->devid; 3205 else 3206 devid = calc_devid(pdev->bus->number, 3207 PCI_DEVFN(PCI_SLOT(pdev->devfn), 0)); 3208 3209 *groupid = amd_iommu_alias_table[devid]; 3210 3211 return 0; 3212} 3213 3214static struct iommu_ops amd_iommu_ops = { 3215 .domain_init = amd_iommu_domain_init, 3216 .domain_destroy = amd_iommu_domain_destroy, 3217 .attach_dev = amd_iommu_attach_device, 3218 .detach_dev = amd_iommu_detach_device, 3219 .map = amd_iommu_map, 3220 .unmap = amd_iommu_unmap, 3221 .iova_to_phys = amd_iommu_iova_to_phys, 3222 .domain_has_cap = amd_iommu_domain_has_cap, 3223 .device_group = amd_iommu_device_group, 3224 .pgsize_bitmap = AMD_IOMMU_PGSIZES, 3225}; 3226 3227/***************************************************************************** 3228 * 3229 * The next functions do a basic initialization of IOMMU for pass through 3230 * mode 3231 * 3232 * In passthrough mode the IOMMU is initialized and enabled but not used for 3233 * DMA-API translation. 3234 * 3235 *****************************************************************************/ 3236 3237int __init amd_iommu_init_passthrough(void) 3238{ 3239 struct iommu_dev_data *dev_data; 3240 struct pci_dev *dev = NULL; 3241 struct amd_iommu *iommu; 3242 u16 devid; 3243 int ret; 3244 3245 ret = alloc_passthrough_domain(); 3246 if (ret) 3247 return ret; 3248 3249 for_each_pci_dev(dev) { 3250 if (!check_device(&dev->dev)) 3251 continue; 3252 3253 dev_data = get_dev_data(&dev->dev); 3254 dev_data->passthrough = true; 3255 3256 devid = get_device_id(&dev->dev); 3257 3258 iommu = amd_iommu_rlookup_table[devid]; 3259 if (!iommu) 3260 continue; 3261 3262 attach_device(&dev->dev, pt_domain); 3263 } 3264 3265 amd_iommu_stats_init(); 3266 3267 pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); 3268 3269 return 0; 3270} 3271 3272/* IOMMUv2 specific functions */ 3273int amd_iommu_register_ppr_notifier(struct notifier_block *nb) 3274{ 3275 return atomic_notifier_chain_register(&ppr_notifier, nb); 3276} 3277EXPORT_SYMBOL(amd_iommu_register_ppr_notifier); 3278 3279int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb) 3280{ 3281 return atomic_notifier_chain_unregister(&ppr_notifier, nb); 3282} 3283EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier); 3284 3285void amd_iommu_domain_direct_map(struct iommu_domain *dom) 3286{ 3287 struct protection_domain *domain = dom->priv; 3288 unsigned long flags; 3289 3290 spin_lock_irqsave(&domain->lock, flags); 3291 3292 /* Update data structure */ 3293 domain->mode = PAGE_MODE_NONE; 3294 domain->updated = true; 3295 3296 /* Make changes visible to IOMMUs */ 3297 update_domain(domain); 3298 3299 /* Page-table is not visible to IOMMU anymore, so free it */ 3300 free_pagetable(domain); 3301 3302 spin_unlock_irqrestore(&domain->lock, flags); 3303} 3304EXPORT_SYMBOL(amd_iommu_domain_direct_map); 3305 3306int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) 3307{ 3308 struct protection_domain *domain = dom->priv; 3309 unsigned long flags; 3310 int levels, ret; 3311 3312 if (pasids <= 0 || pasids > (PASID_MASK + 1)) 3313 return -EINVAL; 3314 3315 /* Number of GCR3 table levels required */ 3316 for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9) 3317 levels += 1; 3318 3319 if (levels > amd_iommu_max_glx_val) 3320 return -EINVAL; 3321 3322 spin_lock_irqsave(&domain->lock, flags); 3323 3324 /* 3325 * Save us all sanity checks whether devices already in the 3326 * domain support IOMMUv2. Just force that the domain has no 3327 * devices attached when it is switched into IOMMUv2 mode. 3328 */ 3329 ret = -EBUSY; 3330 if (domain->dev_cnt > 0 || domain->flags & PD_IOMMUV2_MASK) 3331 goto out; 3332 3333 ret = -ENOMEM; 3334 domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC); 3335 if (domain->gcr3_tbl == NULL) 3336 goto out; 3337 3338 domain->glx = levels; 3339 domain->flags |= PD_IOMMUV2_MASK; 3340 domain->updated = true; 3341 3342 update_domain(domain); 3343 3344 ret = 0; 3345 3346out: 3347 spin_unlock_irqrestore(&domain->lock, flags); 3348 3349 return ret; 3350} 3351EXPORT_SYMBOL(amd_iommu_domain_enable_v2); 3352 3353static int __flush_pasid(struct protection_domain *domain, int pasid, 3354 u64 address, bool size) 3355{ 3356 struct iommu_dev_data *dev_data; 3357 struct iommu_cmd cmd; 3358 int i, ret; 3359 3360 if (!(domain->flags & PD_IOMMUV2_MASK)) 3361 return -EINVAL; 3362 3363 build_inv_iommu_pasid(&cmd, domain->id, pasid, address, size); 3364 3365 /* 3366 * IOMMU TLB needs to be flushed before Device TLB to 3367 * prevent device TLB refill from IOMMU TLB 3368 */ 3369 for (i = 0; i < amd_iommus_present; ++i) { 3370 if (domain->dev_iommu[i] == 0) 3371 continue; 3372 3373 ret = iommu_queue_command(amd_iommus[i], &cmd); 3374 if (ret != 0) 3375 goto out; 3376 } 3377 3378 /* Wait until IOMMU TLB flushes are complete */ 3379 domain_flush_complete(domain); 3380 3381 /* Now flush device TLBs */ 3382 list_for_each_entry(dev_data, &domain->dev_list, list) { 3383 struct amd_iommu *iommu; 3384 int qdep; 3385 3386 BUG_ON(!dev_data->ats.enabled); 3387 3388 qdep = dev_data->ats.qdep; 3389 iommu = amd_iommu_rlookup_table[dev_data->devid]; 3390 3391 build_inv_iotlb_pasid(&cmd, dev_data->devid, pasid, 3392 qdep, address, size); 3393 3394 ret = iommu_queue_command(iommu, &cmd); 3395 if (ret != 0) 3396 goto out; 3397 } 3398 3399 /* Wait until all device TLBs are flushed */ 3400 domain_flush_complete(domain); 3401 3402 ret = 0; 3403 3404out: 3405 3406 return ret; 3407} 3408 3409static int __amd_iommu_flush_page(struct protection_domain *domain, int pasid, 3410 u64 address) 3411{ 3412 INC_STATS_COUNTER(invalidate_iotlb); 3413 3414 return __flush_pasid(domain, pasid, address, false); 3415} 3416 3417int amd_iommu_flush_page(struct iommu_domain *dom, int pasid, 3418 u64 address) 3419{ 3420 struct protection_domain *domain = dom->priv; 3421 unsigned long flags; 3422 int ret; 3423 3424 spin_lock_irqsave(&domain->lock, flags); 3425 ret = __amd_iommu_flush_page(domain, pasid, address); 3426 spin_unlock_irqrestore(&domain->lock, flags); 3427 3428 return ret; 3429} 3430EXPORT_SYMBOL(amd_iommu_flush_page); 3431 3432static int __amd_iommu_flush_tlb(struct protection_domain *domain, int pasid) 3433{ 3434 INC_STATS_COUNTER(invalidate_iotlb_all); 3435 3436 return __flush_pasid(domain, pasid, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 3437 true); 3438} 3439 3440int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid) 3441{ 3442 struct protection_domain *domain = dom->priv; 3443 unsigned long flags; 3444 int ret; 3445 3446 spin_lock_irqsave(&domain->lock, flags); 3447 ret = __amd_iommu_flush_tlb(domain, pasid); 3448 spin_unlock_irqrestore(&domain->lock, flags); 3449 3450 return ret; 3451} 3452EXPORT_SYMBOL(amd_iommu_flush_tlb); 3453 3454static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc) 3455{ 3456 int index; 3457 u64 *pte; 3458 3459 while (true) { 3460 3461 index = (pasid >> (9 * level)) & 0x1ff; 3462 pte = &root[index]; 3463 3464 if (level == 0) 3465 break; 3466 3467 if (!(*pte & GCR3_VALID)) { 3468 if (!alloc) 3469 return NULL; 3470 3471 root = (void *)get_zeroed_page(GFP_ATOMIC); 3472 if (root == NULL) 3473 return NULL; 3474 3475 *pte = __pa(root) | GCR3_VALID; 3476 } 3477 3478 root = __va(*pte & PAGE_MASK); 3479 3480 level -= 1; 3481 } 3482 3483 return pte; 3484} 3485 3486static int __set_gcr3(struct protection_domain *domain, int pasid, 3487 unsigned long cr3) 3488{ 3489 u64 *pte; 3490 3491 if (domain->mode != PAGE_MODE_NONE) 3492 return -EINVAL; 3493 3494 pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true); 3495 if (pte == NULL) 3496 return -ENOMEM; 3497 3498 *pte = (cr3 & PAGE_MASK) | GCR3_VALID; 3499 3500 return __amd_iommu_flush_tlb(domain, pasid); 3501} 3502 3503static int __clear_gcr3(struct protection_domain *domain, int pasid) 3504{ 3505 u64 *pte; 3506 3507 if (domain->mode != PAGE_MODE_NONE) 3508 return -EINVAL; 3509 3510 pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false); 3511 if (pte == NULL) 3512 return 0; 3513 3514 *pte = 0; 3515 3516 return __amd_iommu_flush_tlb(domain, pasid); 3517} 3518 3519int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, 3520 unsigned long cr3) 3521{ 3522 struct protection_domain *domain = dom->priv; 3523 unsigned long flags; 3524 int ret; 3525 3526 spin_lock_irqsave(&domain->lock, flags); 3527 ret = __set_gcr3(domain, pasid, cr3); 3528 spin_unlock_irqrestore(&domain->lock, flags); 3529 3530 return ret; 3531} 3532EXPORT_SYMBOL(amd_iommu_domain_set_gcr3); 3533 3534int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid) 3535{ 3536 struct protection_domain *domain = dom->priv; 3537 unsigned long flags; 3538 int ret; 3539 3540 spin_lock_irqsave(&domain->lock, flags); 3541 ret = __clear_gcr3(domain, pasid); 3542 spin_unlock_irqrestore(&domain->lock, flags); 3543 3544 return ret; 3545} 3546EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3); 3547 3548int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid, 3549 int status, int tag) 3550{ 3551 struct iommu_dev_data *dev_data; 3552 struct amd_iommu *iommu; 3553 struct iommu_cmd cmd; 3554 3555 INC_STATS_COUNTER(complete_ppr); 3556 3557 dev_data = get_dev_data(&pdev->dev); 3558 iommu = amd_iommu_rlookup_table[dev_data->devid]; 3559 3560 build_complete_ppr(&cmd, dev_data->devid, pasid, status, 3561 tag, dev_data->pri_tlp); 3562 3563 return iommu_queue_command(iommu, &cmd); 3564} 3565EXPORT_SYMBOL(amd_iommu_complete_ppr); 3566 3567struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev) 3568{ 3569 struct protection_domain *domain; 3570 3571 domain = get_domain(&pdev->dev); 3572 if (IS_ERR(domain)) 3573 return NULL; 3574 3575 /* Only return IOMMUv2 domains */ 3576 if (!(domain->flags & PD_IOMMUV2_MASK)) 3577 return NULL; 3578 3579 return domain->iommu_domain; 3580} 3581EXPORT_SYMBOL(amd_iommu_get_v2_domain); 3582 3583void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum) 3584{ 3585 struct iommu_dev_data *dev_data; 3586 3587 if (!amd_iommu_v2_supported()) 3588 return; 3589 3590 dev_data = get_dev_data(&pdev->dev); 3591 dev_data->errata |= (1 << erratum); 3592} 3593EXPORT_SYMBOL(amd_iommu_enable_device_erratum); 3594 3595int amd_iommu_device_info(struct pci_dev *pdev, 3596 struct amd_iommu_device_info *info) 3597{ 3598 int max_pasids; 3599 int pos; 3600 3601 if (pdev == NULL || info == NULL) 3602 return -EINVAL; 3603 3604 if (!amd_iommu_v2_supported()) 3605 return -EINVAL; 3606 3607 memset(info, 0, sizeof(*info)); 3608 3609 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS); 3610 if (pos) 3611 info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP; 3612 3613 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); 3614 if (pos) 3615 info->flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP; 3616 3617 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID); 3618 if (pos) { 3619 int features; 3620 3621 max_pasids = 1 << (9 * (amd_iommu_max_glx_val + 1)); 3622 max_pasids = min(max_pasids, (1 << 20)); 3623 3624 info->flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP; 3625 info->max_pasids = min(pci_max_pasids(pdev), max_pasids); 3626 3627 features = pci_pasid_features(pdev); 3628 if (features & PCI_PASID_CAP_EXEC) 3629 info->flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP; 3630 if (features & PCI_PASID_CAP_PRIV) 3631 info->flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP; 3632 } 3633 3634 return 0; 3635} 3636EXPORT_SYMBOL(amd_iommu_device_info); 3637