1/* 2 * Copyright (c) 2006, Intel Corporation. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms and conditions of the GNU General Public License, 6 * version 2, as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 * more details. 12 * 13 * You should have received a copy of the GNU General Public License along with 14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 15 * Place - Suite 330, Boston, MA 02111-1307 USA. 16 * 17 * Copyright (C) 2006-2008 Intel Corporation 18 * Author: Ashok Raj <ashok.raj@intel.com> 19 * Author: Shaohua Li <shaohua.li@intel.com> 20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 21 * Author: Fenghua Yu <fenghua.yu@intel.com> 22 */ 23 24#include <linux/init.h> 25#include <linux/bitmap.h> 26#include <linux/debugfs.h> 27#include <linux/export.h> 28#include <linux/slab.h> 29#include <linux/irq.h> 30#include <linux/interrupt.h> 31#include <linux/spinlock.h> 32#include <linux/pci.h> 33#include <linux/dmar.h> 34#include <linux/dma-mapping.h> 35#include <linux/mempool.h> 36#include <linux/timer.h> 37#include <linux/iova.h> 38#include <linux/iommu.h> 39#include <linux/intel-iommu.h> 40#include <linux/syscore_ops.h> 41#include <linux/tboot.h> 42#include <linux/dmi.h> 43#include <linux/pci-ats.h> 44#include <linux/memblock.h> 45#include <asm/cacheflush.h> 46#include <asm/iommu.h> 47 48#define ROOT_SIZE VTD_PAGE_SIZE 49#define CONTEXT_SIZE VTD_PAGE_SIZE 50 51#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) 52#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) 53#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) 54 55#define IOAPIC_RANGE_START (0xfee00000) 56#define IOAPIC_RANGE_END (0xfeefffff) 57#define IOVA_START_ADDR (0x1000) 58 59#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 60 61#define MAX_AGAW_WIDTH 64 62 63#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1) 64#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1) 65 66/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR 67 to match. That way, we can use 'unsigned long' for PFNs with impunity. */ 68#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \ 69 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1)) 70#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT) 71 72#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 73#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) 74#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) 75 76/* page table handling */ 77#define LEVEL_STRIDE (9) 78#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) 79 80/* 81 * This bitmap is used to advertise the page sizes our hardware support 82 * to the IOMMU core, which will then use this information to split 83 * physically contiguous memory regions it is mapping into page sizes 84 * that we support. 85 * 86 * Traditionally the IOMMU core just handed us the mappings directly, 87 * after making sure the size is an order of a 4KiB page and that the 88 * mapping has natural alignment. 89 * 90 * To retain this behavior, we currently advertise that we support 91 * all page sizes that are an order of 4KiB. 92 * 93 * If at some point we'd like to utilize the IOMMU core's new behavior, 94 * we could change this to advertise the real page sizes we support. 95 */ 96#define INTEL_IOMMU_PGSIZES (~0xFFFUL) 97 98static inline int agaw_to_level(int agaw) 99{ 100 return agaw + 2; 101} 102 103static inline int agaw_to_width(int agaw) 104{ 105 return 30 + agaw * LEVEL_STRIDE; 106} 107 108static inline int width_to_agaw(int width) 109{ 110 return (width - 30) / LEVEL_STRIDE; 111} 112 113static inline unsigned int level_to_offset_bits(int level) 114{ 115 return (level - 1) * LEVEL_STRIDE; 116} 117 118static inline int pfn_level_offset(unsigned long pfn, int level) 119{ 120 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; 121} 122 123static inline unsigned long level_mask(int level) 124{ 125 return -1UL << level_to_offset_bits(level); 126} 127 128static inline unsigned long level_size(int level) 129{ 130 return 1UL << level_to_offset_bits(level); 131} 132 133static inline unsigned long align_to_level(unsigned long pfn, int level) 134{ 135 return (pfn + level_size(level) - 1) & level_mask(level); 136} 137 138static inline unsigned long lvl_to_nr_pages(unsigned int lvl) 139{ 140 return 1 << ((lvl - 1) * LEVEL_STRIDE); 141} 142 143/* VT-d pages must always be _smaller_ than MM pages. Otherwise things 144 are never going to work. */ 145static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) 146{ 147 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT); 148} 149 150static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn) 151{ 152 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); 153} 154static inline unsigned long page_to_dma_pfn(struct page *pg) 155{ 156 return mm_to_dma_pfn(page_to_pfn(pg)); 157} 158static inline unsigned long virt_to_dma_pfn(void *p) 159{ 160 return page_to_dma_pfn(virt_to_page(p)); 161} 162 163/* global iommu list, set NULL for ignored DMAR units */ 164static struct intel_iommu **g_iommus; 165 166static void __init check_tylersburg_isoch(void); 167static int rwbf_quirk; 168 169/* 170 * set to 1 to panic kernel if can't successfully enable VT-d 171 * (used when kernel is launched w/ TXT) 172 */ 173static int force_on = 0; 174 175/* 176 * 0: Present 177 * 1-11: Reserved 178 * 12-63: Context Ptr (12 - (haw-1)) 179 * 64-127: Reserved 180 */ 181struct root_entry { 182 u64 val; 183 u64 rsvd1; 184}; 185#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) 186static inline bool root_present(struct root_entry *root) 187{ 188 return (root->val & 1); 189} 190static inline void set_root_present(struct root_entry *root) 191{ 192 root->val |= 1; 193} 194static inline void set_root_value(struct root_entry *root, unsigned long value) 195{ 196 root->val |= value & VTD_PAGE_MASK; 197} 198 199static inline struct context_entry * 200get_context_addr_from_root(struct root_entry *root) 201{ 202 return (struct context_entry *) 203 (root_present(root)?phys_to_virt( 204 root->val & VTD_PAGE_MASK) : 205 NULL); 206} 207 208/* 209 * low 64 bits: 210 * 0: present 211 * 1: fault processing disable 212 * 2-3: translation type 213 * 12-63: address space root 214 * high 64 bits: 215 * 0-2: address width 216 * 3-6: aval 217 * 8-23: domain id 218 */ 219struct context_entry { 220 u64 lo; 221 u64 hi; 222}; 223 224static inline bool context_present(struct context_entry *context) 225{ 226 return (context->lo & 1); 227} 228static inline void context_set_present(struct context_entry *context) 229{ 230 context->lo |= 1; 231} 232 233static inline void context_set_fault_enable(struct context_entry *context) 234{ 235 context->lo &= (((u64)-1) << 2) | 1; 236} 237 238static inline void context_set_translation_type(struct context_entry *context, 239 unsigned long value) 240{ 241 context->lo &= (((u64)-1) << 4) | 3; 242 context->lo |= (value & 3) << 2; 243} 244 245static inline void context_set_address_root(struct context_entry *context, 246 unsigned long value) 247{ 248 context->lo |= value & VTD_PAGE_MASK; 249} 250 251static inline void context_set_address_width(struct context_entry *context, 252 unsigned long value) 253{ 254 context->hi |= value & 7; 255} 256 257static inline void context_set_domain_id(struct context_entry *context, 258 unsigned long value) 259{ 260 context->hi |= (value & ((1 << 16) - 1)) << 8; 261} 262 263static inline void context_clear_entry(struct context_entry *context) 264{ 265 context->lo = 0; 266 context->hi = 0; 267} 268 269/* 270 * 0: readable 271 * 1: writable 272 * 2-6: reserved 273 * 7: super page 274 * 8-10: available 275 * 11: snoop behavior 276 * 12-63: Host physcial address 277 */ 278struct dma_pte { 279 u64 val; 280}; 281 282static inline void dma_clear_pte(struct dma_pte *pte) 283{ 284 pte->val = 0; 285} 286 287static inline void dma_set_pte_readable(struct dma_pte *pte) 288{ 289 pte->val |= DMA_PTE_READ; 290} 291 292static inline void dma_set_pte_writable(struct dma_pte *pte) 293{ 294 pte->val |= DMA_PTE_WRITE; 295} 296 297static inline void dma_set_pte_snp(struct dma_pte *pte) 298{ 299 pte->val |= DMA_PTE_SNP; 300} 301 302static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot) 303{ 304 pte->val = (pte->val & ~3) | (prot & 3); 305} 306 307static inline u64 dma_pte_addr(struct dma_pte *pte) 308{ 309#ifdef CONFIG_64BIT 310 return pte->val & VTD_PAGE_MASK; 311#else 312 /* Must have a full atomic 64-bit read */ 313 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK; 314#endif 315} 316 317static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn) 318{ 319 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT; 320} 321 322static inline bool dma_pte_present(struct dma_pte *pte) 323{ 324 return (pte->val & 3) != 0; 325} 326 327static inline bool dma_pte_superpage(struct dma_pte *pte) 328{ 329 return (pte->val & (1 << 7)); 330} 331 332static inline int first_pte_in_page(struct dma_pte *pte) 333{ 334 return !((unsigned long)pte & ~VTD_PAGE_MASK); 335} 336 337/* 338 * This domain is a statically identity mapping domain. 339 * 1. This domain creats a static 1:1 mapping to all usable memory. 340 * 2. It maps to each iommu if successful. 341 * 3. Each iommu mapps to this domain if successful. 342 */ 343static struct dmar_domain *si_domain; 344static int hw_pass_through = 1; 345 346/* devices under the same p2p bridge are owned in one domain */ 347#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0) 348 349/* domain represents a virtual machine, more than one devices 350 * across iommus may be owned in one domain, e.g. kvm guest. 351 */ 352#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1) 353 354/* si_domain contains mulitple devices */ 355#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2) 356 357/* define the limit of IOMMUs supported in each domain */ 358#ifdef CONFIG_X86 359# define IOMMU_UNITS_SUPPORTED MAX_IO_APICS 360#else 361# define IOMMU_UNITS_SUPPORTED 64 362#endif 363 364struct dmar_domain { 365 int id; /* domain id */ 366 int nid; /* node id */ 367 DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED); 368 /* bitmap of iommus this domain uses*/ 369 370 struct list_head devices; /* all devices' list */ 371 struct iova_domain iovad; /* iova's that belong to this domain */ 372 373 struct dma_pte *pgd; /* virtual address */ 374 int gaw; /* max guest address width */ 375 376 /* adjusted guest address width, 0 is level 2 30-bit */ 377 int agaw; 378 379 int flags; /* flags to find out type of domain */ 380 381 int iommu_coherency;/* indicate coherency of iommu access */ 382 int iommu_snooping; /* indicate snooping control feature*/ 383 int iommu_count; /* reference count of iommu */ 384 int iommu_superpage;/* Level of superpages supported: 385 0 == 4KiB (no superpages), 1 == 2MiB, 386 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ 387 spinlock_t iommu_lock; /* protect iommu set in domain */ 388 u64 max_addr; /* maximum mapped address */ 389}; 390 391/* PCI domain-device relationship */ 392struct device_domain_info { 393 struct list_head link; /* link to domain siblings */ 394 struct list_head global; /* link to global list */ 395 int segment; /* PCI domain */ 396 u8 bus; /* PCI bus number */ 397 u8 devfn; /* PCI devfn number */ 398 struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */ 399 struct intel_iommu *iommu; /* IOMMU used by this device */ 400 struct dmar_domain *domain; /* pointer to domain */ 401}; 402 403static void flush_unmaps_timeout(unsigned long data); 404 405DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); 406 407#define HIGH_WATER_MARK 250 408struct deferred_flush_tables { 409 int next; 410 struct iova *iova[HIGH_WATER_MARK]; 411 struct dmar_domain *domain[HIGH_WATER_MARK]; 412}; 413 414static struct deferred_flush_tables *deferred_flush; 415 416/* bitmap for indexing intel_iommus */ 417static int g_num_of_iommus; 418 419static DEFINE_SPINLOCK(async_umap_flush_lock); 420static LIST_HEAD(unmaps_to_do); 421 422static int timer_on; 423static long list_size; 424 425static void domain_remove_dev_info(struct dmar_domain *domain); 426 427#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON 428int dmar_disabled = 0; 429#else 430int dmar_disabled = 1; 431#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/ 432 433int intel_iommu_enabled = 0; 434EXPORT_SYMBOL_GPL(intel_iommu_enabled); 435 436static int dmar_map_gfx = 1; 437static int dmar_forcedac; 438static int intel_iommu_strict; 439static int intel_iommu_superpage = 1; 440 441int intel_iommu_gfx_mapped; 442EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); 443 444#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) 445static DEFINE_SPINLOCK(device_domain_lock); 446static LIST_HEAD(device_domain_list); 447 448static struct iommu_ops intel_iommu_ops; 449 450static int __init intel_iommu_setup(char *str) 451{ 452 if (!str) 453 return -EINVAL; 454 while (*str) { 455 if (!strncmp(str, "on", 2)) { 456 dmar_disabled = 0; 457 printk(KERN_INFO "Intel-IOMMU: enabled\n"); 458 } else if (!strncmp(str, "off", 3)) { 459 dmar_disabled = 1; 460 printk(KERN_INFO "Intel-IOMMU: disabled\n"); 461 } else if (!strncmp(str, "igfx_off", 8)) { 462 dmar_map_gfx = 0; 463 printk(KERN_INFO 464 "Intel-IOMMU: disable GFX device mapping\n"); 465 } else if (!strncmp(str, "forcedac", 8)) { 466 printk(KERN_INFO 467 "Intel-IOMMU: Forcing DAC for PCI devices\n"); 468 dmar_forcedac = 1; 469 } else if (!strncmp(str, "strict", 6)) { 470 printk(KERN_INFO 471 "Intel-IOMMU: disable batched IOTLB flush\n"); 472 intel_iommu_strict = 1; 473 } else if (!strncmp(str, "sp_off", 6)) { 474 printk(KERN_INFO 475 "Intel-IOMMU: disable supported super page\n"); 476 intel_iommu_superpage = 0; 477 } 478 479 str += strcspn(str, ","); 480 while (*str == ',') 481 str++; 482 } 483 return 0; 484} 485__setup("intel_iommu=", intel_iommu_setup); 486 487static struct kmem_cache *iommu_domain_cache; 488static struct kmem_cache *iommu_devinfo_cache; 489static struct kmem_cache *iommu_iova_cache; 490 491static inline void *alloc_pgtable_page(int node) 492{ 493 struct page *page; 494 void *vaddr = NULL; 495 496 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0); 497 if (page) 498 vaddr = page_address(page); 499 return vaddr; 500} 501 502static inline void free_pgtable_page(void *vaddr) 503{ 504 free_page((unsigned long)vaddr); 505} 506 507static inline void *alloc_domain_mem(void) 508{ 509 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC); 510} 511 512static void free_domain_mem(void *vaddr) 513{ 514 kmem_cache_free(iommu_domain_cache, vaddr); 515} 516 517static inline void * alloc_devinfo_mem(void) 518{ 519 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC); 520} 521 522static inline void free_devinfo_mem(void *vaddr) 523{ 524 kmem_cache_free(iommu_devinfo_cache, vaddr); 525} 526 527struct iova *alloc_iova_mem(void) 528{ 529 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC); 530} 531 532void free_iova_mem(struct iova *iova) 533{ 534 kmem_cache_free(iommu_iova_cache, iova); 535} 536 537 538static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) 539{ 540 unsigned long sagaw; 541 int agaw = -1; 542 543 sagaw = cap_sagaw(iommu->cap); 544 for (agaw = width_to_agaw(max_gaw); 545 agaw >= 0; agaw--) { 546 if (test_bit(agaw, &sagaw)) 547 break; 548 } 549 550 return agaw; 551} 552 553/* 554 * Calculate max SAGAW for each iommu. 555 */ 556int iommu_calculate_max_sagaw(struct intel_iommu *iommu) 557{ 558 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH); 559} 560 561/* 562 * calculate agaw for each iommu. 563 * "SAGAW" may be different across iommus, use a default agaw, and 564 * get a supported less agaw for iommus that don't support the default agaw. 565 */ 566int iommu_calculate_agaw(struct intel_iommu *iommu) 567{ 568 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); 569} 570 571/* This functionin only returns single iommu in a domain */ 572static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) 573{ 574 int iommu_id; 575 576 /* si_domain and vm domain should not get here. */ 577 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE); 578 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY); 579 580 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus); 581 if (iommu_id < 0 || iommu_id >= g_num_of_iommus) 582 return NULL; 583 584 return g_iommus[iommu_id]; 585} 586 587static void domain_update_iommu_coherency(struct dmar_domain *domain) 588{ 589 int i; 590 591 domain->iommu_coherency = 1; 592 593 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) { 594 if (!ecap_coherent(g_iommus[i]->ecap)) { 595 domain->iommu_coherency = 0; 596 break; 597 } 598 } 599} 600 601static void domain_update_iommu_snooping(struct dmar_domain *domain) 602{ 603 int i; 604 605 domain->iommu_snooping = 1; 606 607 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) { 608 if (!ecap_sc_support(g_iommus[i]->ecap)) { 609 domain->iommu_snooping = 0; 610 break; 611 } 612 } 613} 614 615static void domain_update_iommu_superpage(struct dmar_domain *domain) 616{ 617 struct dmar_drhd_unit *drhd; 618 struct intel_iommu *iommu = NULL; 619 int mask = 0xf; 620 621 if (!intel_iommu_superpage) { 622 domain->iommu_superpage = 0; 623 return; 624 } 625 626 /* set iommu_superpage to the smallest common denominator */ 627 for_each_active_iommu(iommu, drhd) { 628 mask &= cap_super_page_val(iommu->cap); 629 if (!mask) { 630 break; 631 } 632 } 633 domain->iommu_superpage = fls(mask); 634} 635 636/* Some capabilities may be different across iommus */ 637static void domain_update_iommu_cap(struct dmar_domain *domain) 638{ 639 domain_update_iommu_coherency(domain); 640 domain_update_iommu_snooping(domain); 641 domain_update_iommu_superpage(domain); 642} 643 644static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) 645{ 646 struct dmar_drhd_unit *drhd = NULL; 647 int i; 648 649 for_each_drhd_unit(drhd) { 650 if (drhd->ignored) 651 continue; 652 if (segment != drhd->segment) 653 continue; 654 655 for (i = 0; i < drhd->devices_cnt; i++) { 656 if (drhd->devices[i] && 657 drhd->devices[i]->bus->number == bus && 658 drhd->devices[i]->devfn == devfn) 659 return drhd->iommu; 660 if (drhd->devices[i] && 661 drhd->devices[i]->subordinate && 662 drhd->devices[i]->subordinate->number <= bus && 663 drhd->devices[i]->subordinate->subordinate >= bus) 664 return drhd->iommu; 665 } 666 667 if (drhd->include_all) 668 return drhd->iommu; 669 } 670 671 return NULL; 672} 673 674static void domain_flush_cache(struct dmar_domain *domain, 675 void *addr, int size) 676{ 677 if (!domain->iommu_coherency) 678 clflush_cache_range(addr, size); 679} 680 681/* Gets context entry for a given bus and devfn */ 682static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, 683 u8 bus, u8 devfn) 684{ 685 struct root_entry *root; 686 struct context_entry *context; 687 unsigned long phy_addr; 688 unsigned long flags; 689 690 spin_lock_irqsave(&iommu->lock, flags); 691 root = &iommu->root_entry[bus]; 692 context = get_context_addr_from_root(root); 693 if (!context) { 694 context = (struct context_entry *) 695 alloc_pgtable_page(iommu->node); 696 if (!context) { 697 spin_unlock_irqrestore(&iommu->lock, flags); 698 return NULL; 699 } 700 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE); 701 phy_addr = virt_to_phys((void *)context); 702 set_root_value(root, phy_addr); 703 set_root_present(root); 704 __iommu_flush_cache(iommu, root, sizeof(*root)); 705 } 706 spin_unlock_irqrestore(&iommu->lock, flags); 707 return &context[devfn]; 708} 709 710static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) 711{ 712 struct root_entry *root; 713 struct context_entry *context; 714 int ret; 715 unsigned long flags; 716 717 spin_lock_irqsave(&iommu->lock, flags); 718 root = &iommu->root_entry[bus]; 719 context = get_context_addr_from_root(root); 720 if (!context) { 721 ret = 0; 722 goto out; 723 } 724 ret = context_present(&context[devfn]); 725out: 726 spin_unlock_irqrestore(&iommu->lock, flags); 727 return ret; 728} 729 730static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn) 731{ 732 struct root_entry *root; 733 struct context_entry *context; 734 unsigned long flags; 735 736 spin_lock_irqsave(&iommu->lock, flags); 737 root = &iommu->root_entry[bus]; 738 context = get_context_addr_from_root(root); 739 if (context) { 740 context_clear_entry(&context[devfn]); 741 __iommu_flush_cache(iommu, &context[devfn], \ 742 sizeof(*context)); 743 } 744 spin_unlock_irqrestore(&iommu->lock, flags); 745} 746 747static void free_context_table(struct intel_iommu *iommu) 748{ 749 struct root_entry *root; 750 int i; 751 unsigned long flags; 752 struct context_entry *context; 753 754 spin_lock_irqsave(&iommu->lock, flags); 755 if (!iommu->root_entry) { 756 goto out; 757 } 758 for (i = 0; i < ROOT_ENTRY_NR; i++) { 759 root = &iommu->root_entry[i]; 760 context = get_context_addr_from_root(root); 761 if (context) 762 free_pgtable_page(context); 763 } 764 free_pgtable_page(iommu->root_entry); 765 iommu->root_entry = NULL; 766out: 767 spin_unlock_irqrestore(&iommu->lock, flags); 768} 769 770static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, 771 unsigned long pfn, int target_level) 772{ 773 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 774 struct dma_pte *parent, *pte = NULL; 775 int level = agaw_to_level(domain->agaw); 776 int offset; 777 778 BUG_ON(!domain->pgd); 779 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); 780 parent = domain->pgd; 781 782 while (level > 0) { 783 void *tmp_page; 784 785 offset = pfn_level_offset(pfn, level); 786 pte = &parent[offset]; 787 if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte))) 788 break; 789 if (level == target_level) 790 break; 791 792 if (!dma_pte_present(pte)) { 793 uint64_t pteval; 794 795 tmp_page = alloc_pgtable_page(domain->nid); 796 797 if (!tmp_page) 798 return NULL; 799 800 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); 801 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; 802 if (cmpxchg64(&pte->val, 0ULL, pteval)) { 803 /* Someone else set it while we were thinking; use theirs. */ 804 free_pgtable_page(tmp_page); 805 } else { 806 dma_pte_addr(pte); 807 domain_flush_cache(domain, pte, sizeof(*pte)); 808 } 809 } 810 parent = phys_to_virt(dma_pte_addr(pte)); 811 level--; 812 } 813 814 return pte; 815} 816 817 818/* return address's pte at specific level */ 819static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, 820 unsigned long pfn, 821 int level, int *large_page) 822{ 823 struct dma_pte *parent, *pte = NULL; 824 int total = agaw_to_level(domain->agaw); 825 int offset; 826 827 parent = domain->pgd; 828 while (level <= total) { 829 offset = pfn_level_offset(pfn, total); 830 pte = &parent[offset]; 831 if (level == total) 832 return pte; 833 834 if (!dma_pte_present(pte)) { 835 *large_page = total; 836 break; 837 } 838 839 if (pte->val & DMA_PTE_LARGE_PAGE) { 840 *large_page = total; 841 return pte; 842 } 843 844 parent = phys_to_virt(dma_pte_addr(pte)); 845 total--; 846 } 847 return NULL; 848} 849 850/* clear last level pte, a tlb flush should be followed */ 851static int dma_pte_clear_range(struct dmar_domain *domain, 852 unsigned long start_pfn, 853 unsigned long last_pfn) 854{ 855 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 856 unsigned int large_page = 1; 857 struct dma_pte *first_pte, *pte; 858 int order; 859 860 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); 861 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); 862 BUG_ON(start_pfn > last_pfn); 863 864 /* we don't need lock here; nobody else touches the iova range */ 865 do { 866 large_page = 1; 867 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page); 868 if (!pte) { 869 start_pfn = align_to_level(start_pfn + 1, large_page + 1); 870 continue; 871 } 872 do { 873 dma_clear_pte(pte); 874 start_pfn += lvl_to_nr_pages(large_page); 875 pte++; 876 } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); 877 878 domain_flush_cache(domain, first_pte, 879 (void *)pte - (void *)first_pte); 880 881 } while (start_pfn && start_pfn <= last_pfn); 882 883 order = (large_page - 1) * 9; 884 return order; 885} 886 887/* free page table pages. last level pte should already be cleared */ 888static void dma_pte_free_pagetable(struct dmar_domain *domain, 889 unsigned long start_pfn, 890 unsigned long last_pfn) 891{ 892 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 893 struct dma_pte *first_pte, *pte; 894 int total = agaw_to_level(domain->agaw); 895 int level; 896 unsigned long tmp; 897 int large_page = 2; 898 899 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); 900 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); 901 BUG_ON(start_pfn > last_pfn); 902 903 /* We don't need lock here; nobody else touches the iova range */ 904 level = 2; 905 while (level <= total) { 906 tmp = align_to_level(start_pfn, level); 907 908 /* If we can't even clear one PTE at this level, we're done */ 909 if (tmp + level_size(level) - 1 > last_pfn) 910 return; 911 912 do { 913 large_page = level; 914 first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page); 915 if (large_page > level) 916 level = large_page + 1; 917 if (!pte) { 918 tmp = align_to_level(tmp + 1, level + 1); 919 continue; 920 } 921 do { 922 if (dma_pte_present(pte)) { 923 free_pgtable_page(phys_to_virt(dma_pte_addr(pte))); 924 dma_clear_pte(pte); 925 } 926 pte++; 927 tmp += level_size(level); 928 } while (!first_pte_in_page(pte) && 929 tmp + level_size(level) - 1 <= last_pfn); 930 931 domain_flush_cache(domain, first_pte, 932 (void *)pte - (void *)first_pte); 933 934 } while (tmp && tmp + level_size(level) - 1 <= last_pfn); 935 level++; 936 } 937 /* free pgd */ 938 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { 939 free_pgtable_page(domain->pgd); 940 domain->pgd = NULL; 941 } 942} 943 944/* iommu handling */ 945static int iommu_alloc_root_entry(struct intel_iommu *iommu) 946{ 947 struct root_entry *root; 948 unsigned long flags; 949 950 root = (struct root_entry *)alloc_pgtable_page(iommu->node); 951 if (!root) 952 return -ENOMEM; 953 954 __iommu_flush_cache(iommu, root, ROOT_SIZE); 955 956 spin_lock_irqsave(&iommu->lock, flags); 957 iommu->root_entry = root; 958 spin_unlock_irqrestore(&iommu->lock, flags); 959 960 return 0; 961} 962 963static void iommu_set_root_entry(struct intel_iommu *iommu) 964{ 965 void *addr; 966 u32 sts; 967 unsigned long flag; 968 969 addr = iommu->root_entry; 970 971 raw_spin_lock_irqsave(&iommu->register_lock, flag); 972 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr)); 973 974 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG); 975 976 /* Make sure hardware complete it */ 977 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 978 readl, (sts & DMA_GSTS_RTPS), sts); 979 980 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 981} 982 983static void iommu_flush_write_buffer(struct intel_iommu *iommu) 984{ 985 u32 val; 986 unsigned long flag; 987 988 if (!rwbf_quirk && !cap_rwbf(iommu->cap)) 989 return; 990 991 raw_spin_lock_irqsave(&iommu->register_lock, flag); 992 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG); 993 994 /* Make sure hardware complete it */ 995 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 996 readl, (!(val & DMA_GSTS_WBFS)), val); 997 998 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 999} 1000 1001/* return value determine if we need a write buffer flush */ 1002static void __iommu_flush_context(struct intel_iommu *iommu, 1003 u16 did, u16 source_id, u8 function_mask, 1004 u64 type) 1005{ 1006 u64 val = 0; 1007 unsigned long flag; 1008 1009 switch (type) { 1010 case DMA_CCMD_GLOBAL_INVL: 1011 val = DMA_CCMD_GLOBAL_INVL; 1012 break; 1013 case DMA_CCMD_DOMAIN_INVL: 1014 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did); 1015 break; 1016 case DMA_CCMD_DEVICE_INVL: 1017 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did) 1018 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask); 1019 break; 1020 default: 1021 BUG(); 1022 } 1023 val |= DMA_CCMD_ICC; 1024 1025 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1026 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val); 1027 1028 /* Make sure hardware complete it */ 1029 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, 1030 dmar_readq, (!(val & DMA_CCMD_ICC)), val); 1031 1032 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1033} 1034 1035/* return value determine if we need a write buffer flush */ 1036static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, 1037 u64 addr, unsigned int size_order, u64 type) 1038{ 1039 int tlb_offset = ecap_iotlb_offset(iommu->ecap); 1040 u64 val = 0, val_iva = 0; 1041 unsigned long flag; 1042 1043 switch (type) { 1044 case DMA_TLB_GLOBAL_FLUSH: 1045 /* global flush doesn't need set IVA_REG */ 1046 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT; 1047 break; 1048 case DMA_TLB_DSI_FLUSH: 1049 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); 1050 break; 1051 case DMA_TLB_PSI_FLUSH: 1052 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); 1053 /* Note: always flush non-leaf currently */ 1054 val_iva = size_order | addr; 1055 break; 1056 default: 1057 BUG(); 1058 } 1059 /* Note: set drain read/write */ 1060#if 0 1061 /* 1062 * This is probably to be super secure.. Looks like we can 1063 * ignore it without any impact. 1064 */ 1065 if (cap_read_drain(iommu->cap)) 1066 val |= DMA_TLB_READ_DRAIN; 1067#endif 1068 if (cap_write_drain(iommu->cap)) 1069 val |= DMA_TLB_WRITE_DRAIN; 1070 1071 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1072 /* Note: Only uses first TLB reg currently */ 1073 if (val_iva) 1074 dmar_writeq(iommu->reg + tlb_offset, val_iva); 1075 dmar_writeq(iommu->reg + tlb_offset + 8, val); 1076 1077 /* Make sure hardware complete it */ 1078 IOMMU_WAIT_OP(iommu, tlb_offset + 8, 1079 dmar_readq, (!(val & DMA_TLB_IVT)), val); 1080 1081 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1082 1083 /* check IOTLB invalidation granularity */ 1084 if (DMA_TLB_IAIG(val) == 0) 1085 printk(KERN_ERR"IOMMU: flush IOTLB failed\n"); 1086 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type)) 1087 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", 1088 (unsigned long long)DMA_TLB_IIRG(type), 1089 (unsigned long long)DMA_TLB_IAIG(val)); 1090} 1091 1092static struct device_domain_info *iommu_support_dev_iotlb( 1093 struct dmar_domain *domain, int segment, u8 bus, u8 devfn) 1094{ 1095 int found = 0; 1096 unsigned long flags; 1097 struct device_domain_info *info; 1098 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn); 1099 1100 if (!ecap_dev_iotlb_support(iommu->ecap)) 1101 return NULL; 1102 1103 if (!iommu->qi) 1104 return NULL; 1105 1106 spin_lock_irqsave(&device_domain_lock, flags); 1107 list_for_each_entry(info, &domain->devices, link) 1108 if (info->bus == bus && info->devfn == devfn) { 1109 found = 1; 1110 break; 1111 } 1112 spin_unlock_irqrestore(&device_domain_lock, flags); 1113 1114 if (!found || !info->dev) 1115 return NULL; 1116 1117 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS)) 1118 return NULL; 1119 1120 if (!dmar_find_matched_atsr_unit(info->dev)) 1121 return NULL; 1122 1123 info->iommu = iommu; 1124 1125 return info; 1126} 1127 1128static void iommu_enable_dev_iotlb(struct device_domain_info *info) 1129{ 1130 if (!info) 1131 return; 1132 1133 pci_enable_ats(info->dev, VTD_PAGE_SHIFT); 1134} 1135 1136static void iommu_disable_dev_iotlb(struct device_domain_info *info) 1137{ 1138 if (!info->dev || !pci_ats_enabled(info->dev)) 1139 return; 1140 1141 pci_disable_ats(info->dev); 1142} 1143 1144static void iommu_flush_dev_iotlb(struct dmar_domain *domain, 1145 u64 addr, unsigned mask) 1146{ 1147 u16 sid, qdep; 1148 unsigned long flags; 1149 struct device_domain_info *info; 1150 1151 spin_lock_irqsave(&device_domain_lock, flags); 1152 list_for_each_entry(info, &domain->devices, link) { 1153 if (!info->dev || !pci_ats_enabled(info->dev)) 1154 continue; 1155 1156 sid = info->bus << 8 | info->devfn; 1157 qdep = pci_ats_queue_depth(info->dev); 1158 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask); 1159 } 1160 spin_unlock_irqrestore(&device_domain_lock, flags); 1161} 1162 1163static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, 1164 unsigned long pfn, unsigned int pages, int map) 1165{ 1166 unsigned int mask = ilog2(__roundup_pow_of_two(pages)); 1167 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; 1168 1169 BUG_ON(pages == 0); 1170 1171 /* 1172 * Fallback to domain selective flush if no PSI support or the size is 1173 * too big. 1174 * PSI requires page size to be 2 ^ x, and the base address is naturally 1175 * aligned to the size 1176 */ 1177 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap)) 1178 iommu->flush.flush_iotlb(iommu, did, 0, 0, 1179 DMA_TLB_DSI_FLUSH); 1180 else 1181 iommu->flush.flush_iotlb(iommu, did, addr, mask, 1182 DMA_TLB_PSI_FLUSH); 1183 1184 /* 1185 * In caching mode, changes of pages from non-present to present require 1186 * flush. However, device IOTLB doesn't need to be flushed in this case. 1187 */ 1188 if (!cap_caching_mode(iommu->cap) || !map) 1189 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask); 1190} 1191 1192static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) 1193{ 1194 u32 pmen; 1195 unsigned long flags; 1196 1197 raw_spin_lock_irqsave(&iommu->register_lock, flags); 1198 pmen = readl(iommu->reg + DMAR_PMEN_REG); 1199 pmen &= ~DMA_PMEN_EPM; 1200 writel(pmen, iommu->reg + DMAR_PMEN_REG); 1201 1202 /* wait for the protected region status bit to clear */ 1203 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG, 1204 readl, !(pmen & DMA_PMEN_PRS), pmen); 1205 1206 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 1207} 1208 1209static int iommu_enable_translation(struct intel_iommu *iommu) 1210{ 1211 u32 sts; 1212 unsigned long flags; 1213 1214 raw_spin_lock_irqsave(&iommu->register_lock, flags); 1215 iommu->gcmd |= DMA_GCMD_TE; 1216 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 1217 1218 /* Make sure hardware complete it */ 1219 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1220 readl, (sts & DMA_GSTS_TES), sts); 1221 1222 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 1223 return 0; 1224} 1225 1226static int iommu_disable_translation(struct intel_iommu *iommu) 1227{ 1228 u32 sts; 1229 unsigned long flag; 1230 1231 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1232 iommu->gcmd &= ~DMA_GCMD_TE; 1233 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 1234 1235 /* Make sure hardware complete it */ 1236 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1237 readl, (!(sts & DMA_GSTS_TES)), sts); 1238 1239 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1240 return 0; 1241} 1242 1243 1244static int iommu_init_domains(struct intel_iommu *iommu) 1245{ 1246 unsigned long ndomains; 1247 unsigned long nlongs; 1248 1249 ndomains = cap_ndoms(iommu->cap); 1250 pr_debug("IOMMU %d: Number of Domains supported <%ld>\n", iommu->seq_id, 1251 ndomains); 1252 nlongs = BITS_TO_LONGS(ndomains); 1253 1254 spin_lock_init(&iommu->lock); 1255 1256 /* TBD: there might be 64K domains, 1257 * consider other allocation for future chip 1258 */ 1259 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL); 1260 if (!iommu->domain_ids) { 1261 printk(KERN_ERR "Allocating domain id array failed\n"); 1262 return -ENOMEM; 1263 } 1264 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *), 1265 GFP_KERNEL); 1266 if (!iommu->domains) { 1267 printk(KERN_ERR "Allocating domain array failed\n"); 1268 return -ENOMEM; 1269 } 1270 1271 /* 1272 * if Caching mode is set, then invalid translations are tagged 1273 * with domainid 0. Hence we need to pre-allocate it. 1274 */ 1275 if (cap_caching_mode(iommu->cap)) 1276 set_bit(0, iommu->domain_ids); 1277 return 0; 1278} 1279 1280 1281static void domain_exit(struct dmar_domain *domain); 1282static void vm_domain_exit(struct dmar_domain *domain); 1283 1284void free_dmar_iommu(struct intel_iommu *iommu) 1285{ 1286 struct dmar_domain *domain; 1287 int i; 1288 unsigned long flags; 1289 1290 if ((iommu->domains) && (iommu->domain_ids)) { 1291 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) { 1292 domain = iommu->domains[i]; 1293 clear_bit(i, iommu->domain_ids); 1294 1295 spin_lock_irqsave(&domain->iommu_lock, flags); 1296 if (--domain->iommu_count == 0) { 1297 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) 1298 vm_domain_exit(domain); 1299 else 1300 domain_exit(domain); 1301 } 1302 spin_unlock_irqrestore(&domain->iommu_lock, flags); 1303 } 1304 } 1305 1306 if (iommu->gcmd & DMA_GCMD_TE) 1307 iommu_disable_translation(iommu); 1308 1309 if (iommu->irq) { 1310 irq_set_handler_data(iommu->irq, NULL); 1311 /* This will mask the irq */ 1312 free_irq(iommu->irq, iommu); 1313 destroy_irq(iommu->irq); 1314 } 1315 1316 kfree(iommu->domains); 1317 kfree(iommu->domain_ids); 1318 1319 g_iommus[iommu->seq_id] = NULL; 1320 1321 /* if all iommus are freed, free g_iommus */ 1322 for (i = 0; i < g_num_of_iommus; i++) { 1323 if (g_iommus[i]) 1324 break; 1325 } 1326 1327 if (i == g_num_of_iommus) 1328 kfree(g_iommus); 1329 1330 /* free context mapping */ 1331 free_context_table(iommu); 1332} 1333 1334static struct dmar_domain *alloc_domain(void) 1335{ 1336 struct dmar_domain *domain; 1337 1338 domain = alloc_domain_mem(); 1339 if (!domain) 1340 return NULL; 1341 1342 domain->nid = -1; 1343 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp)); 1344 domain->flags = 0; 1345 1346 return domain; 1347} 1348 1349static int iommu_attach_domain(struct dmar_domain *domain, 1350 struct intel_iommu *iommu) 1351{ 1352 int num; 1353 unsigned long ndomains; 1354 unsigned long flags; 1355 1356 ndomains = cap_ndoms(iommu->cap); 1357 1358 spin_lock_irqsave(&iommu->lock, flags); 1359 1360 num = find_first_zero_bit(iommu->domain_ids, ndomains); 1361 if (num >= ndomains) { 1362 spin_unlock_irqrestore(&iommu->lock, flags); 1363 printk(KERN_ERR "IOMMU: no free domain ids\n"); 1364 return -ENOMEM; 1365 } 1366 1367 domain->id = num; 1368 set_bit(num, iommu->domain_ids); 1369 set_bit(iommu->seq_id, domain->iommu_bmp); 1370 iommu->domains[num] = domain; 1371 spin_unlock_irqrestore(&iommu->lock, flags); 1372 1373 return 0; 1374} 1375 1376static void iommu_detach_domain(struct dmar_domain *domain, 1377 struct intel_iommu *iommu) 1378{ 1379 unsigned long flags; 1380 int num, ndomains; 1381 int found = 0; 1382 1383 spin_lock_irqsave(&iommu->lock, flags); 1384 ndomains = cap_ndoms(iommu->cap); 1385 for_each_set_bit(num, iommu->domain_ids, ndomains) { 1386 if (iommu->domains[num] == domain) { 1387 found = 1; 1388 break; 1389 } 1390 } 1391 1392 if (found) { 1393 clear_bit(num, iommu->domain_ids); 1394 clear_bit(iommu->seq_id, domain->iommu_bmp); 1395 iommu->domains[num] = NULL; 1396 } 1397 spin_unlock_irqrestore(&iommu->lock, flags); 1398} 1399 1400static struct iova_domain reserved_iova_list; 1401static struct lock_class_key reserved_rbtree_key; 1402 1403static int dmar_init_reserved_ranges(void) 1404{ 1405 struct pci_dev *pdev = NULL; 1406 struct iova *iova; 1407 int i; 1408 1409 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); 1410 1411 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, 1412 &reserved_rbtree_key); 1413 1414 /* IOAPIC ranges shouldn't be accessed by DMA */ 1415 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START), 1416 IOVA_PFN(IOAPIC_RANGE_END)); 1417 if (!iova) { 1418 printk(KERN_ERR "Reserve IOAPIC range failed\n"); 1419 return -ENODEV; 1420 } 1421 1422 /* Reserve all PCI MMIO to avoid peer-to-peer access */ 1423 for_each_pci_dev(pdev) { 1424 struct resource *r; 1425 1426 for (i = 0; i < PCI_NUM_RESOURCES; i++) { 1427 r = &pdev->resource[i]; 1428 if (!r->flags || !(r->flags & IORESOURCE_MEM)) 1429 continue; 1430 iova = reserve_iova(&reserved_iova_list, 1431 IOVA_PFN(r->start), 1432 IOVA_PFN(r->end)); 1433 if (!iova) { 1434 printk(KERN_ERR "Reserve iova failed\n"); 1435 return -ENODEV; 1436 } 1437 } 1438 } 1439 return 0; 1440} 1441 1442static void domain_reserve_special_ranges(struct dmar_domain *domain) 1443{ 1444 copy_reserved_iova(&reserved_iova_list, &domain->iovad); 1445} 1446 1447static inline int guestwidth_to_adjustwidth(int gaw) 1448{ 1449 int agaw; 1450 int r = (gaw - 12) % 9; 1451 1452 if (r == 0) 1453 agaw = gaw; 1454 else 1455 agaw = gaw + 9 - r; 1456 if (agaw > 64) 1457 agaw = 64; 1458 return agaw; 1459} 1460 1461static int domain_init(struct dmar_domain *domain, int guest_width) 1462{ 1463 struct intel_iommu *iommu; 1464 int adjust_width, agaw; 1465 unsigned long sagaw; 1466 1467 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 1468 spin_lock_init(&domain->iommu_lock); 1469 1470 domain_reserve_special_ranges(domain); 1471 1472 /* calculate AGAW */ 1473 iommu = domain_get_iommu(domain); 1474 if (guest_width > cap_mgaw(iommu->cap)) 1475 guest_width = cap_mgaw(iommu->cap); 1476 domain->gaw = guest_width; 1477 adjust_width = guestwidth_to_adjustwidth(guest_width); 1478 agaw = width_to_agaw(adjust_width); 1479 sagaw = cap_sagaw(iommu->cap); 1480 if (!test_bit(agaw, &sagaw)) { 1481 /* hardware doesn't support it, choose a bigger one */ 1482 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw); 1483 agaw = find_next_bit(&sagaw, 5, agaw); 1484 if (agaw >= 5) 1485 return -ENODEV; 1486 } 1487 domain->agaw = agaw; 1488 INIT_LIST_HEAD(&domain->devices); 1489 1490 if (ecap_coherent(iommu->ecap)) 1491 domain->iommu_coherency = 1; 1492 else 1493 domain->iommu_coherency = 0; 1494 1495 if (ecap_sc_support(iommu->ecap)) 1496 domain->iommu_snooping = 1; 1497 else 1498 domain->iommu_snooping = 0; 1499 1500 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); 1501 domain->iommu_count = 1; 1502 domain->nid = iommu->node; 1503 1504 /* always allocate the top pgd */ 1505 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); 1506 if (!domain->pgd) 1507 return -ENOMEM; 1508 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE); 1509 return 0; 1510} 1511 1512static void domain_exit(struct dmar_domain *domain) 1513{ 1514 struct dmar_drhd_unit *drhd; 1515 struct intel_iommu *iommu; 1516 1517 /* Domain 0 is reserved, so dont process it */ 1518 if (!domain) 1519 return; 1520 1521 /* Flush any lazy unmaps that may reference this domain */ 1522 if (!intel_iommu_strict) 1523 flush_unmaps_timeout(0); 1524 1525 domain_remove_dev_info(domain); 1526 /* destroy iovas */ 1527 put_iova_domain(&domain->iovad); 1528 1529 /* clear ptes */ 1530 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); 1531 1532 /* free page tables */ 1533 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); 1534 1535 for_each_active_iommu(iommu, drhd) 1536 if (test_bit(iommu->seq_id, domain->iommu_bmp)) 1537 iommu_detach_domain(domain, iommu); 1538 1539 free_domain_mem(domain); 1540} 1541 1542static int domain_context_mapping_one(struct dmar_domain *domain, int segment, 1543 u8 bus, u8 devfn, int translation) 1544{ 1545 struct context_entry *context; 1546 unsigned long flags; 1547 struct intel_iommu *iommu; 1548 struct dma_pte *pgd; 1549 unsigned long num; 1550 unsigned long ndomains; 1551 int id; 1552 int agaw; 1553 struct device_domain_info *info = NULL; 1554 1555 pr_debug("Set context mapping for %02x:%02x.%d\n", 1556 bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1557 1558 BUG_ON(!domain->pgd); 1559 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH && 1560 translation != CONTEXT_TT_MULTI_LEVEL); 1561 1562 iommu = device_to_iommu(segment, bus, devfn); 1563 if (!iommu) 1564 return -ENODEV; 1565 1566 context = device_to_context_entry(iommu, bus, devfn); 1567 if (!context) 1568 return -ENOMEM; 1569 spin_lock_irqsave(&iommu->lock, flags); 1570 if (context_present(context)) { 1571 spin_unlock_irqrestore(&iommu->lock, flags); 1572 return 0; 1573 } 1574 1575 id = domain->id; 1576 pgd = domain->pgd; 1577 1578 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || 1579 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) { 1580 int found = 0; 1581 1582 /* find an available domain id for this device in iommu */ 1583 ndomains = cap_ndoms(iommu->cap); 1584 for_each_set_bit(num, iommu->domain_ids, ndomains) { 1585 if (iommu->domains[num] == domain) { 1586 id = num; 1587 found = 1; 1588 break; 1589 } 1590 } 1591 1592 if (found == 0) { 1593 num = find_first_zero_bit(iommu->domain_ids, ndomains); 1594 if (num >= ndomains) { 1595 spin_unlock_irqrestore(&iommu->lock, flags); 1596 printk(KERN_ERR "IOMMU: no free domain ids\n"); 1597 return -EFAULT; 1598 } 1599 1600 set_bit(num, iommu->domain_ids); 1601 iommu->domains[num] = domain; 1602 id = num; 1603 } 1604 1605 /* Skip top levels of page tables for 1606 * iommu which has less agaw than default. 1607 * Unnecessary for PT mode. 1608 */ 1609 if (translation != CONTEXT_TT_PASS_THROUGH) { 1610 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { 1611 pgd = phys_to_virt(dma_pte_addr(pgd)); 1612 if (!dma_pte_present(pgd)) { 1613 spin_unlock_irqrestore(&iommu->lock, flags); 1614 return -ENOMEM; 1615 } 1616 } 1617 } 1618 } 1619 1620 context_set_domain_id(context, id); 1621 1622 if (translation != CONTEXT_TT_PASS_THROUGH) { 1623 info = iommu_support_dev_iotlb(domain, segment, bus, devfn); 1624 translation = info ? CONTEXT_TT_DEV_IOTLB : 1625 CONTEXT_TT_MULTI_LEVEL; 1626 } 1627 /* 1628 * In pass through mode, AW must be programmed to indicate the largest 1629 * AGAW value supported by hardware. And ASR is ignored by hardware. 1630 */ 1631 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH)) 1632 context_set_address_width(context, iommu->msagaw); 1633 else { 1634 context_set_address_root(context, virt_to_phys(pgd)); 1635 context_set_address_width(context, iommu->agaw); 1636 } 1637 1638 context_set_translation_type(context, translation); 1639 context_set_fault_enable(context); 1640 context_set_present(context); 1641 domain_flush_cache(domain, context, sizeof(*context)); 1642 1643 /* 1644 * It's a non-present to present mapping. If hardware doesn't cache 1645 * non-present entry we only need to flush the write-buffer. If the 1646 * _does_ cache non-present entries, then it does so in the special 1647 * domain #0, which we have to flush: 1648 */ 1649 if (cap_caching_mode(iommu->cap)) { 1650 iommu->flush.flush_context(iommu, 0, 1651 (((u16)bus) << 8) | devfn, 1652 DMA_CCMD_MASK_NOBIT, 1653 DMA_CCMD_DEVICE_INVL); 1654 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH); 1655 } else { 1656 iommu_flush_write_buffer(iommu); 1657 } 1658 iommu_enable_dev_iotlb(info); 1659 spin_unlock_irqrestore(&iommu->lock, flags); 1660 1661 spin_lock_irqsave(&domain->iommu_lock, flags); 1662 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) { 1663 domain->iommu_count++; 1664 if (domain->iommu_count == 1) 1665 domain->nid = iommu->node; 1666 domain_update_iommu_cap(domain); 1667 } 1668 spin_unlock_irqrestore(&domain->iommu_lock, flags); 1669 return 0; 1670} 1671 1672static int 1673domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev, 1674 int translation) 1675{ 1676 int ret; 1677 struct pci_dev *tmp, *parent; 1678 1679 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus), 1680 pdev->bus->number, pdev->devfn, 1681 translation); 1682 if (ret) 1683 return ret; 1684 1685 /* dependent device mapping */ 1686 tmp = pci_find_upstream_pcie_bridge(pdev); 1687 if (!tmp) 1688 return 0; 1689 /* Secondary interface's bus number and devfn 0 */ 1690 parent = pdev->bus->self; 1691 while (parent != tmp) { 1692 ret = domain_context_mapping_one(domain, 1693 pci_domain_nr(parent->bus), 1694 parent->bus->number, 1695 parent->devfn, translation); 1696 if (ret) 1697 return ret; 1698 parent = parent->bus->self; 1699 } 1700 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */ 1701 return domain_context_mapping_one(domain, 1702 pci_domain_nr(tmp->subordinate), 1703 tmp->subordinate->number, 0, 1704 translation); 1705 else /* this is a legacy PCI bridge */ 1706 return domain_context_mapping_one(domain, 1707 pci_domain_nr(tmp->bus), 1708 tmp->bus->number, 1709 tmp->devfn, 1710 translation); 1711} 1712 1713static int domain_context_mapped(struct pci_dev *pdev) 1714{ 1715 int ret; 1716 struct pci_dev *tmp, *parent; 1717 struct intel_iommu *iommu; 1718 1719 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number, 1720 pdev->devfn); 1721 if (!iommu) 1722 return -ENODEV; 1723 1724 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn); 1725 if (!ret) 1726 return ret; 1727 /* dependent device mapping */ 1728 tmp = pci_find_upstream_pcie_bridge(pdev); 1729 if (!tmp) 1730 return ret; 1731 /* Secondary interface's bus number and devfn 0 */ 1732 parent = pdev->bus->self; 1733 while (parent != tmp) { 1734 ret = device_context_mapped(iommu, parent->bus->number, 1735 parent->devfn); 1736 if (!ret) 1737 return ret; 1738 parent = parent->bus->self; 1739 } 1740 if (pci_is_pcie(tmp)) 1741 return device_context_mapped(iommu, tmp->subordinate->number, 1742 0); 1743 else 1744 return device_context_mapped(iommu, tmp->bus->number, 1745 tmp->devfn); 1746} 1747 1748/* Returns a number of VTD pages, but aligned to MM page size */ 1749static inline unsigned long aligned_nrpages(unsigned long host_addr, 1750 size_t size) 1751{ 1752 host_addr &= ~PAGE_MASK; 1753 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; 1754} 1755 1756/* Return largest possible superpage level for a given mapping */ 1757static inline int hardware_largepage_caps(struct dmar_domain *domain, 1758 unsigned long iov_pfn, 1759 unsigned long phy_pfn, 1760 unsigned long pages) 1761{ 1762 int support, level = 1; 1763 unsigned long pfnmerge; 1764 1765 support = domain->iommu_superpage; 1766 1767 /* To use a large page, the virtual *and* physical addresses 1768 must be aligned to 2MiB/1GiB/etc. Lower bits set in either 1769 of them will mean we have to use smaller pages. So just 1770 merge them and check both at once. */ 1771 pfnmerge = iov_pfn | phy_pfn; 1772 1773 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) { 1774 pages >>= VTD_STRIDE_SHIFT; 1775 if (!pages) 1776 break; 1777 pfnmerge >>= VTD_STRIDE_SHIFT; 1778 level++; 1779 support--; 1780 } 1781 return level; 1782} 1783 1784static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 1785 struct scatterlist *sg, unsigned long phys_pfn, 1786 unsigned long nr_pages, int prot) 1787{ 1788 struct dma_pte *first_pte = NULL, *pte = NULL; 1789 phys_addr_t uninitialized_var(pteval); 1790 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 1791 unsigned long sg_res; 1792 unsigned int largepage_lvl = 0; 1793 unsigned long lvl_pages = 0; 1794 1795 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); 1796 1797 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) 1798 return -EINVAL; 1799 1800 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; 1801 1802 if (sg) 1803 sg_res = 0; 1804 else { 1805 sg_res = nr_pages + 1; 1806 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; 1807 } 1808 1809 while (nr_pages > 0) { 1810 uint64_t tmp; 1811 1812 if (!sg_res) { 1813 sg_res = aligned_nrpages(sg->offset, sg->length); 1814 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; 1815 sg->dma_length = sg->length; 1816 pteval = page_to_phys(sg_page(sg)) | prot; 1817 phys_pfn = pteval >> VTD_PAGE_SHIFT; 1818 } 1819 1820 if (!pte) { 1821 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res); 1822 1823 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl); 1824 if (!pte) 1825 return -ENOMEM; 1826 /* It is large page*/ 1827 if (largepage_lvl > 1) 1828 pteval |= DMA_PTE_LARGE_PAGE; 1829 else 1830 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; 1831 1832 } 1833 /* We don't need lock here, nobody else 1834 * touches the iova range 1835 */ 1836 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval); 1837 if (tmp) { 1838 static int dumps = 5; 1839 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n", 1840 iov_pfn, tmp, (unsigned long long)pteval); 1841 if (dumps) { 1842 dumps--; 1843 debug_dma_dump_mappings(NULL); 1844 } 1845 WARN_ON(1); 1846 } 1847 1848 lvl_pages = lvl_to_nr_pages(largepage_lvl); 1849 1850 BUG_ON(nr_pages < lvl_pages); 1851 BUG_ON(sg_res < lvl_pages); 1852 1853 nr_pages -= lvl_pages; 1854 iov_pfn += lvl_pages; 1855 phys_pfn += lvl_pages; 1856 pteval += lvl_pages * VTD_PAGE_SIZE; 1857 sg_res -= lvl_pages; 1858 1859 /* If the next PTE would be the first in a new page, then we 1860 need to flush the cache on the entries we've just written. 1861 And then we'll need to recalculate 'pte', so clear it and 1862 let it get set again in the if (!pte) block above. 1863 1864 If we're done (!nr_pages) we need to flush the cache too. 1865 1866 Also if we've been setting superpages, we may need to 1867 recalculate 'pte' and switch back to smaller pages for the 1868 end of the mapping, if the trailing size is not enough to 1869 use another superpage (i.e. sg_res < lvl_pages). */ 1870 pte++; 1871 if (!nr_pages || first_pte_in_page(pte) || 1872 (largepage_lvl > 1 && sg_res < lvl_pages)) { 1873 domain_flush_cache(domain, first_pte, 1874 (void *)pte - (void *)first_pte); 1875 pte = NULL; 1876 } 1877 1878 if (!sg_res && nr_pages) 1879 sg = sg_next(sg); 1880 } 1881 return 0; 1882} 1883 1884static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 1885 struct scatterlist *sg, unsigned long nr_pages, 1886 int prot) 1887{ 1888 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot); 1889} 1890 1891static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 1892 unsigned long phys_pfn, unsigned long nr_pages, 1893 int prot) 1894{ 1895 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot); 1896} 1897 1898static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) 1899{ 1900 if (!iommu) 1901 return; 1902 1903 clear_context_table(iommu, bus, devfn); 1904 iommu->flush.flush_context(iommu, 0, 0, 0, 1905 DMA_CCMD_GLOBAL_INVL); 1906 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 1907} 1908 1909static void domain_remove_dev_info(struct dmar_domain *domain) 1910{ 1911 struct device_domain_info *info; 1912 unsigned long flags; 1913 struct intel_iommu *iommu; 1914 1915 spin_lock_irqsave(&device_domain_lock, flags); 1916 while (!list_empty(&domain->devices)) { 1917 info = list_entry(domain->devices.next, 1918 struct device_domain_info, link); 1919 list_del(&info->link); 1920 list_del(&info->global); 1921 if (info->dev) 1922 info->dev->dev.archdata.iommu = NULL; 1923 spin_unlock_irqrestore(&device_domain_lock, flags); 1924 1925 iommu_disable_dev_iotlb(info); 1926 iommu = device_to_iommu(info->segment, info->bus, info->devfn); 1927 iommu_detach_dev(iommu, info->bus, info->devfn); 1928 free_devinfo_mem(info); 1929 1930 spin_lock_irqsave(&device_domain_lock, flags); 1931 } 1932 spin_unlock_irqrestore(&device_domain_lock, flags); 1933} 1934 1935/* 1936 * find_domain 1937 * Note: we use struct pci_dev->dev.archdata.iommu stores the info 1938 */ 1939static struct dmar_domain * 1940find_domain(struct pci_dev *pdev) 1941{ 1942 struct device_domain_info *info; 1943 1944 /* No lock here, assumes no domain exit in normal case */ 1945 info = pdev->dev.archdata.iommu; 1946 if (info) 1947 return info->domain; 1948 return NULL; 1949} 1950 1951/* domain is initialized */ 1952static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) 1953{ 1954 struct dmar_domain *domain, *found = NULL; 1955 struct intel_iommu *iommu; 1956 struct dmar_drhd_unit *drhd; 1957 struct device_domain_info *info, *tmp; 1958 struct pci_dev *dev_tmp; 1959 unsigned long flags; 1960 int bus = 0, devfn = 0; 1961 int segment; 1962 int ret; 1963 1964 domain = find_domain(pdev); 1965 if (domain) 1966 return domain; 1967 1968 segment = pci_domain_nr(pdev->bus); 1969 1970 dev_tmp = pci_find_upstream_pcie_bridge(pdev); 1971 if (dev_tmp) { 1972 if (pci_is_pcie(dev_tmp)) { 1973 bus = dev_tmp->subordinate->number; 1974 devfn = 0; 1975 } else { 1976 bus = dev_tmp->bus->number; 1977 devfn = dev_tmp->devfn; 1978 } 1979 spin_lock_irqsave(&device_domain_lock, flags); 1980 list_for_each_entry(info, &device_domain_list, global) { 1981 if (info->segment == segment && 1982 info->bus == bus && info->devfn == devfn) { 1983 found = info->domain; 1984 break; 1985 } 1986 } 1987 spin_unlock_irqrestore(&device_domain_lock, flags); 1988 /* pcie-pci bridge already has a domain, uses it */ 1989 if (found) { 1990 domain = found; 1991 goto found_domain; 1992 } 1993 } 1994 1995 domain = alloc_domain(); 1996 if (!domain) 1997 goto error; 1998 1999 /* Allocate new domain for the device */ 2000 drhd = dmar_find_matched_drhd_unit(pdev); 2001 if (!drhd) { 2002 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n", 2003 pci_name(pdev)); 2004 return NULL; 2005 } 2006 iommu = drhd->iommu; 2007 2008 ret = iommu_attach_domain(domain, iommu); 2009 if (ret) { 2010 free_domain_mem(domain); 2011 goto error; 2012 } 2013 2014 if (domain_init(domain, gaw)) { 2015 domain_exit(domain); 2016 goto error; 2017 } 2018 2019 /* register pcie-to-pci device */ 2020 if (dev_tmp) { 2021 info = alloc_devinfo_mem(); 2022 if (!info) { 2023 domain_exit(domain); 2024 goto error; 2025 } 2026 info->segment = segment; 2027 info->bus = bus; 2028 info->devfn = devfn; 2029 info->dev = NULL; 2030 info->domain = domain; 2031 /* This domain is shared by devices under p2p bridge */ 2032 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES; 2033 2034 /* pcie-to-pci bridge already has a domain, uses it */ 2035 found = NULL; 2036 spin_lock_irqsave(&device_domain_lock, flags); 2037 list_for_each_entry(tmp, &device_domain_list, global) { 2038 if (tmp->segment == segment && 2039 tmp->bus == bus && tmp->devfn == devfn) { 2040 found = tmp->domain; 2041 break; 2042 } 2043 } 2044 if (found) { 2045 spin_unlock_irqrestore(&device_domain_lock, flags); 2046 free_devinfo_mem(info); 2047 domain_exit(domain); 2048 domain = found; 2049 } else { 2050 list_add(&info->link, &domain->devices); 2051 list_add(&info->global, &device_domain_list); 2052 spin_unlock_irqrestore(&device_domain_lock, flags); 2053 } 2054 } 2055 2056found_domain: 2057 info = alloc_devinfo_mem(); 2058 if (!info) 2059 goto error; 2060 info->segment = segment; 2061 info->bus = pdev->bus->number; 2062 info->devfn = pdev->devfn; 2063 info->dev = pdev; 2064 info->domain = domain; 2065 spin_lock_irqsave(&device_domain_lock, flags); 2066 /* somebody is fast */ 2067 found = find_domain(pdev); 2068 if (found != NULL) { 2069 spin_unlock_irqrestore(&device_domain_lock, flags); 2070 if (found != domain) { 2071 domain_exit(domain); 2072 domain = found; 2073 } 2074 free_devinfo_mem(info); 2075 return domain; 2076 } 2077 list_add(&info->link, &domain->devices); 2078 list_add(&info->global, &device_domain_list); 2079 pdev->dev.archdata.iommu = info; 2080 spin_unlock_irqrestore(&device_domain_lock, flags); 2081 return domain; 2082error: 2083 /* recheck it here, maybe others set it */ 2084 return find_domain(pdev); 2085} 2086 2087static int iommu_identity_mapping; 2088#define IDENTMAP_ALL 1 2089#define IDENTMAP_GFX 2 2090#define IDENTMAP_AZALIA 4 2091 2092static int iommu_domain_identity_map(struct dmar_domain *domain, 2093 unsigned long long start, 2094 unsigned long long end) 2095{ 2096 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT; 2097 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT; 2098 2099 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn), 2100 dma_to_mm_pfn(last_vpfn))) { 2101 printk(KERN_ERR "IOMMU: reserve iova failed\n"); 2102 return -ENOMEM; 2103 } 2104 2105 pr_debug("Mapping reserved region %llx-%llx for domain %d\n", 2106 start, end, domain->id); 2107 /* 2108 * RMRR range might have overlap with physical memory range, 2109 * clear it first 2110 */ 2111 dma_pte_clear_range(domain, first_vpfn, last_vpfn); 2112 2113 return domain_pfn_mapping(domain, first_vpfn, first_vpfn, 2114 last_vpfn - first_vpfn + 1, 2115 DMA_PTE_READ|DMA_PTE_WRITE); 2116} 2117 2118static int iommu_prepare_identity_map(struct pci_dev *pdev, 2119 unsigned long long start, 2120 unsigned long long end) 2121{ 2122 struct dmar_domain *domain; 2123 int ret; 2124 2125 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); 2126 if (!domain) 2127 return -ENOMEM; 2128 2129 /* For _hardware_ passthrough, don't bother. But for software 2130 passthrough, we do it anyway -- it may indicate a memory 2131 range which is reserved in E820, so which didn't get set 2132 up to start with in si_domain */ 2133 if (domain == si_domain && hw_pass_through) { 2134 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n", 2135 pci_name(pdev), start, end); 2136 return 0; 2137 } 2138 2139 printk(KERN_INFO 2140 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", 2141 pci_name(pdev), start, end); 2142 2143 if (end < start) { 2144 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n" 2145 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 2146 dmi_get_system_info(DMI_BIOS_VENDOR), 2147 dmi_get_system_info(DMI_BIOS_VERSION), 2148 dmi_get_system_info(DMI_PRODUCT_VERSION)); 2149 ret = -EIO; 2150 goto error; 2151 } 2152 2153 if (end >> agaw_to_width(domain->agaw)) { 2154 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n" 2155 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 2156 agaw_to_width(domain->agaw), 2157 dmi_get_system_info(DMI_BIOS_VENDOR), 2158 dmi_get_system_info(DMI_BIOS_VERSION), 2159 dmi_get_system_info(DMI_PRODUCT_VERSION)); 2160 ret = -EIO; 2161 goto error; 2162 } 2163 2164 ret = iommu_domain_identity_map(domain, start, end); 2165 if (ret) 2166 goto error; 2167 2168 /* context entry init */ 2169 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); 2170 if (ret) 2171 goto error; 2172 2173 return 0; 2174 2175 error: 2176 domain_exit(domain); 2177 return ret; 2178} 2179 2180static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, 2181 struct pci_dev *pdev) 2182{ 2183 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2184 return 0; 2185 return iommu_prepare_identity_map(pdev, rmrr->base_address, 2186 rmrr->end_address); 2187} 2188 2189#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA 2190static inline void iommu_prepare_isa(void) 2191{ 2192 struct pci_dev *pdev; 2193 int ret; 2194 2195 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); 2196 if (!pdev) 2197 return; 2198 2199 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); 2200 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1); 2201 2202 if (ret) 2203 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " 2204 "floppy might not work\n"); 2205 2206} 2207#else 2208static inline void iommu_prepare_isa(void) 2209{ 2210 return; 2211} 2212#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */ 2213 2214static int md_domain_init(struct dmar_domain *domain, int guest_width); 2215 2216static int __init si_domain_init(int hw) 2217{ 2218 struct dmar_drhd_unit *drhd; 2219 struct intel_iommu *iommu; 2220 int nid, ret = 0; 2221 2222 si_domain = alloc_domain(); 2223 if (!si_domain) 2224 return -EFAULT; 2225 2226 pr_debug("Identity mapping domain is domain %d\n", si_domain->id); 2227 2228 for_each_active_iommu(iommu, drhd) { 2229 ret = iommu_attach_domain(si_domain, iommu); 2230 if (ret) { 2231 domain_exit(si_domain); 2232 return -EFAULT; 2233 } 2234 } 2235 2236 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 2237 domain_exit(si_domain); 2238 return -EFAULT; 2239 } 2240 2241 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; 2242 2243 if (hw) 2244 return 0; 2245 2246 for_each_online_node(nid) { 2247 unsigned long start_pfn, end_pfn; 2248 int i; 2249 2250 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { 2251 ret = iommu_domain_identity_map(si_domain, 2252 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn)); 2253 if (ret) 2254 return ret; 2255 } 2256 } 2257 2258 return 0; 2259} 2260 2261static void domain_remove_one_dev_info(struct dmar_domain *domain, 2262 struct pci_dev *pdev); 2263static int identity_mapping(struct pci_dev *pdev) 2264{ 2265 struct device_domain_info *info; 2266 2267 if (likely(!iommu_identity_mapping)) 2268 return 0; 2269 2270 info = pdev->dev.archdata.iommu; 2271 if (info && info != DUMMY_DEVICE_DOMAIN_INFO) 2272 return (info->domain == si_domain); 2273 2274 return 0; 2275} 2276 2277static int domain_add_dev_info(struct dmar_domain *domain, 2278 struct pci_dev *pdev, 2279 int translation) 2280{ 2281 struct device_domain_info *info; 2282 unsigned long flags; 2283 int ret; 2284 2285 info = alloc_devinfo_mem(); 2286 if (!info) 2287 return -ENOMEM; 2288 2289 info->segment = pci_domain_nr(pdev->bus); 2290 info->bus = pdev->bus->number; 2291 info->devfn = pdev->devfn; 2292 info->dev = pdev; 2293 info->domain = domain; 2294 2295 spin_lock_irqsave(&device_domain_lock, flags); 2296 list_add(&info->link, &domain->devices); 2297 list_add(&info->global, &device_domain_list); 2298 pdev->dev.archdata.iommu = info; 2299 spin_unlock_irqrestore(&device_domain_lock, flags); 2300 2301 ret = domain_context_mapping(domain, pdev, translation); 2302 if (ret) { 2303 spin_lock_irqsave(&device_domain_lock, flags); 2304 list_del(&info->link); 2305 list_del(&info->global); 2306 pdev->dev.archdata.iommu = NULL; 2307 spin_unlock_irqrestore(&device_domain_lock, flags); 2308 free_devinfo_mem(info); 2309 return ret; 2310 } 2311 2312 return 0; 2313} 2314 2315static int iommu_should_identity_map(struct pci_dev *pdev, int startup) 2316{ 2317 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) 2318 return 1; 2319 2320 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) 2321 return 1; 2322 2323 if (!(iommu_identity_mapping & IDENTMAP_ALL)) 2324 return 0; 2325 2326 /* 2327 * We want to start off with all devices in the 1:1 domain, and 2328 * take them out later if we find they can't access all of memory. 2329 * 2330 * However, we can't do this for PCI devices behind bridges, 2331 * because all PCI devices behind the same bridge will end up 2332 * with the same source-id on their transactions. 2333 * 2334 * Practically speaking, we can't change things around for these 2335 * devices at run-time, because we can't be sure there'll be no 2336 * DMA transactions in flight for any of their siblings. 2337 * 2338 * So PCI devices (unless they're on the root bus) as well as 2339 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of 2340 * the 1:1 domain, just in _case_ one of their siblings turns out 2341 * not to be able to map all of memory. 2342 */ 2343 if (!pci_is_pcie(pdev)) { 2344 if (!pci_is_root_bus(pdev->bus)) 2345 return 0; 2346 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI) 2347 return 0; 2348 } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) 2349 return 0; 2350 2351 /* 2352 * At boot time, we don't yet know if devices will be 64-bit capable. 2353 * Assume that they will -- if they turn out not to be, then we can 2354 * take them out of the 1:1 domain later. 2355 */ 2356 if (!startup) { 2357 /* 2358 * If the device's dma_mask is less than the system's memory 2359 * size then this is not a candidate for identity mapping. 2360 */ 2361 u64 dma_mask = pdev->dma_mask; 2362 2363 if (pdev->dev.coherent_dma_mask && 2364 pdev->dev.coherent_dma_mask < dma_mask) 2365 dma_mask = pdev->dev.coherent_dma_mask; 2366 2367 return dma_mask >= dma_get_required_mask(&pdev->dev); 2368 } 2369 2370 return 1; 2371} 2372 2373static int __init iommu_prepare_static_identity_mapping(int hw) 2374{ 2375 struct pci_dev *pdev = NULL; 2376 int ret; 2377 2378 ret = si_domain_init(hw); 2379 if (ret) 2380 return -EFAULT; 2381 2382 for_each_pci_dev(pdev) { 2383 if (iommu_should_identity_map(pdev, 1)) { 2384 ret = domain_add_dev_info(si_domain, pdev, 2385 hw ? CONTEXT_TT_PASS_THROUGH : 2386 CONTEXT_TT_MULTI_LEVEL); 2387 if (ret) { 2388 /* device not associated with an iommu */ 2389 if (ret == -ENODEV) 2390 continue; 2391 return ret; 2392 } 2393 pr_info("IOMMU: %s identity mapping for device %s\n", 2394 hw ? "hardware" : "software", pci_name(pdev)); 2395 } 2396 } 2397 2398 return 0; 2399} 2400 2401static int __init init_dmars(void) 2402{ 2403 struct dmar_drhd_unit *drhd; 2404 struct dmar_rmrr_unit *rmrr; 2405 struct pci_dev *pdev; 2406 struct intel_iommu *iommu; 2407 int i, ret; 2408 2409 /* 2410 * for each drhd 2411 * allocate root 2412 * initialize and program root entry to not present 2413 * endfor 2414 */ 2415 for_each_drhd_unit(drhd) { 2416 /* 2417 * lock not needed as this is only incremented in the single 2418 * threaded kernel __init code path all other access are read 2419 * only 2420 */ 2421 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) { 2422 g_num_of_iommus++; 2423 continue; 2424 } 2425 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n", 2426 IOMMU_UNITS_SUPPORTED); 2427 } 2428 2429 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *), 2430 GFP_KERNEL); 2431 if (!g_iommus) { 2432 printk(KERN_ERR "Allocating global iommu array failed\n"); 2433 ret = -ENOMEM; 2434 goto error; 2435 } 2436 2437 deferred_flush = kzalloc(g_num_of_iommus * 2438 sizeof(struct deferred_flush_tables), GFP_KERNEL); 2439 if (!deferred_flush) { 2440 ret = -ENOMEM; 2441 goto error; 2442 } 2443 2444 for_each_drhd_unit(drhd) { 2445 if (drhd->ignored) 2446 continue; 2447 2448 iommu = drhd->iommu; 2449 g_iommus[iommu->seq_id] = iommu; 2450 2451 ret = iommu_init_domains(iommu); 2452 if (ret) 2453 goto error; 2454 2455 /* 2456 * TBD: 2457 * we could share the same root & context tables 2458 * among all IOMMU's. Need to Split it later. 2459 */ 2460 ret = iommu_alloc_root_entry(iommu); 2461 if (ret) { 2462 printk(KERN_ERR "IOMMU: allocate root entry failed\n"); 2463 goto error; 2464 } 2465 if (!ecap_pass_through(iommu->ecap)) 2466 hw_pass_through = 0; 2467 } 2468 2469 /* 2470 * Start from the sane iommu hardware state. 2471 */ 2472 for_each_drhd_unit(drhd) { 2473 if (drhd->ignored) 2474 continue; 2475 2476 iommu = drhd->iommu; 2477 2478 /* 2479 * If the queued invalidation is already initialized by us 2480 * (for example, while enabling interrupt-remapping) then 2481 * we got the things already rolling from a sane state. 2482 */ 2483 if (iommu->qi) 2484 continue; 2485 2486 /* 2487 * Clear any previous faults. 2488 */ 2489 dmar_fault(-1, iommu); 2490 /* 2491 * Disable queued invalidation if supported and already enabled 2492 * before OS handover. 2493 */ 2494 dmar_disable_qi(iommu); 2495 } 2496 2497 for_each_drhd_unit(drhd) { 2498 if (drhd->ignored) 2499 continue; 2500 2501 iommu = drhd->iommu; 2502 2503 if (dmar_enable_qi(iommu)) { 2504 /* 2505 * Queued Invalidate not enabled, use Register Based 2506 * Invalidate 2507 */ 2508 iommu->flush.flush_context = __iommu_flush_context; 2509 iommu->flush.flush_iotlb = __iommu_flush_iotlb; 2510 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based " 2511 "invalidation\n", 2512 iommu->seq_id, 2513 (unsigned long long)drhd->reg_base_addr); 2514 } else { 2515 iommu->flush.flush_context = qi_flush_context; 2516 iommu->flush.flush_iotlb = qi_flush_iotlb; 2517 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued " 2518 "invalidation\n", 2519 iommu->seq_id, 2520 (unsigned long long)drhd->reg_base_addr); 2521 } 2522 } 2523 2524 if (iommu_pass_through) 2525 iommu_identity_mapping |= IDENTMAP_ALL; 2526 2527#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA 2528 iommu_identity_mapping |= IDENTMAP_GFX; 2529#endif 2530 2531 check_tylersburg_isoch(); 2532 2533 /* 2534 * If pass through is not set or not enabled, setup context entries for 2535 * identity mappings for rmrr, gfx, and isa and may fall back to static 2536 * identity mapping if iommu_identity_mapping is set. 2537 */ 2538 if (iommu_identity_mapping) { 2539 ret = iommu_prepare_static_identity_mapping(hw_pass_through); 2540 if (ret) { 2541 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n"); 2542 goto error; 2543 } 2544 } 2545 /* 2546 * For each rmrr 2547 * for each dev attached to rmrr 2548 * do 2549 * locate drhd for dev, alloc domain for dev 2550 * allocate free domain 2551 * allocate page table entries for rmrr 2552 * if context not allocated for bus 2553 * allocate and init context 2554 * set present in root table for this bus 2555 * init context with domain, translation etc 2556 * endfor 2557 * endfor 2558 */ 2559 printk(KERN_INFO "IOMMU: Setting RMRR:\n"); 2560 for_each_rmrr_units(rmrr) { 2561 for (i = 0; i < rmrr->devices_cnt; i++) { 2562 pdev = rmrr->devices[i]; 2563 /* 2564 * some BIOS lists non-exist devices in DMAR 2565 * table. 2566 */ 2567 if (!pdev) 2568 continue; 2569 ret = iommu_prepare_rmrr_dev(rmrr, pdev); 2570 if (ret) 2571 printk(KERN_ERR 2572 "IOMMU: mapping reserved region failed\n"); 2573 } 2574 } 2575 2576 iommu_prepare_isa(); 2577 2578 /* 2579 * for each drhd 2580 * enable fault log 2581 * global invalidate context cache 2582 * global invalidate iotlb 2583 * enable translation 2584 */ 2585 for_each_drhd_unit(drhd) { 2586 if (drhd->ignored) { 2587 /* 2588 * we always have to disable PMRs or DMA may fail on 2589 * this device 2590 */ 2591 if (force_on) 2592 iommu_disable_protect_mem_regions(drhd->iommu); 2593 continue; 2594 } 2595 iommu = drhd->iommu; 2596 2597 iommu_flush_write_buffer(iommu); 2598 2599 ret = dmar_set_interrupt(iommu); 2600 if (ret) 2601 goto error; 2602 2603 iommu_set_root_entry(iommu); 2604 2605 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); 2606 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 2607 2608 ret = iommu_enable_translation(iommu); 2609 if (ret) 2610 goto error; 2611 2612 iommu_disable_protect_mem_regions(iommu); 2613 } 2614 2615 return 0; 2616error: 2617 for_each_drhd_unit(drhd) { 2618 if (drhd->ignored) 2619 continue; 2620 iommu = drhd->iommu; 2621 free_iommu(iommu); 2622 } 2623 kfree(g_iommus); 2624 return ret; 2625} 2626 2627/* This takes a number of _MM_ pages, not VTD pages */ 2628static struct iova *intel_alloc_iova(struct device *dev, 2629 struct dmar_domain *domain, 2630 unsigned long nrpages, uint64_t dma_mask) 2631{ 2632 struct pci_dev *pdev = to_pci_dev(dev); 2633 struct iova *iova = NULL; 2634 2635 /* Restrict dma_mask to the width that the iommu can handle */ 2636 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask); 2637 2638 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) { 2639 /* 2640 * First try to allocate an io virtual address in 2641 * DMA_BIT_MASK(32) and if that fails then try allocating 2642 * from higher range 2643 */ 2644 iova = alloc_iova(&domain->iovad, nrpages, 2645 IOVA_PFN(DMA_BIT_MASK(32)), 1); 2646 if (iova) 2647 return iova; 2648 } 2649 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1); 2650 if (unlikely(!iova)) { 2651 printk(KERN_ERR "Allocating %ld-page iova for %s failed", 2652 nrpages, pci_name(pdev)); 2653 return NULL; 2654 } 2655 2656 return iova; 2657} 2658 2659static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev) 2660{ 2661 struct dmar_domain *domain; 2662 int ret; 2663 2664 domain = get_domain_for_dev(pdev, 2665 DEFAULT_DOMAIN_ADDRESS_WIDTH); 2666 if (!domain) { 2667 printk(KERN_ERR 2668 "Allocating domain for %s failed", pci_name(pdev)); 2669 return NULL; 2670 } 2671 2672 /* make sure context mapping is ok */ 2673 if (unlikely(!domain_context_mapped(pdev))) { 2674 ret = domain_context_mapping(domain, pdev, 2675 CONTEXT_TT_MULTI_LEVEL); 2676 if (ret) { 2677 printk(KERN_ERR 2678 "Domain context map for %s failed", 2679 pci_name(pdev)); 2680 return NULL; 2681 } 2682 } 2683 2684 return domain; 2685} 2686 2687static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev) 2688{ 2689 struct device_domain_info *info; 2690 2691 /* No lock here, assumes no domain exit in normal case */ 2692 info = dev->dev.archdata.iommu; 2693 if (likely(info)) 2694 return info->domain; 2695 2696 return __get_valid_domain_for_dev(dev); 2697} 2698 2699static int iommu_dummy(struct pci_dev *pdev) 2700{ 2701 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; 2702} 2703 2704/* Check if the pdev needs to go through non-identity map and unmap process.*/ 2705static int iommu_no_mapping(struct device *dev) 2706{ 2707 struct pci_dev *pdev; 2708 int found; 2709 2710 if (unlikely(dev->bus != &pci_bus_type)) 2711 return 1; 2712 2713 pdev = to_pci_dev(dev); 2714 if (iommu_dummy(pdev)) 2715 return 1; 2716 2717 if (!iommu_identity_mapping) 2718 return 0; 2719 2720 found = identity_mapping(pdev); 2721 if (found) { 2722 if (iommu_should_identity_map(pdev, 0)) 2723 return 1; 2724 else { 2725 /* 2726 * 32 bit DMA is removed from si_domain and fall back 2727 * to non-identity mapping. 2728 */ 2729 domain_remove_one_dev_info(si_domain, pdev); 2730 printk(KERN_INFO "32bit %s uses non-identity mapping\n", 2731 pci_name(pdev)); 2732 return 0; 2733 } 2734 } else { 2735 /* 2736 * In case of a detached 64 bit DMA device from vm, the device 2737 * is put into si_domain for identity mapping. 2738 */ 2739 if (iommu_should_identity_map(pdev, 0)) { 2740 int ret; 2741 ret = domain_add_dev_info(si_domain, pdev, 2742 hw_pass_through ? 2743 CONTEXT_TT_PASS_THROUGH : 2744 CONTEXT_TT_MULTI_LEVEL); 2745 if (!ret) { 2746 printk(KERN_INFO "64bit %s uses identity mapping\n", 2747 pci_name(pdev)); 2748 return 1; 2749 } 2750 } 2751 } 2752 2753 return 0; 2754} 2755 2756static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, 2757 size_t size, int dir, u64 dma_mask) 2758{ 2759 struct pci_dev *pdev = to_pci_dev(hwdev); 2760 struct dmar_domain *domain; 2761 phys_addr_t start_paddr; 2762 struct iova *iova; 2763 int prot = 0; 2764 int ret; 2765 struct intel_iommu *iommu; 2766 unsigned long paddr_pfn = paddr >> PAGE_SHIFT; 2767 2768 BUG_ON(dir == DMA_NONE); 2769 2770 if (iommu_no_mapping(hwdev)) 2771 return paddr; 2772 2773 domain = get_valid_domain_for_dev(pdev); 2774 if (!domain) 2775 return 0; 2776 2777 iommu = domain_get_iommu(domain); 2778 size = aligned_nrpages(paddr, size); 2779 2780 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask); 2781 if (!iova) 2782 goto error; 2783 2784 /* 2785 * Check if DMAR supports zero-length reads on write only 2786 * mappings.. 2787 */ 2788 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ 2789 !cap_zlr(iommu->cap)) 2790 prot |= DMA_PTE_READ; 2791 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 2792 prot |= DMA_PTE_WRITE; 2793 /* 2794 * paddr - (paddr + size) might be partial page, we should map the whole 2795 * page. Note: if two part of one page are separately mapped, we 2796 * might have two guest_addr mapping to the same host paddr, but this 2797 * is not a big problem 2798 */ 2799 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo), 2800 mm_to_dma_pfn(paddr_pfn), size, prot); 2801 if (ret) 2802 goto error; 2803 2804 /* it's a non-present to present mapping. Only flush if caching mode */ 2805 if (cap_caching_mode(iommu->cap)) 2806 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1); 2807 else 2808 iommu_flush_write_buffer(iommu); 2809 2810 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT; 2811 start_paddr += paddr & ~PAGE_MASK; 2812 return start_paddr; 2813 2814error: 2815 if (iova) 2816 __free_iova(&domain->iovad, iova); 2817 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n", 2818 pci_name(pdev), size, (unsigned long long)paddr, dir); 2819 return 0; 2820} 2821 2822static dma_addr_t intel_map_page(struct device *dev, struct page *page, 2823 unsigned long offset, size_t size, 2824 enum dma_data_direction dir, 2825 struct dma_attrs *attrs) 2826{ 2827 return __intel_map_single(dev, page_to_phys(page) + offset, size, 2828 dir, to_pci_dev(dev)->dma_mask); 2829} 2830 2831static void flush_unmaps(void) 2832{ 2833 int i, j; 2834 2835 timer_on = 0; 2836 2837 /* just flush them all */ 2838 for (i = 0; i < g_num_of_iommus; i++) { 2839 struct intel_iommu *iommu = g_iommus[i]; 2840 if (!iommu) 2841 continue; 2842 2843 if (!deferred_flush[i].next) 2844 continue; 2845 2846 /* In caching mode, global flushes turn emulation expensive */ 2847 if (!cap_caching_mode(iommu->cap)) 2848 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 2849 DMA_TLB_GLOBAL_FLUSH); 2850 for (j = 0; j < deferred_flush[i].next; j++) { 2851 unsigned long mask; 2852 struct iova *iova = deferred_flush[i].iova[j]; 2853 struct dmar_domain *domain = deferred_flush[i].domain[j]; 2854 2855 /* On real hardware multiple invalidations are expensive */ 2856 if (cap_caching_mode(iommu->cap)) 2857 iommu_flush_iotlb_psi(iommu, domain->id, 2858 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0); 2859 else { 2860 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1)); 2861 iommu_flush_dev_iotlb(deferred_flush[i].domain[j], 2862 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask); 2863 } 2864 __free_iova(&deferred_flush[i].domain[j]->iovad, iova); 2865 } 2866 deferred_flush[i].next = 0; 2867 } 2868 2869 list_size = 0; 2870} 2871 2872static void flush_unmaps_timeout(unsigned long data) 2873{ 2874 unsigned long flags; 2875 2876 spin_lock_irqsave(&async_umap_flush_lock, flags); 2877 flush_unmaps(); 2878 spin_unlock_irqrestore(&async_umap_flush_lock, flags); 2879} 2880 2881static void add_unmap(struct dmar_domain *dom, struct iova *iova) 2882{ 2883 unsigned long flags; 2884 int next, iommu_id; 2885 struct intel_iommu *iommu; 2886 2887 spin_lock_irqsave(&async_umap_flush_lock, flags); 2888 if (list_size == HIGH_WATER_MARK) 2889 flush_unmaps(); 2890 2891 iommu = domain_get_iommu(dom); 2892 iommu_id = iommu->seq_id; 2893 2894 next = deferred_flush[iommu_id].next; 2895 deferred_flush[iommu_id].domain[next] = dom; 2896 deferred_flush[iommu_id].iova[next] = iova; 2897 deferred_flush[iommu_id].next++; 2898 2899 if (!timer_on) { 2900 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10)); 2901 timer_on = 1; 2902 } 2903 list_size++; 2904 spin_unlock_irqrestore(&async_umap_flush_lock, flags); 2905} 2906 2907static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, 2908 size_t size, enum dma_data_direction dir, 2909 struct dma_attrs *attrs) 2910{ 2911 struct pci_dev *pdev = to_pci_dev(dev); 2912 struct dmar_domain *domain; 2913 unsigned long start_pfn, last_pfn; 2914 struct iova *iova; 2915 struct intel_iommu *iommu; 2916 2917 if (iommu_no_mapping(dev)) 2918 return; 2919 2920 domain = find_domain(pdev); 2921 BUG_ON(!domain); 2922 2923 iommu = domain_get_iommu(domain); 2924 2925 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); 2926 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n", 2927 (unsigned long long)dev_addr)) 2928 return; 2929 2930 start_pfn = mm_to_dma_pfn(iova->pfn_lo); 2931 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; 2932 2933 pr_debug("Device %s unmapping: pfn %lx-%lx\n", 2934 pci_name(pdev), start_pfn, last_pfn); 2935 2936 /* clear the whole page */ 2937 dma_pte_clear_range(domain, start_pfn, last_pfn); 2938 2939 /* free page tables */ 2940 dma_pte_free_pagetable(domain, start_pfn, last_pfn); 2941 2942 if (intel_iommu_strict) { 2943 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, 2944 last_pfn - start_pfn + 1, 0); 2945 /* free iova */ 2946 __free_iova(&domain->iovad, iova); 2947 } else { 2948 add_unmap(domain, iova); 2949 /* 2950 * queue up the release of the unmap to save the 1/6th of the 2951 * cpu used up by the iotlb flush operation... 2952 */ 2953 } 2954} 2955 2956static void *intel_alloc_coherent(struct device *hwdev, size_t size, 2957 dma_addr_t *dma_handle, gfp_t flags, 2958 struct dma_attrs *attrs) 2959{ 2960 void *vaddr; 2961 int order; 2962 2963 size = PAGE_ALIGN(size); 2964 order = get_order(size); 2965 2966 if (!iommu_no_mapping(hwdev)) 2967 flags &= ~(GFP_DMA | GFP_DMA32); 2968 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) { 2969 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32)) 2970 flags |= GFP_DMA; 2971 else 2972 flags |= GFP_DMA32; 2973 } 2974 2975 vaddr = (void *)__get_free_pages(flags, order); 2976 if (!vaddr) 2977 return NULL; 2978 memset(vaddr, 0, size); 2979 2980 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size, 2981 DMA_BIDIRECTIONAL, 2982 hwdev->coherent_dma_mask); 2983 if (*dma_handle) 2984 return vaddr; 2985 free_pages((unsigned long)vaddr, order); 2986 return NULL; 2987} 2988 2989static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr, 2990 dma_addr_t dma_handle, struct dma_attrs *attrs) 2991{ 2992 int order; 2993 2994 size = PAGE_ALIGN(size); 2995 order = get_order(size); 2996 2997 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL); 2998 free_pages((unsigned long)vaddr, order); 2999} 3000 3001static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, 3002 int nelems, enum dma_data_direction dir, 3003 struct dma_attrs *attrs) 3004{ 3005 struct pci_dev *pdev = to_pci_dev(hwdev); 3006 struct dmar_domain *domain; 3007 unsigned long start_pfn, last_pfn; 3008 struct iova *iova; 3009 struct intel_iommu *iommu; 3010 3011 if (iommu_no_mapping(hwdev)) 3012 return; 3013 3014 domain = find_domain(pdev); 3015 BUG_ON(!domain); 3016 3017 iommu = domain_get_iommu(domain); 3018 3019 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); 3020 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n", 3021 (unsigned long long)sglist[0].dma_address)) 3022 return; 3023 3024 start_pfn = mm_to_dma_pfn(iova->pfn_lo); 3025 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; 3026 3027 /* clear the whole page */ 3028 dma_pte_clear_range(domain, start_pfn, last_pfn); 3029 3030 /* free page tables */ 3031 dma_pte_free_pagetable(domain, start_pfn, last_pfn); 3032 3033 if (intel_iommu_strict) { 3034 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, 3035 last_pfn - start_pfn + 1, 0); 3036 /* free iova */ 3037 __free_iova(&domain->iovad, iova); 3038 } else { 3039 add_unmap(domain, iova); 3040 /* 3041 * queue up the release of the unmap to save the 1/6th of the 3042 * cpu used up by the iotlb flush operation... 3043 */ 3044 } 3045} 3046 3047static int intel_nontranslate_map_sg(struct device *hddev, 3048 struct scatterlist *sglist, int nelems, int dir) 3049{ 3050 int i; 3051 struct scatterlist *sg; 3052 3053 for_each_sg(sglist, sg, nelems, i) { 3054 BUG_ON(!sg_page(sg)); 3055 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset; 3056 sg->dma_length = sg->length; 3057 } 3058 return nelems; 3059} 3060 3061static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, 3062 enum dma_data_direction dir, struct dma_attrs *attrs) 3063{ 3064 int i; 3065 struct pci_dev *pdev = to_pci_dev(hwdev); 3066 struct dmar_domain *domain; 3067 size_t size = 0; 3068 int prot = 0; 3069 struct iova *iova = NULL; 3070 int ret; 3071 struct scatterlist *sg; 3072 unsigned long start_vpfn; 3073 struct intel_iommu *iommu; 3074 3075 BUG_ON(dir == DMA_NONE); 3076 if (iommu_no_mapping(hwdev)) 3077 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir); 3078 3079 domain = get_valid_domain_for_dev(pdev); 3080 if (!domain) 3081 return 0; 3082 3083 iommu = domain_get_iommu(domain); 3084 3085 for_each_sg(sglist, sg, nelems, i) 3086 size += aligned_nrpages(sg->offset, sg->length); 3087 3088 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), 3089 pdev->dma_mask); 3090 if (!iova) { 3091 sglist->dma_length = 0; 3092 return 0; 3093 } 3094 3095 /* 3096 * Check if DMAR supports zero-length reads on write only 3097 * mappings.. 3098 */ 3099 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ 3100 !cap_zlr(iommu->cap)) 3101 prot |= DMA_PTE_READ; 3102 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 3103 prot |= DMA_PTE_WRITE; 3104 3105 start_vpfn = mm_to_dma_pfn(iova->pfn_lo); 3106 3107 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot); 3108 if (unlikely(ret)) { 3109 /* clear the page */ 3110 dma_pte_clear_range(domain, start_vpfn, 3111 start_vpfn + size - 1); 3112 /* free page tables */ 3113 dma_pte_free_pagetable(domain, start_vpfn, 3114 start_vpfn + size - 1); 3115 /* free iova */ 3116 __free_iova(&domain->iovad, iova); 3117 return 0; 3118 } 3119 3120 /* it's a non-present to present mapping. Only flush if caching mode */ 3121 if (cap_caching_mode(iommu->cap)) 3122 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1); 3123 else 3124 iommu_flush_write_buffer(iommu); 3125 3126 return nelems; 3127} 3128 3129static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr) 3130{ 3131 return !dma_addr; 3132} 3133 3134struct dma_map_ops intel_dma_ops = { 3135 .alloc = intel_alloc_coherent, 3136 .free = intel_free_coherent, 3137 .map_sg = intel_map_sg, 3138 .unmap_sg = intel_unmap_sg, 3139 .map_page = intel_map_page, 3140 .unmap_page = intel_unmap_page, 3141 .mapping_error = intel_mapping_error, 3142}; 3143 3144static inline int iommu_domain_cache_init(void) 3145{ 3146 int ret = 0; 3147 3148 iommu_domain_cache = kmem_cache_create("iommu_domain", 3149 sizeof(struct dmar_domain), 3150 0, 3151 SLAB_HWCACHE_ALIGN, 3152 3153 NULL); 3154 if (!iommu_domain_cache) { 3155 printk(KERN_ERR "Couldn't create iommu_domain cache\n"); 3156 ret = -ENOMEM; 3157 } 3158 3159 return ret; 3160} 3161 3162static inline int iommu_devinfo_cache_init(void) 3163{ 3164 int ret = 0; 3165 3166 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo", 3167 sizeof(struct device_domain_info), 3168 0, 3169 SLAB_HWCACHE_ALIGN, 3170 NULL); 3171 if (!iommu_devinfo_cache) { 3172 printk(KERN_ERR "Couldn't create devinfo cache\n"); 3173 ret = -ENOMEM; 3174 } 3175 3176 return ret; 3177} 3178 3179static inline int iommu_iova_cache_init(void) 3180{ 3181 int ret = 0; 3182 3183 iommu_iova_cache = kmem_cache_create("iommu_iova", 3184 sizeof(struct iova), 3185 0, 3186 SLAB_HWCACHE_ALIGN, 3187 NULL); 3188 if (!iommu_iova_cache) { 3189 printk(KERN_ERR "Couldn't create iova cache\n"); 3190 ret = -ENOMEM; 3191 } 3192 3193 return ret; 3194} 3195 3196static int __init iommu_init_mempool(void) 3197{ 3198 int ret; 3199 ret = iommu_iova_cache_init(); 3200 if (ret) 3201 return ret; 3202 3203 ret = iommu_domain_cache_init(); 3204 if (ret) 3205 goto domain_error; 3206 3207 ret = iommu_devinfo_cache_init(); 3208 if (!ret) 3209 return ret; 3210 3211 kmem_cache_destroy(iommu_domain_cache); 3212domain_error: 3213 kmem_cache_destroy(iommu_iova_cache); 3214 3215 return -ENOMEM; 3216} 3217 3218static void __init iommu_exit_mempool(void) 3219{ 3220 kmem_cache_destroy(iommu_devinfo_cache); 3221 kmem_cache_destroy(iommu_domain_cache); 3222 kmem_cache_destroy(iommu_iova_cache); 3223 3224} 3225 3226static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev) 3227{ 3228 struct dmar_drhd_unit *drhd; 3229 u32 vtbar; 3230 int rc; 3231 3232 /* We know that this device on this chipset has its own IOMMU. 3233 * If we find it under a different IOMMU, then the BIOS is lying 3234 * to us. Hope that the IOMMU for this device is actually 3235 * disabled, and it needs no translation... 3236 */ 3237 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar); 3238 if (rc) { 3239 /* "can't" happen */ 3240 dev_info(&pdev->dev, "failed to run vt-d quirk\n"); 3241 return; 3242 } 3243 vtbar &= 0xffff0000; 3244 3245 /* we know that the this iommu should be at offset 0xa000 from vtbar */ 3246 drhd = dmar_find_matched_drhd_unit(pdev); 3247 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000, 3248 TAINT_FIRMWARE_WORKAROUND, 3249 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n")) 3250 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; 3251} 3252DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu); 3253 3254static void __init init_no_remapping_devices(void) 3255{ 3256 struct dmar_drhd_unit *drhd; 3257 3258 for_each_drhd_unit(drhd) { 3259 if (!drhd->include_all) { 3260 int i; 3261 for (i = 0; i < drhd->devices_cnt; i++) 3262 if (drhd->devices[i] != NULL) 3263 break; 3264 /* ignore DMAR unit if no pci devices exist */ 3265 if (i == drhd->devices_cnt) 3266 drhd->ignored = 1; 3267 } 3268 } 3269 3270 for_each_drhd_unit(drhd) { 3271 int i; 3272 if (drhd->ignored || drhd->include_all) 3273 continue; 3274 3275 for (i = 0; i < drhd->devices_cnt; i++) 3276 if (drhd->devices[i] && 3277 !IS_GFX_DEVICE(drhd->devices[i])) 3278 break; 3279 3280 if (i < drhd->devices_cnt) 3281 continue; 3282 3283 /* This IOMMU has *only* gfx devices. Either bypass it or 3284 set the gfx_mapped flag, as appropriate */ 3285 if (dmar_map_gfx) { 3286 intel_iommu_gfx_mapped = 1; 3287 } else { 3288 drhd->ignored = 1; 3289 for (i = 0; i < drhd->devices_cnt; i++) { 3290 if (!drhd->devices[i]) 3291 continue; 3292 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; 3293 } 3294 } 3295 } 3296} 3297 3298#ifdef CONFIG_SUSPEND 3299static int init_iommu_hw(void) 3300{ 3301 struct dmar_drhd_unit *drhd; 3302 struct intel_iommu *iommu = NULL; 3303 3304 for_each_active_iommu(iommu, drhd) 3305 if (iommu->qi) 3306 dmar_reenable_qi(iommu); 3307 3308 for_each_iommu(iommu, drhd) { 3309 if (drhd->ignored) { 3310 /* 3311 * we always have to disable PMRs or DMA may fail on 3312 * this device 3313 */ 3314 if (force_on) 3315 iommu_disable_protect_mem_regions(iommu); 3316 continue; 3317 } 3318 3319 iommu_flush_write_buffer(iommu); 3320 3321 iommu_set_root_entry(iommu); 3322 3323 iommu->flush.flush_context(iommu, 0, 0, 0, 3324 DMA_CCMD_GLOBAL_INVL); 3325 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 3326 DMA_TLB_GLOBAL_FLUSH); 3327 if (iommu_enable_translation(iommu)) 3328 return 1; 3329 iommu_disable_protect_mem_regions(iommu); 3330 } 3331 3332 return 0; 3333} 3334 3335static void iommu_flush_all(void) 3336{ 3337 struct dmar_drhd_unit *drhd; 3338 struct intel_iommu *iommu; 3339 3340 for_each_active_iommu(iommu, drhd) { 3341 iommu->flush.flush_context(iommu, 0, 0, 0, 3342 DMA_CCMD_GLOBAL_INVL); 3343 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 3344 DMA_TLB_GLOBAL_FLUSH); 3345 } 3346} 3347 3348static int iommu_suspend(void) 3349{ 3350 struct dmar_drhd_unit *drhd; 3351 struct intel_iommu *iommu = NULL; 3352 unsigned long flag; 3353 3354 for_each_active_iommu(iommu, drhd) { 3355 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS, 3356 GFP_ATOMIC); 3357 if (!iommu->iommu_state) 3358 goto nomem; 3359 } 3360 3361 iommu_flush_all(); 3362 3363 for_each_active_iommu(iommu, drhd) { 3364 iommu_disable_translation(iommu); 3365 3366 raw_spin_lock_irqsave(&iommu->register_lock, flag); 3367 3368 iommu->iommu_state[SR_DMAR_FECTL_REG] = 3369 readl(iommu->reg + DMAR_FECTL_REG); 3370 iommu->iommu_state[SR_DMAR_FEDATA_REG] = 3371 readl(iommu->reg + DMAR_FEDATA_REG); 3372 iommu->iommu_state[SR_DMAR_FEADDR_REG] = 3373 readl(iommu->reg + DMAR_FEADDR_REG); 3374 iommu->iommu_state[SR_DMAR_FEUADDR_REG] = 3375 readl(iommu->reg + DMAR_FEUADDR_REG); 3376 3377 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 3378 } 3379 return 0; 3380 3381nomem: 3382 for_each_active_iommu(iommu, drhd) 3383 kfree(iommu->iommu_state); 3384 3385 return -ENOMEM; 3386} 3387 3388static void iommu_resume(void) 3389{ 3390 struct dmar_drhd_unit *drhd; 3391 struct intel_iommu *iommu = NULL; 3392 unsigned long flag; 3393 3394 if (init_iommu_hw()) { 3395 if (force_on) 3396 panic("tboot: IOMMU setup failed, DMAR can not resume!\n"); 3397 else 3398 WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); 3399 return; 3400 } 3401 3402 for_each_active_iommu(iommu, drhd) { 3403 3404 raw_spin_lock_irqsave(&iommu->register_lock, flag); 3405 3406 writel(iommu->iommu_state[SR_DMAR_FECTL_REG], 3407 iommu->reg + DMAR_FECTL_REG); 3408 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG], 3409 iommu->reg + DMAR_FEDATA_REG); 3410 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG], 3411 iommu->reg + DMAR_FEADDR_REG); 3412 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG], 3413 iommu->reg + DMAR_FEUADDR_REG); 3414 3415 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 3416 } 3417 3418 for_each_active_iommu(iommu, drhd) 3419 kfree(iommu->iommu_state); 3420} 3421 3422static struct syscore_ops iommu_syscore_ops = { 3423 .resume = iommu_resume, 3424 .suspend = iommu_suspend, 3425}; 3426 3427static void __init init_iommu_pm_ops(void) 3428{ 3429 register_syscore_ops(&iommu_syscore_ops); 3430} 3431 3432#else 3433static inline void init_iommu_pm_ops(void) {} 3434#endif /* CONFIG_PM */ 3435 3436LIST_HEAD(dmar_rmrr_units); 3437 3438static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr) 3439{ 3440 list_add(&rmrr->list, &dmar_rmrr_units); 3441} 3442 3443 3444int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header) 3445{ 3446 struct acpi_dmar_reserved_memory *rmrr; 3447 struct dmar_rmrr_unit *rmrru; 3448 3449 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); 3450 if (!rmrru) 3451 return -ENOMEM; 3452 3453 rmrru->hdr = header; 3454 rmrr = (struct acpi_dmar_reserved_memory *)header; 3455 rmrru->base_address = rmrr->base_address; 3456 rmrru->end_address = rmrr->end_address; 3457 3458 dmar_register_rmrr_unit(rmrru); 3459 return 0; 3460} 3461 3462static int __init 3463rmrr_parse_dev(struct dmar_rmrr_unit *rmrru) 3464{ 3465 struct acpi_dmar_reserved_memory *rmrr; 3466 int ret; 3467 3468 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr; 3469 ret = dmar_parse_dev_scope((void *)(rmrr + 1), 3470 ((void *)rmrr) + rmrr->header.length, 3471 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment); 3472 3473 if (ret || (rmrru->devices_cnt == 0)) { 3474 list_del(&rmrru->list); 3475 kfree(rmrru); 3476 } 3477 return ret; 3478} 3479 3480static LIST_HEAD(dmar_atsr_units); 3481 3482int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr) 3483{ 3484 struct acpi_dmar_atsr *atsr; 3485 struct dmar_atsr_unit *atsru; 3486 3487 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 3488 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL); 3489 if (!atsru) 3490 return -ENOMEM; 3491 3492 atsru->hdr = hdr; 3493 atsru->include_all = atsr->flags & 0x1; 3494 3495 list_add(&atsru->list, &dmar_atsr_units); 3496 3497 return 0; 3498} 3499 3500static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru) 3501{ 3502 int rc; 3503 struct acpi_dmar_atsr *atsr; 3504 3505 if (atsru->include_all) 3506 return 0; 3507 3508 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); 3509 rc = dmar_parse_dev_scope((void *)(atsr + 1), 3510 (void *)atsr + atsr->header.length, 3511 &atsru->devices_cnt, &atsru->devices, 3512 atsr->segment); 3513 if (rc || !atsru->devices_cnt) { 3514 list_del(&atsru->list); 3515 kfree(atsru); 3516 } 3517 3518 return rc; 3519} 3520 3521int dmar_find_matched_atsr_unit(struct pci_dev *dev) 3522{ 3523 int i; 3524 struct pci_bus *bus; 3525 struct acpi_dmar_atsr *atsr; 3526 struct dmar_atsr_unit *atsru; 3527 3528 dev = pci_physfn(dev); 3529 3530 list_for_each_entry(atsru, &dmar_atsr_units, list) { 3531 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); 3532 if (atsr->segment == pci_domain_nr(dev->bus)) 3533 goto found; 3534 } 3535 3536 return 0; 3537 3538found: 3539 for (bus = dev->bus; bus; bus = bus->parent) { 3540 struct pci_dev *bridge = bus->self; 3541 3542 if (!bridge || !pci_is_pcie(bridge) || 3543 bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) 3544 return 0; 3545 3546 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) { 3547 for (i = 0; i < atsru->devices_cnt; i++) 3548 if (atsru->devices[i] == bridge) 3549 return 1; 3550 break; 3551 } 3552 } 3553 3554 if (atsru->include_all) 3555 return 1; 3556 3557 return 0; 3558} 3559 3560int __init dmar_parse_rmrr_atsr_dev(void) 3561{ 3562 struct dmar_rmrr_unit *rmrr, *rmrr_n; 3563 struct dmar_atsr_unit *atsr, *atsr_n; 3564 int ret = 0; 3565 3566 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) { 3567 ret = rmrr_parse_dev(rmrr); 3568 if (ret) 3569 return ret; 3570 } 3571 3572 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) { 3573 ret = atsr_parse_dev(atsr); 3574 if (ret) 3575 return ret; 3576 } 3577 3578 return ret; 3579} 3580 3581/* 3582 * Here we only respond to action of unbound device from driver. 3583 * 3584 * Added device is not attached to its DMAR domain here yet. That will happen 3585 * when mapping the device to iova. 3586 */ 3587static int device_notifier(struct notifier_block *nb, 3588 unsigned long action, void *data) 3589{ 3590 struct device *dev = data; 3591 struct pci_dev *pdev = to_pci_dev(dev); 3592 struct dmar_domain *domain; 3593 3594 if (iommu_no_mapping(dev)) 3595 return 0; 3596 3597 domain = find_domain(pdev); 3598 if (!domain) 3599 return 0; 3600 3601 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) { 3602 domain_remove_one_dev_info(domain, pdev); 3603 3604 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && 3605 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) && 3606 list_empty(&domain->devices)) 3607 domain_exit(domain); 3608 } 3609 3610 return 0; 3611} 3612 3613static struct notifier_block device_nb = { 3614 .notifier_call = device_notifier, 3615}; 3616 3617int __init intel_iommu_init(void) 3618{ 3619 int ret = 0; 3620 3621 /* VT-d is required for a TXT/tboot launch, so enforce that */ 3622 force_on = tboot_force_iommu(); 3623 3624 if (dmar_table_init()) { 3625 if (force_on) 3626 panic("tboot: Failed to initialize DMAR table\n"); 3627 return -ENODEV; 3628 } 3629 3630 if (dmar_dev_scope_init() < 0) { 3631 if (force_on) 3632 panic("tboot: Failed to initialize DMAR device scope\n"); 3633 return -ENODEV; 3634 } 3635 3636 if (no_iommu || dmar_disabled) 3637 return -ENODEV; 3638 3639 if (iommu_init_mempool()) { 3640 if (force_on) 3641 panic("tboot: Failed to initialize iommu memory\n"); 3642 return -ENODEV; 3643 } 3644 3645 if (list_empty(&dmar_rmrr_units)) 3646 printk(KERN_INFO "DMAR: No RMRR found\n"); 3647 3648 if (list_empty(&dmar_atsr_units)) 3649 printk(KERN_INFO "DMAR: No ATSR found\n"); 3650 3651 if (dmar_init_reserved_ranges()) { 3652 if (force_on) 3653 panic("tboot: Failed to reserve iommu ranges\n"); 3654 return -ENODEV; 3655 } 3656 3657 init_no_remapping_devices(); 3658 3659 ret = init_dmars(); 3660 if (ret) { 3661 if (force_on) 3662 panic("tboot: Failed to initialize DMARs\n"); 3663 printk(KERN_ERR "IOMMU: dmar init failed\n"); 3664 put_iova_domain(&reserved_iova_list); 3665 iommu_exit_mempool(); 3666 return ret; 3667 } 3668 printk(KERN_INFO 3669 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); 3670 3671 init_timer(&unmap_timer); 3672#ifdef CONFIG_SWIOTLB 3673 swiotlb = 0; 3674#endif 3675 dma_ops = &intel_dma_ops; 3676 3677 init_iommu_pm_ops(); 3678 3679 bus_set_iommu(&pci_bus_type, &intel_iommu_ops); 3680 3681 bus_register_notifier(&pci_bus_type, &device_nb); 3682 3683 intel_iommu_enabled = 1; 3684 3685 return 0; 3686} 3687 3688static void iommu_detach_dependent_devices(struct intel_iommu *iommu, 3689 struct pci_dev *pdev) 3690{ 3691 struct pci_dev *tmp, *parent; 3692 3693 if (!iommu || !pdev) 3694 return; 3695 3696 /* dependent device detach */ 3697 tmp = pci_find_upstream_pcie_bridge(pdev); 3698 /* Secondary interface's bus number and devfn 0 */ 3699 if (tmp) { 3700 parent = pdev->bus->self; 3701 while (parent != tmp) { 3702 iommu_detach_dev(iommu, parent->bus->number, 3703 parent->devfn); 3704 parent = parent->bus->self; 3705 } 3706 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */ 3707 iommu_detach_dev(iommu, 3708 tmp->subordinate->number, 0); 3709 else /* this is a legacy PCI bridge */ 3710 iommu_detach_dev(iommu, tmp->bus->number, 3711 tmp->devfn); 3712 } 3713} 3714 3715static void domain_remove_one_dev_info(struct dmar_domain *domain, 3716 struct pci_dev *pdev) 3717{ 3718 struct device_domain_info *info; 3719 struct intel_iommu *iommu; 3720 unsigned long flags; 3721 int found = 0; 3722 struct list_head *entry, *tmp; 3723 3724 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number, 3725 pdev->devfn); 3726 if (!iommu) 3727 return; 3728 3729 spin_lock_irqsave(&device_domain_lock, flags); 3730 list_for_each_safe(entry, tmp, &domain->devices) { 3731 info = list_entry(entry, struct device_domain_info, link); 3732 if (info->segment == pci_domain_nr(pdev->bus) && 3733 info->bus == pdev->bus->number && 3734 info->devfn == pdev->devfn) { 3735 list_del(&info->link); 3736 list_del(&info->global); 3737 if (info->dev) 3738 info->dev->dev.archdata.iommu = NULL; 3739 spin_unlock_irqrestore(&device_domain_lock, flags); 3740 3741 iommu_disable_dev_iotlb(info); 3742 iommu_detach_dev(iommu, info->bus, info->devfn); 3743 iommu_detach_dependent_devices(iommu, pdev); 3744 free_devinfo_mem(info); 3745 3746 spin_lock_irqsave(&device_domain_lock, flags); 3747 3748 if (found) 3749 break; 3750 else 3751 continue; 3752 } 3753 3754 /* if there is no other devices under the same iommu 3755 * owned by this domain, clear this iommu in iommu_bmp 3756 * update iommu count and coherency 3757 */ 3758 if (iommu == device_to_iommu(info->segment, info->bus, 3759 info->devfn)) 3760 found = 1; 3761 } 3762 3763 spin_unlock_irqrestore(&device_domain_lock, flags); 3764 3765 if (found == 0) { 3766 unsigned long tmp_flags; 3767 spin_lock_irqsave(&domain->iommu_lock, tmp_flags); 3768 clear_bit(iommu->seq_id, domain->iommu_bmp); 3769 domain->iommu_count--; 3770 domain_update_iommu_cap(domain); 3771 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); 3772 3773 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && 3774 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) { 3775 spin_lock_irqsave(&iommu->lock, tmp_flags); 3776 clear_bit(domain->id, iommu->domain_ids); 3777 iommu->domains[domain->id] = NULL; 3778 spin_unlock_irqrestore(&iommu->lock, tmp_flags); 3779 } 3780 } 3781} 3782 3783static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) 3784{ 3785 struct device_domain_info *info; 3786 struct intel_iommu *iommu; 3787 unsigned long flags1, flags2; 3788 3789 spin_lock_irqsave(&device_domain_lock, flags1); 3790 while (!list_empty(&domain->devices)) { 3791 info = list_entry(domain->devices.next, 3792 struct device_domain_info, link); 3793 list_del(&info->link); 3794 list_del(&info->global); 3795 if (info->dev) 3796 info->dev->dev.archdata.iommu = NULL; 3797 3798 spin_unlock_irqrestore(&device_domain_lock, flags1); 3799 3800 iommu_disable_dev_iotlb(info); 3801 iommu = device_to_iommu(info->segment, info->bus, info->devfn); 3802 iommu_detach_dev(iommu, info->bus, info->devfn); 3803 iommu_detach_dependent_devices(iommu, info->dev); 3804 3805 /* clear this iommu in iommu_bmp, update iommu count 3806 * and capabilities 3807 */ 3808 spin_lock_irqsave(&domain->iommu_lock, flags2); 3809 if (test_and_clear_bit(iommu->seq_id, 3810 domain->iommu_bmp)) { 3811 domain->iommu_count--; 3812 domain_update_iommu_cap(domain); 3813 } 3814 spin_unlock_irqrestore(&domain->iommu_lock, flags2); 3815 3816 free_devinfo_mem(info); 3817 spin_lock_irqsave(&device_domain_lock, flags1); 3818 } 3819 spin_unlock_irqrestore(&device_domain_lock, flags1); 3820} 3821 3822/* domain id for virtual machine, it won't be set in context */ 3823static unsigned long vm_domid; 3824 3825static struct dmar_domain *iommu_alloc_vm_domain(void) 3826{ 3827 struct dmar_domain *domain; 3828 3829 domain = alloc_domain_mem(); 3830 if (!domain) 3831 return NULL; 3832 3833 domain->id = vm_domid++; 3834 domain->nid = -1; 3835 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp)); 3836 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE; 3837 3838 return domain; 3839} 3840 3841static int md_domain_init(struct dmar_domain *domain, int guest_width) 3842{ 3843 int adjust_width; 3844 3845 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 3846 spin_lock_init(&domain->iommu_lock); 3847 3848 domain_reserve_special_ranges(domain); 3849 3850 /* calculate AGAW */ 3851 domain->gaw = guest_width; 3852 adjust_width = guestwidth_to_adjustwidth(guest_width); 3853 domain->agaw = width_to_agaw(adjust_width); 3854 3855 INIT_LIST_HEAD(&domain->devices); 3856 3857 domain->iommu_count = 0; 3858 domain->iommu_coherency = 0; 3859 domain->iommu_snooping = 0; 3860 domain->iommu_superpage = 0; 3861 domain->max_addr = 0; 3862 domain->nid = -1; 3863 3864 /* always allocate the top pgd */ 3865 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); 3866 if (!domain->pgd) 3867 return -ENOMEM; 3868 domain_flush_cache(domain, domain->pgd, PAGE_SIZE); 3869 return 0; 3870} 3871 3872static void iommu_free_vm_domain(struct dmar_domain *domain) 3873{ 3874 unsigned long flags; 3875 struct dmar_drhd_unit *drhd; 3876 struct intel_iommu *iommu; 3877 unsigned long i; 3878 unsigned long ndomains; 3879 3880 for_each_drhd_unit(drhd) { 3881 if (drhd->ignored) 3882 continue; 3883 iommu = drhd->iommu; 3884 3885 ndomains = cap_ndoms(iommu->cap); 3886 for_each_set_bit(i, iommu->domain_ids, ndomains) { 3887 if (iommu->domains[i] == domain) { 3888 spin_lock_irqsave(&iommu->lock, flags); 3889 clear_bit(i, iommu->domain_ids); 3890 iommu->domains[i] = NULL; 3891 spin_unlock_irqrestore(&iommu->lock, flags); 3892 break; 3893 } 3894 } 3895 } 3896} 3897 3898static void vm_domain_exit(struct dmar_domain *domain) 3899{ 3900 /* Domain 0 is reserved, so dont process it */ 3901 if (!domain) 3902 return; 3903 3904 vm_domain_remove_all_dev_info(domain); 3905 /* destroy iovas */ 3906 put_iova_domain(&domain->iovad); 3907 3908 /* clear ptes */ 3909 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); 3910 3911 /* free page tables */ 3912 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); 3913 3914 iommu_free_vm_domain(domain); 3915 free_domain_mem(domain); 3916} 3917 3918static int intel_iommu_domain_init(struct iommu_domain *domain) 3919{ 3920 struct dmar_domain *dmar_domain; 3921 3922 dmar_domain = iommu_alloc_vm_domain(); 3923 if (!dmar_domain) { 3924 printk(KERN_ERR 3925 "intel_iommu_domain_init: dmar_domain == NULL\n"); 3926 return -ENOMEM; 3927 } 3928 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 3929 printk(KERN_ERR 3930 "intel_iommu_domain_init() failed\n"); 3931 vm_domain_exit(dmar_domain); 3932 return -ENOMEM; 3933 } 3934 domain_update_iommu_cap(dmar_domain); 3935 domain->priv = dmar_domain; 3936 3937 return 0; 3938} 3939 3940static void intel_iommu_domain_destroy(struct iommu_domain *domain) 3941{ 3942 struct dmar_domain *dmar_domain = domain->priv; 3943 3944 domain->priv = NULL; 3945 vm_domain_exit(dmar_domain); 3946} 3947 3948static int intel_iommu_attach_device(struct iommu_domain *domain, 3949 struct device *dev) 3950{ 3951 struct dmar_domain *dmar_domain = domain->priv; 3952 struct pci_dev *pdev = to_pci_dev(dev); 3953 struct intel_iommu *iommu; 3954 int addr_width; 3955 3956 /* normally pdev is not mapped */ 3957 if (unlikely(domain_context_mapped(pdev))) { 3958 struct dmar_domain *old_domain; 3959 3960 old_domain = find_domain(pdev); 3961 if (old_domain) { 3962 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || 3963 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) 3964 domain_remove_one_dev_info(old_domain, pdev); 3965 else 3966 domain_remove_dev_info(old_domain); 3967 } 3968 } 3969 3970 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number, 3971 pdev->devfn); 3972 if (!iommu) 3973 return -ENODEV; 3974 3975 /* check if this iommu agaw is sufficient for max mapped address */ 3976 addr_width = agaw_to_width(iommu->agaw); 3977 if (addr_width > cap_mgaw(iommu->cap)) 3978 addr_width = cap_mgaw(iommu->cap); 3979 3980 if (dmar_domain->max_addr > (1LL << addr_width)) { 3981 printk(KERN_ERR "%s: iommu width (%d) is not " 3982 "sufficient for the mapped address (%llx)\n", 3983 __func__, addr_width, dmar_domain->max_addr); 3984 return -EFAULT; 3985 } 3986 dmar_domain->gaw = addr_width; 3987 3988 /* 3989 * Knock out extra levels of page tables if necessary 3990 */ 3991 while (iommu->agaw < dmar_domain->agaw) { 3992 struct dma_pte *pte; 3993 3994 pte = dmar_domain->pgd; 3995 if (dma_pte_present(pte)) { 3996 dmar_domain->pgd = (struct dma_pte *) 3997 phys_to_virt(dma_pte_addr(pte)); 3998 free_pgtable_page(pte); 3999 } 4000 dmar_domain->agaw--; 4001 } 4002 4003 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL); 4004} 4005 4006static void intel_iommu_detach_device(struct iommu_domain *domain, 4007 struct device *dev) 4008{ 4009 struct dmar_domain *dmar_domain = domain->priv; 4010 struct pci_dev *pdev = to_pci_dev(dev); 4011 4012 domain_remove_one_dev_info(dmar_domain, pdev); 4013} 4014 4015static int intel_iommu_map(struct iommu_domain *domain, 4016 unsigned long iova, phys_addr_t hpa, 4017 size_t size, int iommu_prot) 4018{ 4019 struct dmar_domain *dmar_domain = domain->priv; 4020 u64 max_addr; 4021 int prot = 0; 4022 int ret; 4023 4024 if (iommu_prot & IOMMU_READ) 4025 prot |= DMA_PTE_READ; 4026 if (iommu_prot & IOMMU_WRITE) 4027 prot |= DMA_PTE_WRITE; 4028 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) 4029 prot |= DMA_PTE_SNP; 4030 4031 max_addr = iova + size; 4032 if (dmar_domain->max_addr < max_addr) { 4033 u64 end; 4034 4035 /* check if minimum agaw is sufficient for mapped address */ 4036 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1; 4037 if (end < max_addr) { 4038 printk(KERN_ERR "%s: iommu width (%d) is not " 4039 "sufficient for the mapped address (%llx)\n", 4040 __func__, dmar_domain->gaw, max_addr); 4041 return -EFAULT; 4042 } 4043 dmar_domain->max_addr = max_addr; 4044 } 4045 /* Round up size to next multiple of PAGE_SIZE, if it and 4046 the low bits of hpa would take us onto the next page */ 4047 size = aligned_nrpages(hpa, size); 4048 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, 4049 hpa >> VTD_PAGE_SHIFT, size, prot); 4050 return ret; 4051} 4052 4053static size_t intel_iommu_unmap(struct iommu_domain *domain, 4054 unsigned long iova, size_t size) 4055{ 4056 struct dmar_domain *dmar_domain = domain->priv; 4057 int order; 4058 4059 order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT, 4060 (iova + size - 1) >> VTD_PAGE_SHIFT); 4061 4062 if (dmar_domain->max_addr == iova + size) 4063 dmar_domain->max_addr = iova; 4064 4065 return PAGE_SIZE << order; 4066} 4067 4068static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, 4069 unsigned long iova) 4070{ 4071 struct dmar_domain *dmar_domain = domain->priv; 4072 struct dma_pte *pte; 4073 u64 phys = 0; 4074 4075 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0); 4076 if (pte) 4077 phys = dma_pte_addr(pte); 4078 4079 return phys; 4080} 4081 4082static int intel_iommu_domain_has_cap(struct iommu_domain *domain, 4083 unsigned long cap) 4084{ 4085 struct dmar_domain *dmar_domain = domain->priv; 4086 4087 if (cap == IOMMU_CAP_CACHE_COHERENCY) 4088 return dmar_domain->iommu_snooping; 4089 if (cap == IOMMU_CAP_INTR_REMAP) 4090 return intr_remapping_enabled; 4091 4092 return 0; 4093} 4094 4095/* 4096 * Group numbers are arbitrary. Device with the same group number 4097 * indicate the iommu cannot differentiate between them. To avoid 4098 * tracking used groups we just use the seg|bus|devfn of the lowest 4099 * level we're able to differentiate devices 4100 */ 4101static int intel_iommu_device_group(struct device *dev, unsigned int *groupid) 4102{ 4103 struct pci_dev *pdev = to_pci_dev(dev); 4104 struct pci_dev *bridge; 4105 union { 4106 struct { 4107 u8 devfn; 4108 u8 bus; 4109 u16 segment; 4110 } pci; 4111 u32 group; 4112 } id; 4113 4114 if (iommu_no_mapping(dev)) 4115 return -ENODEV; 4116 4117 id.pci.segment = pci_domain_nr(pdev->bus); 4118 id.pci.bus = pdev->bus->number; 4119 id.pci.devfn = pdev->devfn; 4120 4121 if (!device_to_iommu(id.pci.segment, id.pci.bus, id.pci.devfn)) 4122 return -ENODEV; 4123 4124 bridge = pci_find_upstream_pcie_bridge(pdev); 4125 if (bridge) { 4126 if (pci_is_pcie(bridge)) { 4127 id.pci.bus = bridge->subordinate->number; 4128 id.pci.devfn = 0; 4129 } else { 4130 id.pci.bus = bridge->bus->number; 4131 id.pci.devfn = bridge->devfn; 4132 } 4133 } 4134 4135 if (!pdev->is_virtfn && iommu_group_mf) 4136 id.pci.devfn = PCI_DEVFN(PCI_SLOT(id.pci.devfn), 0); 4137 4138 *groupid = id.group; 4139 4140 return 0; 4141} 4142 4143static struct iommu_ops intel_iommu_ops = { 4144 .domain_init = intel_iommu_domain_init, 4145 .domain_destroy = intel_iommu_domain_destroy, 4146 .attach_dev = intel_iommu_attach_device, 4147 .detach_dev = intel_iommu_detach_device, 4148 .map = intel_iommu_map, 4149 .unmap = intel_iommu_unmap, 4150 .iova_to_phys = intel_iommu_iova_to_phys, 4151 .domain_has_cap = intel_iommu_domain_has_cap, 4152 .device_group = intel_iommu_device_group, 4153 .pgsize_bitmap = INTEL_IOMMU_PGSIZES, 4154}; 4155 4156static void __devinit quirk_iommu_rwbf(struct pci_dev *dev) 4157{ 4158 /* 4159 * Mobile 4 Series Chipset neglects to set RWBF capability, 4160 * but needs it: 4161 */ 4162 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n"); 4163 rwbf_quirk = 1; 4164 4165 /* https://bugzilla.redhat.com/show_bug.cgi?id=538163 */ 4166 if (dev->revision == 0x07) { 4167 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n"); 4168 dmar_map_gfx = 0; 4169 } 4170} 4171 4172DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf); 4173 4174#define GGC 0x52 4175#define GGC_MEMORY_SIZE_MASK (0xf << 8) 4176#define GGC_MEMORY_SIZE_NONE (0x0 << 8) 4177#define GGC_MEMORY_SIZE_1M (0x1 << 8) 4178#define GGC_MEMORY_SIZE_2M (0x3 << 8) 4179#define GGC_MEMORY_VT_ENABLED (0x8 << 8) 4180#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8) 4181#define GGC_MEMORY_SIZE_3M_VT (0xa << 8) 4182#define GGC_MEMORY_SIZE_4M_VT (0xb << 8) 4183 4184static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev) 4185{ 4186 unsigned short ggc; 4187 4188 if (pci_read_config_word(dev, GGC, &ggc)) 4189 return; 4190 4191 if (!(ggc & GGC_MEMORY_VT_ENABLED)) { 4192 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n"); 4193 dmar_map_gfx = 0; 4194 } else if (dmar_map_gfx) { 4195 /* we have to ensure the gfx device is idle before we flush */ 4196 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n"); 4197 intel_iommu_strict = 1; 4198 } 4199} 4200DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); 4201DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt); 4202DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); 4203DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); 4204 4205/* On Tylersburg chipsets, some BIOSes have been known to enable the 4206 ISOCH DMAR unit for the Azalia sound device, but not give it any 4207 TLB entries, which causes it to deadlock. Check for that. We do 4208 this in a function called from init_dmars(), instead of in a PCI 4209 quirk, because we don't want to print the obnoxious "BIOS broken" 4210 message if VT-d is actually disabled. 4211*/ 4212static void __init check_tylersburg_isoch(void) 4213{ 4214 struct pci_dev *pdev; 4215 uint32_t vtisochctrl; 4216 4217 /* If there's no Azalia in the system anyway, forget it. */ 4218 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL); 4219 if (!pdev) 4220 return; 4221 pci_dev_put(pdev); 4222 4223 /* System Management Registers. Might be hidden, in which case 4224 we can't do the sanity check. But that's OK, because the 4225 known-broken BIOSes _don't_ actually hide it, so far. */ 4226 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL); 4227 if (!pdev) 4228 return; 4229 4230 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) { 4231 pci_dev_put(pdev); 4232 return; 4233 } 4234 4235 pci_dev_put(pdev); 4236 4237 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */ 4238 if (vtisochctrl & 1) 4239 return; 4240 4241 /* Drop all bits other than the number of TLB entries */ 4242 vtisochctrl &= 0x1c; 4243 4244 /* If we have the recommended number of TLB entries (16), fine. */ 4245 if (vtisochctrl == 0x10) 4246 return; 4247 4248 /* Zero TLB entries? You get to ride the short bus to school. */ 4249 if (!vtisochctrl) { 4250 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n" 4251 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 4252 dmi_get_system_info(DMI_BIOS_VENDOR), 4253 dmi_get_system_info(DMI_BIOS_VERSION), 4254 dmi_get_system_info(DMI_PRODUCT_VERSION)); 4255 iommu_identity_mapping |= IDENTMAP_AZALIA; 4256 return; 4257 } 4258 4259 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n", 4260 vtisochctrl); 4261} 4262