1/* 2 * Handle caching attributes in page tables (PAT) 3 * 4 * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> 5 * Suresh B Siddha <suresh.b.siddha@intel.com> 6 * 7 * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen. 8 */ 9 10#include <linux/seq_file.h> 11#include <linux/bootmem.h> 12#include <linux/debugfs.h> 13#include <linux/kernel.h> 14#include <linux/module.h> 15#include <linux/slab.h> 16#include <linux/mm.h> 17#include <linux/fs.h> 18#include <linux/rbtree.h> 19 20#include <asm/cacheflush.h> 21#include <asm/processor.h> 22#include <asm/tlbflush.h> 23#include <asm/x86_init.h> 24#include <asm/pgtable.h> 25#include <asm/fcntl.h> 26#include <asm/e820.h> 27#include <asm/mtrr.h> 28#include <asm/page.h> 29#include <asm/msr.h> 30#include <asm/pat.h> 31#include <asm/io.h> 32 33#include "pat_internal.h" 34 35#ifdef CONFIG_X86_PAT 36int __read_mostly pat_enabled = 1; 37 38static inline void pat_disable(const char *reason) 39{ 40 pat_enabled = 0; 41 printk(KERN_INFO "%s\n", reason); 42} 43 44static int __init nopat(char *str) 45{ 46 pat_disable("PAT support disabled."); 47 return 0; 48} 49early_param("nopat", nopat); 50#else 51static inline void pat_disable(const char *reason) 52{ 53 (void)reason; 54} 55#endif 56 57 58int pat_debug_enable; 59 60static int __init pat_debug_setup(char *str) 61{ 62 pat_debug_enable = 1; 63 return 0; 64} 65__setup("debugpat", pat_debug_setup); 66 67static u64 __read_mostly boot_pat_state; 68 69enum { 70 PAT_UC = 0, /* uncached */ 71 PAT_WC = 1, /* Write combining */ 72 PAT_WT = 4, /* Write Through */ 73 PAT_WP = 5, /* Write Protected */ 74 PAT_WB = 6, /* Write Back (default) */ 75 PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */ 76}; 77 78#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8)) 79 80void pat_init(void) 81{ 82 u64 pat; 83 bool boot_cpu = !boot_pat_state; 84 85 if (!pat_enabled) 86 return; 87 88 if (!cpu_has_pat) { 89 if (!boot_pat_state) { 90 pat_disable("PAT not supported by CPU."); 91 return; 92 } else { 93 /* 94 * If this happens we are on a secondary CPU, but 95 * switched to PAT on the boot CPU. We have no way to 96 * undo PAT. 97 */ 98 printk(KERN_ERR "PAT enabled, " 99 "but not supported by secondary CPU\n"); 100 BUG(); 101 } 102 } 103 104 /* Set PWT to Write-Combining. All other bits stay the same */ 105 /* 106 * PTE encoding used in Linux: 107 * PAT 108 * |PCD 109 * ||PWT 110 * ||| 111 * 000 WB _PAGE_CACHE_WB 112 * 001 WC _PAGE_CACHE_WC 113 * 010 UC- _PAGE_CACHE_UC_MINUS 114 * 011 UC _PAGE_CACHE_UC 115 * PAT bit unused 116 */ 117 pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | 118 PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); 119 120 /* Boot CPU check */ 121 if (!boot_pat_state) 122 rdmsrl(MSR_IA32_CR_PAT, boot_pat_state); 123 124 wrmsrl(MSR_IA32_CR_PAT, pat); 125 126 if (boot_cpu) 127 printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n", 128 smp_processor_id(), boot_pat_state, pat); 129} 130 131#undef PAT 132 133static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */ 134 135/* 136 * Does intersection of PAT memory type and MTRR memory type and returns 137 * the resulting memory type as PAT understands it. 138 * (Type in pat and mtrr will not have same value) 139 * The intersection is based on "Effective Memory Type" tables in IA-32 140 * SDM vol 3a 141 */ 142static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type) 143{ 144 /* 145 * Look for MTRR hint to get the effective type in case where PAT 146 * request is for WB. 147 */ 148 if (req_type == _PAGE_CACHE_WB) { 149 u8 mtrr_type; 150 151 mtrr_type = mtrr_type_lookup(start, end); 152 if (mtrr_type != MTRR_TYPE_WRBACK) 153 return _PAGE_CACHE_UC_MINUS; 154 155 return _PAGE_CACHE_WB; 156 } 157 158 return req_type; 159} 160 161struct pagerange_state { 162 unsigned long cur_pfn; 163 int ram; 164 int not_ram; 165}; 166 167static int 168pagerange_is_ram_callback(unsigned long initial_pfn, unsigned long total_nr_pages, void *arg) 169{ 170 struct pagerange_state *state = arg; 171 172 state->not_ram |= initial_pfn > state->cur_pfn; 173 state->ram |= total_nr_pages > 0; 174 state->cur_pfn = initial_pfn + total_nr_pages; 175 176 return state->ram && state->not_ram; 177} 178 179static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end) 180{ 181 int ret = 0; 182 unsigned long start_pfn = start >> PAGE_SHIFT; 183 unsigned long end_pfn = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; 184 struct pagerange_state state = {start_pfn, 0, 0}; 185 186 /* 187 * For legacy reasons, physical address range in the legacy ISA 188 * region is tracked as non-RAM. This will allow users of 189 * /dev/mem to map portions of legacy ISA region, even when 190 * some of those portions are listed(or not even listed) with 191 * different e820 types(RAM/reserved/..) 192 */ 193 if (start_pfn < ISA_END_ADDRESS >> PAGE_SHIFT) 194 start_pfn = ISA_END_ADDRESS >> PAGE_SHIFT; 195 196 if (start_pfn < end_pfn) { 197 ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, 198 &state, pagerange_is_ram_callback); 199 } 200 201 return (ret > 0) ? -1 : (state.ram ? 1 : 0); 202} 203 204/* 205 * For RAM pages, we use page flags to mark the pages with appropriate type. 206 * Here we do two pass: 207 * - Find the memtype of all the pages in the range, look for any conflicts 208 * - In case of no conflicts, set the new memtype for pages in the range 209 */ 210static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, 211 unsigned long *new_type) 212{ 213 struct page *page; 214 u64 pfn; 215 216 if (req_type == _PAGE_CACHE_UC) { 217 /* We do not support strong UC */ 218 WARN_ON_ONCE(1); 219 req_type = _PAGE_CACHE_UC_MINUS; 220 } 221 222 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { 223 unsigned long type; 224 225 page = pfn_to_page(pfn); 226 type = get_page_memtype(page); 227 if (type != -1) { 228 printk(KERN_INFO "reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%lx, req 0x%lx\n", 229 start, end - 1, type, req_type); 230 if (new_type) 231 *new_type = type; 232 233 return -EBUSY; 234 } 235 } 236 237 if (new_type) 238 *new_type = req_type; 239 240 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { 241 page = pfn_to_page(pfn); 242 set_page_memtype(page, req_type); 243 } 244 return 0; 245} 246 247static int free_ram_pages_type(u64 start, u64 end) 248{ 249 struct page *page; 250 u64 pfn; 251 252 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { 253 page = pfn_to_page(pfn); 254 set_page_memtype(page, -1); 255 } 256 return 0; 257} 258 259/* 260 * req_type typically has one of the: 261 * - _PAGE_CACHE_WB 262 * - _PAGE_CACHE_WC 263 * - _PAGE_CACHE_UC_MINUS 264 * - _PAGE_CACHE_UC 265 * 266 * If new_type is NULL, function will return an error if it cannot reserve the 267 * region with req_type. If new_type is non-NULL, function will return 268 * available type in new_type in case of no error. In case of any error 269 * it will return a negative return value. 270 */ 271int reserve_memtype(u64 start, u64 end, unsigned long req_type, 272 unsigned long *new_type) 273{ 274 struct memtype *new; 275 unsigned long actual_type; 276 int is_range_ram; 277 int err = 0; 278 279 BUG_ON(start >= end); /* end is exclusive */ 280 281 if (!pat_enabled) { 282 /* This is identical to page table setting without PAT */ 283 if (new_type) { 284 if (req_type == _PAGE_CACHE_WC) 285 *new_type = _PAGE_CACHE_UC_MINUS; 286 else 287 *new_type = req_type & _PAGE_CACHE_MASK; 288 } 289 return 0; 290 } 291 292 /* Low ISA region is always mapped WB in page table. No need to track */ 293 if (x86_platform.is_untracked_pat_range(start, end)) { 294 if (new_type) 295 *new_type = _PAGE_CACHE_WB; 296 return 0; 297 } 298 299 /* 300 * Call mtrr_lookup to get the type hint. This is an 301 * optimization for /dev/mem mmap'ers into WB memory (BIOS 302 * tools and ACPI tools). Use WB request for WB memory and use 303 * UC_MINUS otherwise. 304 */ 305 actual_type = pat_x_mtrr_type(start, end, req_type & _PAGE_CACHE_MASK); 306 307 if (new_type) 308 *new_type = actual_type; 309 310 is_range_ram = pat_pagerange_is_ram(start, end); 311 if (is_range_ram == 1) { 312 313 err = reserve_ram_pages_type(start, end, req_type, new_type); 314 315 return err; 316 } else if (is_range_ram < 0) { 317 return -EINVAL; 318 } 319 320 new = kzalloc(sizeof(struct memtype), GFP_KERNEL); 321 if (!new) 322 return -ENOMEM; 323 324 new->start = start; 325 new->end = end; 326 new->type = actual_type; 327 328 spin_lock(&memtype_lock); 329 330 err = rbt_memtype_check_insert(new, new_type); 331 if (err) { 332 printk(KERN_INFO "reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n", 333 start, end - 1, 334 cattr_name(new->type), cattr_name(req_type)); 335 kfree(new); 336 spin_unlock(&memtype_lock); 337 338 return err; 339 } 340 341 spin_unlock(&memtype_lock); 342 343 dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n", 344 start, end - 1, cattr_name(new->type), cattr_name(req_type), 345 new_type ? cattr_name(*new_type) : "-"); 346 347 return err; 348} 349 350int free_memtype(u64 start, u64 end) 351{ 352 int err = -EINVAL; 353 int is_range_ram; 354 struct memtype *entry; 355 356 if (!pat_enabled) 357 return 0; 358 359 /* Low ISA region is always mapped WB. No need to track */ 360 if (x86_platform.is_untracked_pat_range(start, end)) 361 return 0; 362 363 is_range_ram = pat_pagerange_is_ram(start, end); 364 if (is_range_ram == 1) { 365 366 err = free_ram_pages_type(start, end); 367 368 return err; 369 } else if (is_range_ram < 0) { 370 return -EINVAL; 371 } 372 373 spin_lock(&memtype_lock); 374 entry = rbt_memtype_erase(start, end); 375 spin_unlock(&memtype_lock); 376 377 if (!entry) { 378 printk(KERN_INFO "%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n", 379 current->comm, current->pid, start, end - 1); 380 return -EINVAL; 381 } 382 383 kfree(entry); 384 385 dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1); 386 387 return 0; 388} 389 390 391/** 392 * lookup_memtype - Looksup the memory type for a physical address 393 * @paddr: physical address of which memory type needs to be looked up 394 * 395 * Only to be called when PAT is enabled 396 * 397 * Returns _PAGE_CACHE_WB, _PAGE_CACHE_WC, _PAGE_CACHE_UC_MINUS or 398 * _PAGE_CACHE_UC 399 */ 400static unsigned long lookup_memtype(u64 paddr) 401{ 402 int rettype = _PAGE_CACHE_WB; 403 struct memtype *entry; 404 405 if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE)) 406 return rettype; 407 408 if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { 409 struct page *page; 410 page = pfn_to_page(paddr >> PAGE_SHIFT); 411 rettype = get_page_memtype(page); 412 /* 413 * -1 from get_page_memtype() implies RAM page is in its 414 * default state and not reserved, and hence of type WB 415 */ 416 if (rettype == -1) 417 rettype = _PAGE_CACHE_WB; 418 419 return rettype; 420 } 421 422 spin_lock(&memtype_lock); 423 424 entry = rbt_memtype_lookup(paddr); 425 if (entry != NULL) 426 rettype = entry->type; 427 else 428 rettype = _PAGE_CACHE_UC_MINUS; 429 430 spin_unlock(&memtype_lock); 431 return rettype; 432} 433 434/** 435 * io_reserve_memtype - Request a memory type mapping for a region of memory 436 * @start: start (physical address) of the region 437 * @end: end (physical address) of the region 438 * @type: A pointer to memtype, with requested type. On success, requested 439 * or any other compatible type that was available for the region is returned 440 * 441 * On success, returns 0 442 * On failure, returns non-zero 443 */ 444int io_reserve_memtype(resource_size_t start, resource_size_t end, 445 unsigned long *type) 446{ 447 resource_size_t size = end - start; 448 unsigned long req_type = *type; 449 unsigned long new_type; 450 int ret; 451 452 WARN_ON_ONCE(iomem_map_sanity_check(start, size)); 453 454 ret = reserve_memtype(start, end, req_type, &new_type); 455 if (ret) 456 goto out_err; 457 458 if (!is_new_memtype_allowed(start, size, req_type, new_type)) 459 goto out_free; 460 461 if (kernel_map_sync_memtype(start, size, new_type) < 0) 462 goto out_free; 463 464 *type = new_type; 465 return 0; 466 467out_free: 468 free_memtype(start, end); 469 ret = -EBUSY; 470out_err: 471 return ret; 472} 473 474/** 475 * io_free_memtype - Release a memory type mapping for a region of memory 476 * @start: start (physical address) of the region 477 * @end: end (physical address) of the region 478 */ 479void io_free_memtype(resource_size_t start, resource_size_t end) 480{ 481 free_memtype(start, end); 482} 483 484pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, 485 unsigned long size, pgprot_t vma_prot) 486{ 487 return vma_prot; 488} 489 490#ifdef CONFIG_STRICT_DEVMEM 491/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/ 492static inline int range_is_allowed(unsigned long pfn, unsigned long size) 493{ 494 return 1; 495} 496#else 497/* This check is needed to avoid cache aliasing when PAT is enabled */ 498static inline int range_is_allowed(unsigned long pfn, unsigned long size) 499{ 500 u64 from = ((u64)pfn) << PAGE_SHIFT; 501 u64 to = from + size; 502 u64 cursor = from; 503 504 if (!pat_enabled) 505 return 1; 506 507 while (cursor < to) { 508 if (!devmem_is_allowed(pfn)) { 509 printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx]\n", 510 current->comm, from, to - 1); 511 return 0; 512 } 513 cursor += PAGE_SIZE; 514 pfn++; 515 } 516 return 1; 517} 518#endif /* CONFIG_STRICT_DEVMEM */ 519 520int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, 521 unsigned long size, pgprot_t *vma_prot) 522{ 523 unsigned long flags = _PAGE_CACHE_WB; 524 525 if (!range_is_allowed(pfn, size)) 526 return 0; 527 528 if (file->f_flags & O_DSYNC) 529 flags = _PAGE_CACHE_UC_MINUS; 530 531#ifdef CONFIG_X86_32 532 /* 533 * On the PPro and successors, the MTRRs are used to set 534 * memory types for physical addresses outside main memory, 535 * so blindly setting UC or PWT on those pages is wrong. 536 * For Pentiums and earlier, the surround logic should disable 537 * caching for the high addresses through the KEN pin, but 538 * we maintain the tradition of paranoia in this code. 539 */ 540 if (!pat_enabled && 541 !(boot_cpu_has(X86_FEATURE_MTRR) || 542 boot_cpu_has(X86_FEATURE_K6_MTRR) || 543 boot_cpu_has(X86_FEATURE_CYRIX_ARR) || 544 boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) && 545 (pfn << PAGE_SHIFT) >= __pa(high_memory)) { 546 flags = _PAGE_CACHE_UC; 547 } 548#endif 549 550 *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) | 551 flags); 552 return 1; 553} 554 555/* 556 * Change the memory type for the physial address range in kernel identity 557 * mapping space if that range is a part of identity map. 558 */ 559int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) 560{ 561 unsigned long id_sz; 562 563 if (base > __pa(high_memory-1)) 564 return 0; 565 566 /* 567 * some areas in the middle of the kernel identity range 568 * are not mapped, like the PCI space. 569 */ 570 if (!page_is_ram(base >> PAGE_SHIFT)) 571 return 0; 572 573 id_sz = (__pa(high_memory-1) <= base + size) ? 574 __pa(high_memory) - base : 575 size; 576 577 if (ioremap_change_attr((unsigned long)__va(base), id_sz, flags) < 0) { 578 printk(KERN_INFO "%s:%d ioremap_change_attr failed %s " 579 "for [mem %#010Lx-%#010Lx]\n", 580 current->comm, current->pid, 581 cattr_name(flags), 582 base, (unsigned long long)(base + size-1)); 583 return -EINVAL; 584 } 585 return 0; 586} 587 588/* 589 * Internal interface to reserve a range of physical memory with prot. 590 * Reserved non RAM regions only and after successful reserve_memtype, 591 * this func also keeps identity mapping (if any) in sync with this new prot. 592 */ 593static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, 594 int strict_prot) 595{ 596 int is_ram = 0; 597 int ret; 598 unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); 599 unsigned long flags = want_flags; 600 601 is_ram = pat_pagerange_is_ram(paddr, paddr + size); 602 603 /* 604 * reserve_pfn_range() for RAM pages. We do not refcount to keep 605 * track of number of mappings of RAM pages. We can assert that 606 * the type requested matches the type of first page in the range. 607 */ 608 if (is_ram) { 609 if (!pat_enabled) 610 return 0; 611 612 flags = lookup_memtype(paddr); 613 if (want_flags != flags) { 614 printk(KERN_WARNING "%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n", 615 current->comm, current->pid, 616 cattr_name(want_flags), 617 (unsigned long long)paddr, 618 (unsigned long long)(paddr + size - 1), 619 cattr_name(flags)); 620 *vma_prot = __pgprot((pgprot_val(*vma_prot) & 621 (~_PAGE_CACHE_MASK)) | 622 flags); 623 } 624 return 0; 625 } 626 627 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); 628 if (ret) 629 return ret; 630 631 if (flags != want_flags) { 632 if (strict_prot || 633 !is_new_memtype_allowed(paddr, size, want_flags, flags)) { 634 free_memtype(paddr, paddr + size); 635 printk(KERN_ERR "%s:%d map pfn expected mapping type %s" 636 " for [mem %#010Lx-%#010Lx], got %s\n", 637 current->comm, current->pid, 638 cattr_name(want_flags), 639 (unsigned long long)paddr, 640 (unsigned long long)(paddr + size - 1), 641 cattr_name(flags)); 642 return -EINVAL; 643 } 644 /* 645 * We allow returning different type than the one requested in 646 * non strict case. 647 */ 648 *vma_prot = __pgprot((pgprot_val(*vma_prot) & 649 (~_PAGE_CACHE_MASK)) | 650 flags); 651 } 652 653 if (kernel_map_sync_memtype(paddr, size, flags) < 0) { 654 free_memtype(paddr, paddr + size); 655 return -EINVAL; 656 } 657 return 0; 658} 659 660/* 661 * Internal interface to free a range of physical memory. 662 * Frees non RAM regions only. 663 */ 664static void free_pfn_range(u64 paddr, unsigned long size) 665{ 666 int is_ram; 667 668 is_ram = pat_pagerange_is_ram(paddr, paddr + size); 669 if (is_ram == 0) 670 free_memtype(paddr, paddr + size); 671} 672 673/* 674 * track_pfn_copy is called when vma that is covering the pfnmap gets 675 * copied through copy_page_range(). 676 * 677 * If the vma has a linear pfn mapping for the entire range, we get the prot 678 * from pte and reserve the entire vma range with single reserve_pfn_range call. 679 */ 680int track_pfn_copy(struct vm_area_struct *vma) 681{ 682 resource_size_t paddr; 683 unsigned long prot; 684 unsigned long vma_size = vma->vm_end - vma->vm_start; 685 pgprot_t pgprot; 686 687 if (vma->vm_flags & VM_PAT) { 688 /* 689 * reserve the whole chunk covered by vma. We need the 690 * starting address and protection from pte. 691 */ 692 if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { 693 WARN_ON_ONCE(1); 694 return -EINVAL; 695 } 696 pgprot = __pgprot(prot); 697 return reserve_pfn_range(paddr, vma_size, &pgprot, 1); 698 } 699 700 return 0; 701} 702 703/* 704 * prot is passed in as a parameter for the new mapping. If the vma has a 705 * linear pfn mapping for the entire range reserve the entire vma range with 706 * single reserve_pfn_range call. 707 */ 708int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, 709 unsigned long pfn, unsigned long addr, unsigned long size) 710{ 711 resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT; 712 unsigned long flags; 713 714 /* reserve the whole chunk starting from paddr */ 715 if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) { 716 int ret; 717 718 ret = reserve_pfn_range(paddr, size, prot, 0); 719 if (!ret) 720 vma->vm_flags |= VM_PAT; 721 return ret; 722 } 723 724 if (!pat_enabled) 725 return 0; 726 727 /* 728 * For anything smaller than the vma size we set prot based on the 729 * lookup. 730 */ 731 flags = lookup_memtype(paddr); 732 733 /* Check memtype for the remaining pages */ 734 while (size > PAGE_SIZE) { 735 size -= PAGE_SIZE; 736 paddr += PAGE_SIZE; 737 if (flags != lookup_memtype(paddr)) 738 return -EINVAL; 739 } 740 741 *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | 742 flags); 743 744 return 0; 745} 746 747int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, 748 unsigned long pfn) 749{ 750 unsigned long flags; 751 752 if (!pat_enabled) 753 return 0; 754 755 /* Set prot based on lookup */ 756 flags = lookup_memtype((resource_size_t)pfn << PAGE_SHIFT); 757 *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | 758 flags); 759 760 return 0; 761} 762 763/* 764 * untrack_pfn is called while unmapping a pfnmap for a region. 765 * untrack can be called for a specific region indicated by pfn and size or 766 * can be for the entire vma (in which case pfn, size are zero). 767 */ 768void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, 769 unsigned long size) 770{ 771 resource_size_t paddr; 772 unsigned long prot; 773 774 if (!(vma->vm_flags & VM_PAT)) 775 return; 776 777 /* free the chunk starting from pfn or the whole chunk */ 778 paddr = (resource_size_t)pfn << PAGE_SHIFT; 779 if (!paddr && !size) { 780 if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { 781 WARN_ON_ONCE(1); 782 return; 783 } 784 785 size = vma->vm_end - vma->vm_start; 786 } 787 free_pfn_range(paddr, size); 788 vma->vm_flags &= ~VM_PAT; 789} 790 791pgprot_t pgprot_writecombine(pgprot_t prot) 792{ 793 if (pat_enabled) 794 return __pgprot(pgprot_val(prot) | _PAGE_CACHE_WC); 795 else 796 return pgprot_noncached(prot); 797} 798EXPORT_SYMBOL_GPL(pgprot_writecombine); 799 800#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) 801 802static struct memtype *memtype_get_idx(loff_t pos) 803{ 804 struct memtype *print_entry; 805 int ret; 806 807 print_entry = kzalloc(sizeof(struct memtype), GFP_KERNEL); 808 if (!print_entry) 809 return NULL; 810 811 spin_lock(&memtype_lock); 812 ret = rbt_memtype_copy_nth_element(print_entry, pos); 813 spin_unlock(&memtype_lock); 814 815 if (!ret) { 816 return print_entry; 817 } else { 818 kfree(print_entry); 819 return NULL; 820 } 821} 822 823static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) 824{ 825 if (*pos == 0) { 826 ++*pos; 827 seq_printf(seq, "PAT memtype list:\n"); 828 } 829 830 return memtype_get_idx(*pos); 831} 832 833static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos) 834{ 835 ++*pos; 836 return memtype_get_idx(*pos); 837} 838 839static void memtype_seq_stop(struct seq_file *seq, void *v) 840{ 841} 842 843static int memtype_seq_show(struct seq_file *seq, void *v) 844{ 845 struct memtype *print_entry = (struct memtype *)v; 846 847 seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type), 848 print_entry->start, print_entry->end); 849 kfree(print_entry); 850 851 return 0; 852} 853 854static const struct seq_operations memtype_seq_ops = { 855 .start = memtype_seq_start, 856 .next = memtype_seq_next, 857 .stop = memtype_seq_stop, 858 .show = memtype_seq_show, 859}; 860 861static int memtype_seq_open(struct inode *inode, struct file *file) 862{ 863 return seq_open(file, &memtype_seq_ops); 864} 865 866static const struct file_operations memtype_fops = { 867 .open = memtype_seq_open, 868 .read = seq_read, 869 .llseek = seq_lseek, 870 .release = seq_release, 871}; 872 873static int __init pat_memtype_list_init(void) 874{ 875 if (pat_enabled) { 876 debugfs_create_file("pat_memtype_list", S_IRUSR, 877 arch_debugfs_dir, NULL, &memtype_fops); 878 } 879 return 0; 880} 881 882late_initcall(pat_memtype_list_init); 883 884#endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */ 885