pgtable.c revision ec66ad66a0de87866be347b5ecc83bd46427f53b
1/* 2 * Copyright IBM Corp. 2007, 2011 3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 4 */ 5 6#include <linux/sched.h> 7#include <linux/kernel.h> 8#include <linux/errno.h> 9#include <linux/gfp.h> 10#include <linux/mm.h> 11#include <linux/swap.h> 12#include <linux/smp.h> 13#include <linux/highmem.h> 14#include <linux/pagemap.h> 15#include <linux/spinlock.h> 16#include <linux/module.h> 17#include <linux/quicklist.h> 18#include <linux/rcupdate.h> 19#include <linux/slab.h> 20#include <linux/swapops.h> 21 22#include <asm/pgtable.h> 23#include <asm/pgalloc.h> 24#include <asm/tlb.h> 25#include <asm/tlbflush.h> 26#include <asm/mmu_context.h> 27 28#ifndef CONFIG_64BIT 29#define ALLOC_ORDER 1 30#define FRAG_MASK 0x0f 31#else 32#define ALLOC_ORDER 2 33#define FRAG_MASK 0x03 34#endif 35 36 37unsigned long *crst_table_alloc(struct mm_struct *mm) 38{ 39 struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 40 41 if (!page) 42 return NULL; 43 return (unsigned long *) page_to_phys(page); 44} 45 46void crst_table_free(struct mm_struct *mm, unsigned long *table) 47{ 48 free_pages((unsigned long) table, ALLOC_ORDER); 49} 50 51#ifdef CONFIG_64BIT 52static void __crst_table_upgrade(void *arg) 53{ 54 struct mm_struct *mm = arg; 55 56 if (current->active_mm == mm) 57 update_mm(mm, current); 58 __tlb_flush_local(); 59} 60 61int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) 62{ 63 unsigned long *table, *pgd; 64 unsigned long entry; 65 int flush; 66 67 BUG_ON(limit > (1UL << 53)); 68 flush = 0; 69repeat: 70 table = crst_table_alloc(mm); 71 if (!table) 72 return -ENOMEM; 73 spin_lock_bh(&mm->page_table_lock); 74 if (mm->context.asce_limit < limit) { 75 pgd = (unsigned long *) mm->pgd; 76 if (mm->context.asce_limit <= (1UL << 31)) { 77 entry = _REGION3_ENTRY_EMPTY; 78 mm->context.asce_limit = 1UL << 42; 79 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 80 _ASCE_USER_BITS | 81 _ASCE_TYPE_REGION3; 82 } else { 83 entry = _REGION2_ENTRY_EMPTY; 84 mm->context.asce_limit = 1UL << 53; 85 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 86 _ASCE_USER_BITS | 87 _ASCE_TYPE_REGION2; 88 } 89 crst_table_init(table, entry); 90 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); 91 mm->pgd = (pgd_t *) table; 92 mm->task_size = mm->context.asce_limit; 93 table = NULL; 94 flush = 1; 95 } 96 spin_unlock_bh(&mm->page_table_lock); 97 if (table) 98 crst_table_free(mm, table); 99 if (mm->context.asce_limit < limit) 100 goto repeat; 101 if (flush) 102 on_each_cpu(__crst_table_upgrade, mm, 0); 103 return 0; 104} 105 106void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) 107{ 108 pgd_t *pgd; 109 110 if (current->active_mm == mm) 111 __tlb_flush_mm(mm); 112 while (mm->context.asce_limit > limit) { 113 pgd = mm->pgd; 114 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { 115 case _REGION_ENTRY_TYPE_R2: 116 mm->context.asce_limit = 1UL << 42; 117 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 118 _ASCE_USER_BITS | 119 _ASCE_TYPE_REGION3; 120 break; 121 case _REGION_ENTRY_TYPE_R3: 122 mm->context.asce_limit = 1UL << 31; 123 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 124 _ASCE_USER_BITS | 125 _ASCE_TYPE_SEGMENT; 126 break; 127 default: 128 BUG(); 129 } 130 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); 131 mm->task_size = mm->context.asce_limit; 132 crst_table_free(mm, (unsigned long *) pgd); 133 } 134 if (current->active_mm == mm) 135 update_mm(mm, current); 136} 137#endif 138 139#ifdef CONFIG_PGSTE 140 141/** 142 * gmap_alloc - allocate a guest address space 143 * @mm: pointer to the parent mm_struct 144 * 145 * Returns a guest address space structure. 146 */ 147struct gmap *gmap_alloc(struct mm_struct *mm) 148{ 149 struct gmap *gmap; 150 struct page *page; 151 unsigned long *table; 152 153 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); 154 if (!gmap) 155 goto out; 156 INIT_LIST_HEAD(&gmap->crst_list); 157 gmap->mm = mm; 158 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 159 if (!page) 160 goto out_free; 161 list_add(&page->lru, &gmap->crst_list); 162 table = (unsigned long *) page_to_phys(page); 163 crst_table_init(table, _REGION1_ENTRY_EMPTY); 164 gmap->table = table; 165 gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | 166 _ASCE_USER_BITS | __pa(table); 167 list_add(&gmap->list, &mm->context.gmap_list); 168 return gmap; 169 170out_free: 171 kfree(gmap); 172out: 173 return NULL; 174} 175EXPORT_SYMBOL_GPL(gmap_alloc); 176 177static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) 178{ 179 struct gmap_pgtable *mp; 180 struct gmap_rmap *rmap; 181 struct page *page; 182 183 if (*table & _SEGMENT_ENTRY_INVALID) 184 return 0; 185 page = pfn_to_page(*table >> PAGE_SHIFT); 186 mp = (struct gmap_pgtable *) page->index; 187 list_for_each_entry(rmap, &mp->mapper, list) { 188 if (rmap->entry != table) 189 continue; 190 list_del(&rmap->list); 191 kfree(rmap); 192 break; 193 } 194 *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT; 195 return 1; 196} 197 198static void gmap_flush_tlb(struct gmap *gmap) 199{ 200 if (MACHINE_HAS_IDTE) 201 __tlb_flush_idte((unsigned long) gmap->table | 202 _ASCE_TYPE_REGION1); 203 else 204 __tlb_flush_global(); 205} 206 207/** 208 * gmap_free - free a guest address space 209 * @gmap: pointer to the guest address space structure 210 */ 211void gmap_free(struct gmap *gmap) 212{ 213 struct page *page, *next; 214 unsigned long *table; 215 int i; 216 217 218 /* Flush tlb. */ 219 if (MACHINE_HAS_IDTE) 220 __tlb_flush_idte((unsigned long) gmap->table | 221 _ASCE_TYPE_REGION1); 222 else 223 __tlb_flush_global(); 224 225 /* Free all segment & region tables. */ 226 down_read(&gmap->mm->mmap_sem); 227 spin_lock(&gmap->mm->page_table_lock); 228 list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { 229 table = (unsigned long *) page_to_phys(page); 230 if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) 231 /* Remove gmap rmap structures for segment table. */ 232 for (i = 0; i < PTRS_PER_PMD; i++, table++) 233 gmap_unlink_segment(gmap, table); 234 __free_pages(page, ALLOC_ORDER); 235 } 236 spin_unlock(&gmap->mm->page_table_lock); 237 up_read(&gmap->mm->mmap_sem); 238 list_del(&gmap->list); 239 kfree(gmap); 240} 241EXPORT_SYMBOL_GPL(gmap_free); 242 243/** 244 * gmap_enable - switch primary space to the guest address space 245 * @gmap: pointer to the guest address space structure 246 */ 247void gmap_enable(struct gmap *gmap) 248{ 249 S390_lowcore.gmap = (unsigned long) gmap; 250} 251EXPORT_SYMBOL_GPL(gmap_enable); 252 253/** 254 * gmap_disable - switch back to the standard primary address space 255 * @gmap: pointer to the guest address space structure 256 */ 257void gmap_disable(struct gmap *gmap) 258{ 259 S390_lowcore.gmap = 0UL; 260} 261EXPORT_SYMBOL_GPL(gmap_disable); 262 263/* 264 * gmap_alloc_table is assumed to be called with mmap_sem held 265 */ 266static int gmap_alloc_table(struct gmap *gmap, 267 unsigned long *table, unsigned long init) 268 __releases(&gmap->mm->page_table_lock) 269 __acquires(&gmap->mm->page_table_lock) 270{ 271 struct page *page; 272 unsigned long *new; 273 274 /* since we dont free the gmap table until gmap_free we can unlock */ 275 spin_unlock(&gmap->mm->page_table_lock); 276 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 277 spin_lock(&gmap->mm->page_table_lock); 278 if (!page) 279 return -ENOMEM; 280 new = (unsigned long *) page_to_phys(page); 281 crst_table_init(new, init); 282 if (*table & _REGION_ENTRY_INVALID) { 283 list_add(&page->lru, &gmap->crst_list); 284 *table = (unsigned long) new | _REGION_ENTRY_LENGTH | 285 (*table & _REGION_ENTRY_TYPE_MASK); 286 } else 287 __free_pages(page, ALLOC_ORDER); 288 return 0; 289} 290 291/** 292 * gmap_unmap_segment - unmap segment from the guest address space 293 * @gmap: pointer to the guest address space structure 294 * @addr: address in the guest address space 295 * @len: length of the memory area to unmap 296 * 297 * Returns 0 if the unmap succeeded, -EINVAL if not. 298 */ 299int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) 300{ 301 unsigned long *table; 302 unsigned long off; 303 int flush; 304 305 if ((to | len) & (PMD_SIZE - 1)) 306 return -EINVAL; 307 if (len == 0 || to + len < to) 308 return -EINVAL; 309 310 flush = 0; 311 down_read(&gmap->mm->mmap_sem); 312 spin_lock(&gmap->mm->page_table_lock); 313 for (off = 0; off < len; off += PMD_SIZE) { 314 /* Walk the guest addr space page table */ 315 table = gmap->table + (((to + off) >> 53) & 0x7ff); 316 if (*table & _REGION_ENTRY_INVALID) 317 goto out; 318 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 319 table = table + (((to + off) >> 42) & 0x7ff); 320 if (*table & _REGION_ENTRY_INVALID) 321 goto out; 322 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 323 table = table + (((to + off) >> 31) & 0x7ff); 324 if (*table & _REGION_ENTRY_INVALID) 325 goto out; 326 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 327 table = table + (((to + off) >> 20) & 0x7ff); 328 329 /* Clear segment table entry in guest address space. */ 330 flush |= gmap_unlink_segment(gmap, table); 331 *table = _SEGMENT_ENTRY_INVALID; 332 } 333out: 334 spin_unlock(&gmap->mm->page_table_lock); 335 up_read(&gmap->mm->mmap_sem); 336 if (flush) 337 gmap_flush_tlb(gmap); 338 return 0; 339} 340EXPORT_SYMBOL_GPL(gmap_unmap_segment); 341 342/** 343 * gmap_mmap_segment - map a segment to the guest address space 344 * @gmap: pointer to the guest address space structure 345 * @from: source address in the parent address space 346 * @to: target address in the guest address space 347 * 348 * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. 349 */ 350int gmap_map_segment(struct gmap *gmap, unsigned long from, 351 unsigned long to, unsigned long len) 352{ 353 unsigned long *table; 354 unsigned long off; 355 int flush; 356 357 if ((from | to | len) & (PMD_SIZE - 1)) 358 return -EINVAL; 359 if (len == 0 || from + len > TASK_MAX_SIZE || 360 from + len < from || to + len < to) 361 return -EINVAL; 362 363 flush = 0; 364 down_read(&gmap->mm->mmap_sem); 365 spin_lock(&gmap->mm->page_table_lock); 366 for (off = 0; off < len; off += PMD_SIZE) { 367 /* Walk the gmap address space page table */ 368 table = gmap->table + (((to + off) >> 53) & 0x7ff); 369 if ((*table & _REGION_ENTRY_INVALID) && 370 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) 371 goto out_unmap; 372 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 373 table = table + (((to + off) >> 42) & 0x7ff); 374 if ((*table & _REGION_ENTRY_INVALID) && 375 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) 376 goto out_unmap; 377 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 378 table = table + (((to + off) >> 31) & 0x7ff); 379 if ((*table & _REGION_ENTRY_INVALID) && 380 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) 381 goto out_unmap; 382 table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); 383 table = table + (((to + off) >> 20) & 0x7ff); 384 385 /* Store 'from' address in an invalid segment table entry. */ 386 flush |= gmap_unlink_segment(gmap, table); 387 *table = (from + off) | (_SEGMENT_ENTRY_INVALID | 388 _SEGMENT_ENTRY_PROTECT); 389 } 390 spin_unlock(&gmap->mm->page_table_lock); 391 up_read(&gmap->mm->mmap_sem); 392 if (flush) 393 gmap_flush_tlb(gmap); 394 return 0; 395 396out_unmap: 397 spin_unlock(&gmap->mm->page_table_lock); 398 up_read(&gmap->mm->mmap_sem); 399 gmap_unmap_segment(gmap, to, len); 400 return -ENOMEM; 401} 402EXPORT_SYMBOL_GPL(gmap_map_segment); 403 404static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap) 405{ 406 unsigned long *table; 407 408 table = gmap->table + ((address >> 53) & 0x7ff); 409 if (unlikely(*table & _REGION_ENTRY_INVALID)) 410 return ERR_PTR(-EFAULT); 411 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 412 table = table + ((address >> 42) & 0x7ff); 413 if (unlikely(*table & _REGION_ENTRY_INVALID)) 414 return ERR_PTR(-EFAULT); 415 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 416 table = table + ((address >> 31) & 0x7ff); 417 if (unlikely(*table & _REGION_ENTRY_INVALID)) 418 return ERR_PTR(-EFAULT); 419 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 420 table = table + ((address >> 20) & 0x7ff); 421 return table; 422} 423 424/** 425 * __gmap_translate - translate a guest address to a user space address 426 * @address: guest address 427 * @gmap: pointer to guest mapping meta data structure 428 * 429 * Returns user space address which corresponds to the guest address or 430 * -EFAULT if no such mapping exists. 431 * This function does not establish potentially missing page table entries. 432 * The mmap_sem of the mm that belongs to the address space must be held 433 * when this function gets called. 434 */ 435unsigned long __gmap_translate(unsigned long address, struct gmap *gmap) 436{ 437 unsigned long *segment_ptr, vmaddr, segment; 438 struct gmap_pgtable *mp; 439 struct page *page; 440 441 current->thread.gmap_addr = address; 442 segment_ptr = gmap_table_walk(address, gmap); 443 if (IS_ERR(segment_ptr)) 444 return PTR_ERR(segment_ptr); 445 /* Convert the gmap address to an mm address. */ 446 segment = *segment_ptr; 447 if (!(segment & _SEGMENT_ENTRY_INVALID)) { 448 page = pfn_to_page(segment >> PAGE_SHIFT); 449 mp = (struct gmap_pgtable *) page->index; 450 return mp->vmaddr | (address & ~PMD_MASK); 451 } else if (segment & _SEGMENT_ENTRY_PROTECT) { 452 vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; 453 return vmaddr | (address & ~PMD_MASK); 454 } 455 return -EFAULT; 456} 457EXPORT_SYMBOL_GPL(__gmap_translate); 458 459/** 460 * gmap_translate - translate a guest address to a user space address 461 * @address: guest address 462 * @gmap: pointer to guest mapping meta data structure 463 * 464 * Returns user space address which corresponds to the guest address or 465 * -EFAULT if no such mapping exists. 466 * This function does not establish potentially missing page table entries. 467 */ 468unsigned long gmap_translate(unsigned long address, struct gmap *gmap) 469{ 470 unsigned long rc; 471 472 down_read(&gmap->mm->mmap_sem); 473 rc = __gmap_translate(address, gmap); 474 up_read(&gmap->mm->mmap_sem); 475 return rc; 476} 477EXPORT_SYMBOL_GPL(gmap_translate); 478 479static int gmap_connect_pgtable(unsigned long address, unsigned long segment, 480 unsigned long *segment_ptr, struct gmap *gmap) 481{ 482 unsigned long vmaddr; 483 struct vm_area_struct *vma; 484 struct gmap_pgtable *mp; 485 struct gmap_rmap *rmap; 486 struct mm_struct *mm; 487 struct page *page; 488 pgd_t *pgd; 489 pud_t *pud; 490 pmd_t *pmd; 491 492 mm = gmap->mm; 493 vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; 494 vma = find_vma(mm, vmaddr); 495 if (!vma || vma->vm_start > vmaddr) 496 return -EFAULT; 497 /* Walk the parent mm page table */ 498 pgd = pgd_offset(mm, vmaddr); 499 pud = pud_alloc(mm, pgd, vmaddr); 500 if (!pud) 501 return -ENOMEM; 502 pmd = pmd_alloc(mm, pud, vmaddr); 503 if (!pmd) 504 return -ENOMEM; 505 if (!pmd_present(*pmd) && 506 __pte_alloc(mm, vma, pmd, vmaddr)) 507 return -ENOMEM; 508 /* pmd now points to a valid segment table entry. */ 509 rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); 510 if (!rmap) 511 return -ENOMEM; 512 /* Link gmap segment table entry location to page table. */ 513 page = pmd_page(*pmd); 514 mp = (struct gmap_pgtable *) page->index; 515 rmap->gmap = gmap; 516 rmap->entry = segment_ptr; 517 rmap->vmaddr = address & PMD_MASK; 518 spin_lock(&mm->page_table_lock); 519 if (*segment_ptr == segment) { 520 list_add(&rmap->list, &mp->mapper); 521 /* Set gmap segment table entry to page table. */ 522 *segment_ptr = pmd_val(*pmd) & PAGE_MASK; 523 rmap = NULL; 524 } 525 spin_unlock(&mm->page_table_lock); 526 kfree(rmap); 527 return 0; 528} 529 530static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table) 531{ 532 struct gmap_rmap *rmap, *next; 533 struct gmap_pgtable *mp; 534 struct page *page; 535 int flush; 536 537 flush = 0; 538 spin_lock(&mm->page_table_lock); 539 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 540 mp = (struct gmap_pgtable *) page->index; 541 list_for_each_entry_safe(rmap, next, &mp->mapper, list) { 542 *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID | 543 _SEGMENT_ENTRY_PROTECT); 544 list_del(&rmap->list); 545 kfree(rmap); 546 flush = 1; 547 } 548 spin_unlock(&mm->page_table_lock); 549 if (flush) 550 __tlb_flush_global(); 551} 552 553/* 554 * this function is assumed to be called with mmap_sem held 555 */ 556unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) 557{ 558 unsigned long *segment_ptr, segment; 559 struct gmap_pgtable *mp; 560 struct page *page; 561 int rc; 562 563 current->thread.gmap_addr = address; 564 segment_ptr = gmap_table_walk(address, gmap); 565 if (IS_ERR(segment_ptr)) 566 return -EFAULT; 567 /* Convert the gmap address to an mm address. */ 568 while (1) { 569 segment = *segment_ptr; 570 if (!(segment & _SEGMENT_ENTRY_INVALID)) { 571 /* Page table is present */ 572 page = pfn_to_page(segment >> PAGE_SHIFT); 573 mp = (struct gmap_pgtable *) page->index; 574 return mp->vmaddr | (address & ~PMD_MASK); 575 } 576 if (!(segment & _SEGMENT_ENTRY_PROTECT)) 577 /* Nothing mapped in the gmap address space. */ 578 break; 579 rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap); 580 if (rc) 581 return rc; 582 } 583 return -EFAULT; 584} 585 586unsigned long gmap_fault(unsigned long address, struct gmap *gmap) 587{ 588 unsigned long rc; 589 590 down_read(&gmap->mm->mmap_sem); 591 rc = __gmap_fault(address, gmap); 592 up_read(&gmap->mm->mmap_sem); 593 594 return rc; 595} 596EXPORT_SYMBOL_GPL(gmap_fault); 597 598static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm) 599{ 600 if (!non_swap_entry(entry)) 601 dec_mm_counter(mm, MM_SWAPENTS); 602 else if (is_migration_entry(entry)) { 603 struct page *page = migration_entry_to_page(entry); 604 605 if (PageAnon(page)) 606 dec_mm_counter(mm, MM_ANONPAGES); 607 else 608 dec_mm_counter(mm, MM_FILEPAGES); 609 } 610 free_swap_and_cache(entry); 611} 612 613/** 614 * The mm->mmap_sem lock must be held 615 */ 616static void gmap_zap_unused(struct mm_struct *mm, unsigned long address) 617{ 618 unsigned long ptev, pgstev; 619 spinlock_t *ptl; 620 pgste_t pgste; 621 pte_t *ptep, pte; 622 623 ptep = get_locked_pte(mm, address, &ptl); 624 if (unlikely(!ptep)) 625 return; 626 pte = *ptep; 627 if (!pte_swap(pte)) 628 goto out_pte; 629 /* Zap unused and logically-zero pages */ 630 pgste = pgste_get_lock(ptep); 631 pgstev = pgste_val(pgste); 632 ptev = pte_val(pte); 633 if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || 634 ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) { 635 gmap_zap_swap_entry(pte_to_swp_entry(pte), mm); 636 pte_clear(mm, address, ptep); 637 } 638 pgste_set_unlock(ptep, pgste); 639out_pte: 640 pte_unmap_unlock(*ptep, ptl); 641} 642 643/* 644 * this function is assumed to be called with mmap_sem held 645 */ 646void __gmap_zap(unsigned long address, struct gmap *gmap) 647{ 648 unsigned long *table, *segment_ptr; 649 unsigned long segment, pgstev, ptev; 650 struct gmap_pgtable *mp; 651 struct page *page; 652 653 segment_ptr = gmap_table_walk(address, gmap); 654 if (IS_ERR(segment_ptr)) 655 return; 656 segment = *segment_ptr; 657 if (segment & _SEGMENT_ENTRY_INVALID) 658 return; 659 page = pfn_to_page(segment >> PAGE_SHIFT); 660 mp = (struct gmap_pgtable *) page->index; 661 address = mp->vmaddr | (address & ~PMD_MASK); 662 /* Page table is present */ 663 table = (unsigned long *)(segment & _SEGMENT_ENTRY_ORIGIN); 664 table = table + ((address >> 12) & 0xff); 665 pgstev = table[PTRS_PER_PTE]; 666 ptev = table[0]; 667 /* quick check, checked again with locks held */ 668 if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || 669 ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) 670 gmap_zap_unused(gmap->mm, address); 671} 672EXPORT_SYMBOL_GPL(__gmap_zap); 673 674void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) 675{ 676 677 unsigned long *table, address, size; 678 struct vm_area_struct *vma; 679 struct gmap_pgtable *mp; 680 struct page *page; 681 682 down_read(&gmap->mm->mmap_sem); 683 address = from; 684 while (address < to) { 685 /* Walk the gmap address space page table */ 686 table = gmap->table + ((address >> 53) & 0x7ff); 687 if (unlikely(*table & _REGION_ENTRY_INVALID)) { 688 address = (address + PMD_SIZE) & PMD_MASK; 689 continue; 690 } 691 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 692 table = table + ((address >> 42) & 0x7ff); 693 if (unlikely(*table & _REGION_ENTRY_INVALID)) { 694 address = (address + PMD_SIZE) & PMD_MASK; 695 continue; 696 } 697 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 698 table = table + ((address >> 31) & 0x7ff); 699 if (unlikely(*table & _REGION_ENTRY_INVALID)) { 700 address = (address + PMD_SIZE) & PMD_MASK; 701 continue; 702 } 703 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 704 table = table + ((address >> 20) & 0x7ff); 705 if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) { 706 address = (address + PMD_SIZE) & PMD_MASK; 707 continue; 708 } 709 page = pfn_to_page(*table >> PAGE_SHIFT); 710 mp = (struct gmap_pgtable *) page->index; 711 vma = find_vma(gmap->mm, mp->vmaddr); 712 size = min(to - address, PMD_SIZE - (address & ~PMD_MASK)); 713 zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK), 714 size, NULL); 715 address = (address + PMD_SIZE) & PMD_MASK; 716 } 717 up_read(&gmap->mm->mmap_sem); 718} 719EXPORT_SYMBOL_GPL(gmap_discard); 720 721static LIST_HEAD(gmap_notifier_list); 722static DEFINE_SPINLOCK(gmap_notifier_lock); 723 724/** 725 * gmap_register_ipte_notifier - register a pte invalidation callback 726 * @nb: pointer to the gmap notifier block 727 */ 728void gmap_register_ipte_notifier(struct gmap_notifier *nb) 729{ 730 spin_lock(&gmap_notifier_lock); 731 list_add(&nb->list, &gmap_notifier_list); 732 spin_unlock(&gmap_notifier_lock); 733} 734EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); 735 736/** 737 * gmap_unregister_ipte_notifier - remove a pte invalidation callback 738 * @nb: pointer to the gmap notifier block 739 */ 740void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) 741{ 742 spin_lock(&gmap_notifier_lock); 743 list_del_init(&nb->list); 744 spin_unlock(&gmap_notifier_lock); 745} 746EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); 747 748/** 749 * gmap_ipte_notify - mark a range of ptes for invalidation notification 750 * @gmap: pointer to guest mapping meta data structure 751 * @address: virtual address in the guest address space 752 * @len: size of area 753 * 754 * Returns 0 if for each page in the given range a gmap mapping exists and 755 * the invalidation notification could be set. If the gmap mapping is missing 756 * for one or more pages -EFAULT is returned. If no memory could be allocated 757 * -ENOMEM is returned. This function establishes missing page table entries. 758 */ 759int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) 760{ 761 unsigned long addr; 762 spinlock_t *ptl; 763 pte_t *ptep, entry; 764 pgste_t pgste; 765 int rc = 0; 766 767 if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK)) 768 return -EINVAL; 769 down_read(&gmap->mm->mmap_sem); 770 while (len) { 771 /* Convert gmap address and connect the page tables */ 772 addr = __gmap_fault(start, gmap); 773 if (IS_ERR_VALUE(addr)) { 774 rc = addr; 775 break; 776 } 777 /* Get the page mapped */ 778 if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE)) { 779 rc = -EFAULT; 780 break; 781 } 782 /* Walk the process page table, lock and get pte pointer */ 783 ptep = get_locked_pte(gmap->mm, addr, &ptl); 784 if (unlikely(!ptep)) 785 continue; 786 /* Set notification bit in the pgste of the pte */ 787 entry = *ptep; 788 if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { 789 pgste = pgste_get_lock(ptep); 790 pgste_val(pgste) |= PGSTE_IN_BIT; 791 pgste_set_unlock(ptep, pgste); 792 start += PAGE_SIZE; 793 len -= PAGE_SIZE; 794 } 795 spin_unlock(ptl); 796 } 797 up_read(&gmap->mm->mmap_sem); 798 return rc; 799} 800EXPORT_SYMBOL_GPL(gmap_ipte_notify); 801 802/** 803 * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. 804 * @mm: pointer to the process mm_struct 805 * @addr: virtual address in the process address space 806 * @pte: pointer to the page table entry 807 * 808 * This function is assumed to be called with the page table lock held 809 * for the pte to notify. 810 */ 811void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte) 812{ 813 unsigned long segment_offset; 814 struct gmap_notifier *nb; 815 struct gmap_pgtable *mp; 816 struct gmap_rmap *rmap; 817 struct page *page; 818 819 segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); 820 segment_offset = segment_offset * (4096 / sizeof(pte_t)); 821 page = pfn_to_page(__pa(pte) >> PAGE_SHIFT); 822 mp = (struct gmap_pgtable *) page->index; 823 spin_lock(&gmap_notifier_lock); 824 list_for_each_entry(rmap, &mp->mapper, list) { 825 list_for_each_entry(nb, &gmap_notifier_list, list) 826 nb->notifier_call(rmap->gmap, 827 rmap->vmaddr + segment_offset); 828 } 829 spin_unlock(&gmap_notifier_lock); 830} 831 832static inline int page_table_with_pgste(struct page *page) 833{ 834 return atomic_read(&page->_mapcount) == 0; 835} 836 837static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 838 unsigned long vmaddr) 839{ 840 struct page *page; 841 unsigned long *table; 842 struct gmap_pgtable *mp; 843 844 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 845 if (!page) 846 return NULL; 847 mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT); 848 if (!mp) { 849 __free_page(page); 850 return NULL; 851 } 852 if (!pgtable_page_ctor(page)) { 853 kfree(mp); 854 __free_page(page); 855 return NULL; 856 } 857 mp->vmaddr = vmaddr & PMD_MASK; 858 INIT_LIST_HEAD(&mp->mapper); 859 page->index = (unsigned long) mp; 860 atomic_set(&page->_mapcount, 0); 861 table = (unsigned long *) page_to_phys(page); 862 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); 863 clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT, 864 PAGE_SIZE/2); 865 return table; 866} 867 868static inline void page_table_free_pgste(unsigned long *table) 869{ 870 struct page *page; 871 struct gmap_pgtable *mp; 872 873 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 874 mp = (struct gmap_pgtable *) page->index; 875 BUG_ON(!list_empty(&mp->mapper)); 876 pgtable_page_dtor(page); 877 atomic_set(&page->_mapcount, -1); 878 kfree(mp); 879 __free_page(page); 880} 881 882static inline unsigned long page_table_reset_pte(struct mm_struct *mm, 883 pmd_t *pmd, unsigned long addr, unsigned long end) 884{ 885 pte_t *start_pte, *pte; 886 spinlock_t *ptl; 887 pgste_t pgste; 888 889 start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 890 pte = start_pte; 891 do { 892 pgste = pgste_get_lock(pte); 893 pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; 894 pgste_set_unlock(pte, pgste); 895 } while (pte++, addr += PAGE_SIZE, addr != end); 896 pte_unmap_unlock(start_pte, ptl); 897 898 return addr; 899} 900 901static inline unsigned long page_table_reset_pmd(struct mm_struct *mm, 902 pud_t *pud, unsigned long addr, unsigned long end) 903{ 904 unsigned long next; 905 pmd_t *pmd; 906 907 pmd = pmd_offset(pud, addr); 908 do { 909 next = pmd_addr_end(addr, end); 910 if (pmd_none_or_clear_bad(pmd)) 911 continue; 912 next = page_table_reset_pte(mm, pmd, addr, next); 913 } while (pmd++, addr = next, addr != end); 914 915 return addr; 916} 917 918static inline unsigned long page_table_reset_pud(struct mm_struct *mm, 919 pgd_t *pgd, unsigned long addr, unsigned long end) 920{ 921 unsigned long next; 922 pud_t *pud; 923 924 pud = pud_offset(pgd, addr); 925 do { 926 next = pud_addr_end(addr, end); 927 if (pud_none_or_clear_bad(pud)) 928 continue; 929 next = page_table_reset_pmd(mm, pud, addr, next); 930 } while (pud++, addr = next, addr != end); 931 932 return addr; 933} 934 935void page_table_reset_pgste(struct mm_struct *mm, 936 unsigned long start, unsigned long end) 937{ 938 unsigned long addr, next; 939 pgd_t *pgd; 940 941 addr = start; 942 down_read(&mm->mmap_sem); 943 pgd = pgd_offset(mm, addr); 944 do { 945 next = pgd_addr_end(addr, end); 946 if (pgd_none_or_clear_bad(pgd)) 947 continue; 948 next = page_table_reset_pud(mm, pgd, addr, next); 949 } while (pgd++, addr = next, addr != end); 950 up_read(&mm->mmap_sem); 951} 952EXPORT_SYMBOL(page_table_reset_pgste); 953 954int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 955 unsigned long key, bool nq) 956{ 957 spinlock_t *ptl; 958 pgste_t old, new; 959 pte_t *ptep; 960 961 down_read(&mm->mmap_sem); 962 ptep = get_locked_pte(current->mm, addr, &ptl); 963 if (unlikely(!ptep)) { 964 up_read(&mm->mmap_sem); 965 return -EFAULT; 966 } 967 968 new = old = pgste_get_lock(ptep); 969 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | 970 PGSTE_ACC_BITS | PGSTE_FP_BIT); 971 pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; 972 pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 973 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 974 unsigned long address, bits, skey; 975 976 address = pte_val(*ptep) & PAGE_MASK; 977 skey = (unsigned long) page_get_storage_key(address); 978 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 979 skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); 980 /* Set storage key ACC and FP */ 981 page_set_storage_key(address, skey, !nq); 982 /* Merge host changed & referenced into pgste */ 983 pgste_val(new) |= bits << 52; 984 } 985 /* changing the guest storage key is considered a change of the page */ 986 if ((pgste_val(new) ^ pgste_val(old)) & 987 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) 988 pgste_val(new) |= PGSTE_HC_BIT; 989 990 pgste_set_unlock(ptep, new); 991 pte_unmap_unlock(*ptep, ptl); 992 up_read(&mm->mmap_sem); 993 return 0; 994} 995EXPORT_SYMBOL(set_guest_storage_key); 996 997#else /* CONFIG_PGSTE */ 998 999static inline int page_table_with_pgste(struct page *page) 1000{ 1001 return 0; 1002} 1003 1004static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 1005 unsigned long vmaddr) 1006{ 1007 return NULL; 1008} 1009 1010static inline void page_table_free_pgste(unsigned long *table) 1011{ 1012} 1013 1014static inline void gmap_disconnect_pgtable(struct mm_struct *mm, 1015 unsigned long *table) 1016{ 1017} 1018 1019#endif /* CONFIG_PGSTE */ 1020 1021static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) 1022{ 1023 unsigned int old, new; 1024 1025 do { 1026 old = atomic_read(v); 1027 new = old ^ bits; 1028 } while (atomic_cmpxchg(v, old, new) != old); 1029 return new; 1030} 1031 1032/* 1033 * page table entry allocation/free routines. 1034 */ 1035unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) 1036{ 1037 unsigned long *uninitialized_var(table); 1038 struct page *uninitialized_var(page); 1039 unsigned int mask, bit; 1040 1041 if (mm_has_pgste(mm)) 1042 return page_table_alloc_pgste(mm, vmaddr); 1043 /* Allocate fragments of a 4K page as 1K/2K page table */ 1044 spin_lock_bh(&mm->context.list_lock); 1045 mask = FRAG_MASK; 1046 if (!list_empty(&mm->context.pgtable_list)) { 1047 page = list_first_entry(&mm->context.pgtable_list, 1048 struct page, lru); 1049 table = (unsigned long *) page_to_phys(page); 1050 mask = atomic_read(&page->_mapcount); 1051 mask = mask | (mask >> 4); 1052 } 1053 if ((mask & FRAG_MASK) == FRAG_MASK) { 1054 spin_unlock_bh(&mm->context.list_lock); 1055 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 1056 if (!page) 1057 return NULL; 1058 if (!pgtable_page_ctor(page)) { 1059 __free_page(page); 1060 return NULL; 1061 } 1062 atomic_set(&page->_mapcount, 1); 1063 table = (unsigned long *) page_to_phys(page); 1064 clear_table(table, _PAGE_INVALID, PAGE_SIZE); 1065 spin_lock_bh(&mm->context.list_lock); 1066 list_add(&page->lru, &mm->context.pgtable_list); 1067 } else { 1068 for (bit = 1; mask & bit; bit <<= 1) 1069 table += PTRS_PER_PTE; 1070 mask = atomic_xor_bits(&page->_mapcount, bit); 1071 if ((mask & FRAG_MASK) == FRAG_MASK) 1072 list_del(&page->lru); 1073 } 1074 spin_unlock_bh(&mm->context.list_lock); 1075 return table; 1076} 1077 1078void page_table_free(struct mm_struct *mm, unsigned long *table) 1079{ 1080 struct page *page; 1081 unsigned int bit, mask; 1082 1083 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 1084 if (page_table_with_pgste(page)) { 1085 gmap_disconnect_pgtable(mm, table); 1086 return page_table_free_pgste(table); 1087 } 1088 /* Free 1K/2K page table fragment of a 4K page */ 1089 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); 1090 spin_lock_bh(&mm->context.list_lock); 1091 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 1092 list_del(&page->lru); 1093 mask = atomic_xor_bits(&page->_mapcount, bit); 1094 if (mask & FRAG_MASK) 1095 list_add(&page->lru, &mm->context.pgtable_list); 1096 spin_unlock_bh(&mm->context.list_lock); 1097 if (mask == 0) { 1098 pgtable_page_dtor(page); 1099 atomic_set(&page->_mapcount, -1); 1100 __free_page(page); 1101 } 1102} 1103 1104static void __page_table_free_rcu(void *table, unsigned bit) 1105{ 1106 struct page *page; 1107 1108 if (bit == FRAG_MASK) 1109 return page_table_free_pgste(table); 1110 /* Free 1K/2K page table fragment of a 4K page */ 1111 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 1112 if (atomic_xor_bits(&page->_mapcount, bit) == 0) { 1113 pgtable_page_dtor(page); 1114 atomic_set(&page->_mapcount, -1); 1115 __free_page(page); 1116 } 1117} 1118 1119void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) 1120{ 1121 struct mm_struct *mm; 1122 struct page *page; 1123 unsigned int bit, mask; 1124 1125 mm = tlb->mm; 1126 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 1127 if (page_table_with_pgste(page)) { 1128 gmap_disconnect_pgtable(mm, table); 1129 table = (unsigned long *) (__pa(table) | FRAG_MASK); 1130 tlb_remove_table(tlb, table); 1131 return; 1132 } 1133 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); 1134 spin_lock_bh(&mm->context.list_lock); 1135 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 1136 list_del(&page->lru); 1137 mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4)); 1138 if (mask & FRAG_MASK) 1139 list_add_tail(&page->lru, &mm->context.pgtable_list); 1140 spin_unlock_bh(&mm->context.list_lock); 1141 table = (unsigned long *) (__pa(table) | (bit << 4)); 1142 tlb_remove_table(tlb, table); 1143} 1144 1145static void __tlb_remove_table(void *_table) 1146{ 1147 const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK; 1148 void *table = (void *)((unsigned long) _table & ~mask); 1149 unsigned type = (unsigned long) _table & mask; 1150 1151 if (type) 1152 __page_table_free_rcu(table, type); 1153 else 1154 free_pages((unsigned long) table, ALLOC_ORDER); 1155} 1156 1157static void tlb_remove_table_smp_sync(void *arg) 1158{ 1159 /* Simply deliver the interrupt */ 1160} 1161 1162static void tlb_remove_table_one(void *table) 1163{ 1164 /* 1165 * This isn't an RCU grace period and hence the page-tables cannot be 1166 * assumed to be actually RCU-freed. 1167 * 1168 * It is however sufficient for software page-table walkers that rely 1169 * on IRQ disabling. See the comment near struct mmu_table_batch. 1170 */ 1171 smp_call_function(tlb_remove_table_smp_sync, NULL, 1); 1172 __tlb_remove_table(table); 1173} 1174 1175static void tlb_remove_table_rcu(struct rcu_head *head) 1176{ 1177 struct mmu_table_batch *batch; 1178 int i; 1179 1180 batch = container_of(head, struct mmu_table_batch, rcu); 1181 1182 for (i = 0; i < batch->nr; i++) 1183 __tlb_remove_table(batch->tables[i]); 1184 1185 free_page((unsigned long)batch); 1186} 1187 1188void tlb_table_flush(struct mmu_gather *tlb) 1189{ 1190 struct mmu_table_batch **batch = &tlb->batch; 1191 1192 if (*batch) { 1193 call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); 1194 *batch = NULL; 1195 } 1196} 1197 1198void tlb_remove_table(struct mmu_gather *tlb, void *table) 1199{ 1200 struct mmu_table_batch **batch = &tlb->batch; 1201 1202 tlb->mm->context.flush_mm = 1; 1203 if (*batch == NULL) { 1204 *batch = (struct mmu_table_batch *) 1205 __get_free_page(GFP_NOWAIT | __GFP_NOWARN); 1206 if (*batch == NULL) { 1207 __tlb_flush_mm_lazy(tlb->mm); 1208 tlb_remove_table_one(table); 1209 return; 1210 } 1211 (*batch)->nr = 0; 1212 } 1213 (*batch)->tables[(*batch)->nr++] = table; 1214 if ((*batch)->nr == MAX_TABLE_BATCH) 1215 tlb_flush_mmu(tlb); 1216} 1217 1218#ifdef CONFIG_TRANSPARENT_HUGEPAGE 1219static inline void thp_split_vma(struct vm_area_struct *vma) 1220{ 1221 unsigned long addr; 1222 1223 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) 1224 follow_page(vma, addr, FOLL_SPLIT); 1225} 1226 1227static inline void thp_split_mm(struct mm_struct *mm) 1228{ 1229 struct vm_area_struct *vma; 1230 1231 for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { 1232 thp_split_vma(vma); 1233 vma->vm_flags &= ~VM_HUGEPAGE; 1234 vma->vm_flags |= VM_NOHUGEPAGE; 1235 } 1236 mm->def_flags |= VM_NOHUGEPAGE; 1237} 1238#else 1239static inline void thp_split_mm(struct mm_struct *mm) 1240{ 1241} 1242#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1243 1244static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb, 1245 struct mm_struct *mm, pud_t *pud, 1246 unsigned long addr, unsigned long end) 1247{ 1248 unsigned long next, *table, *new; 1249 struct page *page; 1250 pmd_t *pmd; 1251 1252 pmd = pmd_offset(pud, addr); 1253 do { 1254 next = pmd_addr_end(addr, end); 1255again: 1256 if (pmd_none_or_clear_bad(pmd)) 1257 continue; 1258 table = (unsigned long *) pmd_deref(*pmd); 1259 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 1260 if (page_table_with_pgste(page)) 1261 continue; 1262 /* Allocate new page table with pgstes */ 1263 new = page_table_alloc_pgste(mm, addr); 1264 if (!new) 1265 return -ENOMEM; 1266 1267 spin_lock(&mm->page_table_lock); 1268 if (likely((unsigned long *) pmd_deref(*pmd) == table)) { 1269 /* Nuke pmd entry pointing to the "short" page table */ 1270 pmdp_flush_lazy(mm, addr, pmd); 1271 pmd_clear(pmd); 1272 /* Copy ptes from old table to new table */ 1273 memcpy(new, table, PAGE_SIZE/2); 1274 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); 1275 /* Establish new table */ 1276 pmd_populate(mm, pmd, (pte_t *) new); 1277 /* Free old table with rcu, there might be a walker! */ 1278 page_table_free_rcu(tlb, table); 1279 new = NULL; 1280 } 1281 spin_unlock(&mm->page_table_lock); 1282 if (new) { 1283 page_table_free_pgste(new); 1284 goto again; 1285 } 1286 } while (pmd++, addr = next, addr != end); 1287 1288 return addr; 1289} 1290 1291static unsigned long page_table_realloc_pud(struct mmu_gather *tlb, 1292 struct mm_struct *mm, pgd_t *pgd, 1293 unsigned long addr, unsigned long end) 1294{ 1295 unsigned long next; 1296 pud_t *pud; 1297 1298 pud = pud_offset(pgd, addr); 1299 do { 1300 next = pud_addr_end(addr, end); 1301 if (pud_none_or_clear_bad(pud)) 1302 continue; 1303 next = page_table_realloc_pmd(tlb, mm, pud, addr, next); 1304 if (unlikely(IS_ERR_VALUE(next))) 1305 return next; 1306 } while (pud++, addr = next, addr != end); 1307 1308 return addr; 1309} 1310 1311static unsigned long page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm, 1312 unsigned long addr, unsigned long end) 1313{ 1314 unsigned long next; 1315 pgd_t *pgd; 1316 1317 pgd = pgd_offset(mm, addr); 1318 do { 1319 next = pgd_addr_end(addr, end); 1320 if (pgd_none_or_clear_bad(pgd)) 1321 continue; 1322 next = page_table_realloc_pud(tlb, mm, pgd, addr, next); 1323 if (unlikely(IS_ERR_VALUE(next))) 1324 return next; 1325 } while (pgd++, addr = next, addr != end); 1326 1327 return 0; 1328} 1329 1330/* 1331 * switch on pgstes for its userspace process (for kvm) 1332 */ 1333int s390_enable_sie(void) 1334{ 1335 struct task_struct *tsk = current; 1336 struct mm_struct *mm = tsk->mm; 1337 struct mmu_gather tlb; 1338 1339 /* Do we have pgstes? if yes, we are done */ 1340 if (mm_has_pgste(tsk->mm)) 1341 return 0; 1342 1343 down_write(&mm->mmap_sem); 1344 /* split thp mappings and disable thp for future mappings */ 1345 thp_split_mm(mm); 1346 /* Reallocate the page tables with pgstes */ 1347 tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE); 1348 if (!page_table_realloc(&tlb, mm, 0, TASK_SIZE)) 1349 mm->context.has_pgste = 1; 1350 tlb_finish_mmu(&tlb, 0, TASK_SIZE); 1351 up_write(&mm->mmap_sem); 1352 return mm->context.has_pgste ? 0 : -ENOMEM; 1353} 1354EXPORT_SYMBOL_GPL(s390_enable_sie); 1355 1356#ifdef CONFIG_TRANSPARENT_HUGEPAGE 1357int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, 1358 pmd_t *pmdp) 1359{ 1360 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 1361 /* No need to flush TLB 1362 * On s390 reference bits are in storage key and never in TLB */ 1363 return pmdp_test_and_clear_young(vma, address, pmdp); 1364} 1365 1366int pmdp_set_access_flags(struct vm_area_struct *vma, 1367 unsigned long address, pmd_t *pmdp, 1368 pmd_t entry, int dirty) 1369{ 1370 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 1371 1372 if (pmd_same(*pmdp, entry)) 1373 return 0; 1374 pmdp_invalidate(vma, address, pmdp); 1375 set_pmd_at(vma->vm_mm, address, pmdp, entry); 1376 return 1; 1377} 1378 1379static void pmdp_splitting_flush_sync(void *arg) 1380{ 1381 /* Simply deliver the interrupt */ 1382} 1383 1384void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, 1385 pmd_t *pmdp) 1386{ 1387 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 1388 if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT, 1389 (unsigned long *) pmdp)) { 1390 /* need to serialize against gup-fast (IRQ disabled) */ 1391 smp_call_function(pmdp_splitting_flush_sync, NULL, 1); 1392 } 1393} 1394 1395void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 1396 pgtable_t pgtable) 1397{ 1398 struct list_head *lh = (struct list_head *) pgtable; 1399 1400 assert_spin_locked(pmd_lockptr(mm, pmdp)); 1401 1402 /* FIFO */ 1403 if (!pmd_huge_pte(mm, pmdp)) 1404 INIT_LIST_HEAD(lh); 1405 else 1406 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); 1407 pmd_huge_pte(mm, pmdp) = pgtable; 1408} 1409 1410pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) 1411{ 1412 struct list_head *lh; 1413 pgtable_t pgtable; 1414 pte_t *ptep; 1415 1416 assert_spin_locked(pmd_lockptr(mm, pmdp)); 1417 1418 /* FIFO */ 1419 pgtable = pmd_huge_pte(mm, pmdp); 1420 lh = (struct list_head *) pgtable; 1421 if (list_empty(lh)) 1422 pmd_huge_pte(mm, pmdp) = NULL; 1423 else { 1424 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; 1425 list_del(lh); 1426 } 1427 ptep = (pte_t *) pgtable; 1428 pte_val(*ptep) = _PAGE_INVALID; 1429 ptep++; 1430 pte_val(*ptep) = _PAGE_INVALID; 1431 return pgtable; 1432} 1433#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1434