pgtable.c revision a9162f238a84ee05b09ea4b0ebd97fb20448c28c
1/* 2 * Copyright IBM Corp. 2007,2009 3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 4 */ 5 6#include <linux/sched.h> 7#include <linux/kernel.h> 8#include <linux/errno.h> 9#include <linux/gfp.h> 10#include <linux/mm.h> 11#include <linux/swap.h> 12#include <linux/smp.h> 13#include <linux/highmem.h> 14#include <linux/pagemap.h> 15#include <linux/spinlock.h> 16#include <linux/module.h> 17#include <linux/quicklist.h> 18#include <linux/rcupdate.h> 19#include <linux/slab.h> 20 21#include <asm/system.h> 22#include <asm/pgtable.h> 23#include <asm/pgalloc.h> 24#include <asm/tlb.h> 25#include <asm/tlbflush.h> 26#include <asm/mmu_context.h> 27 28#ifndef CONFIG_64BIT 29#define ALLOC_ORDER 1 30#define FRAG_MASK 0x0f 31#else 32#define ALLOC_ORDER 2 33#define FRAG_MASK 0x03 34#endif 35 36unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; 37EXPORT_SYMBOL(VMALLOC_START); 38 39static int __init parse_vmalloc(char *arg) 40{ 41 if (!arg) 42 return -EINVAL; 43 VMALLOC_START = (VMALLOC_END - memparse(arg, &arg)) & PAGE_MASK; 44 return 0; 45} 46early_param("vmalloc", parse_vmalloc); 47 48unsigned long *crst_table_alloc(struct mm_struct *mm) 49{ 50 struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 51 52 if (!page) 53 return NULL; 54 return (unsigned long *) page_to_phys(page); 55} 56 57void crst_table_free(struct mm_struct *mm, unsigned long *table) 58{ 59 free_pages((unsigned long) table, ALLOC_ORDER); 60} 61 62#ifdef CONFIG_64BIT 63int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) 64{ 65 unsigned long *table, *pgd; 66 unsigned long entry; 67 68 BUG_ON(limit > (1UL << 53)); 69repeat: 70 table = crst_table_alloc(mm); 71 if (!table) 72 return -ENOMEM; 73 spin_lock_bh(&mm->page_table_lock); 74 if (mm->context.asce_limit < limit) { 75 pgd = (unsigned long *) mm->pgd; 76 if (mm->context.asce_limit <= (1UL << 31)) { 77 entry = _REGION3_ENTRY_EMPTY; 78 mm->context.asce_limit = 1UL << 42; 79 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 80 _ASCE_USER_BITS | 81 _ASCE_TYPE_REGION3; 82 } else { 83 entry = _REGION2_ENTRY_EMPTY; 84 mm->context.asce_limit = 1UL << 53; 85 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 86 _ASCE_USER_BITS | 87 _ASCE_TYPE_REGION2; 88 } 89 crst_table_init(table, entry); 90 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); 91 mm->pgd = (pgd_t *) table; 92 mm->task_size = mm->context.asce_limit; 93 table = NULL; 94 } 95 spin_unlock_bh(&mm->page_table_lock); 96 if (table) 97 crst_table_free(mm, table); 98 if (mm->context.asce_limit < limit) 99 goto repeat; 100 update_mm(mm, current); 101 return 0; 102} 103 104void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) 105{ 106 pgd_t *pgd; 107 108 if (mm->context.asce_limit <= limit) 109 return; 110 __tlb_flush_mm(mm); 111 while (mm->context.asce_limit > limit) { 112 pgd = mm->pgd; 113 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { 114 case _REGION_ENTRY_TYPE_R2: 115 mm->context.asce_limit = 1UL << 42; 116 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 117 _ASCE_USER_BITS | 118 _ASCE_TYPE_REGION3; 119 break; 120 case _REGION_ENTRY_TYPE_R3: 121 mm->context.asce_limit = 1UL << 31; 122 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 123 _ASCE_USER_BITS | 124 _ASCE_TYPE_SEGMENT; 125 break; 126 default: 127 BUG(); 128 } 129 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); 130 mm->task_size = mm->context.asce_limit; 131 crst_table_free(mm, (unsigned long *) pgd); 132 } 133 update_mm(mm, current); 134} 135#endif 136 137#ifdef CONFIG_PGSTE 138 139/** 140 * gmap_alloc - allocate a guest address space 141 * @mm: pointer to the parent mm_struct 142 * 143 * Returns a guest address space structure. 144 */ 145struct gmap *gmap_alloc(struct mm_struct *mm) 146{ 147 struct gmap *gmap; 148 struct page *page; 149 unsigned long *table; 150 151 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); 152 if (!gmap) 153 goto out; 154 INIT_LIST_HEAD(&gmap->crst_list); 155 gmap->mm = mm; 156 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 157 if (!page) 158 goto out_free; 159 list_add(&page->lru, &gmap->crst_list); 160 table = (unsigned long *) page_to_phys(page); 161 crst_table_init(table, _REGION1_ENTRY_EMPTY); 162 gmap->table = table; 163 gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | 164 _ASCE_USER_BITS | __pa(table); 165 list_add(&gmap->list, &mm->context.gmap_list); 166 return gmap; 167 168out_free: 169 kfree(gmap); 170out: 171 return NULL; 172} 173EXPORT_SYMBOL_GPL(gmap_alloc); 174 175static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) 176{ 177 struct gmap_pgtable *mp; 178 struct gmap_rmap *rmap; 179 struct page *page; 180 181 if (*table & _SEGMENT_ENTRY_INV) 182 return 0; 183 page = pfn_to_page(*table >> PAGE_SHIFT); 184 mp = (struct gmap_pgtable *) page->index; 185 list_for_each_entry(rmap, &mp->mapper, list) { 186 if (rmap->entry != table) 187 continue; 188 list_del(&rmap->list); 189 kfree(rmap); 190 break; 191 } 192 *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; 193 return 1; 194} 195 196static void gmap_flush_tlb(struct gmap *gmap) 197{ 198 if (MACHINE_HAS_IDTE) 199 __tlb_flush_idte((unsigned long) gmap->table | 200 _ASCE_TYPE_REGION1); 201 else 202 __tlb_flush_global(); 203} 204 205/** 206 * gmap_free - free a guest address space 207 * @gmap: pointer to the guest address space structure 208 */ 209void gmap_free(struct gmap *gmap) 210{ 211 struct page *page, *next; 212 unsigned long *table; 213 int i; 214 215 216 /* Flush tlb. */ 217 if (MACHINE_HAS_IDTE) 218 __tlb_flush_idte((unsigned long) gmap->table | 219 _ASCE_TYPE_REGION1); 220 else 221 __tlb_flush_global(); 222 223 /* Free all segment & region tables. */ 224 down_read(&gmap->mm->mmap_sem); 225 list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { 226 table = (unsigned long *) page_to_phys(page); 227 if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) 228 /* Remove gmap rmap structures for segment table. */ 229 for (i = 0; i < PTRS_PER_PMD; i++, table++) 230 gmap_unlink_segment(gmap, table); 231 __free_pages(page, ALLOC_ORDER); 232 } 233 up_read(&gmap->mm->mmap_sem); 234 list_del(&gmap->list); 235 kfree(gmap); 236} 237EXPORT_SYMBOL_GPL(gmap_free); 238 239/** 240 * gmap_enable - switch primary space to the guest address space 241 * @gmap: pointer to the guest address space structure 242 */ 243void gmap_enable(struct gmap *gmap) 244{ 245 S390_lowcore.gmap = (unsigned long) gmap; 246} 247EXPORT_SYMBOL_GPL(gmap_enable); 248 249/** 250 * gmap_disable - switch back to the standard primary address space 251 * @gmap: pointer to the guest address space structure 252 */ 253void gmap_disable(struct gmap *gmap) 254{ 255 S390_lowcore.gmap = 0UL; 256} 257EXPORT_SYMBOL_GPL(gmap_disable); 258 259/* 260 * gmap_alloc_table is assumed to be called with mmap_sem held 261 */ 262static int gmap_alloc_table(struct gmap *gmap, 263 unsigned long *table, unsigned long init) 264{ 265 struct page *page; 266 unsigned long *new; 267 268 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 269 if (!page) 270 return -ENOMEM; 271 new = (unsigned long *) page_to_phys(page); 272 crst_table_init(new, init); 273 if (*table & _REGION_ENTRY_INV) { 274 list_add(&page->lru, &gmap->crst_list); 275 *table = (unsigned long) new | _REGION_ENTRY_LENGTH | 276 (*table & _REGION_ENTRY_TYPE_MASK); 277 } else 278 __free_pages(page, ALLOC_ORDER); 279 return 0; 280} 281 282/** 283 * gmap_unmap_segment - unmap segment from the guest address space 284 * @gmap: pointer to the guest address space structure 285 * @addr: address in the guest address space 286 * @len: length of the memory area to unmap 287 * 288 * Returns 0 if the unmap succeded, -EINVAL if not. 289 */ 290int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) 291{ 292 unsigned long *table; 293 unsigned long off; 294 int flush; 295 296 if ((to | len) & (PMD_SIZE - 1)) 297 return -EINVAL; 298 if (len == 0 || to + len < to) 299 return -EINVAL; 300 301 flush = 0; 302 down_read(&gmap->mm->mmap_sem); 303 for (off = 0; off < len; off += PMD_SIZE) { 304 /* Walk the guest addr space page table */ 305 table = gmap->table + (((to + off) >> 53) & 0x7ff); 306 if (*table & _REGION_ENTRY_INV) 307 goto out; 308 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 309 table = table + (((to + off) >> 42) & 0x7ff); 310 if (*table & _REGION_ENTRY_INV) 311 goto out; 312 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 313 table = table + (((to + off) >> 31) & 0x7ff); 314 if (*table & _REGION_ENTRY_INV) 315 goto out; 316 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 317 table = table + (((to + off) >> 20) & 0x7ff); 318 319 /* Clear segment table entry in guest address space. */ 320 flush |= gmap_unlink_segment(gmap, table); 321 *table = _SEGMENT_ENTRY_INV; 322 } 323out: 324 up_read(&gmap->mm->mmap_sem); 325 if (flush) 326 gmap_flush_tlb(gmap); 327 return 0; 328} 329EXPORT_SYMBOL_GPL(gmap_unmap_segment); 330 331/** 332 * gmap_mmap_segment - map a segment to the guest address space 333 * @gmap: pointer to the guest address space structure 334 * @from: source address in the parent address space 335 * @to: target address in the guest address space 336 * 337 * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not. 338 */ 339int gmap_map_segment(struct gmap *gmap, unsigned long from, 340 unsigned long to, unsigned long len) 341{ 342 unsigned long *table; 343 unsigned long off; 344 int flush; 345 346 if ((from | to | len) & (PMD_SIZE - 1)) 347 return -EINVAL; 348 if (len == 0 || from + len > PGDIR_SIZE || 349 from + len < from || to + len < to) 350 return -EINVAL; 351 352 flush = 0; 353 down_read(&gmap->mm->mmap_sem); 354 for (off = 0; off < len; off += PMD_SIZE) { 355 /* Walk the gmap address space page table */ 356 table = gmap->table + (((to + off) >> 53) & 0x7ff); 357 if ((*table & _REGION_ENTRY_INV) && 358 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) 359 goto out_unmap; 360 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 361 table = table + (((to + off) >> 42) & 0x7ff); 362 if ((*table & _REGION_ENTRY_INV) && 363 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) 364 goto out_unmap; 365 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 366 table = table + (((to + off) >> 31) & 0x7ff); 367 if ((*table & _REGION_ENTRY_INV) && 368 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) 369 goto out_unmap; 370 table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); 371 table = table + (((to + off) >> 20) & 0x7ff); 372 373 /* Store 'from' address in an invalid segment table entry. */ 374 flush |= gmap_unlink_segment(gmap, table); 375 *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); 376 } 377 up_read(&gmap->mm->mmap_sem); 378 if (flush) 379 gmap_flush_tlb(gmap); 380 return 0; 381 382out_unmap: 383 up_read(&gmap->mm->mmap_sem); 384 gmap_unmap_segment(gmap, to, len); 385 return -ENOMEM; 386} 387EXPORT_SYMBOL_GPL(gmap_map_segment); 388 389unsigned long gmap_fault(unsigned long address, struct gmap *gmap) 390{ 391 unsigned long *table, vmaddr, segment; 392 struct mm_struct *mm; 393 struct gmap_pgtable *mp; 394 struct gmap_rmap *rmap; 395 struct vm_area_struct *vma; 396 struct page *page; 397 pgd_t *pgd; 398 pud_t *pud; 399 pmd_t *pmd; 400 401 current->thread.gmap_addr = address; 402 mm = gmap->mm; 403 /* Walk the gmap address space page table */ 404 table = gmap->table + ((address >> 53) & 0x7ff); 405 if (unlikely(*table & _REGION_ENTRY_INV)) 406 return -EFAULT; 407 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 408 table = table + ((address >> 42) & 0x7ff); 409 if (unlikely(*table & _REGION_ENTRY_INV)) 410 return -EFAULT; 411 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 412 table = table + ((address >> 31) & 0x7ff); 413 if (unlikely(*table & _REGION_ENTRY_INV)) 414 return -EFAULT; 415 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 416 table = table + ((address >> 20) & 0x7ff); 417 418 /* Convert the gmap address to an mm address. */ 419 segment = *table; 420 if (likely(!(segment & _SEGMENT_ENTRY_INV))) { 421 page = pfn_to_page(segment >> PAGE_SHIFT); 422 mp = (struct gmap_pgtable *) page->index; 423 return mp->vmaddr | (address & ~PMD_MASK); 424 } else if (segment & _SEGMENT_ENTRY_RO) { 425 vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; 426 vma = find_vma(mm, vmaddr); 427 if (!vma || vma->vm_start > vmaddr) 428 return -EFAULT; 429 430 /* Walk the parent mm page table */ 431 pgd = pgd_offset(mm, vmaddr); 432 pud = pud_alloc(mm, pgd, vmaddr); 433 if (!pud) 434 return -ENOMEM; 435 pmd = pmd_alloc(mm, pud, vmaddr); 436 if (!pmd) 437 return -ENOMEM; 438 if (!pmd_present(*pmd) && 439 __pte_alloc(mm, vma, pmd, vmaddr)) 440 return -ENOMEM; 441 /* pmd now points to a valid segment table entry. */ 442 rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); 443 if (!rmap) 444 return -ENOMEM; 445 /* Link gmap segment table entry location to page table. */ 446 page = pmd_page(*pmd); 447 mp = (struct gmap_pgtable *) page->index; 448 rmap->entry = table; 449 list_add(&rmap->list, &mp->mapper); 450 /* Set gmap segment table entry to page table. */ 451 *table = pmd_val(*pmd) & PAGE_MASK; 452 return vmaddr | (address & ~PMD_MASK); 453 } 454 return -EFAULT; 455 456} 457EXPORT_SYMBOL_GPL(gmap_fault); 458 459void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table) 460{ 461 struct gmap_rmap *rmap, *next; 462 struct gmap_pgtable *mp; 463 struct page *page; 464 int flush; 465 466 flush = 0; 467 spin_lock(&mm->page_table_lock); 468 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 469 mp = (struct gmap_pgtable *) page->index; 470 list_for_each_entry_safe(rmap, next, &mp->mapper, list) { 471 *rmap->entry = 472 _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; 473 list_del(&rmap->list); 474 kfree(rmap); 475 flush = 1; 476 } 477 spin_unlock(&mm->page_table_lock); 478 if (flush) 479 __tlb_flush_global(); 480} 481 482static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 483 unsigned long vmaddr) 484{ 485 struct page *page; 486 unsigned long *table; 487 struct gmap_pgtable *mp; 488 489 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 490 if (!page) 491 return NULL; 492 mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT); 493 if (!mp) { 494 __free_page(page); 495 return NULL; 496 } 497 pgtable_page_ctor(page); 498 mp->vmaddr = vmaddr & PMD_MASK; 499 INIT_LIST_HEAD(&mp->mapper); 500 page->index = (unsigned long) mp; 501 atomic_set(&page->_mapcount, 3); 502 table = (unsigned long *) page_to_phys(page); 503 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); 504 clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); 505 return table; 506} 507 508static inline void page_table_free_pgste(unsigned long *table) 509{ 510 struct page *page; 511 struct gmap_pgtable *mp; 512 513 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 514 mp = (struct gmap_pgtable *) page->index; 515 BUG_ON(!list_empty(&mp->mapper)); 516 pgtable_page_ctor(page); 517 atomic_set(&page->_mapcount, -1); 518 kfree(mp); 519 __free_page(page); 520} 521 522#else /* CONFIG_PGSTE */ 523 524static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 525 unsigned long vmaddr) 526{ 527 return NULL; 528} 529 530static inline void page_table_free_pgste(unsigned long *table) 531{ 532} 533 534static inline void gmap_unmap_notifier(struct mm_struct *mm, 535 unsigned long *table) 536{ 537} 538 539#endif /* CONFIG_PGSTE */ 540 541static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) 542{ 543 unsigned int old, new; 544 545 do { 546 old = atomic_read(v); 547 new = old ^ bits; 548 } while (atomic_cmpxchg(v, old, new) != old); 549 return new; 550} 551 552/* 553 * page table entry allocation/free routines. 554 */ 555unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) 556{ 557 struct page *page; 558 unsigned long *table; 559 unsigned int mask, bit; 560 561 if (mm_has_pgste(mm)) 562 return page_table_alloc_pgste(mm, vmaddr); 563 /* Allocate fragments of a 4K page as 1K/2K page table */ 564 spin_lock_bh(&mm->context.list_lock); 565 mask = FRAG_MASK; 566 if (!list_empty(&mm->context.pgtable_list)) { 567 page = list_first_entry(&mm->context.pgtable_list, 568 struct page, lru); 569 table = (unsigned long *) page_to_phys(page); 570 mask = atomic_read(&page->_mapcount); 571 mask = mask | (mask >> 4); 572 } 573 if ((mask & FRAG_MASK) == FRAG_MASK) { 574 spin_unlock_bh(&mm->context.list_lock); 575 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 576 if (!page) 577 return NULL; 578 pgtable_page_ctor(page); 579 atomic_set(&page->_mapcount, 1); 580 table = (unsigned long *) page_to_phys(page); 581 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); 582 spin_lock_bh(&mm->context.list_lock); 583 list_add(&page->lru, &mm->context.pgtable_list); 584 } else { 585 for (bit = 1; mask & bit; bit <<= 1) 586 table += PTRS_PER_PTE; 587 mask = atomic_xor_bits(&page->_mapcount, bit); 588 if ((mask & FRAG_MASK) == FRAG_MASK) 589 list_del(&page->lru); 590 } 591 spin_unlock_bh(&mm->context.list_lock); 592 return table; 593} 594 595void page_table_free(struct mm_struct *mm, unsigned long *table) 596{ 597 struct page *page; 598 unsigned int bit, mask; 599 600 if (mm_has_pgste(mm)) { 601 gmap_unmap_notifier(mm, table); 602 return page_table_free_pgste(table); 603 } 604 /* Free 1K/2K page table fragment of a 4K page */ 605 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 606 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); 607 spin_lock_bh(&mm->context.list_lock); 608 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 609 list_del(&page->lru); 610 mask = atomic_xor_bits(&page->_mapcount, bit); 611 if (mask & FRAG_MASK) 612 list_add(&page->lru, &mm->context.pgtable_list); 613 spin_unlock_bh(&mm->context.list_lock); 614 if (mask == 0) { 615 pgtable_page_dtor(page); 616 atomic_set(&page->_mapcount, -1); 617 __free_page(page); 618 } 619} 620 621#ifdef CONFIG_HAVE_RCU_TABLE_FREE 622 623static void __page_table_free_rcu(void *table, unsigned bit) 624{ 625 struct page *page; 626 627 if (bit == FRAG_MASK) 628 return page_table_free_pgste(table); 629 /* Free 1K/2K page table fragment of a 4K page */ 630 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 631 if (atomic_xor_bits(&page->_mapcount, bit) == 0) { 632 pgtable_page_dtor(page); 633 atomic_set(&page->_mapcount, -1); 634 __free_page(page); 635 } 636} 637 638void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) 639{ 640 struct mm_struct *mm; 641 struct page *page; 642 unsigned int bit, mask; 643 644 mm = tlb->mm; 645 if (mm_has_pgste(mm)) { 646 gmap_unmap_notifier(mm, table); 647 table = (unsigned long *) (__pa(table) | FRAG_MASK); 648 tlb_remove_table(tlb, table); 649 return; 650 } 651 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); 652 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 653 spin_lock_bh(&mm->context.list_lock); 654 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 655 list_del(&page->lru); 656 mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4)); 657 if (mask & FRAG_MASK) 658 list_add_tail(&page->lru, &mm->context.pgtable_list); 659 spin_unlock_bh(&mm->context.list_lock); 660 table = (unsigned long *) (__pa(table) | (bit << 4)); 661 tlb_remove_table(tlb, table); 662} 663 664void __tlb_remove_table(void *_table) 665{ 666 const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK; 667 void *table = (void *)((unsigned long) _table & ~mask); 668 unsigned type = (unsigned long) _table & mask; 669 670 if (type) 671 __page_table_free_rcu(table, type); 672 else 673 free_pages((unsigned long) table, ALLOC_ORDER); 674} 675 676#endif 677 678/* 679 * switch on pgstes for its userspace process (for kvm) 680 */ 681int s390_enable_sie(void) 682{ 683 struct task_struct *tsk = current; 684 struct mm_struct *mm, *old_mm; 685 686 /* Do we have switched amode? If no, we cannot do sie */ 687 if (user_mode == HOME_SPACE_MODE) 688 return -EINVAL; 689 690 /* Do we have pgstes? if yes, we are done */ 691 if (mm_has_pgste(tsk->mm)) 692 return 0; 693 694 /* lets check if we are allowed to replace the mm */ 695 task_lock(tsk); 696 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || 697#ifdef CONFIG_AIO 698 !hlist_empty(&tsk->mm->ioctx_list) || 699#endif 700 tsk->mm != tsk->active_mm) { 701 task_unlock(tsk); 702 return -EINVAL; 703 } 704 task_unlock(tsk); 705 706 /* we copy the mm and let dup_mm create the page tables with_pgstes */ 707 tsk->mm->context.alloc_pgste = 1; 708 mm = dup_mm(tsk); 709 tsk->mm->context.alloc_pgste = 0; 710 if (!mm) 711 return -ENOMEM; 712 713 /* Now lets check again if something happened */ 714 task_lock(tsk); 715 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || 716#ifdef CONFIG_AIO 717 !hlist_empty(&tsk->mm->ioctx_list) || 718#endif 719 tsk->mm != tsk->active_mm) { 720 mmput(mm); 721 task_unlock(tsk); 722 return -EINVAL; 723 } 724 725 /* ok, we are alone. No ptrace, no threads, etc. */ 726 old_mm = tsk->mm; 727 tsk->mm = tsk->active_mm = mm; 728 preempt_disable(); 729 update_mm(mm, tsk); 730 atomic_inc(&mm->context.attach_count); 731 atomic_dec(&old_mm->context.attach_count); 732 cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); 733 preempt_enable(); 734 task_unlock(tsk); 735 mmput(old_mm); 736 return 0; 737} 738EXPORT_SYMBOL_GPL(s390_enable_sie); 739 740#if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION) 741bool kernel_page_present(struct page *page) 742{ 743 unsigned long addr; 744 int cc; 745 746 addr = page_to_phys(page); 747 asm volatile( 748 " lra %1,0(%1)\n" 749 " ipm %0\n" 750 " srl %0,28" 751 : "=d" (cc), "+a" (addr) : : "cc"); 752 return cc == 0; 753} 754#endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */ 755