hugetlb.c revision 9de455b20705f36384a711d4a20bcf7ba1ab180b
1/* 2 * Generic hugetlb support. 3 * (C) William Irwin, April 2004 4 */ 5#include <linux/gfp.h> 6#include <linux/list.h> 7#include <linux/init.h> 8#include <linux/module.h> 9#include <linux/mm.h> 10#include <linux/sysctl.h> 11#include <linux/highmem.h> 12#include <linux/nodemask.h> 13#include <linux/pagemap.h> 14#include <linux/mempolicy.h> 15#include <linux/cpuset.h> 16#include <linux/mutex.h> 17 18#include <asm/page.h> 19#include <asm/pgtable.h> 20 21#include <linux/hugetlb.h> 22#include "internal.h" 23 24const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; 25static unsigned long nr_huge_pages, free_huge_pages, resv_huge_pages; 26unsigned long max_huge_pages; 27static struct list_head hugepage_freelists[MAX_NUMNODES]; 28static unsigned int nr_huge_pages_node[MAX_NUMNODES]; 29static unsigned int free_huge_pages_node[MAX_NUMNODES]; 30/* 31 * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages 32 */ 33static DEFINE_SPINLOCK(hugetlb_lock); 34 35static void clear_huge_page(struct page *page, unsigned long addr) 36{ 37 int i; 38 39 might_sleep(); 40 for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); i++) { 41 cond_resched(); 42 clear_user_highpage(page + i, addr); 43 } 44} 45 46static void copy_huge_page(struct page *dst, struct page *src, 47 unsigned long addr, struct vm_area_struct *vma) 48{ 49 int i; 50 51 might_sleep(); 52 for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) { 53 cond_resched(); 54 copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma); 55 } 56} 57 58static void enqueue_huge_page(struct page *page) 59{ 60 int nid = page_to_nid(page); 61 list_add(&page->lru, &hugepage_freelists[nid]); 62 free_huge_pages++; 63 free_huge_pages_node[nid]++; 64} 65 66static struct page *dequeue_huge_page(struct vm_area_struct *vma, 67 unsigned long address) 68{ 69 int nid = numa_node_id(); 70 struct page *page = NULL; 71 struct zonelist *zonelist = huge_zonelist(vma, address); 72 struct zone **z; 73 74 for (z = zonelist->zones; *z; z++) { 75 nid = zone_to_nid(*z); 76 if (cpuset_zone_allowed_softwall(*z, GFP_HIGHUSER) && 77 !list_empty(&hugepage_freelists[nid])) 78 break; 79 } 80 81 if (*z) { 82 page = list_entry(hugepage_freelists[nid].next, 83 struct page, lru); 84 list_del(&page->lru); 85 free_huge_pages--; 86 free_huge_pages_node[nid]--; 87 } 88 return page; 89} 90 91static void free_huge_page(struct page *page) 92{ 93 BUG_ON(page_count(page)); 94 95 INIT_LIST_HEAD(&page->lru); 96 97 spin_lock(&hugetlb_lock); 98 enqueue_huge_page(page); 99 spin_unlock(&hugetlb_lock); 100} 101 102static int alloc_fresh_huge_page(void) 103{ 104 static int nid = 0; 105 struct page *page; 106 page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN, 107 HUGETLB_PAGE_ORDER); 108 nid = next_node(nid, node_online_map); 109 if (nid == MAX_NUMNODES) 110 nid = first_node(node_online_map); 111 if (page) { 112 set_compound_page_dtor(page, free_huge_page); 113 spin_lock(&hugetlb_lock); 114 nr_huge_pages++; 115 nr_huge_pages_node[page_to_nid(page)]++; 116 spin_unlock(&hugetlb_lock); 117 put_page(page); /* free it into the hugepage allocator */ 118 return 1; 119 } 120 return 0; 121} 122 123static struct page *alloc_huge_page(struct vm_area_struct *vma, 124 unsigned long addr) 125{ 126 struct page *page; 127 128 spin_lock(&hugetlb_lock); 129 if (vma->vm_flags & VM_MAYSHARE) 130 resv_huge_pages--; 131 else if (free_huge_pages <= resv_huge_pages) 132 goto fail; 133 134 page = dequeue_huge_page(vma, addr); 135 if (!page) 136 goto fail; 137 138 spin_unlock(&hugetlb_lock); 139 set_page_refcounted(page); 140 return page; 141 142fail: 143 spin_unlock(&hugetlb_lock); 144 return NULL; 145} 146 147static int __init hugetlb_init(void) 148{ 149 unsigned long i; 150 151 if (HPAGE_SHIFT == 0) 152 return 0; 153 154 for (i = 0; i < MAX_NUMNODES; ++i) 155 INIT_LIST_HEAD(&hugepage_freelists[i]); 156 157 for (i = 0; i < max_huge_pages; ++i) { 158 if (!alloc_fresh_huge_page()) 159 break; 160 } 161 max_huge_pages = free_huge_pages = nr_huge_pages = i; 162 printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages); 163 return 0; 164} 165module_init(hugetlb_init); 166 167static int __init hugetlb_setup(char *s) 168{ 169 if (sscanf(s, "%lu", &max_huge_pages) <= 0) 170 max_huge_pages = 0; 171 return 1; 172} 173__setup("hugepages=", hugetlb_setup); 174 175#ifdef CONFIG_SYSCTL 176static void update_and_free_page(struct page *page) 177{ 178 int i; 179 nr_huge_pages--; 180 nr_huge_pages_node[page_to_nid(page)]--; 181 for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) { 182 page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | 183 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | 184 1 << PG_private | 1<< PG_writeback); 185 } 186 page[1].lru.next = NULL; 187 set_page_refcounted(page); 188 __free_pages(page, HUGETLB_PAGE_ORDER); 189} 190 191#ifdef CONFIG_HIGHMEM 192static void try_to_free_low(unsigned long count) 193{ 194 int i; 195 196 for (i = 0; i < MAX_NUMNODES; ++i) { 197 struct page *page, *next; 198 list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) { 199 if (PageHighMem(page)) 200 continue; 201 list_del(&page->lru); 202 update_and_free_page(page); 203 free_huge_pages--; 204 free_huge_pages_node[page_to_nid(page)]--; 205 if (count >= nr_huge_pages) 206 return; 207 } 208 } 209} 210#else 211static inline void try_to_free_low(unsigned long count) 212{ 213} 214#endif 215 216static unsigned long set_max_huge_pages(unsigned long count) 217{ 218 while (count > nr_huge_pages) { 219 if (!alloc_fresh_huge_page()) 220 return nr_huge_pages; 221 } 222 if (count >= nr_huge_pages) 223 return nr_huge_pages; 224 225 spin_lock(&hugetlb_lock); 226 count = max(count, resv_huge_pages); 227 try_to_free_low(count); 228 while (count < nr_huge_pages) { 229 struct page *page = dequeue_huge_page(NULL, 0); 230 if (!page) 231 break; 232 update_and_free_page(page); 233 } 234 spin_unlock(&hugetlb_lock); 235 return nr_huge_pages; 236} 237 238int hugetlb_sysctl_handler(struct ctl_table *table, int write, 239 struct file *file, void __user *buffer, 240 size_t *length, loff_t *ppos) 241{ 242 proc_doulongvec_minmax(table, write, file, buffer, length, ppos); 243 max_huge_pages = set_max_huge_pages(max_huge_pages); 244 return 0; 245} 246#endif /* CONFIG_SYSCTL */ 247 248int hugetlb_report_meminfo(char *buf) 249{ 250 return sprintf(buf, 251 "HugePages_Total: %5lu\n" 252 "HugePages_Free: %5lu\n" 253 "HugePages_Rsvd: %5lu\n" 254 "Hugepagesize: %5lu kB\n", 255 nr_huge_pages, 256 free_huge_pages, 257 resv_huge_pages, 258 HPAGE_SIZE/1024); 259} 260 261int hugetlb_report_node_meminfo(int nid, char *buf) 262{ 263 return sprintf(buf, 264 "Node %d HugePages_Total: %5u\n" 265 "Node %d HugePages_Free: %5u\n", 266 nid, nr_huge_pages_node[nid], 267 nid, free_huge_pages_node[nid]); 268} 269 270/* Return the number pages of memory we physically have, in PAGE_SIZE units. */ 271unsigned long hugetlb_total_pages(void) 272{ 273 return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE); 274} 275 276/* 277 * We cannot handle pagefaults against hugetlb pages at all. They cause 278 * handle_mm_fault() to try to instantiate regular-sized pages in the 279 * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get 280 * this far. 281 */ 282static struct page *hugetlb_nopage(struct vm_area_struct *vma, 283 unsigned long address, int *unused) 284{ 285 BUG(); 286 return NULL; 287} 288 289struct vm_operations_struct hugetlb_vm_ops = { 290 .nopage = hugetlb_nopage, 291}; 292 293static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, 294 int writable) 295{ 296 pte_t entry; 297 298 if (writable) { 299 entry = 300 pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); 301 } else { 302 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); 303 } 304 entry = pte_mkyoung(entry); 305 entry = pte_mkhuge(entry); 306 307 return entry; 308} 309 310static void set_huge_ptep_writable(struct vm_area_struct *vma, 311 unsigned long address, pte_t *ptep) 312{ 313 pte_t entry; 314 315 entry = pte_mkwrite(pte_mkdirty(*ptep)); 316 ptep_set_access_flags(vma, address, ptep, entry, 1); 317 update_mmu_cache(vma, address, entry); 318 lazy_mmu_prot_update(entry); 319} 320 321 322int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, 323 struct vm_area_struct *vma) 324{ 325 pte_t *src_pte, *dst_pte, entry; 326 struct page *ptepage; 327 unsigned long addr; 328 int cow; 329 330 cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; 331 332 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { 333 src_pte = huge_pte_offset(src, addr); 334 if (!src_pte) 335 continue; 336 dst_pte = huge_pte_alloc(dst, addr); 337 if (!dst_pte) 338 goto nomem; 339 spin_lock(&dst->page_table_lock); 340 spin_lock(&src->page_table_lock); 341 if (!pte_none(*src_pte)) { 342 if (cow) 343 ptep_set_wrprotect(src, addr, src_pte); 344 entry = *src_pte; 345 ptepage = pte_page(entry); 346 get_page(ptepage); 347 set_huge_pte_at(dst, addr, dst_pte, entry); 348 } 349 spin_unlock(&src->page_table_lock); 350 spin_unlock(&dst->page_table_lock); 351 } 352 return 0; 353 354nomem: 355 return -ENOMEM; 356} 357 358void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, 359 unsigned long end) 360{ 361 struct mm_struct *mm = vma->vm_mm; 362 unsigned long address; 363 pte_t *ptep; 364 pte_t pte; 365 struct page *page; 366 struct page *tmp; 367 /* 368 * A page gathering list, protected by per file i_mmap_lock. The 369 * lock is used to avoid list corruption from multiple unmapping 370 * of the same page since we are using page->lru. 371 */ 372 LIST_HEAD(page_list); 373 374 WARN_ON(!is_vm_hugetlb_page(vma)); 375 BUG_ON(start & ~HPAGE_MASK); 376 BUG_ON(end & ~HPAGE_MASK); 377 378 spin_lock(&mm->page_table_lock); 379 for (address = start; address < end; address += HPAGE_SIZE) { 380 ptep = huge_pte_offset(mm, address); 381 if (!ptep) 382 continue; 383 384 if (huge_pmd_unshare(mm, &address, ptep)) 385 continue; 386 387 pte = huge_ptep_get_and_clear(mm, address, ptep); 388 if (pte_none(pte)) 389 continue; 390 391 page = pte_page(pte); 392 list_add(&page->lru, &page_list); 393 } 394 spin_unlock(&mm->page_table_lock); 395 flush_tlb_range(vma, start, end); 396 list_for_each_entry_safe(page, tmp, &page_list, lru) { 397 list_del(&page->lru); 398 put_page(page); 399 } 400} 401 402void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, 403 unsigned long end) 404{ 405 /* 406 * It is undesirable to test vma->vm_file as it should be non-null 407 * for valid hugetlb area. However, vm_file will be NULL in the error 408 * cleanup path of do_mmap_pgoff. When hugetlbfs ->mmap method fails, 409 * do_mmap_pgoff() nullifies vma->vm_file before calling this function 410 * to clean up. Since no pte has actually been setup, it is safe to 411 * do nothing in this case. 412 */ 413 if (vma->vm_file) { 414 spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); 415 __unmap_hugepage_range(vma, start, end); 416 spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock); 417 } 418} 419 420static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, 421 unsigned long address, pte_t *ptep, pte_t pte) 422{ 423 struct page *old_page, *new_page; 424 int avoidcopy; 425 426 old_page = pte_page(pte); 427 428 /* If no-one else is actually using this page, avoid the copy 429 * and just make the page writable */ 430 avoidcopy = (page_count(old_page) == 1); 431 if (avoidcopy) { 432 set_huge_ptep_writable(vma, address, ptep); 433 return VM_FAULT_MINOR; 434 } 435 436 page_cache_get(old_page); 437 new_page = alloc_huge_page(vma, address); 438 439 if (!new_page) { 440 page_cache_release(old_page); 441 return VM_FAULT_OOM; 442 } 443 444 spin_unlock(&mm->page_table_lock); 445 copy_huge_page(new_page, old_page, address, vma); 446 spin_lock(&mm->page_table_lock); 447 448 ptep = huge_pte_offset(mm, address & HPAGE_MASK); 449 if (likely(pte_same(*ptep, pte))) { 450 /* Break COW */ 451 set_huge_pte_at(mm, address, ptep, 452 make_huge_pte(vma, new_page, 1)); 453 /* Make the old page be freed below */ 454 new_page = old_page; 455 } 456 page_cache_release(new_page); 457 page_cache_release(old_page); 458 return VM_FAULT_MINOR; 459} 460 461int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, 462 unsigned long address, pte_t *ptep, int write_access) 463{ 464 int ret = VM_FAULT_SIGBUS; 465 unsigned long idx; 466 unsigned long size; 467 struct page *page; 468 struct address_space *mapping; 469 pte_t new_pte; 470 471 mapping = vma->vm_file->f_mapping; 472 idx = ((address - vma->vm_start) >> HPAGE_SHIFT) 473 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); 474 475 /* 476 * Use page lock to guard against racing truncation 477 * before we get page_table_lock. 478 */ 479retry: 480 page = find_lock_page(mapping, idx); 481 if (!page) { 482 size = i_size_read(mapping->host) >> HPAGE_SHIFT; 483 if (idx >= size) 484 goto out; 485 if (hugetlb_get_quota(mapping)) 486 goto out; 487 page = alloc_huge_page(vma, address); 488 if (!page) { 489 hugetlb_put_quota(mapping); 490 ret = VM_FAULT_OOM; 491 goto out; 492 } 493 clear_huge_page(page, address); 494 495 if (vma->vm_flags & VM_SHARED) { 496 int err; 497 498 err = add_to_page_cache(page, mapping, idx, GFP_KERNEL); 499 if (err) { 500 put_page(page); 501 hugetlb_put_quota(mapping); 502 if (err == -EEXIST) 503 goto retry; 504 goto out; 505 } 506 } else 507 lock_page(page); 508 } 509 510 spin_lock(&mm->page_table_lock); 511 size = i_size_read(mapping->host) >> HPAGE_SHIFT; 512 if (idx >= size) 513 goto backout; 514 515 ret = VM_FAULT_MINOR; 516 if (!pte_none(*ptep)) 517 goto backout; 518 519 new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE) 520 && (vma->vm_flags & VM_SHARED))); 521 set_huge_pte_at(mm, address, ptep, new_pte); 522 523 if (write_access && !(vma->vm_flags & VM_SHARED)) { 524 /* Optimization, do the COW without a second fault */ 525 ret = hugetlb_cow(mm, vma, address, ptep, new_pte); 526 } 527 528 spin_unlock(&mm->page_table_lock); 529 unlock_page(page); 530out: 531 return ret; 532 533backout: 534 spin_unlock(&mm->page_table_lock); 535 hugetlb_put_quota(mapping); 536 unlock_page(page); 537 put_page(page); 538 goto out; 539} 540 541int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, 542 unsigned long address, int write_access) 543{ 544 pte_t *ptep; 545 pte_t entry; 546 int ret; 547 static DEFINE_MUTEX(hugetlb_instantiation_mutex); 548 549 ptep = huge_pte_alloc(mm, address); 550 if (!ptep) 551 return VM_FAULT_OOM; 552 553 /* 554 * Serialize hugepage allocation and instantiation, so that we don't 555 * get spurious allocation failures if two CPUs race to instantiate 556 * the same page in the page cache. 557 */ 558 mutex_lock(&hugetlb_instantiation_mutex); 559 entry = *ptep; 560 if (pte_none(entry)) { 561 ret = hugetlb_no_page(mm, vma, address, ptep, write_access); 562 mutex_unlock(&hugetlb_instantiation_mutex); 563 return ret; 564 } 565 566 ret = VM_FAULT_MINOR; 567 568 spin_lock(&mm->page_table_lock); 569 /* Check for a racing update before calling hugetlb_cow */ 570 if (likely(pte_same(entry, *ptep))) 571 if (write_access && !pte_write(entry)) 572 ret = hugetlb_cow(mm, vma, address, ptep, entry); 573 spin_unlock(&mm->page_table_lock); 574 mutex_unlock(&hugetlb_instantiation_mutex); 575 576 return ret; 577} 578 579int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, 580 struct page **pages, struct vm_area_struct **vmas, 581 unsigned long *position, int *length, int i) 582{ 583 unsigned long pfn_offset; 584 unsigned long vaddr = *position; 585 int remainder = *length; 586 587 spin_lock(&mm->page_table_lock); 588 while (vaddr < vma->vm_end && remainder) { 589 pte_t *pte; 590 struct page *page; 591 592 /* 593 * Some archs (sparc64, sh*) have multiple pte_ts to 594 * each hugepage. We have to make * sure we get the 595 * first, for the page indexing below to work. 596 */ 597 pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); 598 599 if (!pte || pte_none(*pte)) { 600 int ret; 601 602 spin_unlock(&mm->page_table_lock); 603 ret = hugetlb_fault(mm, vma, vaddr, 0); 604 spin_lock(&mm->page_table_lock); 605 if (ret == VM_FAULT_MINOR) 606 continue; 607 608 remainder = 0; 609 if (!i) 610 i = -EFAULT; 611 break; 612 } 613 614 pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT; 615 page = pte_page(*pte); 616same_page: 617 if (pages) { 618 get_page(page); 619 pages[i] = page + pfn_offset; 620 } 621 622 if (vmas) 623 vmas[i] = vma; 624 625 vaddr += PAGE_SIZE; 626 ++pfn_offset; 627 --remainder; 628 ++i; 629 if (vaddr < vma->vm_end && remainder && 630 pfn_offset < HPAGE_SIZE/PAGE_SIZE) { 631 /* 632 * We use pfn_offset to avoid touching the pageframes 633 * of this compound page. 634 */ 635 goto same_page; 636 } 637 } 638 spin_unlock(&mm->page_table_lock); 639 *length = remainder; 640 *position = vaddr; 641 642 return i; 643} 644 645void hugetlb_change_protection(struct vm_area_struct *vma, 646 unsigned long address, unsigned long end, pgprot_t newprot) 647{ 648 struct mm_struct *mm = vma->vm_mm; 649 unsigned long start = address; 650 pte_t *ptep; 651 pte_t pte; 652 653 BUG_ON(address >= end); 654 flush_cache_range(vma, address, end); 655 656 spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); 657 spin_lock(&mm->page_table_lock); 658 for (; address < end; address += HPAGE_SIZE) { 659 ptep = huge_pte_offset(mm, address); 660 if (!ptep) 661 continue; 662 if (huge_pmd_unshare(mm, &address, ptep)) 663 continue; 664 if (!pte_none(*ptep)) { 665 pte = huge_ptep_get_and_clear(mm, address, ptep); 666 pte = pte_mkhuge(pte_modify(pte, newprot)); 667 set_huge_pte_at(mm, address, ptep, pte); 668 lazy_mmu_prot_update(pte); 669 } 670 } 671 spin_unlock(&mm->page_table_lock); 672 spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock); 673 674 flush_tlb_range(vma, start, end); 675} 676 677struct file_region { 678 struct list_head link; 679 long from; 680 long to; 681}; 682 683static long region_add(struct list_head *head, long f, long t) 684{ 685 struct file_region *rg, *nrg, *trg; 686 687 /* Locate the region we are either in or before. */ 688 list_for_each_entry(rg, head, link) 689 if (f <= rg->to) 690 break; 691 692 /* Round our left edge to the current segment if it encloses us. */ 693 if (f > rg->from) 694 f = rg->from; 695 696 /* Check for and consume any regions we now overlap with. */ 697 nrg = rg; 698 list_for_each_entry_safe(rg, trg, rg->link.prev, link) { 699 if (&rg->link == head) 700 break; 701 if (rg->from > t) 702 break; 703 704 /* If this area reaches higher then extend our area to 705 * include it completely. If this is not the first area 706 * which we intend to reuse, free it. */ 707 if (rg->to > t) 708 t = rg->to; 709 if (rg != nrg) { 710 list_del(&rg->link); 711 kfree(rg); 712 } 713 } 714 nrg->from = f; 715 nrg->to = t; 716 return 0; 717} 718 719static long region_chg(struct list_head *head, long f, long t) 720{ 721 struct file_region *rg, *nrg; 722 long chg = 0; 723 724 /* Locate the region we are before or in. */ 725 list_for_each_entry(rg, head, link) 726 if (f <= rg->to) 727 break; 728 729 /* If we are below the current region then a new region is required. 730 * Subtle, allocate a new region at the position but make it zero 731 * size such that we can guarentee to record the reservation. */ 732 if (&rg->link == head || t < rg->from) { 733 nrg = kmalloc(sizeof(*nrg), GFP_KERNEL); 734 if (nrg == 0) 735 return -ENOMEM; 736 nrg->from = f; 737 nrg->to = f; 738 INIT_LIST_HEAD(&nrg->link); 739 list_add(&nrg->link, rg->link.prev); 740 741 return t - f; 742 } 743 744 /* Round our left edge to the current segment if it encloses us. */ 745 if (f > rg->from) 746 f = rg->from; 747 chg = t - f; 748 749 /* Check for and consume any regions we now overlap with. */ 750 list_for_each_entry(rg, rg->link.prev, link) { 751 if (&rg->link == head) 752 break; 753 if (rg->from > t) 754 return chg; 755 756 /* We overlap with this area, if it extends futher than 757 * us then we must extend ourselves. Account for its 758 * existing reservation. */ 759 if (rg->to > t) { 760 chg += rg->to - t; 761 t = rg->to; 762 } 763 chg -= rg->to - rg->from; 764 } 765 return chg; 766} 767 768static long region_truncate(struct list_head *head, long end) 769{ 770 struct file_region *rg, *trg; 771 long chg = 0; 772 773 /* Locate the region we are either in or before. */ 774 list_for_each_entry(rg, head, link) 775 if (end <= rg->to) 776 break; 777 if (&rg->link == head) 778 return 0; 779 780 /* If we are in the middle of a region then adjust it. */ 781 if (end > rg->from) { 782 chg = rg->to - end; 783 rg->to = end; 784 rg = list_entry(rg->link.next, typeof(*rg), link); 785 } 786 787 /* Drop any remaining regions. */ 788 list_for_each_entry_safe(rg, trg, rg->link.prev, link) { 789 if (&rg->link == head) 790 break; 791 chg += rg->to - rg->from; 792 list_del(&rg->link); 793 kfree(rg); 794 } 795 return chg; 796} 797 798static int hugetlb_acct_memory(long delta) 799{ 800 int ret = -ENOMEM; 801 802 spin_lock(&hugetlb_lock); 803 if ((delta + resv_huge_pages) <= free_huge_pages) { 804 resv_huge_pages += delta; 805 ret = 0; 806 } 807 spin_unlock(&hugetlb_lock); 808 return ret; 809} 810 811int hugetlb_reserve_pages(struct inode *inode, long from, long to) 812{ 813 long ret, chg; 814 815 chg = region_chg(&inode->i_mapping->private_list, from, to); 816 if (chg < 0) 817 return chg; 818 ret = hugetlb_acct_memory(chg); 819 if (ret < 0) 820 return ret; 821 region_add(&inode->i_mapping->private_list, from, to); 822 return 0; 823} 824 825void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) 826{ 827 long chg = region_truncate(&inode->i_mapping->private_list, offset); 828 hugetlb_acct_memory(freed - chg); 829} 830