migrate.c revision 742755a1d8ce2b548428f7aacf1758b4bba50080
1/* 2 * Memory Migration functionality - linux/mm/migration.c 3 * 4 * Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter 5 * 6 * Page migration was first developed in the context of the memory hotplug 7 * project. The main authors of the migration code are: 8 * 9 * IWAMOTO Toshihiro <iwamoto@valinux.co.jp> 10 * Hirokazu Takahashi <taka@valinux.co.jp> 11 * Dave Hansen <haveblue@us.ibm.com> 12 * Christoph Lameter <clameter@sgi.com> 13 */ 14 15#include <linux/migrate.h> 16#include <linux/module.h> 17#include <linux/swap.h> 18#include <linux/swapops.h> 19#include <linux/pagemap.h> 20#include <linux/buffer_head.h> 21#include <linux/mm_inline.h> 22#include <linux/pagevec.h> 23#include <linux/rmap.h> 24#include <linux/topology.h> 25#include <linux/cpu.h> 26#include <linux/cpuset.h> 27#include <linux/writeback.h> 28#include <linux/mempolicy.h> 29#include <linux/vmalloc.h> 30 31#include "internal.h" 32 33#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) 34 35/* 36 * Isolate one page from the LRU lists. If successful put it onto 37 * the indicated list with elevated page count. 38 * 39 * Result: 40 * -EBUSY: page not on LRU list 41 * 0: page removed from LRU list and added to the specified list. 42 */ 43int isolate_lru_page(struct page *page, struct list_head *pagelist) 44{ 45 int ret = -EBUSY; 46 47 if (PageLRU(page)) { 48 struct zone *zone = page_zone(page); 49 50 spin_lock_irq(&zone->lru_lock); 51 if (PageLRU(page)) { 52 ret = 0; 53 get_page(page); 54 ClearPageLRU(page); 55 if (PageActive(page)) 56 del_page_from_active_list(zone, page); 57 else 58 del_page_from_inactive_list(zone, page); 59 list_add_tail(&page->lru, pagelist); 60 } 61 spin_unlock_irq(&zone->lru_lock); 62 } 63 return ret; 64} 65 66/* 67 * migrate_prep() needs to be called before we start compiling a list of pages 68 * to be migrated using isolate_lru_page(). 69 */ 70int migrate_prep(void) 71{ 72 /* 73 * Clear the LRU lists so pages can be isolated. 74 * Note that pages may be moved off the LRU after we have 75 * drained them. Those pages will fail to migrate like other 76 * pages that may be busy. 77 */ 78 lru_add_drain_all(); 79 80 return 0; 81} 82 83static inline void move_to_lru(struct page *page) 84{ 85 if (PageActive(page)) { 86 /* 87 * lru_cache_add_active checks that 88 * the PG_active bit is off. 89 */ 90 ClearPageActive(page); 91 lru_cache_add_active(page); 92 } else { 93 lru_cache_add(page); 94 } 95 put_page(page); 96} 97 98/* 99 * Add isolated pages on the list back to the LRU. 100 * 101 * returns the number of pages put back. 102 */ 103int putback_lru_pages(struct list_head *l) 104{ 105 struct page *page; 106 struct page *page2; 107 int count = 0; 108 109 list_for_each_entry_safe(page, page2, l, lru) { 110 list_del(&page->lru); 111 move_to_lru(page); 112 count++; 113 } 114 return count; 115} 116 117static inline int is_swap_pte(pte_t pte) 118{ 119 return !pte_none(pte) && !pte_present(pte) && !pte_file(pte); 120} 121 122/* 123 * Restore a potential migration pte to a working pte entry 124 */ 125static void remove_migration_pte(struct vm_area_struct *vma, 126 struct page *old, struct page *new) 127{ 128 struct mm_struct *mm = vma->vm_mm; 129 swp_entry_t entry; 130 pgd_t *pgd; 131 pud_t *pud; 132 pmd_t *pmd; 133 pte_t *ptep, pte; 134 spinlock_t *ptl; 135 unsigned long addr = page_address_in_vma(new, vma); 136 137 if (addr == -EFAULT) 138 return; 139 140 pgd = pgd_offset(mm, addr); 141 if (!pgd_present(*pgd)) 142 return; 143 144 pud = pud_offset(pgd, addr); 145 if (!pud_present(*pud)) 146 return; 147 148 pmd = pmd_offset(pud, addr); 149 if (!pmd_present(*pmd)) 150 return; 151 152 ptep = pte_offset_map(pmd, addr); 153 154 if (!is_swap_pte(*ptep)) { 155 pte_unmap(ptep); 156 return; 157 } 158 159 ptl = pte_lockptr(mm, pmd); 160 spin_lock(ptl); 161 pte = *ptep; 162 if (!is_swap_pte(pte)) 163 goto out; 164 165 entry = pte_to_swp_entry(pte); 166 167 if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) 168 goto out; 169 170 get_page(new); 171 pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); 172 if (is_write_migration_entry(entry)) 173 pte = pte_mkwrite(pte); 174 set_pte_at(mm, addr, ptep, pte); 175 176 if (PageAnon(new)) 177 page_add_anon_rmap(new, vma, addr); 178 else 179 page_add_file_rmap(new); 180 181 /* No need to invalidate - it was non-present before */ 182 update_mmu_cache(vma, addr, pte); 183 lazy_mmu_prot_update(pte); 184 185out: 186 pte_unmap_unlock(ptep, ptl); 187} 188 189/* 190 * Note that remove_file_migration_ptes will only work on regular mappings, 191 * Nonlinear mappings do not use migration entries. 192 */ 193static void remove_file_migration_ptes(struct page *old, struct page *new) 194{ 195 struct vm_area_struct *vma; 196 struct address_space *mapping = page_mapping(new); 197 struct prio_tree_iter iter; 198 pgoff_t pgoff = new->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 199 200 if (!mapping) 201 return; 202 203 spin_lock(&mapping->i_mmap_lock); 204 205 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) 206 remove_migration_pte(vma, old, new); 207 208 spin_unlock(&mapping->i_mmap_lock); 209} 210 211/* 212 * Must hold mmap_sem lock on at least one of the vmas containing 213 * the page so that the anon_vma cannot vanish. 214 */ 215static void remove_anon_migration_ptes(struct page *old, struct page *new) 216{ 217 struct anon_vma *anon_vma; 218 struct vm_area_struct *vma; 219 unsigned long mapping; 220 221 mapping = (unsigned long)new->mapping; 222 223 if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0) 224 return; 225 226 /* 227 * We hold the mmap_sem lock. So no need to call page_lock_anon_vma. 228 */ 229 anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON); 230 spin_lock(&anon_vma->lock); 231 232 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) 233 remove_migration_pte(vma, old, new); 234 235 spin_unlock(&anon_vma->lock); 236} 237 238/* 239 * Get rid of all migration entries and replace them by 240 * references to the indicated page. 241 */ 242static void remove_migration_ptes(struct page *old, struct page *new) 243{ 244 if (PageAnon(new)) 245 remove_anon_migration_ptes(old, new); 246 else 247 remove_file_migration_ptes(old, new); 248} 249 250/* 251 * Something used the pte of a page under migration. We need to 252 * get to the page and wait until migration is finished. 253 * When we return from this function the fault will be retried. 254 * 255 * This function is called from do_swap_page(). 256 */ 257void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, 258 unsigned long address) 259{ 260 pte_t *ptep, pte; 261 spinlock_t *ptl; 262 swp_entry_t entry; 263 struct page *page; 264 265 ptep = pte_offset_map_lock(mm, pmd, address, &ptl); 266 pte = *ptep; 267 if (!is_swap_pte(pte)) 268 goto out; 269 270 entry = pte_to_swp_entry(pte); 271 if (!is_migration_entry(entry)) 272 goto out; 273 274 page = migration_entry_to_page(entry); 275 276 get_page(page); 277 pte_unmap_unlock(ptep, ptl); 278 wait_on_page_locked(page); 279 put_page(page); 280 return; 281out: 282 pte_unmap_unlock(ptep, ptl); 283} 284 285/* 286 * Replace the page in the mapping. 287 * 288 * The number of remaining references must be: 289 * 1 for anonymous pages without a mapping 290 * 2 for pages with a mapping 291 * 3 for pages with a mapping and PagePrivate set. 292 */ 293static int migrate_page_move_mapping(struct address_space *mapping, 294 struct page *newpage, struct page *page) 295{ 296 struct page **radix_pointer; 297 298 if (!mapping) { 299 /* Anonymous page */ 300 if (page_count(page) != 1) 301 return -EAGAIN; 302 return 0; 303 } 304 305 write_lock_irq(&mapping->tree_lock); 306 307 radix_pointer = (struct page **)radix_tree_lookup_slot( 308 &mapping->page_tree, 309 page_index(page)); 310 311 if (page_count(page) != 2 + !!PagePrivate(page) || 312 *radix_pointer != page) { 313 write_unlock_irq(&mapping->tree_lock); 314 return -EAGAIN; 315 } 316 317 /* 318 * Now we know that no one else is looking at the page. 319 */ 320 get_page(newpage); 321#ifdef CONFIG_SWAP 322 if (PageSwapCache(page)) { 323 SetPageSwapCache(newpage); 324 set_page_private(newpage, page_private(page)); 325 } 326#endif 327 328 *radix_pointer = newpage; 329 __put_page(page); 330 write_unlock_irq(&mapping->tree_lock); 331 332 return 0; 333} 334 335/* 336 * Copy the page to its new location 337 */ 338static void migrate_page_copy(struct page *newpage, struct page *page) 339{ 340 copy_highpage(newpage, page); 341 342 if (PageError(page)) 343 SetPageError(newpage); 344 if (PageReferenced(page)) 345 SetPageReferenced(newpage); 346 if (PageUptodate(page)) 347 SetPageUptodate(newpage); 348 if (PageActive(page)) 349 SetPageActive(newpage); 350 if (PageChecked(page)) 351 SetPageChecked(newpage); 352 if (PageMappedToDisk(page)) 353 SetPageMappedToDisk(newpage); 354 355 if (PageDirty(page)) { 356 clear_page_dirty_for_io(page); 357 set_page_dirty(newpage); 358 } 359 360#ifdef CONFIG_SWAP 361 ClearPageSwapCache(page); 362#endif 363 ClearPageActive(page); 364 ClearPagePrivate(page); 365 set_page_private(page, 0); 366 page->mapping = NULL; 367 368 /* 369 * If any waiters have accumulated on the new page then 370 * wake them up. 371 */ 372 if (PageWriteback(newpage)) 373 end_page_writeback(newpage); 374} 375 376/************************************************************ 377 * Migration functions 378 ***********************************************************/ 379 380/* Always fail migration. Used for mappings that are not movable */ 381int fail_migrate_page(struct address_space *mapping, 382 struct page *newpage, struct page *page) 383{ 384 return -EIO; 385} 386EXPORT_SYMBOL(fail_migrate_page); 387 388/* 389 * Common logic to directly migrate a single page suitable for 390 * pages that do not use PagePrivate. 391 * 392 * Pages are locked upon entry and exit. 393 */ 394int migrate_page(struct address_space *mapping, 395 struct page *newpage, struct page *page) 396{ 397 int rc; 398 399 BUG_ON(PageWriteback(page)); /* Writeback must be complete */ 400 401 rc = migrate_page_move_mapping(mapping, newpage, page); 402 403 if (rc) 404 return rc; 405 406 migrate_page_copy(newpage, page); 407 return 0; 408} 409EXPORT_SYMBOL(migrate_page); 410 411/* 412 * Migration function for pages with buffers. This function can only be used 413 * if the underlying filesystem guarantees that no other references to "page" 414 * exist. 415 */ 416int buffer_migrate_page(struct address_space *mapping, 417 struct page *newpage, struct page *page) 418{ 419 struct buffer_head *bh, *head; 420 int rc; 421 422 if (!page_has_buffers(page)) 423 return migrate_page(mapping, newpage, page); 424 425 head = page_buffers(page); 426 427 rc = migrate_page_move_mapping(mapping, newpage, page); 428 429 if (rc) 430 return rc; 431 432 bh = head; 433 do { 434 get_bh(bh); 435 lock_buffer(bh); 436 bh = bh->b_this_page; 437 438 } while (bh != head); 439 440 ClearPagePrivate(page); 441 set_page_private(newpage, page_private(page)); 442 set_page_private(page, 0); 443 put_page(page); 444 get_page(newpage); 445 446 bh = head; 447 do { 448 set_bh_page(bh, newpage, bh_offset(bh)); 449 bh = bh->b_this_page; 450 451 } while (bh != head); 452 453 SetPagePrivate(newpage); 454 455 migrate_page_copy(newpage, page); 456 457 bh = head; 458 do { 459 unlock_buffer(bh); 460 put_bh(bh); 461 bh = bh->b_this_page; 462 463 } while (bh != head); 464 465 return 0; 466} 467EXPORT_SYMBOL(buffer_migrate_page); 468 469/* 470 * Writeback a page to clean the dirty state 471 */ 472static int writeout(struct address_space *mapping, struct page *page) 473{ 474 struct writeback_control wbc = { 475 .sync_mode = WB_SYNC_NONE, 476 .nr_to_write = 1, 477 .range_start = 0, 478 .range_end = LLONG_MAX, 479 .nonblocking = 1, 480 .for_reclaim = 1 481 }; 482 int rc; 483 484 if (!mapping->a_ops->writepage) 485 /* No write method for the address space */ 486 return -EINVAL; 487 488 if (!clear_page_dirty_for_io(page)) 489 /* Someone else already triggered a write */ 490 return -EAGAIN; 491 492 /* 493 * A dirty page may imply that the underlying filesystem has 494 * the page on some queue. So the page must be clean for 495 * migration. Writeout may mean we loose the lock and the 496 * page state is no longer what we checked for earlier. 497 * At this point we know that the migration attempt cannot 498 * be successful. 499 */ 500 remove_migration_ptes(page, page); 501 502 rc = mapping->a_ops->writepage(page, &wbc); 503 if (rc < 0) 504 /* I/O Error writing */ 505 return -EIO; 506 507 if (rc != AOP_WRITEPAGE_ACTIVATE) 508 /* unlocked. Relock */ 509 lock_page(page); 510 511 return -EAGAIN; 512} 513 514/* 515 * Default handling if a filesystem does not provide a migration function. 516 */ 517static int fallback_migrate_page(struct address_space *mapping, 518 struct page *newpage, struct page *page) 519{ 520 if (PageDirty(page)) 521 return writeout(mapping, page); 522 523 /* 524 * Buffers may be managed in a filesystem specific way. 525 * We must have no buffers or drop them. 526 */ 527 if (page_has_buffers(page) && 528 !try_to_release_page(page, GFP_KERNEL)) 529 return -EAGAIN; 530 531 return migrate_page(mapping, newpage, page); 532} 533 534/* 535 * Move a page to a newly allocated page 536 * The page is locked and all ptes have been successfully removed. 537 * 538 * The new page will have replaced the old page if this function 539 * is successful. 540 */ 541static int move_to_new_page(struct page *newpage, struct page *page) 542{ 543 struct address_space *mapping; 544 int rc; 545 546 /* 547 * Block others from accessing the page when we get around to 548 * establishing additional references. We are the only one 549 * holding a reference to the new page at this point. 550 */ 551 if (TestSetPageLocked(newpage)) 552 BUG(); 553 554 /* Prepare mapping for the new page.*/ 555 newpage->index = page->index; 556 newpage->mapping = page->mapping; 557 558 mapping = page_mapping(page); 559 if (!mapping) 560 rc = migrate_page(mapping, newpage, page); 561 else if (mapping->a_ops->migratepage) 562 /* 563 * Most pages have a mapping and most filesystems 564 * should provide a migration function. Anonymous 565 * pages are part of swap space which also has its 566 * own migration function. This is the most common 567 * path for page migration. 568 */ 569 rc = mapping->a_ops->migratepage(mapping, 570 newpage, page); 571 else 572 rc = fallback_migrate_page(mapping, newpage, page); 573 574 if (!rc) 575 remove_migration_ptes(page, newpage); 576 else 577 newpage->mapping = NULL; 578 579 unlock_page(newpage); 580 581 return rc; 582} 583 584/* 585 * Obtain the lock on page, remove all ptes and migrate the page 586 * to the newly allocated page in newpage. 587 */ 588static int unmap_and_move(new_page_t get_new_page, unsigned long private, 589 struct page *page, int force) 590{ 591 int rc = 0; 592 int *result = NULL; 593 struct page *newpage = get_new_page(page, private, &result); 594 595 if (!newpage) 596 return -ENOMEM; 597 598 if (page_count(page) == 1) 599 /* page was freed from under us. So we are done. */ 600 goto move_newpage; 601 602 rc = -EAGAIN; 603 if (TestSetPageLocked(page)) { 604 if (!force) 605 goto move_newpage; 606 lock_page(page); 607 } 608 609 if (PageWriteback(page)) { 610 if (!force) 611 goto unlock; 612 wait_on_page_writeback(page); 613 } 614 615 /* 616 * Establish migration ptes or remove ptes 617 */ 618 if (try_to_unmap(page, 1) != SWAP_FAIL) { 619 if (!page_mapped(page)) 620 rc = move_to_new_page(newpage, page); 621 } else 622 /* A vma has VM_LOCKED set -> permanent failure */ 623 rc = -EPERM; 624 625 if (rc) 626 remove_migration_ptes(page, page); 627unlock: 628 unlock_page(page); 629 630 if (rc != -EAGAIN) { 631 /* 632 * A page that has been migrated has all references 633 * removed and will be freed. A page that has not been 634 * migrated will have kepts its references and be 635 * restored. 636 */ 637 list_del(&page->lru); 638 move_to_lru(page); 639 } 640 641move_newpage: 642 /* 643 * Move the new page to the LRU. If migration was not successful 644 * then this will free the page. 645 */ 646 move_to_lru(newpage); 647 if (result) { 648 if (rc) 649 *result = rc; 650 else 651 *result = page_to_nid(newpage); 652 } 653 return rc; 654} 655 656/* 657 * migrate_pages 658 * 659 * The function takes one list of pages to migrate and a function 660 * that determines from the page to be migrated and the private data 661 * the target of the move and allocates the page. 662 * 663 * The function returns after 10 attempts or if no pages 664 * are movable anymore because to has become empty 665 * or no retryable pages exist anymore. All pages will be 666 * retruned to the LRU or freed. 667 * 668 * Return: Number of pages not migrated or error code. 669 */ 670int migrate_pages(struct list_head *from, 671 new_page_t get_new_page, unsigned long private) 672{ 673 int retry = 1; 674 int nr_failed = 0; 675 int pass = 0; 676 struct page *page; 677 struct page *page2; 678 int swapwrite = current->flags & PF_SWAPWRITE; 679 int rc; 680 681 if (!swapwrite) 682 current->flags |= PF_SWAPWRITE; 683 684 for(pass = 0; pass < 10 && retry; pass++) { 685 retry = 0; 686 687 list_for_each_entry_safe(page, page2, from, lru) { 688 cond_resched(); 689 690 rc = unmap_and_move(get_new_page, private, 691 page, pass > 2); 692 693 switch(rc) { 694 case -ENOMEM: 695 goto out; 696 case -EAGAIN: 697 retry++; 698 break; 699 case 0: 700 break; 701 default: 702 /* Permanent failure */ 703 nr_failed++; 704 break; 705 } 706 } 707 } 708 rc = 0; 709out: 710 if (!swapwrite) 711 current->flags &= ~PF_SWAPWRITE; 712 713 putback_lru_pages(from); 714 715 if (rc) 716 return rc; 717 718 return nr_failed + retry; 719} 720 721#ifdef CONFIG_NUMA 722/* 723 * Move a list of individual pages 724 */ 725struct page_to_node { 726 unsigned long addr; 727 struct page *page; 728 int node; 729 int status; 730}; 731 732static struct page *new_page_node(struct page *p, unsigned long private, 733 int **result) 734{ 735 struct page_to_node *pm = (struct page_to_node *)private; 736 737 while (pm->node != MAX_NUMNODES && pm->page != p) 738 pm++; 739 740 if (pm->node == MAX_NUMNODES) 741 return NULL; 742 743 *result = &pm->status; 744 745 return alloc_pages_node(pm->node, GFP_HIGHUSER, 0); 746} 747 748/* 749 * Move a set of pages as indicated in the pm array. The addr 750 * field must be set to the virtual address of the page to be moved 751 * and the node number must contain a valid target node. 752 */ 753static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm, 754 int migrate_all) 755{ 756 int err; 757 struct page_to_node *pp; 758 LIST_HEAD(pagelist); 759 760 down_read(&mm->mmap_sem); 761 762 /* 763 * Build a list of pages to migrate 764 */ 765 migrate_prep(); 766 for (pp = pm; pp->node != MAX_NUMNODES; pp++) { 767 struct vm_area_struct *vma; 768 struct page *page; 769 770 /* 771 * A valid page pointer that will not match any of the 772 * pages that will be moved. 773 */ 774 pp->page = ZERO_PAGE(0); 775 776 err = -EFAULT; 777 vma = find_vma(mm, pp->addr); 778 if (!vma) 779 goto set_status; 780 781 page = follow_page(vma, pp->addr, FOLL_GET); 782 err = -ENOENT; 783 if (!page) 784 goto set_status; 785 786 if (PageReserved(page)) /* Check for zero page */ 787 goto put_and_set; 788 789 pp->page = page; 790 err = page_to_nid(page); 791 792 if (err == pp->node) 793 /* 794 * Node already in the right place 795 */ 796 goto put_and_set; 797 798 err = -EACCES; 799 if (page_mapcount(page) > 1 && 800 !migrate_all) 801 goto put_and_set; 802 803 err = isolate_lru_page(page, &pagelist); 804put_and_set: 805 /* 806 * Either remove the duplicate refcount from 807 * isolate_lru_page() or drop the page ref if it was 808 * not isolated. 809 */ 810 put_page(page); 811set_status: 812 pp->status = err; 813 } 814 815 if (!list_empty(&pagelist)) 816 err = migrate_pages(&pagelist, new_page_node, 817 (unsigned long)pm); 818 else 819 err = -ENOENT; 820 821 up_read(&mm->mmap_sem); 822 return err; 823} 824 825/* 826 * Determine the nodes of a list of pages. The addr in the pm array 827 * must have been set to the virtual address of which we want to determine 828 * the node number. 829 */ 830static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm) 831{ 832 down_read(&mm->mmap_sem); 833 834 for ( ; pm->node != MAX_NUMNODES; pm++) { 835 struct vm_area_struct *vma; 836 struct page *page; 837 int err; 838 839 err = -EFAULT; 840 vma = find_vma(mm, pm->addr); 841 if (!vma) 842 goto set_status; 843 844 page = follow_page(vma, pm->addr, 0); 845 err = -ENOENT; 846 /* Use PageReserved to check for zero page */ 847 if (!page || PageReserved(page)) 848 goto set_status; 849 850 err = page_to_nid(page); 851set_status: 852 pm->status = err; 853 } 854 855 up_read(&mm->mmap_sem); 856 return 0; 857} 858 859/* 860 * Move a list of pages in the address space of the currently executing 861 * process. 862 */ 863asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, 864 const void __user * __user *pages, 865 const int __user *nodes, 866 int __user *status, int flags) 867{ 868 int err = 0; 869 int i; 870 struct task_struct *task; 871 nodemask_t task_nodes; 872 struct mm_struct *mm; 873 struct page_to_node *pm = NULL; 874 875 /* Check flags */ 876 if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) 877 return -EINVAL; 878 879 if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE)) 880 return -EPERM; 881 882 /* Find the mm_struct */ 883 read_lock(&tasklist_lock); 884 task = pid ? find_task_by_pid(pid) : current; 885 if (!task) { 886 read_unlock(&tasklist_lock); 887 return -ESRCH; 888 } 889 mm = get_task_mm(task); 890 read_unlock(&tasklist_lock); 891 892 if (!mm) 893 return -EINVAL; 894 895 /* 896 * Check if this process has the right to modify the specified 897 * process. The right exists if the process has administrative 898 * capabilities, superuser privileges or the same 899 * userid as the target process. 900 */ 901 if ((current->euid != task->suid) && (current->euid != task->uid) && 902 (current->uid != task->suid) && (current->uid != task->uid) && 903 !capable(CAP_SYS_NICE)) { 904 err = -EPERM; 905 goto out2; 906 } 907 908 task_nodes = cpuset_mems_allowed(task); 909 910 /* Limit nr_pages so that the multiplication may not overflow */ 911 if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) { 912 err = -E2BIG; 913 goto out2; 914 } 915 916 pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node)); 917 if (!pm) { 918 err = -ENOMEM; 919 goto out2; 920 } 921 922 /* 923 * Get parameters from user space and initialize the pm 924 * array. Return various errors if the user did something wrong. 925 */ 926 for (i = 0; i < nr_pages; i++) { 927 const void *p; 928 929 err = -EFAULT; 930 if (get_user(p, pages + i)) 931 goto out; 932 933 pm[i].addr = (unsigned long)p; 934 if (nodes) { 935 int node; 936 937 if (get_user(node, nodes + i)) 938 goto out; 939 940 err = -ENODEV; 941 if (!node_online(node)) 942 goto out; 943 944 err = -EACCES; 945 if (!node_isset(node, task_nodes)) 946 goto out; 947 948 pm[i].node = node; 949 } 950 } 951 /* End marker */ 952 pm[nr_pages].node = MAX_NUMNODES; 953 954 if (nodes) 955 err = do_move_pages(mm, pm, flags & MPOL_MF_MOVE_ALL); 956 else 957 err = do_pages_stat(mm, pm); 958 959 if (err >= 0) 960 /* Return status information */ 961 for (i = 0; i < nr_pages; i++) 962 if (put_user(pm[i].status, status + i)) 963 err = -EFAULT; 964 965out: 966 vfree(pm); 967out2: 968 mmput(mm); 969 return err; 970} 971#endif 972 973