1/* 2 * Lockless get_user_pages_fast for powerpc 3 * 4 * Copyright (C) 2008 Nick Piggin 5 * Copyright (C) 2008 Novell Inc. 6 */ 7#undef DEBUG 8 9#include <linux/sched.h> 10#include <linux/mm.h> 11#include <linux/hugetlb.h> 12#include <linux/vmstat.h> 13#include <linux/pagemap.h> 14#include <linux/rwsem.h> 15#include <asm/pgtable.h> 16 17#ifdef __HAVE_ARCH_PTE_SPECIAL 18 19/* 20 * The performance critical leaf functions are made noinline otherwise gcc 21 * inlines everything into a single function which results in too much 22 * register pressure. 23 */ 24static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, 25 unsigned long end, int write, struct page **pages, int *nr) 26{ 27 unsigned long mask, result; 28 pte_t *ptep; 29 30 result = _PAGE_PRESENT|_PAGE_USER; 31 if (write) 32 result |= _PAGE_RW; 33 mask = result | _PAGE_SPECIAL; 34 35 ptep = pte_offset_kernel(&pmd, addr); 36 do { 37 pte_t pte = *ptep; 38 struct page *page; 39 40 if ((pte_val(pte) & mask) != result) 41 return 0; 42 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 43 page = pte_page(pte); 44 if (!page_cache_get_speculative(page)) 45 return 0; 46 if (unlikely(pte_val(pte) != pte_val(*ptep))) { 47 put_page(page); 48 return 0; 49 } 50 pages[*nr] = page; 51 (*nr)++; 52 53 } while (ptep++, addr += PAGE_SIZE, addr != end); 54 55 return 1; 56} 57 58static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, 59 int write, struct page **pages, int *nr) 60{ 61 unsigned long next; 62 pmd_t *pmdp; 63 64 pmdp = pmd_offset(&pud, addr); 65 do { 66 pmd_t pmd = *pmdp; 67 68 next = pmd_addr_end(addr, end); 69 if (pmd_none(pmd)) 70 return 0; 71 if (is_hugepd(pmdp)) { 72 if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT, 73 addr, next, write, pages, nr)) 74 return 0; 75 } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) 76 return 0; 77 } while (pmdp++, addr = next, addr != end); 78 79 return 1; 80} 81 82static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, 83 int write, struct page **pages, int *nr) 84{ 85 unsigned long next; 86 pud_t *pudp; 87 88 pudp = pud_offset(&pgd, addr); 89 do { 90 pud_t pud = *pudp; 91 92 next = pud_addr_end(addr, end); 93 if (pud_none(pud)) 94 return 0; 95 if (is_hugepd(pudp)) { 96 if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT, 97 addr, next, write, pages, nr)) 98 return 0; 99 } else if (!gup_pmd_range(pud, addr, next, write, pages, nr)) 100 return 0; 101 } while (pudp++, addr = next, addr != end); 102 103 return 1; 104} 105 106int get_user_pages_fast(unsigned long start, int nr_pages, int write, 107 struct page **pages) 108{ 109 struct mm_struct *mm = current->mm; 110 unsigned long addr, len, end; 111 unsigned long next; 112 pgd_t *pgdp; 113 int nr = 0; 114 115 pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); 116 117 start &= PAGE_MASK; 118 addr = start; 119 len = (unsigned long) nr_pages << PAGE_SHIFT; 120 end = start + len; 121 122 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, 123 start, len))) 124 goto slow_irqon; 125 126 pr_devel(" aligned: %lx .. %lx\n", start, end); 127 128 /* 129 * XXX: batch / limit 'nr', to avoid large irq off latency 130 * needs some instrumenting to determine the common sizes used by 131 * important workloads (eg. DB2), and whether limiting the batch size 132 * will decrease performance. 133 * 134 * It seems like we're in the clear for the moment. Direct-IO is 135 * the main guy that batches up lots of get_user_pages, and even 136 * they are limited to 64-at-a-time which is not so many. 137 */ 138 /* 139 * This doesn't prevent pagetable teardown, but does prevent 140 * the pagetables from being freed on powerpc. 141 * 142 * So long as we atomically load page table pointers versus teardown, 143 * we can follow the address down to the the page and take a ref on it. 144 */ 145 local_irq_disable(); 146 147 pgdp = pgd_offset(mm, addr); 148 do { 149 pgd_t pgd = *pgdp; 150 151 pr_devel(" %016lx: normal pgd %p\n", addr, 152 (void *)pgd_val(pgd)); 153 next = pgd_addr_end(addr, end); 154 if (pgd_none(pgd)) 155 goto slow; 156 if (is_hugepd(pgdp)) { 157 if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT, 158 addr, next, write, pages, &nr)) 159 goto slow; 160 } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) 161 goto slow; 162 } while (pgdp++, addr = next, addr != end); 163 164 local_irq_enable(); 165 166 VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); 167 return nr; 168 169 { 170 int ret; 171 172slow: 173 local_irq_enable(); 174slow_irqon: 175 pr_devel(" slow path ! nr = %d\n", nr); 176 177 /* Try to get the remaining pages with get_user_pages */ 178 start += nr << PAGE_SHIFT; 179 pages += nr; 180 181 down_read(&mm->mmap_sem); 182 ret = get_user_pages(current, mm, start, 183 (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); 184 up_read(&mm->mmap_sem); 185 186 /* Have to be a bit careful with return values */ 187 if (nr > 0) { 188 if (ret < 0) 189 ret = nr; 190 else 191 ret += nr; 192 } 193 194 return ret; 195 } 196} 197 198#endif /* __HAVE_ARCH_PTE_SPECIAL */ 199