pagewalk.c revision dd78553b5e7a0b34c0b60478d04ee16d8d8f4fa7
1#include <linux/mm.h>
2#include <linux/highmem.h>
3#include <linux/sched.h>
4#include <linux/hugetlb.h>
5
6static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
7			  struct mm_walk *walk)
8{
9	pte_t *pte;
10	int err = 0;
11
12	pte = pte_offset_map(pmd, addr);
13	for (;;) {
14		err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
15		if (err)
16		       break;
17		addr += PAGE_SIZE;
18		if (addr == end)
19			break;
20		pte++;
21	}
22
23	pte_unmap(pte);
24	return err;
25}
26
27static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
28			  struct mm_walk *walk)
29{
30	pmd_t *pmd;
31	unsigned long next;
32	int err = 0;
33
34	pmd = pmd_offset(pud, addr);
35	do {
36again:
37		next = pmd_addr_end(addr, end);
38		if (pmd_none(*pmd)) {
39			if (walk->pte_hole)
40				err = walk->pte_hole(addr, next, walk);
41			if (err)
42				break;
43			continue;
44		}
45		/*
46		 * This implies that each ->pmd_entry() handler
47		 * needs to know about pmd_trans_huge() pmds
48		 */
49		if (walk->pmd_entry)
50			err = walk->pmd_entry(pmd, addr, next, walk);
51		if (err)
52			break;
53
54		/*
55		 * Check this here so we only break down trans_huge
56		 * pages when we _need_ to
57		 */
58		if (!walk->pte_entry)
59			continue;
60
61		split_huge_page_pmd(walk->mm, pmd);
62		if (pmd_none_or_clear_bad(pmd))
63			goto again;
64		err = walk_pte_range(pmd, addr, next, walk);
65		if (err)
66			break;
67	} while (pmd++, addr = next, addr != end);
68
69	return err;
70}
71
72static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
73			  struct mm_walk *walk)
74{
75	pud_t *pud;
76	unsigned long next;
77	int err = 0;
78
79	pud = pud_offset(pgd, addr);
80	do {
81		next = pud_addr_end(addr, end);
82		if (pud_none_or_clear_bad(pud)) {
83			if (walk->pte_hole)
84				err = walk->pte_hole(addr, next, walk);
85			if (err)
86				break;
87			continue;
88		}
89		if (walk->pud_entry)
90			err = walk->pud_entry(pud, addr, next, walk);
91		if (!err && (walk->pmd_entry || walk->pte_entry))
92			err = walk_pmd_range(pud, addr, next, walk);
93		if (err)
94			break;
95	} while (pud++, addr = next, addr != end);
96
97	return err;
98}
99
100#ifdef CONFIG_HUGETLB_PAGE
101static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
102				       unsigned long end)
103{
104	unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h);
105	return boundary < end ? boundary : end;
106}
107
108static int walk_hugetlb_range(struct vm_area_struct *vma,
109			      unsigned long addr, unsigned long end,
110			      struct mm_walk *walk)
111{
112	struct hstate *h = hstate_vma(vma);
113	unsigned long next;
114	unsigned long hmask = huge_page_mask(h);
115	pte_t *pte;
116	int err = 0;
117
118	do {
119		next = hugetlb_entry_end(h, addr, end);
120		pte = huge_pte_offset(walk->mm, addr & hmask);
121		if (pte && walk->hugetlb_entry)
122			err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
123		if (err)
124			return err;
125	} while (addr = next, addr != end);
126
127	return 0;
128}
129
130static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk)
131{
132	struct vm_area_struct *vma;
133
134	/* We don't need vma lookup at all. */
135	if (!walk->hugetlb_entry)
136		return NULL;
137
138	VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
139	vma = find_vma(walk->mm, addr);
140	if (vma && vma->vm_start <= addr && is_vm_hugetlb_page(vma))
141		return vma;
142
143	return NULL;
144}
145
146#else /* CONFIG_HUGETLB_PAGE */
147static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk)
148{
149	return NULL;
150}
151
152static int walk_hugetlb_range(struct vm_area_struct *vma,
153			      unsigned long addr, unsigned long end,
154			      struct mm_walk *walk)
155{
156	return 0;
157}
158
159#endif /* CONFIG_HUGETLB_PAGE */
160
161
162
163/**
164 * walk_page_range - walk a memory map's page tables with a callback
165 * @mm: memory map to walk
166 * @addr: starting address
167 * @end: ending address
168 * @walk: set of callbacks to invoke for each level of the tree
169 *
170 * Recursively walk the page table for the memory area in a VMA,
171 * calling supplied callbacks. Callbacks are called in-order (first
172 * PGD, first PUD, first PMD, first PTE, second PTE... second PMD,
173 * etc.). If lower-level callbacks are omitted, walking depth is reduced.
174 *
175 * Each callback receives an entry pointer and the start and end of the
176 * associated range, and a copy of the original mm_walk for access to
177 * the ->private or ->mm fields.
178 *
179 * Usually no locks are taken, but splitting transparent huge page may
180 * take page table lock. And the bottom level iterator will map PTE
181 * directories from highmem if necessary.
182 *
183 * If any callback returns a non-zero value, the walk is aborted and
184 * the return value is propagated back to the caller. Otherwise 0 is returned.
185 *
186 * walk->mm->mmap_sem must be held for at least read if walk->hugetlb_entry
187 * is !NULL.
188 */
189int walk_page_range(unsigned long addr, unsigned long end,
190		    struct mm_walk *walk)
191{
192	pgd_t *pgd;
193	unsigned long next;
194	int err = 0;
195
196	if (addr >= end)
197		return err;
198
199	if (!walk->mm)
200		return -EINVAL;
201
202	pgd = pgd_offset(walk->mm, addr);
203	do {
204		struct vm_area_struct *vma;
205
206		next = pgd_addr_end(addr, end);
207
208		/*
209		 * handle hugetlb vma individually because pagetable walk for
210		 * the hugetlb page is dependent on the architecture and
211		 * we can't handled it in the same manner as non-huge pages.
212		 */
213		vma = hugetlb_vma(addr, walk);
214		if (vma) {
215			if (vma->vm_end < next)
216				next = vma->vm_end;
217			/*
218			 * Hugepage is very tightly coupled with vma, so
219			 * walk through hugetlb entries within a given vma.
220			 */
221			err = walk_hugetlb_range(vma, addr, next, walk);
222			if (err)
223				break;
224			pgd = pgd_offset(walk->mm, next);
225			continue;
226		}
227
228		if (pgd_none_or_clear_bad(pgd)) {
229			if (walk->pte_hole)
230				err = walk->pte_hole(addr, next, walk);
231			if (err)
232				break;
233			pgd++;
234			continue;
235		}
236		if (walk->pgd_entry)
237			err = walk->pgd_entry(pgd, addr, next, walk);
238		if (!err &&
239		    (walk->pud_entry || walk->pmd_entry || walk->pte_entry))
240			err = walk_pud_range(pgd, addr, next, walk);
241		if (err)
242			break;
243		pgd++;
244	} while (addr = next, addr != end);
245
246	return err;
247}
248