truncate.c revision 887ed2f3aecde2ff24e06666932dc5f144745044
1/*
2 * mm/truncate.c - code for taking down pages from address_spaces
3 *
4 * Copyright (C) 2002, Linus Torvalds
5 *
6 * 10Sep2002	akpm@zip.com.au
7 *		Initial version.
8 */
9
10#include <linux/kernel.h>
11#include <linux/mm.h>
12#include <linux/swap.h>
13#include <linux/module.h>
14#include <linux/pagemap.h>
15#include <linux/pagevec.h>
16#include <linux/buffer_head.h>	/* grr. try_to_release_page,
17				   do_invalidatepage */
18
19
20/**
21 * do_invalidatepage - invalidate part of all of a page
22 * @page: the page which is affected
23 * @offset: the index of the truncation point
24 *
25 * do_invalidatepage() is called when all or part of the page has become
26 * invalidated by a truncate operation.
27 *
28 * do_invalidatepage() does not have to release all buffers, but it must
29 * ensure that no dirty buffer is left outside @offset and that no I/O
30 * is underway against any of the blocks which are outside the truncation
31 * point.  Because the caller is about to free (and possibly reuse) those
32 * blocks on-disk.
33 */
34void do_invalidatepage(struct page *page, unsigned long offset)
35{
36	void (*invalidatepage)(struct page *, unsigned long);
37	invalidatepage = page->mapping->a_ops->invalidatepage;
38#ifdef CONFIG_BLOCK
39	if (!invalidatepage)
40		invalidatepage = block_invalidatepage;
41#endif
42	if (invalidatepage)
43		(*invalidatepage)(page, offset);
44}
45
46static inline void truncate_partial_page(struct page *page, unsigned partial)
47{
48	memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial);
49	if (PagePrivate(page))
50		do_invalidatepage(page, partial);
51}
52
53/*
54 * If truncate cannot remove the fs-private metadata from the page, the page
55 * becomes anonymous.  It will be left on the LRU and may even be mapped into
56 * user pagetables if we're racing with filemap_nopage().
57 *
58 * We need to bale out if page->mapping is no longer equal to the original
59 * mapping.  This happens a) when the VM reclaimed the page while we waited on
60 * its lock, b) when a concurrent invalidate_inode_pages got there first and
61 * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
62 */
63static void
64truncate_complete_page(struct address_space *mapping, struct page *page)
65{
66	if (page->mapping != mapping)
67		return;
68
69	if (PagePrivate(page))
70		do_invalidatepage(page, 0);
71
72	clear_page_dirty(page);
73	ClearPageUptodate(page);
74	ClearPageMappedToDisk(page);
75	remove_from_page_cache(page);
76	page_cache_release(page);	/* pagecache ref */
77}
78
79/*
80 * This is for invalidate_inode_pages().  That function can be called at
81 * any time, and is not supposed to throw away dirty pages.  But pages can
82 * be marked dirty at any time too, so use remove_mapping which safely
83 * discards clean, unused pages.
84 *
85 * Returns non-zero if the page was successfully invalidated.
86 */
87static int
88invalidate_complete_page(struct address_space *mapping, struct page *page)
89{
90	int ret;
91
92	if (page->mapping != mapping)
93		return 0;
94
95	if (PagePrivate(page) && !try_to_release_page(page, 0))
96		return 0;
97
98	ret = remove_mapping(mapping, page);
99	ClearPageUptodate(page);
100
101	return ret;
102}
103
104/**
105 * truncate_inode_pages - truncate range of pages specified by start and
106 * end byte offsets
107 * @mapping: mapping to truncate
108 * @lstart: offset from which to truncate
109 * @lend: offset to which to truncate
110 *
111 * Truncate the page cache, removing the pages that are between
112 * specified offsets (and zeroing out partial page
113 * (if lstart is not page aligned)).
114 *
115 * Truncate takes two passes - the first pass is nonblocking.  It will not
116 * block on page locks and it will not block on writeback.  The second pass
117 * will wait.  This is to prevent as much IO as possible in the affected region.
118 * The first pass will remove most pages, so the search cost of the second pass
119 * is low.
120 *
121 * When looking at page->index outside the page lock we need to be careful to
122 * copy it into a local to avoid races (it could change at any time).
123 *
124 * We pass down the cache-hot hint to the page freeing code.  Even if the
125 * mapping is large, it is probably the case that the final pages are the most
126 * recently touched, and freeing happens in ascending file offset order.
127 */
128void truncate_inode_pages_range(struct address_space *mapping,
129				loff_t lstart, loff_t lend)
130{
131	const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
132	pgoff_t end;
133	const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
134	struct pagevec pvec;
135	pgoff_t next;
136	int i;
137
138	if (mapping->nrpages == 0)
139		return;
140
141	BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
142	end = (lend >> PAGE_CACHE_SHIFT);
143
144	pagevec_init(&pvec, 0);
145	next = start;
146	while (next <= end &&
147	       pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
148		for (i = 0; i < pagevec_count(&pvec); i++) {
149			struct page *page = pvec.pages[i];
150			pgoff_t page_index = page->index;
151
152			if (page_index > end) {
153				next = page_index;
154				break;
155			}
156
157			if (page_index > next)
158				next = page_index;
159			next++;
160			if (TestSetPageLocked(page))
161				continue;
162			if (PageWriteback(page)) {
163				unlock_page(page);
164				continue;
165			}
166			truncate_complete_page(mapping, page);
167			unlock_page(page);
168		}
169		pagevec_release(&pvec);
170		cond_resched();
171	}
172
173	if (partial) {
174		struct page *page = find_lock_page(mapping, start - 1);
175		if (page) {
176			wait_on_page_writeback(page);
177			truncate_partial_page(page, partial);
178			unlock_page(page);
179			page_cache_release(page);
180		}
181	}
182
183	next = start;
184	for ( ; ; ) {
185		cond_resched();
186		if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
187			if (next == start)
188				break;
189			next = start;
190			continue;
191		}
192		if (pvec.pages[0]->index > end) {
193			pagevec_release(&pvec);
194			break;
195		}
196		for (i = 0; i < pagevec_count(&pvec); i++) {
197			struct page *page = pvec.pages[i];
198
199			if (page->index > end)
200				break;
201			lock_page(page);
202			wait_on_page_writeback(page);
203			if (page->index > next)
204				next = page->index;
205			next++;
206			truncate_complete_page(mapping, page);
207			unlock_page(page);
208		}
209		pagevec_release(&pvec);
210	}
211}
212EXPORT_SYMBOL(truncate_inode_pages_range);
213
214/**
215 * truncate_inode_pages - truncate *all* the pages from an offset
216 * @mapping: mapping to truncate
217 * @lstart: offset from which to truncate
218 *
219 * Called under (and serialised by) inode->i_mutex.
220 */
221void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
222{
223	truncate_inode_pages_range(mapping, lstart, (loff_t)-1);
224}
225EXPORT_SYMBOL(truncate_inode_pages);
226
227/**
228 * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
229 * @mapping: the address_space which holds the pages to invalidate
230 * @start: the offset 'from' which to invalidate
231 * @end: the offset 'to' which to invalidate (inclusive)
232 *
233 * This function only removes the unlocked pages, if you want to
234 * remove all the pages of one inode, you must call truncate_inode_pages.
235 *
236 * invalidate_mapping_pages() will not block on IO activity. It will not
237 * invalidate pages which are dirty, locked, under writeback or mapped into
238 * pagetables.
239 */
240unsigned long invalidate_mapping_pages(struct address_space *mapping,
241				pgoff_t start, pgoff_t end)
242{
243	struct pagevec pvec;
244	pgoff_t next = start;
245	unsigned long ret = 0;
246	int i;
247
248	pagevec_init(&pvec, 0);
249	while (next <= end &&
250			pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
251		for (i = 0; i < pagevec_count(&pvec); i++) {
252			struct page *page = pvec.pages[i];
253			pgoff_t index;
254			int lock_failed;
255
256			lock_failed = TestSetPageLocked(page);
257
258			/*
259			 * We really shouldn't be looking at the ->index of an
260			 * unlocked page.  But we're not allowed to lock these
261			 * pages.  So we rely upon nobody altering the ->index
262			 * of this (pinned-by-us) page.
263			 */
264			index = page->index;
265			if (index > next)
266				next = index;
267			next++;
268			if (lock_failed)
269				continue;
270
271			if (PageDirty(page) || PageWriteback(page))
272				goto unlock;
273			if (page_mapped(page))
274				goto unlock;
275			ret += invalidate_complete_page(mapping, page);
276unlock:
277			unlock_page(page);
278			if (next > end)
279				break;
280		}
281		pagevec_release(&pvec);
282	}
283	return ret;
284}
285
286unsigned long invalidate_inode_pages(struct address_space *mapping)
287{
288	return invalidate_mapping_pages(mapping, 0, ~0UL);
289}
290EXPORT_SYMBOL(invalidate_inode_pages);
291
292/*
293 * This is like invalidate_complete_page(), except it ignores the page's
294 * refcount.  We do this because invalidate_inode_pages2() needs stronger
295 * invalidation guarantees, and cannot afford to leave pages behind because
296 * shrink_list() has a temp ref on them, or because they're transiently sitting
297 * in the lru_cache_add() pagevecs.
298 */
299static int
300invalidate_complete_page2(struct address_space *mapping, struct page *page)
301{
302	if (page->mapping != mapping)
303		return 0;
304
305	if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL))
306		return 0;
307
308	write_lock_irq(&mapping->tree_lock);
309	if (PageDirty(page))
310		goto failed;
311
312	BUG_ON(PagePrivate(page));
313	__remove_from_page_cache(page);
314	write_unlock_irq(&mapping->tree_lock);
315	ClearPageUptodate(page);
316	page_cache_release(page);	/* pagecache ref */
317	return 1;
318failed:
319	write_unlock_irq(&mapping->tree_lock);
320	return 0;
321}
322
323/**
324 * invalidate_inode_pages2_range - remove range of pages from an address_space
325 * @mapping: the address_space
326 * @start: the page offset 'from' which to invalidate
327 * @end: the page offset 'to' which to invalidate (inclusive)
328 *
329 * Any pages which are found to be mapped into pagetables are unmapped prior to
330 * invalidation.
331 *
332 * Returns -EIO if any pages could not be invalidated.
333 */
334int invalidate_inode_pages2_range(struct address_space *mapping,
335				  pgoff_t start, pgoff_t end)
336{
337	struct pagevec pvec;
338	pgoff_t next;
339	int i;
340	int ret = 0;
341	int did_range_unmap = 0;
342	int wrapped = 0;
343
344	pagevec_init(&pvec, 0);
345	next = start;
346	while (next <= end && !ret && !wrapped &&
347		pagevec_lookup(&pvec, mapping, next,
348			min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
349		for (i = 0; !ret && i < pagevec_count(&pvec); i++) {
350			struct page *page = pvec.pages[i];
351			pgoff_t page_index;
352			int was_dirty;
353
354			lock_page(page);
355			if (page->mapping != mapping) {
356				unlock_page(page);
357				continue;
358			}
359			page_index = page->index;
360			next = page_index + 1;
361			if (next == 0)
362				wrapped = 1;
363			if (page_index > end) {
364				unlock_page(page);
365				break;
366			}
367			wait_on_page_writeback(page);
368			while (page_mapped(page)) {
369				if (!did_range_unmap) {
370					/*
371					 * Zap the rest of the file in one hit.
372					 */
373					unmap_mapping_range(mapping,
374					   (loff_t)page_index<<PAGE_CACHE_SHIFT,
375					   (loff_t)(end - page_index + 1)
376							<< PAGE_CACHE_SHIFT,
377					    0);
378					did_range_unmap = 1;
379				} else {
380					/*
381					 * Just zap this page
382					 */
383					unmap_mapping_range(mapping,
384					  (loff_t)page_index<<PAGE_CACHE_SHIFT,
385					  PAGE_CACHE_SIZE, 0);
386				}
387			}
388			was_dirty = test_clear_page_dirty(page);
389			if (!invalidate_complete_page2(mapping, page)) {
390				if (was_dirty)
391					set_page_dirty(page);
392				ret = -EIO;
393			}
394			unlock_page(page);
395		}
396		pagevec_release(&pvec);
397		cond_resched();
398	}
399	WARN_ON_ONCE(ret);
400	return ret;
401}
402EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
403
404/**
405 * invalidate_inode_pages2 - remove all pages from an address_space
406 * @mapping: the address_space
407 *
408 * Any pages which are found to be mapped into pagetables are unmapped prior to
409 * invalidation.
410 *
411 * Returns -EIO if any pages could not be invalidated.
412 */
413int invalidate_inode_pages2(struct address_space *mapping)
414{
415	return invalidate_inode_pages2_range(mapping, 0, -1);
416}
417EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
418