shmem.c revision bde05d1ccd512696b09db9dd2e5f33ad19152605
1/*
2 * Resizable virtual memory filesystem for Linux.
3 *
4 * Copyright (C) 2000 Linus Torvalds.
5 *		 2000 Transmeta Corp.
6 *		 2000-2001 Christoph Rohland
7 *		 2000-2001 SAP AG
8 *		 2002 Red Hat Inc.
9 * Copyright (C) 2002-2011 Hugh Dickins.
10 * Copyright (C) 2011 Google Inc.
11 * Copyright (C) 2002-2005 VERITAS Software Corporation.
12 * Copyright (C) 2004 Andi Kleen, SuSE Labs
13 *
14 * Extended attribute support for tmpfs:
15 * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net>
16 * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
17 *
18 * tiny-shmem:
19 * Copyright (c) 2004, 2008 Matt Mackall <mpm@selenic.com>
20 *
21 * This file is released under the GPL.
22 */
23
24#include <linux/fs.h>
25#include <linux/init.h>
26#include <linux/vfs.h>
27#include <linux/mount.h>
28#include <linux/pagemap.h>
29#include <linux/file.h>
30#include <linux/mm.h>
31#include <linux/export.h>
32#include <linux/swap.h>
33
34static struct vfsmount *shm_mnt;
35
36#ifdef CONFIG_SHMEM
37/*
38 * This virtual memory filesystem is heavily based on the ramfs. It
39 * extends ramfs by the ability to use swap and honor resource limits
40 * which makes it a completely usable filesystem.
41 */
42
43#include <linux/xattr.h>
44#include <linux/exportfs.h>
45#include <linux/posix_acl.h>
46#include <linux/generic_acl.h>
47#include <linux/mman.h>
48#include <linux/string.h>
49#include <linux/slab.h>
50#include <linux/backing-dev.h>
51#include <linux/shmem_fs.h>
52#include <linux/writeback.h>
53#include <linux/blkdev.h>
54#include <linux/pagevec.h>
55#include <linux/percpu_counter.h>
56#include <linux/splice.h>
57#include <linux/security.h>
58#include <linux/swapops.h>
59#include <linux/mempolicy.h>
60#include <linux/namei.h>
61#include <linux/ctype.h>
62#include <linux/migrate.h>
63#include <linux/highmem.h>
64#include <linux/seq_file.h>
65#include <linux/magic.h>
66
67#include <asm/uaccess.h>
68#include <asm/pgtable.h>
69
70#define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
71#define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
72
73/* Pretend that each entry is of this size in directory's i_size */
74#define BOGO_DIRENT_SIZE 20
75
76/* Symlink up to this size is kmalloc'ed instead of using a swappable page */
77#define SHORT_SYMLINK_LEN 128
78
79struct shmem_xattr {
80	struct list_head list;	/* anchored by shmem_inode_info->xattr_list */
81	char *name;		/* xattr name */
82	size_t size;
83	char value[0];
84};
85
86/* Flag allocation requirements to shmem_getpage */
87enum sgp_type {
88	SGP_READ,	/* don't exceed i_size, don't allocate page */
89	SGP_CACHE,	/* don't exceed i_size, may allocate page */
90	SGP_DIRTY,	/* like SGP_CACHE, but set new page dirty */
91	SGP_WRITE,	/* may exceed i_size, may allocate page */
92};
93
94#ifdef CONFIG_TMPFS
95static unsigned long shmem_default_max_blocks(void)
96{
97	return totalram_pages / 2;
98}
99
100static unsigned long shmem_default_max_inodes(void)
101{
102	return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
103}
104#endif
105
106static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
107static int shmem_replace_page(struct page **pagep, gfp_t gfp,
108				struct shmem_inode_info *info, pgoff_t index);
109static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
110	struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type);
111
112static inline int shmem_getpage(struct inode *inode, pgoff_t index,
113	struct page **pagep, enum sgp_type sgp, int *fault_type)
114{
115	return shmem_getpage_gfp(inode, index, pagep, sgp,
116			mapping_gfp_mask(inode->i_mapping), fault_type);
117}
118
119static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
120{
121	return sb->s_fs_info;
122}
123
124/*
125 * shmem_file_setup pre-accounts the whole fixed size of a VM object,
126 * for shared memory and for shared anonymous (/dev/zero) mappings
127 * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1),
128 * consistent with the pre-accounting of private mappings ...
129 */
130static inline int shmem_acct_size(unsigned long flags, loff_t size)
131{
132	return (flags & VM_NORESERVE) ?
133		0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size));
134}
135
136static inline void shmem_unacct_size(unsigned long flags, loff_t size)
137{
138	if (!(flags & VM_NORESERVE))
139		vm_unacct_memory(VM_ACCT(size));
140}
141
142/*
143 * ... whereas tmpfs objects are accounted incrementally as
144 * pages are allocated, in order to allow huge sparse files.
145 * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
146 * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
147 */
148static inline int shmem_acct_block(unsigned long flags)
149{
150	return (flags & VM_NORESERVE) ?
151		security_vm_enough_memory_mm(current->mm, VM_ACCT(PAGE_CACHE_SIZE)) : 0;
152}
153
154static inline void shmem_unacct_blocks(unsigned long flags, long pages)
155{
156	if (flags & VM_NORESERVE)
157		vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
158}
159
160static const struct super_operations shmem_ops;
161static const struct address_space_operations shmem_aops;
162static const struct file_operations shmem_file_operations;
163static const struct inode_operations shmem_inode_operations;
164static const struct inode_operations shmem_dir_inode_operations;
165static const struct inode_operations shmem_special_inode_operations;
166static const struct vm_operations_struct shmem_vm_ops;
167
168static struct backing_dev_info shmem_backing_dev_info  __read_mostly = {
169	.ra_pages	= 0,	/* No readahead */
170	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
171};
172
173static LIST_HEAD(shmem_swaplist);
174static DEFINE_MUTEX(shmem_swaplist_mutex);
175
176static int shmem_reserve_inode(struct super_block *sb)
177{
178	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
179	if (sbinfo->max_inodes) {
180		spin_lock(&sbinfo->stat_lock);
181		if (!sbinfo->free_inodes) {
182			spin_unlock(&sbinfo->stat_lock);
183			return -ENOSPC;
184		}
185		sbinfo->free_inodes--;
186		spin_unlock(&sbinfo->stat_lock);
187	}
188	return 0;
189}
190
191static void shmem_free_inode(struct super_block *sb)
192{
193	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
194	if (sbinfo->max_inodes) {
195		spin_lock(&sbinfo->stat_lock);
196		sbinfo->free_inodes++;
197		spin_unlock(&sbinfo->stat_lock);
198	}
199}
200
201/**
202 * shmem_recalc_inode - recalculate the block usage of an inode
203 * @inode: inode to recalc
204 *
205 * We have to calculate the free blocks since the mm can drop
206 * undirtied hole pages behind our back.
207 *
208 * But normally   info->alloced == inode->i_mapping->nrpages + info->swapped
209 * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
210 *
211 * It has to be called with the spinlock held.
212 */
213static void shmem_recalc_inode(struct inode *inode)
214{
215	struct shmem_inode_info *info = SHMEM_I(inode);
216	long freed;
217
218	freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
219	if (freed > 0) {
220		struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
221		if (sbinfo->max_blocks)
222			percpu_counter_add(&sbinfo->used_blocks, -freed);
223		info->alloced -= freed;
224		inode->i_blocks -= freed * BLOCKS_PER_PAGE;
225		shmem_unacct_blocks(info->flags, freed);
226	}
227}
228
229/*
230 * Replace item expected in radix tree by a new item, while holding tree lock.
231 */
232static int shmem_radix_tree_replace(struct address_space *mapping,
233			pgoff_t index, void *expected, void *replacement)
234{
235	void **pslot;
236	void *item = NULL;
237
238	VM_BUG_ON(!expected);
239	pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
240	if (pslot)
241		item = radix_tree_deref_slot_protected(pslot,
242							&mapping->tree_lock);
243	if (item != expected)
244		return -ENOENT;
245	if (replacement)
246		radix_tree_replace_slot(pslot, replacement);
247	else
248		radix_tree_delete(&mapping->page_tree, index);
249	return 0;
250}
251
252/*
253 * Like add_to_page_cache_locked, but error if expected item has gone.
254 */
255static int shmem_add_to_page_cache(struct page *page,
256				   struct address_space *mapping,
257				   pgoff_t index, gfp_t gfp, void *expected)
258{
259	int error = 0;
260
261	VM_BUG_ON(!PageLocked(page));
262	VM_BUG_ON(!PageSwapBacked(page));
263
264	if (!expected)
265		error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
266	if (!error) {
267		page_cache_get(page);
268		page->mapping = mapping;
269		page->index = index;
270
271		spin_lock_irq(&mapping->tree_lock);
272		if (!expected)
273			error = radix_tree_insert(&mapping->page_tree,
274							index, page);
275		else
276			error = shmem_radix_tree_replace(mapping, index,
277							expected, page);
278		if (!error) {
279			mapping->nrpages++;
280			__inc_zone_page_state(page, NR_FILE_PAGES);
281			__inc_zone_page_state(page, NR_SHMEM);
282			spin_unlock_irq(&mapping->tree_lock);
283		} else {
284			page->mapping = NULL;
285			spin_unlock_irq(&mapping->tree_lock);
286			page_cache_release(page);
287		}
288		if (!expected)
289			radix_tree_preload_end();
290	}
291	if (error)
292		mem_cgroup_uncharge_cache_page(page);
293	return error;
294}
295
296/*
297 * Like delete_from_page_cache, but substitutes swap for page.
298 */
299static void shmem_delete_from_page_cache(struct page *page, void *radswap)
300{
301	struct address_space *mapping = page->mapping;
302	int error;
303
304	spin_lock_irq(&mapping->tree_lock);
305	error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
306	page->mapping = NULL;
307	mapping->nrpages--;
308	__dec_zone_page_state(page, NR_FILE_PAGES);
309	__dec_zone_page_state(page, NR_SHMEM);
310	spin_unlock_irq(&mapping->tree_lock);
311	page_cache_release(page);
312	BUG_ON(error);
313}
314
315/*
316 * Like find_get_pages, but collecting swap entries as well as pages.
317 */
318static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping,
319					pgoff_t start, unsigned int nr_pages,
320					struct page **pages, pgoff_t *indices)
321{
322	unsigned int i;
323	unsigned int ret;
324	unsigned int nr_found;
325
326	rcu_read_lock();
327restart:
328	nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
329				(void ***)pages, indices, start, nr_pages);
330	ret = 0;
331	for (i = 0; i < nr_found; i++) {
332		struct page *page;
333repeat:
334		page = radix_tree_deref_slot((void **)pages[i]);
335		if (unlikely(!page))
336			continue;
337		if (radix_tree_exception(page)) {
338			if (radix_tree_deref_retry(page))
339				goto restart;
340			/*
341			 * Otherwise, we must be storing a swap entry
342			 * here as an exceptional entry: so return it
343			 * without attempting to raise page count.
344			 */
345			goto export;
346		}
347		if (!page_cache_get_speculative(page))
348			goto repeat;
349
350		/* Has the page moved? */
351		if (unlikely(page != *((void **)pages[i]))) {
352			page_cache_release(page);
353			goto repeat;
354		}
355export:
356		indices[ret] = indices[i];
357		pages[ret] = page;
358		ret++;
359	}
360	if (unlikely(!ret && nr_found))
361		goto restart;
362	rcu_read_unlock();
363	return ret;
364}
365
366/*
367 * Remove swap entry from radix tree, free the swap and its page cache.
368 */
369static int shmem_free_swap(struct address_space *mapping,
370			   pgoff_t index, void *radswap)
371{
372	int error;
373
374	spin_lock_irq(&mapping->tree_lock);
375	error = shmem_radix_tree_replace(mapping, index, radswap, NULL);
376	spin_unlock_irq(&mapping->tree_lock);
377	if (!error)
378		free_swap_and_cache(radix_to_swp_entry(radswap));
379	return error;
380}
381
382/*
383 * Pagevec may contain swap entries, so shuffle up pages before releasing.
384 */
385static void shmem_deswap_pagevec(struct pagevec *pvec)
386{
387	int i, j;
388
389	for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
390		struct page *page = pvec->pages[i];
391		if (!radix_tree_exceptional_entry(page))
392			pvec->pages[j++] = page;
393	}
394	pvec->nr = j;
395}
396
397/*
398 * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists.
399 */
400void shmem_unlock_mapping(struct address_space *mapping)
401{
402	struct pagevec pvec;
403	pgoff_t indices[PAGEVEC_SIZE];
404	pgoff_t index = 0;
405
406	pagevec_init(&pvec, 0);
407	/*
408	 * Minor point, but we might as well stop if someone else SHM_LOCKs it.
409	 */
410	while (!mapping_unevictable(mapping)) {
411		/*
412		 * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it
413		 * has finished, if it hits a row of PAGEVEC_SIZE swap entries.
414		 */
415		pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
416					PAGEVEC_SIZE, pvec.pages, indices);
417		if (!pvec.nr)
418			break;
419		index = indices[pvec.nr - 1] + 1;
420		shmem_deswap_pagevec(&pvec);
421		check_move_unevictable_pages(pvec.pages, pvec.nr);
422		pagevec_release(&pvec);
423		cond_resched();
424	}
425}
426
427/*
428 * Remove range of pages and swap entries from radix tree, and free them.
429 */
430void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
431{
432	struct address_space *mapping = inode->i_mapping;
433	struct shmem_inode_info *info = SHMEM_I(inode);
434	pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
435	unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
436	pgoff_t end = (lend >> PAGE_CACHE_SHIFT);
437	struct pagevec pvec;
438	pgoff_t indices[PAGEVEC_SIZE];
439	long nr_swaps_freed = 0;
440	pgoff_t index;
441	int i;
442
443	BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
444
445	pagevec_init(&pvec, 0);
446	index = start;
447	while (index <= end) {
448		pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
449			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
450							pvec.pages, indices);
451		if (!pvec.nr)
452			break;
453		mem_cgroup_uncharge_start();
454		for (i = 0; i < pagevec_count(&pvec); i++) {
455			struct page *page = pvec.pages[i];
456
457			index = indices[i];
458			if (index > end)
459				break;
460
461			if (radix_tree_exceptional_entry(page)) {
462				nr_swaps_freed += !shmem_free_swap(mapping,
463								index, page);
464				continue;
465			}
466
467			if (!trylock_page(page))
468				continue;
469			if (page->mapping == mapping) {
470				VM_BUG_ON(PageWriteback(page));
471				truncate_inode_page(mapping, page);
472			}
473			unlock_page(page);
474		}
475		shmem_deswap_pagevec(&pvec);
476		pagevec_release(&pvec);
477		mem_cgroup_uncharge_end();
478		cond_resched();
479		index++;
480	}
481
482	if (partial) {
483		struct page *page = NULL;
484		shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
485		if (page) {
486			zero_user_segment(page, partial, PAGE_CACHE_SIZE);
487			set_page_dirty(page);
488			unlock_page(page);
489			page_cache_release(page);
490		}
491	}
492
493	index = start;
494	for ( ; ; ) {
495		cond_resched();
496		pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
497			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
498							pvec.pages, indices);
499		if (!pvec.nr) {
500			if (index == start)
501				break;
502			index = start;
503			continue;
504		}
505		if (index == start && indices[0] > end) {
506			shmem_deswap_pagevec(&pvec);
507			pagevec_release(&pvec);
508			break;
509		}
510		mem_cgroup_uncharge_start();
511		for (i = 0; i < pagevec_count(&pvec); i++) {
512			struct page *page = pvec.pages[i];
513
514			index = indices[i];
515			if (index > end)
516				break;
517
518			if (radix_tree_exceptional_entry(page)) {
519				nr_swaps_freed += !shmem_free_swap(mapping,
520								index, page);
521				continue;
522			}
523
524			lock_page(page);
525			if (page->mapping == mapping) {
526				VM_BUG_ON(PageWriteback(page));
527				truncate_inode_page(mapping, page);
528			}
529			unlock_page(page);
530		}
531		shmem_deswap_pagevec(&pvec);
532		pagevec_release(&pvec);
533		mem_cgroup_uncharge_end();
534		index++;
535	}
536
537	spin_lock(&info->lock);
538	info->swapped -= nr_swaps_freed;
539	shmem_recalc_inode(inode);
540	spin_unlock(&info->lock);
541
542	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
543}
544EXPORT_SYMBOL_GPL(shmem_truncate_range);
545
546static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
547{
548	struct inode *inode = dentry->d_inode;
549	int error;
550
551	error = inode_change_ok(inode, attr);
552	if (error)
553		return error;
554
555	if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
556		loff_t oldsize = inode->i_size;
557		loff_t newsize = attr->ia_size;
558
559		if (newsize != oldsize) {
560			i_size_write(inode, newsize);
561			inode->i_ctime = inode->i_mtime = CURRENT_TIME;
562		}
563		if (newsize < oldsize) {
564			loff_t holebegin = round_up(newsize, PAGE_SIZE);
565			unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
566			shmem_truncate_range(inode, newsize, (loff_t)-1);
567			/* unmap again to remove racily COWed private pages */
568			unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
569		}
570	}
571
572	setattr_copy(inode, attr);
573#ifdef CONFIG_TMPFS_POSIX_ACL
574	if (attr->ia_valid & ATTR_MODE)
575		error = generic_acl_chmod(inode);
576#endif
577	return error;
578}
579
580static void shmem_evict_inode(struct inode *inode)
581{
582	struct shmem_inode_info *info = SHMEM_I(inode);
583	struct shmem_xattr *xattr, *nxattr;
584
585	if (inode->i_mapping->a_ops == &shmem_aops) {
586		shmem_unacct_size(info->flags, inode->i_size);
587		inode->i_size = 0;
588		shmem_truncate_range(inode, 0, (loff_t)-1);
589		if (!list_empty(&info->swaplist)) {
590			mutex_lock(&shmem_swaplist_mutex);
591			list_del_init(&info->swaplist);
592			mutex_unlock(&shmem_swaplist_mutex);
593		}
594	} else
595		kfree(info->symlink);
596
597	list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) {
598		kfree(xattr->name);
599		kfree(xattr);
600	}
601	BUG_ON(inode->i_blocks);
602	shmem_free_inode(inode->i_sb);
603	clear_inode(inode);
604}
605
606/*
607 * If swap found in inode, free it and move page from swapcache to filecache.
608 */
609static int shmem_unuse_inode(struct shmem_inode_info *info,
610			     swp_entry_t swap, struct page **pagep)
611{
612	struct address_space *mapping = info->vfs_inode.i_mapping;
613	void *radswap;
614	pgoff_t index;
615	gfp_t gfp;
616	int error = 0;
617
618	radswap = swp_to_radix_entry(swap);
619	index = radix_tree_locate_item(&mapping->page_tree, radswap);
620	if (index == -1)
621		return 0;
622
623	/*
624	 * Move _head_ to start search for next from here.
625	 * But be careful: shmem_evict_inode checks list_empty without taking
626	 * mutex, and there's an instant in list_move_tail when info->swaplist
627	 * would appear empty, if it were the only one on shmem_swaplist.
628	 */
629	if (shmem_swaplist.next != &info->swaplist)
630		list_move_tail(&shmem_swaplist, &info->swaplist);
631
632	gfp = mapping_gfp_mask(mapping);
633	if (shmem_should_replace_page(*pagep, gfp)) {
634		mutex_unlock(&shmem_swaplist_mutex);
635		error = shmem_replace_page(pagep, gfp, info, index);
636		mutex_lock(&shmem_swaplist_mutex);
637		/*
638		 * We needed to drop mutex to make that restrictive page
639		 * allocation; but the inode might already be freed by now,
640		 * and we cannot refer to inode or mapping or info to check.
641		 * However, we do hold page lock on the PageSwapCache page,
642		 * so can check if that still has our reference remaining.
643		 */
644		if (!page_swapcount(*pagep))
645			error = -ENOENT;
646	}
647
648	/*
649	 * We rely on shmem_swaplist_mutex, not only to protect the swaplist,
650	 * but also to hold up shmem_evict_inode(): so inode cannot be freed
651	 * beneath us (pagelock doesn't help until the page is in pagecache).
652	 */
653	if (!error)
654		error = shmem_add_to_page_cache(*pagep, mapping, index,
655						GFP_NOWAIT, radswap);
656	if (error != -ENOMEM) {
657		/*
658		 * Truncation and eviction use free_swap_and_cache(), which
659		 * only does trylock page: if we raced, best clean up here.
660		 */
661		delete_from_swap_cache(*pagep);
662		set_page_dirty(*pagep);
663		if (!error) {
664			spin_lock(&info->lock);
665			info->swapped--;
666			spin_unlock(&info->lock);
667			swap_free(swap);
668		}
669		error = 1;	/* not an error, but entry was found */
670	}
671	return error;
672}
673
674/*
675 * Search through swapped inodes to find and replace swap by page.
676 */
677int shmem_unuse(swp_entry_t swap, struct page *page)
678{
679	struct list_head *this, *next;
680	struct shmem_inode_info *info;
681	int found = 0;
682	int error = 0;
683
684	/*
685	 * There's a faint possibility that swap page was replaced before
686	 * caller locked it: it will come back later with the right page.
687	 */
688	if (unlikely(!PageSwapCache(page)))
689		goto out;
690
691	/*
692	 * Charge page using GFP_KERNEL while we can wait, before taking
693	 * the shmem_swaplist_mutex which might hold up shmem_writepage().
694	 * Charged back to the user (not to caller) when swap account is used.
695	 */
696	error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
697	if (error)
698		goto out;
699	/* No radix_tree_preload: swap entry keeps a place for page in tree */
700
701	mutex_lock(&shmem_swaplist_mutex);
702	list_for_each_safe(this, next, &shmem_swaplist) {
703		info = list_entry(this, struct shmem_inode_info, swaplist);
704		if (info->swapped)
705			found = shmem_unuse_inode(info, swap, &page);
706		else
707			list_del_init(&info->swaplist);
708		cond_resched();
709		if (found)
710			break;
711	}
712	mutex_unlock(&shmem_swaplist_mutex);
713
714	if (found < 0)
715		error = found;
716out:
717	unlock_page(page);
718	page_cache_release(page);
719	return error;
720}
721
722/*
723 * Move the page from the page cache to the swap cache.
724 */
725static int shmem_writepage(struct page *page, struct writeback_control *wbc)
726{
727	struct shmem_inode_info *info;
728	struct address_space *mapping;
729	struct inode *inode;
730	swp_entry_t swap;
731	pgoff_t index;
732
733	BUG_ON(!PageLocked(page));
734	mapping = page->mapping;
735	index = page->index;
736	inode = mapping->host;
737	info = SHMEM_I(inode);
738	if (info->flags & VM_LOCKED)
739		goto redirty;
740	if (!total_swap_pages)
741		goto redirty;
742
743	/*
744	 * shmem_backing_dev_info's capabilities prevent regular writeback or
745	 * sync from ever calling shmem_writepage; but a stacking filesystem
746	 * might use ->writepage of its underlying filesystem, in which case
747	 * tmpfs should write out to swap only in response to memory pressure,
748	 * and not for the writeback threads or sync.
749	 */
750	if (!wbc->for_reclaim) {
751		WARN_ON_ONCE(1);	/* Still happens? Tell us about it! */
752		goto redirty;
753	}
754	swap = get_swap_page();
755	if (!swap.val)
756		goto redirty;
757
758	/*
759	 * Add inode to shmem_unuse()'s list of swapped-out inodes,
760	 * if it's not already there.  Do it now before the page is
761	 * moved to swap cache, when its pagelock no longer protects
762	 * the inode from eviction.  But don't unlock the mutex until
763	 * we've incremented swapped, because shmem_unuse_inode() will
764	 * prune a !swapped inode from the swaplist under this mutex.
765	 */
766	mutex_lock(&shmem_swaplist_mutex);
767	if (list_empty(&info->swaplist))
768		list_add_tail(&info->swaplist, &shmem_swaplist);
769
770	if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
771		swap_shmem_alloc(swap);
772		shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
773
774		spin_lock(&info->lock);
775		info->swapped++;
776		shmem_recalc_inode(inode);
777		spin_unlock(&info->lock);
778
779		mutex_unlock(&shmem_swaplist_mutex);
780		BUG_ON(page_mapped(page));
781		swap_writepage(page, wbc);
782		return 0;
783	}
784
785	mutex_unlock(&shmem_swaplist_mutex);
786	swapcache_free(swap, NULL);
787redirty:
788	set_page_dirty(page);
789	if (wbc->for_reclaim)
790		return AOP_WRITEPAGE_ACTIVATE;	/* Return with page locked */
791	unlock_page(page);
792	return 0;
793}
794
795#ifdef CONFIG_NUMA
796#ifdef CONFIG_TMPFS
797static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
798{
799	char buffer[64];
800
801	if (!mpol || mpol->mode == MPOL_DEFAULT)
802		return;		/* show nothing */
803
804	mpol_to_str(buffer, sizeof(buffer), mpol, 1);
805
806	seq_printf(seq, ",mpol=%s", buffer);
807}
808
809static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
810{
811	struct mempolicy *mpol = NULL;
812	if (sbinfo->mpol) {
813		spin_lock(&sbinfo->stat_lock);	/* prevent replace/use races */
814		mpol = sbinfo->mpol;
815		mpol_get(mpol);
816		spin_unlock(&sbinfo->stat_lock);
817	}
818	return mpol;
819}
820#endif /* CONFIG_TMPFS */
821
822static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
823			struct shmem_inode_info *info, pgoff_t index)
824{
825	struct mempolicy mpol, *spol;
826	struct vm_area_struct pvma;
827
828	spol = mpol_cond_copy(&mpol,
829			mpol_shared_policy_lookup(&info->policy, index));
830
831	/* Create a pseudo vma that just contains the policy */
832	pvma.vm_start = 0;
833	pvma.vm_pgoff = index;
834	pvma.vm_ops = NULL;
835	pvma.vm_policy = spol;
836	return swapin_readahead(swap, gfp, &pvma, 0);
837}
838
839static struct page *shmem_alloc_page(gfp_t gfp,
840			struct shmem_inode_info *info, pgoff_t index)
841{
842	struct vm_area_struct pvma;
843
844	/* Create a pseudo vma that just contains the policy */
845	pvma.vm_start = 0;
846	pvma.vm_pgoff = index;
847	pvma.vm_ops = NULL;
848	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
849
850	/*
851	 * alloc_page_vma() will drop the shared policy reference
852	 */
853	return alloc_page_vma(gfp, &pvma, 0);
854}
855#else /* !CONFIG_NUMA */
856#ifdef CONFIG_TMPFS
857static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
858{
859}
860#endif /* CONFIG_TMPFS */
861
862static inline struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
863			struct shmem_inode_info *info, pgoff_t index)
864{
865	return swapin_readahead(swap, gfp, NULL, 0);
866}
867
868static inline struct page *shmem_alloc_page(gfp_t gfp,
869			struct shmem_inode_info *info, pgoff_t index)
870{
871	return alloc_page(gfp);
872}
873#endif /* CONFIG_NUMA */
874
875#if !defined(CONFIG_NUMA) || !defined(CONFIG_TMPFS)
876static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
877{
878	return NULL;
879}
880#endif
881
882/*
883 * When a page is moved from swapcache to shmem filecache (either by the
884 * usual swapin of shmem_getpage_gfp(), or by the less common swapoff of
885 * shmem_unuse_inode()), it may have been read in earlier from swap, in
886 * ignorance of the mapping it belongs to.  If that mapping has special
887 * constraints (like the gma500 GEM driver, which requires RAM below 4GB),
888 * we may need to copy to a suitable page before moving to filecache.
889 *
890 * In a future release, this may well be extended to respect cpuset and
891 * NUMA mempolicy, and applied also to anonymous pages in do_swap_page();
892 * but for now it is a simple matter of zone.
893 */
894static bool shmem_should_replace_page(struct page *page, gfp_t gfp)
895{
896	return page_zonenum(page) > gfp_zone(gfp);
897}
898
899static int shmem_replace_page(struct page **pagep, gfp_t gfp,
900				struct shmem_inode_info *info, pgoff_t index)
901{
902	struct page *oldpage, *newpage;
903	struct address_space *swap_mapping;
904	pgoff_t swap_index;
905	int error;
906
907	oldpage = *pagep;
908	swap_index = page_private(oldpage);
909	swap_mapping = page_mapping(oldpage);
910
911	/*
912	 * We have arrived here because our zones are constrained, so don't
913	 * limit chance of success by further cpuset and node constraints.
914	 */
915	gfp &= ~GFP_CONSTRAINT_MASK;
916	newpage = shmem_alloc_page(gfp, info, index);
917	if (!newpage)
918		return -ENOMEM;
919	VM_BUG_ON(shmem_should_replace_page(newpage, gfp));
920
921	*pagep = newpage;
922	page_cache_get(newpage);
923	copy_highpage(newpage, oldpage);
924
925	VM_BUG_ON(!PageLocked(oldpage));
926	__set_page_locked(newpage);
927	VM_BUG_ON(!PageUptodate(oldpage));
928	SetPageUptodate(newpage);
929	VM_BUG_ON(!PageSwapBacked(oldpage));
930	SetPageSwapBacked(newpage);
931	VM_BUG_ON(!swap_index);
932	set_page_private(newpage, swap_index);
933	VM_BUG_ON(!PageSwapCache(oldpage));
934	SetPageSwapCache(newpage);
935
936	/*
937	 * Our caller will very soon move newpage out of swapcache, but it's
938	 * a nice clean interface for us to replace oldpage by newpage there.
939	 */
940	spin_lock_irq(&swap_mapping->tree_lock);
941	error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
942								   newpage);
943	__inc_zone_page_state(newpage, NR_FILE_PAGES);
944	__dec_zone_page_state(oldpage, NR_FILE_PAGES);
945	spin_unlock_irq(&swap_mapping->tree_lock);
946	BUG_ON(error);
947
948	mem_cgroup_replace_page_cache(oldpage, newpage);
949	lru_cache_add_anon(newpage);
950
951	ClearPageSwapCache(oldpage);
952	set_page_private(oldpage, 0);
953
954	unlock_page(oldpage);
955	page_cache_release(oldpage);
956	page_cache_release(oldpage);
957	return 0;
958}
959
960/*
961 * shmem_getpage_gfp - find page in cache, or get from swap, or allocate
962 *
963 * If we allocate a new one we do not mark it dirty. That's up to the
964 * vm. If we swap it in we mark it dirty since we also free the swap
965 * entry since a page cannot live in both the swap and page cache
966 */
967static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
968	struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type)
969{
970	struct address_space *mapping = inode->i_mapping;
971	struct shmem_inode_info *info;
972	struct shmem_sb_info *sbinfo;
973	struct page *page;
974	swp_entry_t swap;
975	int error;
976	int once = 0;
977
978	if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
979		return -EFBIG;
980repeat:
981	swap.val = 0;
982	page = find_lock_page(mapping, index);
983	if (radix_tree_exceptional_entry(page)) {
984		swap = radix_to_swp_entry(page);
985		page = NULL;
986	}
987
988	if (sgp != SGP_WRITE &&
989	    ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
990		error = -EINVAL;
991		goto failed;
992	}
993
994	if (page || (sgp == SGP_READ && !swap.val)) {
995		/*
996		 * Once we can get the page lock, it must be uptodate:
997		 * if there were an error in reading back from swap,
998		 * the page would not be inserted into the filecache.
999		 */
1000		BUG_ON(page && !PageUptodate(page));
1001		*pagep = page;
1002		return 0;
1003	}
1004
1005	/*
1006	 * Fast cache lookup did not find it:
1007	 * bring it back from swap or allocate.
1008	 */
1009	info = SHMEM_I(inode);
1010	sbinfo = SHMEM_SB(inode->i_sb);
1011
1012	if (swap.val) {
1013		/* Look it up and read it in.. */
1014		page = lookup_swap_cache(swap);
1015		if (!page) {
1016			/* here we actually do the io */
1017			if (fault_type)
1018				*fault_type |= VM_FAULT_MAJOR;
1019			page = shmem_swapin(swap, gfp, info, index);
1020			if (!page) {
1021				error = -ENOMEM;
1022				goto failed;
1023			}
1024		}
1025
1026		/* We have to do this with page locked to prevent races */
1027		lock_page(page);
1028		if (!PageSwapCache(page) || page->mapping) {
1029			error = -EEXIST;	/* try again */
1030			goto failed;
1031		}
1032		if (!PageUptodate(page)) {
1033			error = -EIO;
1034			goto failed;
1035		}
1036		wait_on_page_writeback(page);
1037
1038		if (shmem_should_replace_page(page, gfp)) {
1039			error = shmem_replace_page(&page, gfp, info, index);
1040			if (error)
1041				goto failed;
1042		}
1043
1044		error = mem_cgroup_cache_charge(page, current->mm,
1045						gfp & GFP_RECLAIM_MASK);
1046		if (!error)
1047			error = shmem_add_to_page_cache(page, mapping, index,
1048						gfp, swp_to_radix_entry(swap));
1049		if (error)
1050			goto failed;
1051
1052		spin_lock(&info->lock);
1053		info->swapped--;
1054		shmem_recalc_inode(inode);
1055		spin_unlock(&info->lock);
1056
1057		delete_from_swap_cache(page);
1058		set_page_dirty(page);
1059		swap_free(swap);
1060
1061	} else {
1062		if (shmem_acct_block(info->flags)) {
1063			error = -ENOSPC;
1064			goto failed;
1065		}
1066		if (sbinfo->max_blocks) {
1067			if (percpu_counter_compare(&sbinfo->used_blocks,
1068						sbinfo->max_blocks) >= 0) {
1069				error = -ENOSPC;
1070				goto unacct;
1071			}
1072			percpu_counter_inc(&sbinfo->used_blocks);
1073		}
1074
1075		page = shmem_alloc_page(gfp, info, index);
1076		if (!page) {
1077			error = -ENOMEM;
1078			goto decused;
1079		}
1080
1081		SetPageSwapBacked(page);
1082		__set_page_locked(page);
1083		error = mem_cgroup_cache_charge(page, current->mm,
1084						gfp & GFP_RECLAIM_MASK);
1085		if (!error)
1086			error = shmem_add_to_page_cache(page, mapping, index,
1087						gfp, NULL);
1088		if (error)
1089			goto decused;
1090		lru_cache_add_anon(page);
1091
1092		spin_lock(&info->lock);
1093		info->alloced++;
1094		inode->i_blocks += BLOCKS_PER_PAGE;
1095		shmem_recalc_inode(inode);
1096		spin_unlock(&info->lock);
1097
1098		clear_highpage(page);
1099		flush_dcache_page(page);
1100		SetPageUptodate(page);
1101		if (sgp == SGP_DIRTY)
1102			set_page_dirty(page);
1103	}
1104
1105	/* Perhaps the file has been truncated since we checked */
1106	if (sgp != SGP_WRITE &&
1107	    ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1108		error = -EINVAL;
1109		goto trunc;
1110	}
1111	*pagep = page;
1112	return 0;
1113
1114	/*
1115	 * Error recovery.
1116	 */
1117trunc:
1118	ClearPageDirty(page);
1119	delete_from_page_cache(page);
1120	spin_lock(&info->lock);
1121	info->alloced--;
1122	inode->i_blocks -= BLOCKS_PER_PAGE;
1123	spin_unlock(&info->lock);
1124decused:
1125	if (sbinfo->max_blocks)
1126		percpu_counter_add(&sbinfo->used_blocks, -1);
1127unacct:
1128	shmem_unacct_blocks(info->flags, 1);
1129failed:
1130	if (swap.val && error != -EINVAL) {
1131		struct page *test = find_get_page(mapping, index);
1132		if (test && !radix_tree_exceptional_entry(test))
1133			page_cache_release(test);
1134		/* Have another try if the entry has changed */
1135		if (test != swp_to_radix_entry(swap))
1136			error = -EEXIST;
1137	}
1138	if (page) {
1139		unlock_page(page);
1140		page_cache_release(page);
1141	}
1142	if (error == -ENOSPC && !once++) {
1143		info = SHMEM_I(inode);
1144		spin_lock(&info->lock);
1145		shmem_recalc_inode(inode);
1146		spin_unlock(&info->lock);
1147		goto repeat;
1148	}
1149	if (error == -EEXIST)
1150		goto repeat;
1151	return error;
1152}
1153
1154static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1155{
1156	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1157	int error;
1158	int ret = VM_FAULT_LOCKED;
1159
1160	error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
1161	if (error)
1162		return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
1163
1164	if (ret & VM_FAULT_MAJOR) {
1165		count_vm_event(PGMAJFAULT);
1166		mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
1167	}
1168	return ret;
1169}
1170
1171#ifdef CONFIG_NUMA
1172static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
1173{
1174	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1175	return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
1176}
1177
1178static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
1179					  unsigned long addr)
1180{
1181	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1182	pgoff_t index;
1183
1184	index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
1185	return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
1186}
1187#endif
1188
1189int shmem_lock(struct file *file, int lock, struct user_struct *user)
1190{
1191	struct inode *inode = file->f_path.dentry->d_inode;
1192	struct shmem_inode_info *info = SHMEM_I(inode);
1193	int retval = -ENOMEM;
1194
1195	spin_lock(&info->lock);
1196	if (lock && !(info->flags & VM_LOCKED)) {
1197		if (!user_shm_lock(inode->i_size, user))
1198			goto out_nomem;
1199		info->flags |= VM_LOCKED;
1200		mapping_set_unevictable(file->f_mapping);
1201	}
1202	if (!lock && (info->flags & VM_LOCKED) && user) {
1203		user_shm_unlock(inode->i_size, user);
1204		info->flags &= ~VM_LOCKED;
1205		mapping_clear_unevictable(file->f_mapping);
1206	}
1207	retval = 0;
1208
1209out_nomem:
1210	spin_unlock(&info->lock);
1211	return retval;
1212}
1213
1214static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1215{
1216	file_accessed(file);
1217	vma->vm_ops = &shmem_vm_ops;
1218	vma->vm_flags |= VM_CAN_NONLINEAR;
1219	return 0;
1220}
1221
1222static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
1223				     umode_t mode, dev_t dev, unsigned long flags)
1224{
1225	struct inode *inode;
1226	struct shmem_inode_info *info;
1227	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1228
1229	if (shmem_reserve_inode(sb))
1230		return NULL;
1231
1232	inode = new_inode(sb);
1233	if (inode) {
1234		inode->i_ino = get_next_ino();
1235		inode_init_owner(inode, dir, mode);
1236		inode->i_blocks = 0;
1237		inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
1238		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1239		inode->i_generation = get_seconds();
1240		info = SHMEM_I(inode);
1241		memset(info, 0, (char *)inode - (char *)info);
1242		spin_lock_init(&info->lock);
1243		info->flags = flags & VM_NORESERVE;
1244		INIT_LIST_HEAD(&info->swaplist);
1245		INIT_LIST_HEAD(&info->xattr_list);
1246		cache_no_acl(inode);
1247
1248		switch (mode & S_IFMT) {
1249		default:
1250			inode->i_op = &shmem_special_inode_operations;
1251			init_special_inode(inode, mode, dev);
1252			break;
1253		case S_IFREG:
1254			inode->i_mapping->a_ops = &shmem_aops;
1255			inode->i_op = &shmem_inode_operations;
1256			inode->i_fop = &shmem_file_operations;
1257			mpol_shared_policy_init(&info->policy,
1258						 shmem_get_sbmpol(sbinfo));
1259			break;
1260		case S_IFDIR:
1261			inc_nlink(inode);
1262			/* Some things misbehave if size == 0 on a directory */
1263			inode->i_size = 2 * BOGO_DIRENT_SIZE;
1264			inode->i_op = &shmem_dir_inode_operations;
1265			inode->i_fop = &simple_dir_operations;
1266			break;
1267		case S_IFLNK:
1268			/*
1269			 * Must not load anything in the rbtree,
1270			 * mpol_free_shared_policy will not be called.
1271			 */
1272			mpol_shared_policy_init(&info->policy, NULL);
1273			break;
1274		}
1275	} else
1276		shmem_free_inode(sb);
1277	return inode;
1278}
1279
1280#ifdef CONFIG_TMPFS
1281static const struct inode_operations shmem_symlink_inode_operations;
1282static const struct inode_operations shmem_short_symlink_operations;
1283
1284#ifdef CONFIG_TMPFS_XATTR
1285static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
1286#else
1287#define shmem_initxattrs NULL
1288#endif
1289
1290static int
1291shmem_write_begin(struct file *file, struct address_space *mapping,
1292			loff_t pos, unsigned len, unsigned flags,
1293			struct page **pagep, void **fsdata)
1294{
1295	struct inode *inode = mapping->host;
1296	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1297	return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
1298}
1299
1300static int
1301shmem_write_end(struct file *file, struct address_space *mapping,
1302			loff_t pos, unsigned len, unsigned copied,
1303			struct page *page, void *fsdata)
1304{
1305	struct inode *inode = mapping->host;
1306
1307	if (pos + copied > inode->i_size)
1308		i_size_write(inode, pos + copied);
1309
1310	set_page_dirty(page);
1311	unlock_page(page);
1312	page_cache_release(page);
1313
1314	return copied;
1315}
1316
1317static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
1318{
1319	struct inode *inode = filp->f_path.dentry->d_inode;
1320	struct address_space *mapping = inode->i_mapping;
1321	pgoff_t index;
1322	unsigned long offset;
1323	enum sgp_type sgp = SGP_READ;
1324
1325	/*
1326	 * Might this read be for a stacking filesystem?  Then when reading
1327	 * holes of a sparse file, we actually need to allocate those pages,
1328	 * and even mark them dirty, so it cannot exceed the max_blocks limit.
1329	 */
1330	if (segment_eq(get_fs(), KERNEL_DS))
1331		sgp = SGP_DIRTY;
1332
1333	index = *ppos >> PAGE_CACHE_SHIFT;
1334	offset = *ppos & ~PAGE_CACHE_MASK;
1335
1336	for (;;) {
1337		struct page *page = NULL;
1338		pgoff_t end_index;
1339		unsigned long nr, ret;
1340		loff_t i_size = i_size_read(inode);
1341
1342		end_index = i_size >> PAGE_CACHE_SHIFT;
1343		if (index > end_index)
1344			break;
1345		if (index == end_index) {
1346			nr = i_size & ~PAGE_CACHE_MASK;
1347			if (nr <= offset)
1348				break;
1349		}
1350
1351		desc->error = shmem_getpage(inode, index, &page, sgp, NULL);
1352		if (desc->error) {
1353			if (desc->error == -EINVAL)
1354				desc->error = 0;
1355			break;
1356		}
1357		if (page)
1358			unlock_page(page);
1359
1360		/*
1361		 * We must evaluate after, since reads (unlike writes)
1362		 * are called without i_mutex protection against truncate
1363		 */
1364		nr = PAGE_CACHE_SIZE;
1365		i_size = i_size_read(inode);
1366		end_index = i_size >> PAGE_CACHE_SHIFT;
1367		if (index == end_index) {
1368			nr = i_size & ~PAGE_CACHE_MASK;
1369			if (nr <= offset) {
1370				if (page)
1371					page_cache_release(page);
1372				break;
1373			}
1374		}
1375		nr -= offset;
1376
1377		if (page) {
1378			/*
1379			 * If users can be writing to this page using arbitrary
1380			 * virtual addresses, take care about potential aliasing
1381			 * before reading the page on the kernel side.
1382			 */
1383			if (mapping_writably_mapped(mapping))
1384				flush_dcache_page(page);
1385			/*
1386			 * Mark the page accessed if we read the beginning.
1387			 */
1388			if (!offset)
1389				mark_page_accessed(page);
1390		} else {
1391			page = ZERO_PAGE(0);
1392			page_cache_get(page);
1393		}
1394
1395		/*
1396		 * Ok, we have the page, and it's up-to-date, so
1397		 * now we can copy it to user space...
1398		 *
1399		 * The actor routine returns how many bytes were actually used..
1400		 * NOTE! This may not be the same as how much of a user buffer
1401		 * we filled up (we may be padding etc), so we can only update
1402		 * "pos" here (the actor routine has to update the user buffer
1403		 * pointers and the remaining count).
1404		 */
1405		ret = actor(desc, page, offset, nr);
1406		offset += ret;
1407		index += offset >> PAGE_CACHE_SHIFT;
1408		offset &= ~PAGE_CACHE_MASK;
1409
1410		page_cache_release(page);
1411		if (ret != nr || !desc->count)
1412			break;
1413
1414		cond_resched();
1415	}
1416
1417	*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1418	file_accessed(filp);
1419}
1420
1421static ssize_t shmem_file_aio_read(struct kiocb *iocb,
1422		const struct iovec *iov, unsigned long nr_segs, loff_t pos)
1423{
1424	struct file *filp = iocb->ki_filp;
1425	ssize_t retval;
1426	unsigned long seg;
1427	size_t count;
1428	loff_t *ppos = &iocb->ki_pos;
1429
1430	retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
1431	if (retval)
1432		return retval;
1433
1434	for (seg = 0; seg < nr_segs; seg++) {
1435		read_descriptor_t desc;
1436
1437		desc.written = 0;
1438		desc.arg.buf = iov[seg].iov_base;
1439		desc.count = iov[seg].iov_len;
1440		if (desc.count == 0)
1441			continue;
1442		desc.error = 0;
1443		do_shmem_file_read(filp, ppos, &desc, file_read_actor);
1444		retval += desc.written;
1445		if (desc.error) {
1446			retval = retval ?: desc.error;
1447			break;
1448		}
1449		if (desc.count > 0)
1450			break;
1451	}
1452	return retval;
1453}
1454
1455static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
1456				struct pipe_inode_info *pipe, size_t len,
1457				unsigned int flags)
1458{
1459	struct address_space *mapping = in->f_mapping;
1460	struct inode *inode = mapping->host;
1461	unsigned int loff, nr_pages, req_pages;
1462	struct page *pages[PIPE_DEF_BUFFERS];
1463	struct partial_page partial[PIPE_DEF_BUFFERS];
1464	struct page *page;
1465	pgoff_t index, end_index;
1466	loff_t isize, left;
1467	int error, page_nr;
1468	struct splice_pipe_desc spd = {
1469		.pages = pages,
1470		.partial = partial,
1471		.flags = flags,
1472		.ops = &page_cache_pipe_buf_ops,
1473		.spd_release = spd_release_page,
1474	};
1475
1476	isize = i_size_read(inode);
1477	if (unlikely(*ppos >= isize))
1478		return 0;
1479
1480	left = isize - *ppos;
1481	if (unlikely(left < len))
1482		len = left;
1483
1484	if (splice_grow_spd(pipe, &spd))
1485		return -ENOMEM;
1486
1487	index = *ppos >> PAGE_CACHE_SHIFT;
1488	loff = *ppos & ~PAGE_CACHE_MASK;
1489	req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1490	nr_pages = min(req_pages, pipe->buffers);
1491
1492	spd.nr_pages = find_get_pages_contig(mapping, index,
1493						nr_pages, spd.pages);
1494	index += spd.nr_pages;
1495	error = 0;
1496
1497	while (spd.nr_pages < nr_pages) {
1498		error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL);
1499		if (error)
1500			break;
1501		unlock_page(page);
1502		spd.pages[spd.nr_pages++] = page;
1503		index++;
1504	}
1505
1506	index = *ppos >> PAGE_CACHE_SHIFT;
1507	nr_pages = spd.nr_pages;
1508	spd.nr_pages = 0;
1509
1510	for (page_nr = 0; page_nr < nr_pages; page_nr++) {
1511		unsigned int this_len;
1512
1513		if (!len)
1514			break;
1515
1516		this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
1517		page = spd.pages[page_nr];
1518
1519		if (!PageUptodate(page) || page->mapping != mapping) {
1520			error = shmem_getpage(inode, index, &page,
1521							SGP_CACHE, NULL);
1522			if (error)
1523				break;
1524			unlock_page(page);
1525			page_cache_release(spd.pages[page_nr]);
1526			spd.pages[page_nr] = page;
1527		}
1528
1529		isize = i_size_read(inode);
1530		end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
1531		if (unlikely(!isize || index > end_index))
1532			break;
1533
1534		if (end_index == index) {
1535			unsigned int plen;
1536
1537			plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
1538			if (plen <= loff)
1539				break;
1540
1541			this_len = min(this_len, plen - loff);
1542			len = this_len;
1543		}
1544
1545		spd.partial[page_nr].offset = loff;
1546		spd.partial[page_nr].len = this_len;
1547		len -= this_len;
1548		loff = 0;
1549		spd.nr_pages++;
1550		index++;
1551	}
1552
1553	while (page_nr < nr_pages)
1554		page_cache_release(spd.pages[page_nr++]);
1555
1556	if (spd.nr_pages)
1557		error = splice_to_pipe(pipe, &spd);
1558
1559	splice_shrink_spd(pipe, &spd);
1560
1561	if (error > 0) {
1562		*ppos += error;
1563		file_accessed(in);
1564	}
1565	return error;
1566}
1567
1568static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
1569{
1570	struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
1571
1572	buf->f_type = TMPFS_MAGIC;
1573	buf->f_bsize = PAGE_CACHE_SIZE;
1574	buf->f_namelen = NAME_MAX;
1575	if (sbinfo->max_blocks) {
1576		buf->f_blocks = sbinfo->max_blocks;
1577		buf->f_bavail =
1578		buf->f_bfree  = sbinfo->max_blocks -
1579				percpu_counter_sum(&sbinfo->used_blocks);
1580	}
1581	if (sbinfo->max_inodes) {
1582		buf->f_files = sbinfo->max_inodes;
1583		buf->f_ffree = sbinfo->free_inodes;
1584	}
1585	/* else leave those fields 0 like simple_statfs */
1586	return 0;
1587}
1588
1589/*
1590 * File creation. Allocate an inode, and we're done..
1591 */
1592static int
1593shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
1594{
1595	struct inode *inode;
1596	int error = -ENOSPC;
1597
1598	inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
1599	if (inode) {
1600		error = security_inode_init_security(inode, dir,
1601						     &dentry->d_name,
1602						     shmem_initxattrs, NULL);
1603		if (error) {
1604			if (error != -EOPNOTSUPP) {
1605				iput(inode);
1606				return error;
1607			}
1608		}
1609#ifdef CONFIG_TMPFS_POSIX_ACL
1610		error = generic_acl_init(inode, dir);
1611		if (error) {
1612			iput(inode);
1613			return error;
1614		}
1615#else
1616		error = 0;
1617#endif
1618		dir->i_size += BOGO_DIRENT_SIZE;
1619		dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1620		d_instantiate(dentry, inode);
1621		dget(dentry); /* Extra count - pin the dentry in core */
1622	}
1623	return error;
1624}
1625
1626static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1627{
1628	int error;
1629
1630	if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
1631		return error;
1632	inc_nlink(dir);
1633	return 0;
1634}
1635
1636static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
1637		struct nameidata *nd)
1638{
1639	return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
1640}
1641
1642/*
1643 * Link a file..
1644 */
1645static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1646{
1647	struct inode *inode = old_dentry->d_inode;
1648	int ret;
1649
1650	/*
1651	 * No ordinary (disk based) filesystem counts links as inodes;
1652	 * but each new link needs a new dentry, pinning lowmem, and
1653	 * tmpfs dentries cannot be pruned until they are unlinked.
1654	 */
1655	ret = shmem_reserve_inode(inode->i_sb);
1656	if (ret)
1657		goto out;
1658
1659	dir->i_size += BOGO_DIRENT_SIZE;
1660	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1661	inc_nlink(inode);
1662	ihold(inode);	/* New dentry reference */
1663	dget(dentry);		/* Extra pinning count for the created dentry */
1664	d_instantiate(dentry, inode);
1665out:
1666	return ret;
1667}
1668
1669static int shmem_unlink(struct inode *dir, struct dentry *dentry)
1670{
1671	struct inode *inode = dentry->d_inode;
1672
1673	if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
1674		shmem_free_inode(inode->i_sb);
1675
1676	dir->i_size -= BOGO_DIRENT_SIZE;
1677	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1678	drop_nlink(inode);
1679	dput(dentry);	/* Undo the count from "create" - this does all the work */
1680	return 0;
1681}
1682
1683static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
1684{
1685	if (!simple_empty(dentry))
1686		return -ENOTEMPTY;
1687
1688	drop_nlink(dentry->d_inode);
1689	drop_nlink(dir);
1690	return shmem_unlink(dir, dentry);
1691}
1692
1693/*
1694 * The VFS layer already does all the dentry stuff for rename,
1695 * we just have to decrement the usage count for the target if
1696 * it exists so that the VFS layer correctly free's it when it
1697 * gets overwritten.
1698 */
1699static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
1700{
1701	struct inode *inode = old_dentry->d_inode;
1702	int they_are_dirs = S_ISDIR(inode->i_mode);
1703
1704	if (!simple_empty(new_dentry))
1705		return -ENOTEMPTY;
1706
1707	if (new_dentry->d_inode) {
1708		(void) shmem_unlink(new_dir, new_dentry);
1709		if (they_are_dirs)
1710			drop_nlink(old_dir);
1711	} else if (they_are_dirs) {
1712		drop_nlink(old_dir);
1713		inc_nlink(new_dir);
1714	}
1715
1716	old_dir->i_size -= BOGO_DIRENT_SIZE;
1717	new_dir->i_size += BOGO_DIRENT_SIZE;
1718	old_dir->i_ctime = old_dir->i_mtime =
1719	new_dir->i_ctime = new_dir->i_mtime =
1720	inode->i_ctime = CURRENT_TIME;
1721	return 0;
1722}
1723
1724static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1725{
1726	int error;
1727	int len;
1728	struct inode *inode;
1729	struct page *page;
1730	char *kaddr;
1731	struct shmem_inode_info *info;
1732
1733	len = strlen(symname) + 1;
1734	if (len > PAGE_CACHE_SIZE)
1735		return -ENAMETOOLONG;
1736
1737	inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE);
1738	if (!inode)
1739		return -ENOSPC;
1740
1741	error = security_inode_init_security(inode, dir, &dentry->d_name,
1742					     shmem_initxattrs, NULL);
1743	if (error) {
1744		if (error != -EOPNOTSUPP) {
1745			iput(inode);
1746			return error;
1747		}
1748		error = 0;
1749	}
1750
1751	info = SHMEM_I(inode);
1752	inode->i_size = len-1;
1753	if (len <= SHORT_SYMLINK_LEN) {
1754		info->symlink = kmemdup(symname, len, GFP_KERNEL);
1755		if (!info->symlink) {
1756			iput(inode);
1757			return -ENOMEM;
1758		}
1759		inode->i_op = &shmem_short_symlink_operations;
1760	} else {
1761		error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
1762		if (error) {
1763			iput(inode);
1764			return error;
1765		}
1766		inode->i_mapping->a_ops = &shmem_aops;
1767		inode->i_op = &shmem_symlink_inode_operations;
1768		kaddr = kmap_atomic(page);
1769		memcpy(kaddr, symname, len);
1770		kunmap_atomic(kaddr);
1771		set_page_dirty(page);
1772		unlock_page(page);
1773		page_cache_release(page);
1774	}
1775	dir->i_size += BOGO_DIRENT_SIZE;
1776	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1777	d_instantiate(dentry, inode);
1778	dget(dentry);
1779	return 0;
1780}
1781
1782static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd)
1783{
1784	nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink);
1785	return NULL;
1786}
1787
1788static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
1789{
1790	struct page *page = NULL;
1791	int error = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
1792	nd_set_link(nd, error ? ERR_PTR(error) : kmap(page));
1793	if (page)
1794		unlock_page(page);
1795	return page;
1796}
1797
1798static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
1799{
1800	if (!IS_ERR(nd_get_link(nd))) {
1801		struct page *page = cookie;
1802		kunmap(page);
1803		mark_page_accessed(page);
1804		page_cache_release(page);
1805	}
1806}
1807
1808#ifdef CONFIG_TMPFS_XATTR
1809/*
1810 * Superblocks without xattr inode operations may get some security.* xattr
1811 * support from the LSM "for free". As soon as we have any other xattrs
1812 * like ACLs, we also need to implement the security.* handlers at
1813 * filesystem level, though.
1814 */
1815
1816/*
1817 * Allocate new xattr and copy in the value; but leave the name to callers.
1818 */
1819static struct shmem_xattr *shmem_xattr_alloc(const void *value, size_t size)
1820{
1821	struct shmem_xattr *new_xattr;
1822	size_t len;
1823
1824	/* wrap around? */
1825	len = sizeof(*new_xattr) + size;
1826	if (len <= sizeof(*new_xattr))
1827		return NULL;
1828
1829	new_xattr = kmalloc(len, GFP_KERNEL);
1830	if (!new_xattr)
1831		return NULL;
1832
1833	new_xattr->size = size;
1834	memcpy(new_xattr->value, value, size);
1835	return new_xattr;
1836}
1837
1838/*
1839 * Callback for security_inode_init_security() for acquiring xattrs.
1840 */
1841static int shmem_initxattrs(struct inode *inode,
1842			    const struct xattr *xattr_array,
1843			    void *fs_info)
1844{
1845	struct shmem_inode_info *info = SHMEM_I(inode);
1846	const struct xattr *xattr;
1847	struct shmem_xattr *new_xattr;
1848	size_t len;
1849
1850	for (xattr = xattr_array; xattr->name != NULL; xattr++) {
1851		new_xattr = shmem_xattr_alloc(xattr->value, xattr->value_len);
1852		if (!new_xattr)
1853			return -ENOMEM;
1854
1855		len = strlen(xattr->name) + 1;
1856		new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
1857					  GFP_KERNEL);
1858		if (!new_xattr->name) {
1859			kfree(new_xattr);
1860			return -ENOMEM;
1861		}
1862
1863		memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
1864		       XATTR_SECURITY_PREFIX_LEN);
1865		memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
1866		       xattr->name, len);
1867
1868		spin_lock(&info->lock);
1869		list_add(&new_xattr->list, &info->xattr_list);
1870		spin_unlock(&info->lock);
1871	}
1872
1873	return 0;
1874}
1875
1876static int shmem_xattr_get(struct dentry *dentry, const char *name,
1877			   void *buffer, size_t size)
1878{
1879	struct shmem_inode_info *info;
1880	struct shmem_xattr *xattr;
1881	int ret = -ENODATA;
1882
1883	info = SHMEM_I(dentry->d_inode);
1884
1885	spin_lock(&info->lock);
1886	list_for_each_entry(xattr, &info->xattr_list, list) {
1887		if (strcmp(name, xattr->name))
1888			continue;
1889
1890		ret = xattr->size;
1891		if (buffer) {
1892			if (size < xattr->size)
1893				ret = -ERANGE;
1894			else
1895				memcpy(buffer, xattr->value, xattr->size);
1896		}
1897		break;
1898	}
1899	spin_unlock(&info->lock);
1900	return ret;
1901}
1902
1903static int shmem_xattr_set(struct inode *inode, const char *name,
1904			   const void *value, size_t size, int flags)
1905{
1906	struct shmem_inode_info *info = SHMEM_I(inode);
1907	struct shmem_xattr *xattr;
1908	struct shmem_xattr *new_xattr = NULL;
1909	int err = 0;
1910
1911	/* value == NULL means remove */
1912	if (value) {
1913		new_xattr = shmem_xattr_alloc(value, size);
1914		if (!new_xattr)
1915			return -ENOMEM;
1916
1917		new_xattr->name = kstrdup(name, GFP_KERNEL);
1918		if (!new_xattr->name) {
1919			kfree(new_xattr);
1920			return -ENOMEM;
1921		}
1922	}
1923
1924	spin_lock(&info->lock);
1925	list_for_each_entry(xattr, &info->xattr_list, list) {
1926		if (!strcmp(name, xattr->name)) {
1927			if (flags & XATTR_CREATE) {
1928				xattr = new_xattr;
1929				err = -EEXIST;
1930			} else if (new_xattr) {
1931				list_replace(&xattr->list, &new_xattr->list);
1932			} else {
1933				list_del(&xattr->list);
1934			}
1935			goto out;
1936		}
1937	}
1938	if (flags & XATTR_REPLACE) {
1939		xattr = new_xattr;
1940		err = -ENODATA;
1941	} else {
1942		list_add(&new_xattr->list, &info->xattr_list);
1943		xattr = NULL;
1944	}
1945out:
1946	spin_unlock(&info->lock);
1947	if (xattr)
1948		kfree(xattr->name);
1949	kfree(xattr);
1950	return err;
1951}
1952
1953static const struct xattr_handler *shmem_xattr_handlers[] = {
1954#ifdef CONFIG_TMPFS_POSIX_ACL
1955	&generic_acl_access_handler,
1956	&generic_acl_default_handler,
1957#endif
1958	NULL
1959};
1960
1961static int shmem_xattr_validate(const char *name)
1962{
1963	struct { const char *prefix; size_t len; } arr[] = {
1964		{ XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN },
1965		{ XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN }
1966	};
1967	int i;
1968
1969	for (i = 0; i < ARRAY_SIZE(arr); i++) {
1970		size_t preflen = arr[i].len;
1971		if (strncmp(name, arr[i].prefix, preflen) == 0) {
1972			if (!name[preflen])
1973				return -EINVAL;
1974			return 0;
1975		}
1976	}
1977	return -EOPNOTSUPP;
1978}
1979
1980static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
1981			      void *buffer, size_t size)
1982{
1983	int err;
1984
1985	/*
1986	 * If this is a request for a synthetic attribute in the system.*
1987	 * namespace use the generic infrastructure to resolve a handler
1988	 * for it via sb->s_xattr.
1989	 */
1990	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
1991		return generic_getxattr(dentry, name, buffer, size);
1992
1993	err = shmem_xattr_validate(name);
1994	if (err)
1995		return err;
1996
1997	return shmem_xattr_get(dentry, name, buffer, size);
1998}
1999
2000static int shmem_setxattr(struct dentry *dentry, const char *name,
2001			  const void *value, size_t size, int flags)
2002{
2003	int err;
2004
2005	/*
2006	 * If this is a request for a synthetic attribute in the system.*
2007	 * namespace use the generic infrastructure to resolve a handler
2008	 * for it via sb->s_xattr.
2009	 */
2010	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
2011		return generic_setxattr(dentry, name, value, size, flags);
2012
2013	err = shmem_xattr_validate(name);
2014	if (err)
2015		return err;
2016
2017	if (size == 0)
2018		value = "";  /* empty EA, do not remove */
2019
2020	return shmem_xattr_set(dentry->d_inode, name, value, size, flags);
2021
2022}
2023
2024static int shmem_removexattr(struct dentry *dentry, const char *name)
2025{
2026	int err;
2027
2028	/*
2029	 * If this is a request for a synthetic attribute in the system.*
2030	 * namespace use the generic infrastructure to resolve a handler
2031	 * for it via sb->s_xattr.
2032	 */
2033	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
2034		return generic_removexattr(dentry, name);
2035
2036	err = shmem_xattr_validate(name);
2037	if (err)
2038		return err;
2039
2040	return shmem_xattr_set(dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
2041}
2042
2043static bool xattr_is_trusted(const char *name)
2044{
2045	return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
2046}
2047
2048static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
2049{
2050	bool trusted = capable(CAP_SYS_ADMIN);
2051	struct shmem_xattr *xattr;
2052	struct shmem_inode_info *info;
2053	size_t used = 0;
2054
2055	info = SHMEM_I(dentry->d_inode);
2056
2057	spin_lock(&info->lock);
2058	list_for_each_entry(xattr, &info->xattr_list, list) {
2059		size_t len;
2060
2061		/* skip "trusted." attributes for unprivileged callers */
2062		if (!trusted && xattr_is_trusted(xattr->name))
2063			continue;
2064
2065		len = strlen(xattr->name) + 1;
2066		used += len;
2067		if (buffer) {
2068			if (size < used) {
2069				used = -ERANGE;
2070				break;
2071			}
2072			memcpy(buffer, xattr->name, len);
2073			buffer += len;
2074		}
2075	}
2076	spin_unlock(&info->lock);
2077
2078	return used;
2079}
2080#endif /* CONFIG_TMPFS_XATTR */
2081
2082static const struct inode_operations shmem_short_symlink_operations = {
2083	.readlink	= generic_readlink,
2084	.follow_link	= shmem_follow_short_symlink,
2085#ifdef CONFIG_TMPFS_XATTR
2086	.setxattr	= shmem_setxattr,
2087	.getxattr	= shmem_getxattr,
2088	.listxattr	= shmem_listxattr,
2089	.removexattr	= shmem_removexattr,
2090#endif
2091};
2092
2093static const struct inode_operations shmem_symlink_inode_operations = {
2094	.readlink	= generic_readlink,
2095	.follow_link	= shmem_follow_link,
2096	.put_link	= shmem_put_link,
2097#ifdef CONFIG_TMPFS_XATTR
2098	.setxattr	= shmem_setxattr,
2099	.getxattr	= shmem_getxattr,
2100	.listxattr	= shmem_listxattr,
2101	.removexattr	= shmem_removexattr,
2102#endif
2103};
2104
2105static struct dentry *shmem_get_parent(struct dentry *child)
2106{
2107	return ERR_PTR(-ESTALE);
2108}
2109
2110static int shmem_match(struct inode *ino, void *vfh)
2111{
2112	__u32 *fh = vfh;
2113	__u64 inum = fh[2];
2114	inum = (inum << 32) | fh[1];
2115	return ino->i_ino == inum && fh[0] == ino->i_generation;
2116}
2117
2118static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
2119		struct fid *fid, int fh_len, int fh_type)
2120{
2121	struct inode *inode;
2122	struct dentry *dentry = NULL;
2123	u64 inum = fid->raw[2];
2124	inum = (inum << 32) | fid->raw[1];
2125
2126	if (fh_len < 3)
2127		return NULL;
2128
2129	inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
2130			shmem_match, fid->raw);
2131	if (inode) {
2132		dentry = d_find_alias(inode);
2133		iput(inode);
2134	}
2135
2136	return dentry;
2137}
2138
2139static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
2140				int connectable)
2141{
2142	struct inode *inode = dentry->d_inode;
2143
2144	if (*len < 3) {
2145		*len = 3;
2146		return 255;
2147	}
2148
2149	if (inode_unhashed(inode)) {
2150		/* Unfortunately insert_inode_hash is not idempotent,
2151		 * so as we hash inodes here rather than at creation
2152		 * time, we need a lock to ensure we only try
2153		 * to do it once
2154		 */
2155		static DEFINE_SPINLOCK(lock);
2156		spin_lock(&lock);
2157		if (inode_unhashed(inode))
2158			__insert_inode_hash(inode,
2159					    inode->i_ino + inode->i_generation);
2160		spin_unlock(&lock);
2161	}
2162
2163	fh[0] = inode->i_generation;
2164	fh[1] = inode->i_ino;
2165	fh[2] = ((__u64)inode->i_ino) >> 32;
2166
2167	*len = 3;
2168	return 1;
2169}
2170
2171static const struct export_operations shmem_export_ops = {
2172	.get_parent     = shmem_get_parent,
2173	.encode_fh      = shmem_encode_fh,
2174	.fh_to_dentry	= shmem_fh_to_dentry,
2175};
2176
2177static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
2178			       bool remount)
2179{
2180	char *this_char, *value, *rest;
2181	uid_t uid;
2182	gid_t gid;
2183
2184	while (options != NULL) {
2185		this_char = options;
2186		for (;;) {
2187			/*
2188			 * NUL-terminate this option: unfortunately,
2189			 * mount options form a comma-separated list,
2190			 * but mpol's nodelist may also contain commas.
2191			 */
2192			options = strchr(options, ',');
2193			if (options == NULL)
2194				break;
2195			options++;
2196			if (!isdigit(*options)) {
2197				options[-1] = '\0';
2198				break;
2199			}
2200		}
2201		if (!*this_char)
2202			continue;
2203		if ((value = strchr(this_char,'=')) != NULL) {
2204			*value++ = 0;
2205		} else {
2206			printk(KERN_ERR
2207			    "tmpfs: No value for mount option '%s'\n",
2208			    this_char);
2209			return 1;
2210		}
2211
2212		if (!strcmp(this_char,"size")) {
2213			unsigned long long size;
2214			size = memparse(value,&rest);
2215			if (*rest == '%') {
2216				size <<= PAGE_SHIFT;
2217				size *= totalram_pages;
2218				do_div(size, 100);
2219				rest++;
2220			}
2221			if (*rest)
2222				goto bad_val;
2223			sbinfo->max_blocks =
2224				DIV_ROUND_UP(size, PAGE_CACHE_SIZE);
2225		} else if (!strcmp(this_char,"nr_blocks")) {
2226			sbinfo->max_blocks = memparse(value, &rest);
2227			if (*rest)
2228				goto bad_val;
2229		} else if (!strcmp(this_char,"nr_inodes")) {
2230			sbinfo->max_inodes = memparse(value, &rest);
2231			if (*rest)
2232				goto bad_val;
2233		} else if (!strcmp(this_char,"mode")) {
2234			if (remount)
2235				continue;
2236			sbinfo->mode = simple_strtoul(value, &rest, 8) & 07777;
2237			if (*rest)
2238				goto bad_val;
2239		} else if (!strcmp(this_char,"uid")) {
2240			if (remount)
2241				continue;
2242			uid = simple_strtoul(value, &rest, 0);
2243			if (*rest)
2244				goto bad_val;
2245			sbinfo->uid = make_kuid(current_user_ns(), uid);
2246			if (!uid_valid(sbinfo->uid))
2247				goto bad_val;
2248		} else if (!strcmp(this_char,"gid")) {
2249			if (remount)
2250				continue;
2251			gid = simple_strtoul(value, &rest, 0);
2252			if (*rest)
2253				goto bad_val;
2254			sbinfo->gid = make_kgid(current_user_ns(), gid);
2255			if (!gid_valid(sbinfo->gid))
2256				goto bad_val;
2257		} else if (!strcmp(this_char,"mpol")) {
2258			if (mpol_parse_str(value, &sbinfo->mpol, 1))
2259				goto bad_val;
2260		} else {
2261			printk(KERN_ERR "tmpfs: Bad mount option %s\n",
2262			       this_char);
2263			return 1;
2264		}
2265	}
2266	return 0;
2267
2268bad_val:
2269	printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
2270	       value, this_char);
2271	return 1;
2272
2273}
2274
2275static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
2276{
2277	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
2278	struct shmem_sb_info config = *sbinfo;
2279	unsigned long inodes;
2280	int error = -EINVAL;
2281
2282	if (shmem_parse_options(data, &config, true))
2283		return error;
2284
2285	spin_lock(&sbinfo->stat_lock);
2286	inodes = sbinfo->max_inodes - sbinfo->free_inodes;
2287	if (percpu_counter_compare(&sbinfo->used_blocks, config.max_blocks) > 0)
2288		goto out;
2289	if (config.max_inodes < inodes)
2290		goto out;
2291	/*
2292	 * Those tests disallow limited->unlimited while any are in use;
2293	 * but we must separately disallow unlimited->limited, because
2294	 * in that case we have no record of how much is already in use.
2295	 */
2296	if (config.max_blocks && !sbinfo->max_blocks)
2297		goto out;
2298	if (config.max_inodes && !sbinfo->max_inodes)
2299		goto out;
2300
2301	error = 0;
2302	sbinfo->max_blocks  = config.max_blocks;
2303	sbinfo->max_inodes  = config.max_inodes;
2304	sbinfo->free_inodes = config.max_inodes - inodes;
2305
2306	mpol_put(sbinfo->mpol);
2307	sbinfo->mpol        = config.mpol;	/* transfers initial ref */
2308out:
2309	spin_unlock(&sbinfo->stat_lock);
2310	return error;
2311}
2312
2313static int shmem_show_options(struct seq_file *seq, struct dentry *root)
2314{
2315	struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb);
2316
2317	if (sbinfo->max_blocks != shmem_default_max_blocks())
2318		seq_printf(seq, ",size=%luk",
2319			sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10));
2320	if (sbinfo->max_inodes != shmem_default_max_inodes())
2321		seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
2322	if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
2323		seq_printf(seq, ",mode=%03ho", sbinfo->mode);
2324	if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
2325		seq_printf(seq, ",uid=%u",
2326				from_kuid_munged(&init_user_ns, sbinfo->uid));
2327	if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
2328		seq_printf(seq, ",gid=%u",
2329				from_kgid_munged(&init_user_ns, sbinfo->gid));
2330	shmem_show_mpol(seq, sbinfo->mpol);
2331	return 0;
2332}
2333#endif /* CONFIG_TMPFS */
2334
2335static void shmem_put_super(struct super_block *sb)
2336{
2337	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
2338
2339	percpu_counter_destroy(&sbinfo->used_blocks);
2340	kfree(sbinfo);
2341	sb->s_fs_info = NULL;
2342}
2343
2344int shmem_fill_super(struct super_block *sb, void *data, int silent)
2345{
2346	struct inode *inode;
2347	struct shmem_sb_info *sbinfo;
2348	int err = -ENOMEM;
2349
2350	/* Round up to L1_CACHE_BYTES to resist false sharing */
2351	sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
2352				L1_CACHE_BYTES), GFP_KERNEL);
2353	if (!sbinfo)
2354		return -ENOMEM;
2355
2356	sbinfo->mode = S_IRWXUGO | S_ISVTX;
2357	sbinfo->uid = current_fsuid();
2358	sbinfo->gid = current_fsgid();
2359	sb->s_fs_info = sbinfo;
2360
2361#ifdef CONFIG_TMPFS
2362	/*
2363	 * Per default we only allow half of the physical ram per
2364	 * tmpfs instance, limiting inodes to one per page of lowmem;
2365	 * but the internal instance is left unlimited.
2366	 */
2367	if (!(sb->s_flags & MS_NOUSER)) {
2368		sbinfo->max_blocks = shmem_default_max_blocks();
2369		sbinfo->max_inodes = shmem_default_max_inodes();
2370		if (shmem_parse_options(data, sbinfo, false)) {
2371			err = -EINVAL;
2372			goto failed;
2373		}
2374	}
2375	sb->s_export_op = &shmem_export_ops;
2376#else
2377	sb->s_flags |= MS_NOUSER;
2378#endif
2379
2380	spin_lock_init(&sbinfo->stat_lock);
2381	if (percpu_counter_init(&sbinfo->used_blocks, 0))
2382		goto failed;
2383	sbinfo->free_inodes = sbinfo->max_inodes;
2384
2385	sb->s_maxbytes = MAX_LFS_FILESIZE;
2386	sb->s_blocksize = PAGE_CACHE_SIZE;
2387	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
2388	sb->s_magic = TMPFS_MAGIC;
2389	sb->s_op = &shmem_ops;
2390	sb->s_time_gran = 1;
2391#ifdef CONFIG_TMPFS_XATTR
2392	sb->s_xattr = shmem_xattr_handlers;
2393#endif
2394#ifdef CONFIG_TMPFS_POSIX_ACL
2395	sb->s_flags |= MS_POSIXACL;
2396#endif
2397
2398	inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
2399	if (!inode)
2400		goto failed;
2401	inode->i_uid = sbinfo->uid;
2402	inode->i_gid = sbinfo->gid;
2403	sb->s_root = d_make_root(inode);
2404	if (!sb->s_root)
2405		goto failed;
2406	return 0;
2407
2408failed:
2409	shmem_put_super(sb);
2410	return err;
2411}
2412
2413static struct kmem_cache *shmem_inode_cachep;
2414
2415static struct inode *shmem_alloc_inode(struct super_block *sb)
2416{
2417	struct shmem_inode_info *info;
2418	info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
2419	if (!info)
2420		return NULL;
2421	return &info->vfs_inode;
2422}
2423
2424static void shmem_destroy_callback(struct rcu_head *head)
2425{
2426	struct inode *inode = container_of(head, struct inode, i_rcu);
2427	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
2428}
2429
2430static void shmem_destroy_inode(struct inode *inode)
2431{
2432	if (S_ISREG(inode->i_mode))
2433		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
2434	call_rcu(&inode->i_rcu, shmem_destroy_callback);
2435}
2436
2437static void shmem_init_inode(void *foo)
2438{
2439	struct shmem_inode_info *info = foo;
2440	inode_init_once(&info->vfs_inode);
2441}
2442
2443static int shmem_init_inodecache(void)
2444{
2445	shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
2446				sizeof(struct shmem_inode_info),
2447				0, SLAB_PANIC, shmem_init_inode);
2448	return 0;
2449}
2450
2451static void shmem_destroy_inodecache(void)
2452{
2453	kmem_cache_destroy(shmem_inode_cachep);
2454}
2455
2456static const struct address_space_operations shmem_aops = {
2457	.writepage	= shmem_writepage,
2458	.set_page_dirty	= __set_page_dirty_no_writeback,
2459#ifdef CONFIG_TMPFS
2460	.write_begin	= shmem_write_begin,
2461	.write_end	= shmem_write_end,
2462#endif
2463	.migratepage	= migrate_page,
2464	.error_remove_page = generic_error_remove_page,
2465};
2466
2467static const struct file_operations shmem_file_operations = {
2468	.mmap		= shmem_mmap,
2469#ifdef CONFIG_TMPFS
2470	.llseek		= generic_file_llseek,
2471	.read		= do_sync_read,
2472	.write		= do_sync_write,
2473	.aio_read	= shmem_file_aio_read,
2474	.aio_write	= generic_file_aio_write,
2475	.fsync		= noop_fsync,
2476	.splice_read	= shmem_file_splice_read,
2477	.splice_write	= generic_file_splice_write,
2478#endif
2479};
2480
2481static const struct inode_operations shmem_inode_operations = {
2482	.setattr	= shmem_setattr,
2483	.truncate_range	= shmem_truncate_range,
2484#ifdef CONFIG_TMPFS_XATTR
2485	.setxattr	= shmem_setxattr,
2486	.getxattr	= shmem_getxattr,
2487	.listxattr	= shmem_listxattr,
2488	.removexattr	= shmem_removexattr,
2489#endif
2490};
2491
2492static const struct inode_operations shmem_dir_inode_operations = {
2493#ifdef CONFIG_TMPFS
2494	.create		= shmem_create,
2495	.lookup		= simple_lookup,
2496	.link		= shmem_link,
2497	.unlink		= shmem_unlink,
2498	.symlink	= shmem_symlink,
2499	.mkdir		= shmem_mkdir,
2500	.rmdir		= shmem_rmdir,
2501	.mknod		= shmem_mknod,
2502	.rename		= shmem_rename,
2503#endif
2504#ifdef CONFIG_TMPFS_XATTR
2505	.setxattr	= shmem_setxattr,
2506	.getxattr	= shmem_getxattr,
2507	.listxattr	= shmem_listxattr,
2508	.removexattr	= shmem_removexattr,
2509#endif
2510#ifdef CONFIG_TMPFS_POSIX_ACL
2511	.setattr	= shmem_setattr,
2512#endif
2513};
2514
2515static const struct inode_operations shmem_special_inode_operations = {
2516#ifdef CONFIG_TMPFS_XATTR
2517	.setxattr	= shmem_setxattr,
2518	.getxattr	= shmem_getxattr,
2519	.listxattr	= shmem_listxattr,
2520	.removexattr	= shmem_removexattr,
2521#endif
2522#ifdef CONFIG_TMPFS_POSIX_ACL
2523	.setattr	= shmem_setattr,
2524#endif
2525};
2526
2527static const struct super_operations shmem_ops = {
2528	.alloc_inode	= shmem_alloc_inode,
2529	.destroy_inode	= shmem_destroy_inode,
2530#ifdef CONFIG_TMPFS
2531	.statfs		= shmem_statfs,
2532	.remount_fs	= shmem_remount_fs,
2533	.show_options	= shmem_show_options,
2534#endif
2535	.evict_inode	= shmem_evict_inode,
2536	.drop_inode	= generic_delete_inode,
2537	.put_super	= shmem_put_super,
2538};
2539
2540static const struct vm_operations_struct shmem_vm_ops = {
2541	.fault		= shmem_fault,
2542#ifdef CONFIG_NUMA
2543	.set_policy     = shmem_set_policy,
2544	.get_policy     = shmem_get_policy,
2545#endif
2546};
2547
2548static struct dentry *shmem_mount(struct file_system_type *fs_type,
2549	int flags, const char *dev_name, void *data)
2550{
2551	return mount_nodev(fs_type, flags, data, shmem_fill_super);
2552}
2553
2554static struct file_system_type shmem_fs_type = {
2555	.owner		= THIS_MODULE,
2556	.name		= "tmpfs",
2557	.mount		= shmem_mount,
2558	.kill_sb	= kill_litter_super,
2559};
2560
2561int __init shmem_init(void)
2562{
2563	int error;
2564
2565	error = bdi_init(&shmem_backing_dev_info);
2566	if (error)
2567		goto out4;
2568
2569	error = shmem_init_inodecache();
2570	if (error)
2571		goto out3;
2572
2573	error = register_filesystem(&shmem_fs_type);
2574	if (error) {
2575		printk(KERN_ERR "Could not register tmpfs\n");
2576		goto out2;
2577	}
2578
2579	shm_mnt = vfs_kern_mount(&shmem_fs_type, MS_NOUSER,
2580				 shmem_fs_type.name, NULL);
2581	if (IS_ERR(shm_mnt)) {
2582		error = PTR_ERR(shm_mnt);
2583		printk(KERN_ERR "Could not kern_mount tmpfs\n");
2584		goto out1;
2585	}
2586	return 0;
2587
2588out1:
2589	unregister_filesystem(&shmem_fs_type);
2590out2:
2591	shmem_destroy_inodecache();
2592out3:
2593	bdi_destroy(&shmem_backing_dev_info);
2594out4:
2595	shm_mnt = ERR_PTR(error);
2596	return error;
2597}
2598
2599#else /* !CONFIG_SHMEM */
2600
2601/*
2602 * tiny-shmem: simple shmemfs and tmpfs using ramfs code
2603 *
2604 * This is intended for small system where the benefits of the full
2605 * shmem code (swap-backed and resource-limited) are outweighed by
2606 * their complexity. On systems without swap this code should be
2607 * effectively equivalent, but much lighter weight.
2608 */
2609
2610#include <linux/ramfs.h>
2611
2612static struct file_system_type shmem_fs_type = {
2613	.name		= "tmpfs",
2614	.mount		= ramfs_mount,
2615	.kill_sb	= kill_litter_super,
2616};
2617
2618int __init shmem_init(void)
2619{
2620	BUG_ON(register_filesystem(&shmem_fs_type) != 0);
2621
2622	shm_mnt = kern_mount(&shmem_fs_type);
2623	BUG_ON(IS_ERR(shm_mnt));
2624
2625	return 0;
2626}
2627
2628int shmem_unuse(swp_entry_t swap, struct page *page)
2629{
2630	return 0;
2631}
2632
2633int shmem_lock(struct file *file, int lock, struct user_struct *user)
2634{
2635	return 0;
2636}
2637
2638void shmem_unlock_mapping(struct address_space *mapping)
2639{
2640}
2641
2642void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
2643{
2644	truncate_inode_pages_range(inode->i_mapping, lstart, lend);
2645}
2646EXPORT_SYMBOL_GPL(shmem_truncate_range);
2647
2648#define shmem_vm_ops				generic_file_vm_ops
2649#define shmem_file_operations			ramfs_file_operations
2650#define shmem_get_inode(sb, dir, mode, dev, flags)	ramfs_get_inode(sb, dir, mode, dev)
2651#define shmem_acct_size(flags, size)		0
2652#define shmem_unacct_size(flags, size)		do {} while (0)
2653
2654#endif /* CONFIG_SHMEM */
2655
2656/* common code */
2657
2658/**
2659 * shmem_file_setup - get an unlinked file living in tmpfs
2660 * @name: name for dentry (to be seen in /proc/<pid>/maps
2661 * @size: size to be set for the file
2662 * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
2663 */
2664struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
2665{
2666	int error;
2667	struct file *file;
2668	struct inode *inode;
2669	struct path path;
2670	struct dentry *root;
2671	struct qstr this;
2672
2673	if (IS_ERR(shm_mnt))
2674		return (void *)shm_mnt;
2675
2676	if (size < 0 || size > MAX_LFS_FILESIZE)
2677		return ERR_PTR(-EINVAL);
2678
2679	if (shmem_acct_size(flags, size))
2680		return ERR_PTR(-ENOMEM);
2681
2682	error = -ENOMEM;
2683	this.name = name;
2684	this.len = strlen(name);
2685	this.hash = 0; /* will go */
2686	root = shm_mnt->mnt_root;
2687	path.dentry = d_alloc(root, &this);
2688	if (!path.dentry)
2689		goto put_memory;
2690	path.mnt = mntget(shm_mnt);
2691
2692	error = -ENOSPC;
2693	inode = shmem_get_inode(root->d_sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
2694	if (!inode)
2695		goto put_dentry;
2696
2697	d_instantiate(path.dentry, inode);
2698	inode->i_size = size;
2699	clear_nlink(inode);	/* It is unlinked */
2700#ifndef CONFIG_MMU
2701	error = ramfs_nommu_expand_for_mapping(inode, size);
2702	if (error)
2703		goto put_dentry;
2704#endif
2705
2706	error = -ENFILE;
2707	file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
2708		  &shmem_file_operations);
2709	if (!file)
2710		goto put_dentry;
2711
2712	return file;
2713
2714put_dentry:
2715	path_put(&path);
2716put_memory:
2717	shmem_unacct_size(flags, size);
2718	return ERR_PTR(error);
2719}
2720EXPORT_SYMBOL_GPL(shmem_file_setup);
2721
2722/**
2723 * shmem_zero_setup - setup a shared anonymous mapping
2724 * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
2725 */
2726int shmem_zero_setup(struct vm_area_struct *vma)
2727{
2728	struct file *file;
2729	loff_t size = vma->vm_end - vma->vm_start;
2730
2731	file = shmem_file_setup("dev/zero", size, vma->vm_flags);
2732	if (IS_ERR(file))
2733		return PTR_ERR(file);
2734
2735	if (vma->vm_file)
2736		fput(vma->vm_file);
2737	vma->vm_file = file;
2738	vma->vm_ops = &shmem_vm_ops;
2739	vma->vm_flags |= VM_CAN_NONLINEAR;
2740	return 0;
2741}
2742
2743/**
2744 * shmem_read_mapping_page_gfp - read into page cache, using specified page allocation flags.
2745 * @mapping:	the page's address_space
2746 * @index:	the page index
2747 * @gfp:	the page allocator flags to use if allocating
2748 *
2749 * This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)",
2750 * with any new page allocations done using the specified allocation flags.
2751 * But read_cache_page_gfp() uses the ->readpage() method: which does not
2752 * suit tmpfs, since it may have pages in swapcache, and needs to find those
2753 * for itself; although drivers/gpu/drm i915 and ttm rely upon this support.
2754 *
2755 * i915_gem_object_get_pages_gtt() mixes __GFP_NORETRY | __GFP_NOWARN in
2756 * with the mapping_gfp_mask(), to avoid OOMing the machine unnecessarily.
2757 */
2758struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
2759					 pgoff_t index, gfp_t gfp)
2760{
2761#ifdef CONFIG_SHMEM
2762	struct inode *inode = mapping->host;
2763	struct page *page;
2764	int error;
2765
2766	BUG_ON(mapping->a_ops != &shmem_aops);
2767	error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, gfp, NULL);
2768	if (error)
2769		page = ERR_PTR(error);
2770	else
2771		unlock_page(page);
2772	return page;
2773#else
2774	/*
2775	 * The tiny !SHMEM case uses ramfs without swap
2776	 */
2777	return read_cache_page_gfp(mapping, index, gfp);
2778#endif
2779}
2780EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);
2781