nommu.c revision 365e9c87a982c03d0af3886e29d877f581b59611
1/*
2 *  linux/mm/nommu.c
3 *
4 *  Replacement code for mm functions to support CPU's that don't
5 *  have any form of memory management unit (thus no virtual memory).
6 *
7 *  See Documentation/nommu-mmap.txt
8 *
9 *  Copyright (c) 2004-2005 David Howells <dhowells@redhat.com>
10 *  Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
11 *  Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
12 *  Copyright (c) 2002      Greg Ungerer <gerg@snapgear.com>
13 */
14
15#include <linux/mm.h>
16#include <linux/mman.h>
17#include <linux/swap.h>
18#include <linux/file.h>
19#include <linux/highmem.h>
20#include <linux/pagemap.h>
21#include <linux/slab.h>
22#include <linux/vmalloc.h>
23#include <linux/ptrace.h>
24#include <linux/blkdev.h>
25#include <linux/backing-dev.h>
26#include <linux/mount.h>
27#include <linux/personality.h>
28#include <linux/security.h>
29#include <linux/syscalls.h>
30
31#include <asm/uaccess.h>
32#include <asm/tlb.h>
33#include <asm/tlbflush.h>
34
35void *high_memory;
36struct page *mem_map;
37unsigned long max_mapnr;
38unsigned long num_physpages;
39unsigned long askedalloc, realalloc;
40atomic_t vm_committed_space = ATOMIC_INIT(0);
41int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
42int sysctl_overcommit_ratio = 50; /* default is 50% */
43int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
44int heap_stack_gap = 0;
45
46EXPORT_SYMBOL(mem_map);
47EXPORT_SYMBOL(sysctl_max_map_count);
48EXPORT_SYMBOL(sysctl_overcommit_memory);
49EXPORT_SYMBOL(sysctl_overcommit_ratio);
50EXPORT_SYMBOL(vm_committed_space);
51EXPORT_SYMBOL(__vm_enough_memory);
52
53/* list of shareable VMAs */
54struct rb_root nommu_vma_tree = RB_ROOT;
55DECLARE_RWSEM(nommu_vma_sem);
56
57struct vm_operations_struct generic_file_vm_ops = {
58};
59
60EXPORT_SYMBOL(vmalloc);
61EXPORT_SYMBOL(vfree);
62EXPORT_SYMBOL(vmalloc_to_page);
63EXPORT_SYMBOL(vmalloc_32);
64
65/*
66 * Handle all mappings that got truncated by a "truncate()"
67 * system call.
68 *
69 * NOTE! We have to be ready to update the memory sharing
70 * between the file and the memory map for a potential last
71 * incomplete page.  Ugly, but necessary.
72 */
73int vmtruncate(struct inode *inode, loff_t offset)
74{
75	struct address_space *mapping = inode->i_mapping;
76	unsigned long limit;
77
78	if (inode->i_size < offset)
79		goto do_expand;
80	i_size_write(inode, offset);
81
82	truncate_inode_pages(mapping, offset);
83	goto out_truncate;
84
85do_expand:
86	limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
87	if (limit != RLIM_INFINITY && offset > limit)
88		goto out_sig;
89	if (offset > inode->i_sb->s_maxbytes)
90		goto out;
91	i_size_write(inode, offset);
92
93out_truncate:
94	if (inode->i_op && inode->i_op->truncate)
95		inode->i_op->truncate(inode);
96	return 0;
97out_sig:
98	send_sig(SIGXFSZ, current, 0);
99out:
100	return -EFBIG;
101}
102
103EXPORT_SYMBOL(vmtruncate);
104
105/*
106 * Return the total memory allocated for this pointer, not
107 * just what the caller asked for.
108 *
109 * Doesn't have to be accurate, i.e. may have races.
110 */
111unsigned int kobjsize(const void *objp)
112{
113	struct page *page;
114
115	if (!objp || !((page = virt_to_page(objp))))
116		return 0;
117
118	if (PageSlab(page))
119		return ksize(objp);
120
121	BUG_ON(page->index < 0);
122	BUG_ON(page->index >= MAX_ORDER);
123
124	return (PAGE_SIZE << page->index);
125}
126
127/*
128 * The nommu dodgy version :-)
129 */
130int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
131	unsigned long start, int len, int write, int force,
132	struct page **pages, struct vm_area_struct **vmas)
133{
134	int i;
135	static struct vm_area_struct dummy_vma;
136
137	for (i = 0; i < len; i++) {
138		if (pages) {
139			pages[i] = virt_to_page(start);
140			if (pages[i])
141				page_cache_get(pages[i]);
142		}
143		if (vmas)
144			vmas[i] = &dummy_vma;
145		start += PAGE_SIZE;
146	}
147	return(i);
148}
149
150EXPORT_SYMBOL(get_user_pages);
151
152DEFINE_RWLOCK(vmlist_lock);
153struct vm_struct *vmlist;
154
155void vfree(void *addr)
156{
157	kfree(addr);
158}
159
160void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
161{
162	/*
163	 * kmalloc doesn't like __GFP_HIGHMEM for some reason
164	 */
165	return kmalloc(size, gfp_mask & ~__GFP_HIGHMEM);
166}
167
168struct page * vmalloc_to_page(void *addr)
169{
170	return virt_to_page(addr);
171}
172
173unsigned long vmalloc_to_pfn(void *addr)
174{
175	return page_to_pfn(virt_to_page(addr));
176}
177
178
179long vread(char *buf, char *addr, unsigned long count)
180{
181	memcpy(buf, addr, count);
182	return count;
183}
184
185long vwrite(char *buf, char *addr, unsigned long count)
186{
187	/* Don't allow overflow */
188	if ((unsigned long) addr + count < count)
189		count = -(unsigned long) addr;
190
191	memcpy(addr, buf, count);
192	return(count);
193}
194
195/*
196 *	vmalloc  -  allocate virtually continguos memory
197 *
198 *	@size:		allocation size
199 *
200 *	Allocate enough pages to cover @size from the page level
201 *	allocator and map them into continguos kernel virtual space.
202 *
203 *	For tight cotrol over page level allocator and protection flags
204 *	use __vmalloc() instead.
205 */
206void *vmalloc(unsigned long size)
207{
208       return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
209}
210
211/*
212 *	vmalloc_32  -  allocate virtually continguos memory (32bit addressable)
213 *
214 *	@size:		allocation size
215 *
216 *	Allocate enough 32bit PA addressable pages to cover @size from the
217 *	page level allocator and map them into continguos kernel virtual space.
218 */
219void *vmalloc_32(unsigned long size)
220{
221	return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
222}
223
224void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot)
225{
226	BUG();
227	return NULL;
228}
229
230void vunmap(void *addr)
231{
232	BUG();
233}
234
235/*
236 *  sys_brk() for the most part doesn't need the global kernel
237 *  lock, except when an application is doing something nasty
238 *  like trying to un-brk an area that has already been mapped
239 *  to a regular file.  in this case, the unmapping will need
240 *  to invoke file system routines that need the global lock.
241 */
242asmlinkage unsigned long sys_brk(unsigned long brk)
243{
244	struct mm_struct *mm = current->mm;
245
246	if (brk < mm->start_brk || brk > mm->context.end_brk)
247		return mm->brk;
248
249	if (mm->brk == brk)
250		return mm->brk;
251
252	/*
253	 * Always allow shrinking brk
254	 */
255	if (brk <= mm->brk) {
256		mm->brk = brk;
257		return brk;
258	}
259
260	/*
261	 * Ok, looks good - let it rip.
262	 */
263	return mm->brk = brk;
264}
265
266#ifdef DEBUG
267static void show_process_blocks(void)
268{
269	struct vm_list_struct *vml;
270
271	printk("Process blocks %d:", current->pid);
272
273	for (vml = &current->mm->context.vmlist; vml; vml = vml->next) {
274		printk(" %p: %p", vml, vml->vma);
275		if (vml->vma)
276			printk(" (%d @%lx #%d)",
277			       kobjsize((void *) vml->vma->vm_start),
278			       vml->vma->vm_start,
279			       atomic_read(&vml->vma->vm_usage));
280		printk(vml->next ? " ->" : ".\n");
281	}
282}
283#endif /* DEBUG */
284
285static inline struct vm_area_struct *find_nommu_vma(unsigned long start)
286{
287	struct vm_area_struct *vma;
288	struct rb_node *n = nommu_vma_tree.rb_node;
289
290	while (n) {
291		vma = rb_entry(n, struct vm_area_struct, vm_rb);
292
293		if (start < vma->vm_start)
294			n = n->rb_left;
295		else if (start > vma->vm_start)
296			n = n->rb_right;
297		else
298			return vma;
299	}
300
301	return NULL;
302}
303
304static void add_nommu_vma(struct vm_area_struct *vma)
305{
306	struct vm_area_struct *pvma;
307	struct address_space *mapping;
308	struct rb_node **p = &nommu_vma_tree.rb_node;
309	struct rb_node *parent = NULL;
310
311	/* add the VMA to the mapping */
312	if (vma->vm_file) {
313		mapping = vma->vm_file->f_mapping;
314
315		flush_dcache_mmap_lock(mapping);
316		vma_prio_tree_insert(vma, &mapping->i_mmap);
317		flush_dcache_mmap_unlock(mapping);
318	}
319
320	/* add the VMA to the master list */
321	while (*p) {
322		parent = *p;
323		pvma = rb_entry(parent, struct vm_area_struct, vm_rb);
324
325		if (vma->vm_start < pvma->vm_start) {
326			p = &(*p)->rb_left;
327		}
328		else if (vma->vm_start > pvma->vm_start) {
329			p = &(*p)->rb_right;
330		}
331		else {
332			/* mappings are at the same address - this can only
333			 * happen for shared-mem chardevs and shared file
334			 * mappings backed by ramfs/tmpfs */
335			BUG_ON(!(pvma->vm_flags & VM_SHARED));
336
337			if (vma < pvma)
338				p = &(*p)->rb_left;
339			else if (vma > pvma)
340				p = &(*p)->rb_right;
341			else
342				BUG();
343		}
344	}
345
346	rb_link_node(&vma->vm_rb, parent, p);
347	rb_insert_color(&vma->vm_rb, &nommu_vma_tree);
348}
349
350static void delete_nommu_vma(struct vm_area_struct *vma)
351{
352	struct address_space *mapping;
353
354	/* remove the VMA from the mapping */
355	if (vma->vm_file) {
356		mapping = vma->vm_file->f_mapping;
357
358		flush_dcache_mmap_lock(mapping);
359		vma_prio_tree_remove(vma, &mapping->i_mmap);
360		flush_dcache_mmap_unlock(mapping);
361	}
362
363	/* remove from the master list */
364	rb_erase(&vma->vm_rb, &nommu_vma_tree);
365}
366
367/*
368 * determine whether a mapping should be permitted and, if so, what sort of
369 * mapping we're capable of supporting
370 */
371static int validate_mmap_request(struct file *file,
372				 unsigned long addr,
373				 unsigned long len,
374				 unsigned long prot,
375				 unsigned long flags,
376				 unsigned long pgoff,
377				 unsigned long *_capabilities)
378{
379	unsigned long capabilities;
380	unsigned long reqprot = prot;
381	int ret;
382
383	/* do the simple checks first */
384	if (flags & MAP_FIXED || addr) {
385		printk(KERN_DEBUG
386		       "%d: Can't do fixed-address/overlay mmap of RAM\n",
387		       current->pid);
388		return -EINVAL;
389	}
390
391	if ((flags & MAP_TYPE) != MAP_PRIVATE &&
392	    (flags & MAP_TYPE) != MAP_SHARED)
393		return -EINVAL;
394
395	if (PAGE_ALIGN(len) == 0)
396		return addr;
397
398	if (len > TASK_SIZE)
399		return -EINVAL;
400
401	/* offset overflow? */
402	if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
403		return -EINVAL;
404
405	if (file) {
406		/* validate file mapping requests */
407		struct address_space *mapping;
408
409		/* files must support mmap */
410		if (!file->f_op || !file->f_op->mmap)
411			return -ENODEV;
412
413		/* work out if what we've got could possibly be shared
414		 * - we support chardevs that provide their own "memory"
415		 * - we support files/blockdevs that are memory backed
416		 */
417		mapping = file->f_mapping;
418		if (!mapping)
419			mapping = file->f_dentry->d_inode->i_mapping;
420
421		capabilities = 0;
422		if (mapping && mapping->backing_dev_info)
423			capabilities = mapping->backing_dev_info->capabilities;
424
425		if (!capabilities) {
426			/* no explicit capabilities set, so assume some
427			 * defaults */
428			switch (file->f_dentry->d_inode->i_mode & S_IFMT) {
429			case S_IFREG:
430			case S_IFBLK:
431				capabilities = BDI_CAP_MAP_COPY;
432				break;
433
434			case S_IFCHR:
435				capabilities =
436					BDI_CAP_MAP_DIRECT |
437					BDI_CAP_READ_MAP |
438					BDI_CAP_WRITE_MAP;
439				break;
440
441			default:
442				return -EINVAL;
443			}
444		}
445
446		/* eliminate any capabilities that we can't support on this
447		 * device */
448		if (!file->f_op->get_unmapped_area)
449			capabilities &= ~BDI_CAP_MAP_DIRECT;
450		if (!file->f_op->read)
451			capabilities &= ~BDI_CAP_MAP_COPY;
452
453		if (flags & MAP_SHARED) {
454			/* do checks for writing, appending and locking */
455			if ((prot & PROT_WRITE) &&
456			    !(file->f_mode & FMODE_WRITE))
457				return -EACCES;
458
459			if (IS_APPEND(file->f_dentry->d_inode) &&
460			    (file->f_mode & FMODE_WRITE))
461				return -EACCES;
462
463			if (locks_verify_locked(file->f_dentry->d_inode))
464				return -EAGAIN;
465
466			if (!(capabilities & BDI_CAP_MAP_DIRECT))
467				return -ENODEV;
468
469			if (((prot & PROT_READ)  && !(capabilities & BDI_CAP_READ_MAP))  ||
470			    ((prot & PROT_WRITE) && !(capabilities & BDI_CAP_WRITE_MAP)) ||
471			    ((prot & PROT_EXEC)  && !(capabilities & BDI_CAP_EXEC_MAP))
472			    ) {
473				printk("MAP_SHARED not completely supported on !MMU\n");
474				return -EINVAL;
475			}
476
477			/* we mustn't privatise shared mappings */
478			capabilities &= ~BDI_CAP_MAP_COPY;
479		}
480		else {
481			/* we're going to read the file into private memory we
482			 * allocate */
483			if (!(capabilities & BDI_CAP_MAP_COPY))
484				return -ENODEV;
485
486			/* we don't permit a private writable mapping to be
487			 * shared with the backing device */
488			if (prot & PROT_WRITE)
489				capabilities &= ~BDI_CAP_MAP_DIRECT;
490		}
491
492		/* handle executable mappings and implied executable
493		 * mappings */
494		if (file->f_vfsmnt->mnt_flags & MNT_NOEXEC) {
495			if (prot & PROT_EXEC)
496				return -EPERM;
497		}
498		else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) {
499			/* handle implication of PROT_EXEC by PROT_READ */
500			if (current->personality & READ_IMPLIES_EXEC) {
501				if (capabilities & BDI_CAP_EXEC_MAP)
502					prot |= PROT_EXEC;
503			}
504		}
505		else if ((prot & PROT_READ) &&
506			 (prot & PROT_EXEC) &&
507			 !(capabilities & BDI_CAP_EXEC_MAP)
508			 ) {
509			/* backing file is not executable, try to copy */
510			capabilities &= ~BDI_CAP_MAP_DIRECT;
511		}
512	}
513	else {
514		/* anonymous mappings are always memory backed and can be
515		 * privately mapped
516		 */
517		capabilities = BDI_CAP_MAP_COPY;
518
519		/* handle PROT_EXEC implication by PROT_READ */
520		if ((prot & PROT_READ) &&
521		    (current->personality & READ_IMPLIES_EXEC))
522			prot |= PROT_EXEC;
523	}
524
525	/* allow the security API to have its say */
526	ret = security_file_mmap(file, reqprot, prot, flags);
527	if (ret < 0)
528		return ret;
529
530	/* looks okay */
531	*_capabilities = capabilities;
532	return 0;
533}
534
535/*
536 * we've determined that we can make the mapping, now translate what we
537 * now know into VMA flags
538 */
539static unsigned long determine_vm_flags(struct file *file,
540					unsigned long prot,
541					unsigned long flags,
542					unsigned long capabilities)
543{
544	unsigned long vm_flags;
545
546	vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags);
547	vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
548	/* vm_flags |= mm->def_flags; */
549
550	if (!(capabilities & BDI_CAP_MAP_DIRECT)) {
551		/* attempt to share read-only copies of mapped file chunks */
552		if (file && !(prot & PROT_WRITE))
553			vm_flags |= VM_MAYSHARE;
554	}
555	else {
556		/* overlay a shareable mapping on the backing device or inode
557		 * if possible - used for chardevs, ramfs/tmpfs/shmfs and
558		 * romfs/cramfs */
559		if (flags & MAP_SHARED)
560			vm_flags |= VM_MAYSHARE | VM_SHARED;
561		else if ((((vm_flags & capabilities) ^ vm_flags) & BDI_CAP_VMFLAGS) == 0)
562			vm_flags |= VM_MAYSHARE;
563	}
564
565	/* refuse to let anyone share private mappings with this process if
566	 * it's being traced - otherwise breakpoints set in it may interfere
567	 * with another untraced process
568	 */
569	if ((flags & MAP_PRIVATE) && (current->ptrace & PT_PTRACED))
570		vm_flags &= ~VM_MAYSHARE;
571
572	return vm_flags;
573}
574
575/*
576 * set up a shared mapping on a file
577 */
578static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len)
579{
580	int ret;
581
582	ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
583	if (ret != -ENOSYS)
584		return ret;
585
586	/* getting an ENOSYS error indicates that direct mmap isn't
587	 * possible (as opposed to tried but failed) so we'll fall
588	 * through to making a private copy of the data and mapping
589	 * that if we can */
590	return -ENODEV;
591}
592
593/*
594 * set up a private mapping or an anonymous shared mapping
595 */
596static int do_mmap_private(struct vm_area_struct *vma, unsigned long len)
597{
598	void *base;
599	int ret;
600
601	/* invoke the file's mapping function so that it can keep track of
602	 * shared mappings on devices or memory
603	 * - VM_MAYSHARE will be set if it may attempt to share
604	 */
605	if (vma->vm_file) {
606		ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
607		if (ret != -ENOSYS) {
608			/* shouldn't return success if we're not sharing */
609			BUG_ON(ret == 0 && !(vma->vm_flags & VM_MAYSHARE));
610			return ret; /* success or a real error */
611		}
612
613		/* getting an ENOSYS error indicates that direct mmap isn't
614		 * possible (as opposed to tried but failed) so we'll try to
615		 * make a private copy of the data and map that instead */
616	}
617
618	/* allocate some memory to hold the mapping
619	 * - note that this may not return a page-aligned address if the object
620	 *   we're allocating is smaller than a page
621	 */
622	base = kmalloc(len, GFP_KERNEL);
623	if (!base)
624		goto enomem;
625
626	vma->vm_start = (unsigned long) base;
627	vma->vm_end = vma->vm_start + len;
628	vma->vm_flags |= VM_MAPPED_COPY;
629
630#ifdef WARN_ON_SLACK
631	if (len + WARN_ON_SLACK <= kobjsize(result))
632		printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n",
633		       len, current->pid, kobjsize(result) - len);
634#endif
635
636	if (vma->vm_file) {
637		/* read the contents of a file into the copy */
638		mm_segment_t old_fs;
639		loff_t fpos;
640
641		fpos = vma->vm_pgoff;
642		fpos <<= PAGE_SHIFT;
643
644		old_fs = get_fs();
645		set_fs(KERNEL_DS);
646		ret = vma->vm_file->f_op->read(vma->vm_file, base, len, &fpos);
647		set_fs(old_fs);
648
649		if (ret < 0)
650			goto error_free;
651
652		/* clear the last little bit */
653		if (ret < len)
654			memset(base + ret, 0, len - ret);
655
656	} else {
657		/* if it's an anonymous mapping, then just clear it */
658		memset(base, 0, len);
659	}
660
661	return 0;
662
663error_free:
664	kfree(base);
665	vma->vm_start = 0;
666	return ret;
667
668enomem:
669	printk("Allocation of length %lu from process %d failed\n",
670	       len, current->pid);
671	show_free_areas();
672	return -ENOMEM;
673}
674
675/*
676 * handle mapping creation for uClinux
677 */
678unsigned long do_mmap_pgoff(struct file *file,
679			    unsigned long addr,
680			    unsigned long len,
681			    unsigned long prot,
682			    unsigned long flags,
683			    unsigned long pgoff)
684{
685	struct vm_list_struct *vml = NULL;
686	struct vm_area_struct *vma = NULL;
687	struct rb_node *rb;
688	unsigned long capabilities, vm_flags;
689	void *result;
690	int ret;
691
692	/* decide whether we should attempt the mapping, and if so what sort of
693	 * mapping */
694	ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
695				    &capabilities);
696	if (ret < 0)
697		return ret;
698
699	/* we've determined that we can make the mapping, now translate what we
700	 * now know into VMA flags */
701	vm_flags = determine_vm_flags(file, prot, flags, capabilities);
702
703	/* we're going to need to record the mapping if it works */
704	vml = kmalloc(sizeof(struct vm_list_struct), GFP_KERNEL);
705	if (!vml)
706		goto error_getting_vml;
707	memset(vml, 0, sizeof(*vml));
708
709	down_write(&nommu_vma_sem);
710
711	/* if we want to share, we need to check for VMAs created by other
712	 * mmap() calls that overlap with our proposed mapping
713	 * - we can only share with an exact match on most regular files
714	 * - shared mappings on character devices and memory backed files are
715	 *   permitted to overlap inexactly as far as we are concerned for in
716	 *   these cases, sharing is handled in the driver or filesystem rather
717	 *   than here
718	 */
719	if (vm_flags & VM_MAYSHARE) {
720		unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
721		unsigned long vmpglen;
722
723		for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) {
724			vma = rb_entry(rb, struct vm_area_struct, vm_rb);
725
726			if (!(vma->vm_flags & VM_MAYSHARE))
727				continue;
728
729			/* search for overlapping mappings on the same file */
730			if (vma->vm_file->f_dentry->d_inode != file->f_dentry->d_inode)
731				continue;
732
733			if (vma->vm_pgoff >= pgoff + pglen)
734				continue;
735
736			vmpglen = vma->vm_end - vma->vm_start + PAGE_SIZE - 1;
737			vmpglen >>= PAGE_SHIFT;
738			if (pgoff >= vma->vm_pgoff + vmpglen)
739				continue;
740
741			/* handle inexactly overlapping matches between mappings */
742			if (vma->vm_pgoff != pgoff || vmpglen != pglen) {
743				if (!(capabilities & BDI_CAP_MAP_DIRECT))
744					goto sharing_violation;
745				continue;
746			}
747
748			/* we've found a VMA we can share */
749			atomic_inc(&vma->vm_usage);
750
751			vml->vma = vma;
752			result = (void *) vma->vm_start;
753			goto shared;
754		}
755
756		vma = NULL;
757
758		/* obtain the address at which to make a shared mapping
759		 * - this is the hook for quasi-memory character devices to
760		 *   tell us the location of a shared mapping
761		 */
762		if (file && file->f_op->get_unmapped_area) {
763			addr = file->f_op->get_unmapped_area(file, addr, len,
764							     pgoff, flags);
765			if (IS_ERR((void *) addr)) {
766				ret = addr;
767				if (ret != (unsigned long) -ENOSYS)
768					goto error;
769
770				/* the driver refused to tell us where to site
771				 * the mapping so we'll have to attempt to copy
772				 * it */
773				ret = (unsigned long) -ENODEV;
774				if (!(capabilities & BDI_CAP_MAP_COPY))
775					goto error;
776
777				capabilities &= ~BDI_CAP_MAP_DIRECT;
778			}
779		}
780	}
781
782	/* we're going to need a VMA struct as well */
783	vma = kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
784	if (!vma)
785		goto error_getting_vma;
786
787	memset(vma, 0, sizeof(*vma));
788	INIT_LIST_HEAD(&vma->anon_vma_node);
789	atomic_set(&vma->vm_usage, 1);
790	if (file)
791		get_file(file);
792	vma->vm_file	= file;
793	vma->vm_flags	= vm_flags;
794	vma->vm_start	= addr;
795	vma->vm_end	= addr + len;
796	vma->vm_pgoff	= pgoff;
797
798	vml->vma = vma;
799
800	/* set up the mapping */
801	if (file && vma->vm_flags & VM_SHARED)
802		ret = do_mmap_shared_file(vma, len);
803	else
804		ret = do_mmap_private(vma, len);
805	if (ret < 0)
806		goto error;
807
808	/* okay... we have a mapping; now we have to register it */
809	result = (void *) vma->vm_start;
810
811	if (vma->vm_flags & VM_MAPPED_COPY) {
812		realalloc += kobjsize(result);
813		askedalloc += len;
814	}
815
816	realalloc += kobjsize(vma);
817	askedalloc += sizeof(*vma);
818
819	current->mm->total_vm += len >> PAGE_SHIFT;
820
821	add_nommu_vma(vma);
822
823 shared:
824	realalloc += kobjsize(vml);
825	askedalloc += sizeof(*vml);
826
827	vml->next = current->mm->context.vmlist;
828	current->mm->context.vmlist = vml;
829
830	up_write(&nommu_vma_sem);
831
832	if (prot & PROT_EXEC)
833		flush_icache_range((unsigned long) result,
834				   (unsigned long) result + len);
835
836#ifdef DEBUG
837	printk("do_mmap:\n");
838	show_process_blocks();
839#endif
840
841	return (unsigned long) result;
842
843 error:
844	up_write(&nommu_vma_sem);
845	kfree(vml);
846	if (vma) {
847		fput(vma->vm_file);
848		kfree(vma);
849	}
850	return ret;
851
852 sharing_violation:
853	up_write(&nommu_vma_sem);
854	printk("Attempt to share mismatched mappings\n");
855	kfree(vml);
856	return -EINVAL;
857
858 error_getting_vma:
859	up_write(&nommu_vma_sem);
860	kfree(vml);
861	printk("Allocation of vma for %lu byte allocation from process %d failed\n",
862	       len, current->pid);
863	show_free_areas();
864	return -ENOMEM;
865
866 error_getting_vml:
867	printk("Allocation of vml for %lu byte allocation from process %d failed\n",
868	       len, current->pid);
869	show_free_areas();
870	return -ENOMEM;
871}
872
873/*
874 * handle mapping disposal for uClinux
875 */
876static void put_vma(struct vm_area_struct *vma)
877{
878	if (vma) {
879		down_write(&nommu_vma_sem);
880
881		if (atomic_dec_and_test(&vma->vm_usage)) {
882			delete_nommu_vma(vma);
883
884			if (vma->vm_ops && vma->vm_ops->close)
885				vma->vm_ops->close(vma);
886
887			/* IO memory and memory shared directly out of the pagecache from
888			 * ramfs/tmpfs mustn't be released here */
889			if (vma->vm_flags & VM_MAPPED_COPY) {
890				realalloc -= kobjsize((void *) vma->vm_start);
891				askedalloc -= vma->vm_end - vma->vm_start;
892				kfree((void *) vma->vm_start);
893			}
894
895			realalloc -= kobjsize(vma);
896			askedalloc -= sizeof(*vma);
897
898			if (vma->vm_file)
899				fput(vma->vm_file);
900			kfree(vma);
901		}
902
903		up_write(&nommu_vma_sem);
904	}
905}
906
907int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
908{
909	struct vm_list_struct *vml, **parent;
910	unsigned long end = addr + len;
911
912#ifdef DEBUG
913	printk("do_munmap:\n");
914#endif
915
916	for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next)
917		if ((*parent)->vma->vm_start == addr &&
918		    ((len == 0) || ((*parent)->vma->vm_end == end)))
919			goto found;
920
921	printk("munmap of non-mmaped memory by process %d (%s): %p\n",
922	       current->pid, current->comm, (void *) addr);
923	return -EINVAL;
924
925 found:
926	vml = *parent;
927
928	put_vma(vml->vma);
929
930	*parent = vml->next;
931	realalloc -= kobjsize(vml);
932	askedalloc -= sizeof(*vml);
933	kfree(vml);
934
935	update_hiwater_vm(mm);
936	mm->total_vm -= len >> PAGE_SHIFT;
937
938#ifdef DEBUG
939	show_process_blocks();
940#endif
941
942	return 0;
943}
944
945/* Release all mmaps. */
946void exit_mmap(struct mm_struct * mm)
947{
948	struct vm_list_struct *tmp;
949
950	if (mm) {
951#ifdef DEBUG
952		printk("Exit_mmap:\n");
953#endif
954
955		mm->total_vm = 0;
956
957		while ((tmp = mm->context.vmlist)) {
958			mm->context.vmlist = tmp->next;
959			put_vma(tmp->vma);
960
961			realalloc -= kobjsize(tmp);
962			askedalloc -= sizeof(*tmp);
963			kfree(tmp);
964		}
965
966#ifdef DEBUG
967		show_process_blocks();
968#endif
969	}
970}
971
972asmlinkage long sys_munmap(unsigned long addr, size_t len)
973{
974	int ret;
975	struct mm_struct *mm = current->mm;
976
977	down_write(&mm->mmap_sem);
978	ret = do_munmap(mm, addr, len);
979	up_write(&mm->mmap_sem);
980	return ret;
981}
982
983unsigned long do_brk(unsigned long addr, unsigned long len)
984{
985	return -ENOMEM;
986}
987
988/*
989 * Expand (or shrink) an existing mapping, potentially moving it at the
990 * same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
991 *
992 * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise
993 * This option implies MREMAP_MAYMOVE.
994 *
995 * on uClinux, we only permit changing a mapping's size, and only as long as it stays within the
996 * hole allocated by the kmalloc() call in do_mmap_pgoff() and the block is not shareable
997 */
998unsigned long do_mremap(unsigned long addr,
999			unsigned long old_len, unsigned long new_len,
1000			unsigned long flags, unsigned long new_addr)
1001{
1002	struct vm_list_struct *vml = NULL;
1003
1004	/* insanity checks first */
1005	if (new_len == 0)
1006		return (unsigned long) -EINVAL;
1007
1008	if (flags & MREMAP_FIXED && new_addr != addr)
1009		return (unsigned long) -EINVAL;
1010
1011	for (vml = current->mm->context.vmlist; vml; vml = vml->next)
1012		if (vml->vma->vm_start == addr)
1013			goto found;
1014
1015	return (unsigned long) -EINVAL;
1016
1017 found:
1018	if (vml->vma->vm_end != vml->vma->vm_start + old_len)
1019		return (unsigned long) -EFAULT;
1020
1021	if (vml->vma->vm_flags & VM_MAYSHARE)
1022		return (unsigned long) -EPERM;
1023
1024	if (new_len > kobjsize((void *) addr))
1025		return (unsigned long) -ENOMEM;
1026
1027	/* all checks complete - do it */
1028	vml->vma->vm_end = vml->vma->vm_start + new_len;
1029
1030	askedalloc -= old_len;
1031	askedalloc += new_len;
1032
1033	return vml->vma->vm_start;
1034}
1035
1036/*
1037 * Look up the first VMA which satisfies  addr < vm_end,  NULL if none
1038 */
1039struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
1040{
1041	struct vm_list_struct *vml;
1042
1043	for (vml = mm->context.vmlist; vml; vml = vml->next)
1044		if (addr >= vml->vma->vm_start && addr < vml->vma->vm_end)
1045			return vml->vma;
1046
1047	return NULL;
1048}
1049
1050EXPORT_SYMBOL(find_vma);
1051
1052struct page * follow_page(struct mm_struct *mm, unsigned long addr, int write)
1053{
1054	return NULL;
1055}
1056
1057struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
1058{
1059	return NULL;
1060}
1061
1062int remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
1063		unsigned long to, unsigned long size, pgprot_t prot)
1064{
1065	vma->vm_start = vma->vm_pgoff << PAGE_SHIFT;
1066	return 0;
1067}
1068
1069void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1070{
1071}
1072
1073unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
1074	unsigned long len, unsigned long pgoff, unsigned long flags)
1075{
1076	return -ENOMEM;
1077}
1078
1079void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
1080{
1081}
1082
1083void unmap_mapping_range(struct address_space *mapping,
1084			 loff_t const holebegin, loff_t const holelen,
1085			 int even_cows)
1086{
1087}
1088
1089/*
1090 * Check that a process has enough memory to allocate a new virtual
1091 * mapping. 0 means there is enough memory for the allocation to
1092 * succeed and -ENOMEM implies there is not.
1093 *
1094 * We currently support three overcommit policies, which are set via the
1095 * vm.overcommit_memory sysctl.  See Documentation/vm/overcommit-accounting
1096 *
1097 * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
1098 * Additional code 2002 Jul 20 by Robert Love.
1099 *
1100 * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
1101 *
1102 * Note this is a helper function intended to be used by LSMs which
1103 * wish to use this logic.
1104 */
1105int __vm_enough_memory(long pages, int cap_sys_admin)
1106{
1107	unsigned long free, allowed;
1108
1109	vm_acct_memory(pages);
1110
1111	/*
1112	 * Sometimes we want to use more memory than we have
1113	 */
1114	if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
1115		return 0;
1116
1117	if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
1118		unsigned long n;
1119
1120		free = get_page_cache_size();
1121		free += nr_swap_pages;
1122
1123		/*
1124		 * Any slabs which are created with the
1125		 * SLAB_RECLAIM_ACCOUNT flag claim to have contents
1126		 * which are reclaimable, under pressure.  The dentry
1127		 * cache and most inode caches should fall into this
1128		 */
1129		free += atomic_read(&slab_reclaim_pages);
1130
1131		/*
1132		 * Leave the last 3% for root
1133		 */
1134		if (!cap_sys_admin)
1135			free -= free / 32;
1136
1137		if (free > pages)
1138			return 0;
1139
1140		/*
1141		 * nr_free_pages() is very expensive on large systems,
1142		 * only call if we're about to fail.
1143		 */
1144		n = nr_free_pages();
1145		if (!cap_sys_admin)
1146			n -= n / 32;
1147		free += n;
1148
1149		if (free > pages)
1150			return 0;
1151		vm_unacct_memory(pages);
1152		return -ENOMEM;
1153	}
1154
1155	allowed = totalram_pages * sysctl_overcommit_ratio / 100;
1156	/*
1157	 * Leave the last 3% for root
1158	 */
1159	if (!cap_sys_admin)
1160		allowed -= allowed / 32;
1161	allowed += total_swap_pages;
1162
1163	/* Don't let a single process grow too big:
1164	   leave 3% of the size of this process for other processes */
1165	allowed -= current->mm->total_vm / 32;
1166
1167	/*
1168	 * cast `allowed' as a signed long because vm_committed_space
1169	 * sometimes has a negative value
1170	 */
1171	if (atomic_read(&vm_committed_space) < (long)allowed)
1172		return 0;
1173
1174	vm_unacct_memory(pages);
1175
1176	return -ENOMEM;
1177}
1178
1179int in_gate_area_no_task(unsigned long addr)
1180{
1181	return 0;
1182}
1183