pgtable.c revision 6252d702c5311ce916caf75ed82e5c8245171c92
1/*
2 *  arch/s390/mm/pgtable.c
3 *
4 *    Copyright IBM Corp. 2007
5 *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6 */
7
8#include <linux/sched.h>
9#include <linux/kernel.h>
10#include <linux/errno.h>
11#include <linux/mm.h>
12#include <linux/swap.h>
13#include <linux/smp.h>
14#include <linux/highmem.h>
15#include <linux/slab.h>
16#include <linux/pagemap.h>
17#include <linux/spinlock.h>
18#include <linux/module.h>
19#include <linux/quicklist.h>
20
21#include <asm/system.h>
22#include <asm/pgtable.h>
23#include <asm/pgalloc.h>
24#include <asm/tlb.h>
25#include <asm/tlbflush.h>
26#include <asm/mmu_context.h>
27
28#ifndef CONFIG_64BIT
29#define ALLOC_ORDER	1
30#define TABLES_PER_PAGE	4
31#define FRAG_MASK	15UL
32#define SECOND_HALVES	10UL
33#else
34#define ALLOC_ORDER	2
35#define TABLES_PER_PAGE	2
36#define FRAG_MASK	3UL
37#define SECOND_HALVES	2UL
38#endif
39
40unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
41{
42	struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
43
44	if (!page)
45		return NULL;
46	page->index = 0;
47	if (noexec) {
48		struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
49		if (!shadow) {
50			__free_pages(page, ALLOC_ORDER);
51			return NULL;
52		}
53		page->index = page_to_phys(shadow);
54	}
55	spin_lock(&mm->page_table_lock);
56	list_add(&page->lru, &mm->context.crst_list);
57	spin_unlock(&mm->page_table_lock);
58	return (unsigned long *) page_to_phys(page);
59}
60
61void crst_table_free(struct mm_struct *mm, unsigned long *table)
62{
63	unsigned long *shadow = get_shadow_table(table);
64	struct page *page = virt_to_page(table);
65
66	spin_lock(&mm->page_table_lock);
67	list_del(&page->lru);
68	spin_unlock(&mm->page_table_lock);
69	if (shadow)
70		free_pages((unsigned long) shadow, ALLOC_ORDER);
71	free_pages((unsigned long) table, ALLOC_ORDER);
72}
73
74#ifdef CONFIG_64BIT
75int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
76{
77	unsigned long *table, *pgd;
78	unsigned long entry;
79
80	BUG_ON(limit > (1UL << 53));
81repeat:
82	table = crst_table_alloc(mm, mm->context.noexec);
83	if (!table)
84		return -ENOMEM;
85	spin_lock(&mm->page_table_lock);
86	if (mm->context.asce_limit < limit) {
87		pgd = (unsigned long *) mm->pgd;
88		if (mm->context.asce_limit <= (1UL << 31)) {
89			entry = _REGION3_ENTRY_EMPTY;
90			mm->context.asce_limit = 1UL << 42;
91			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
92						_ASCE_USER_BITS |
93						_ASCE_TYPE_REGION3;
94		} else {
95			entry = _REGION2_ENTRY_EMPTY;
96			mm->context.asce_limit = 1UL << 53;
97			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
98						_ASCE_USER_BITS |
99						_ASCE_TYPE_REGION2;
100		}
101		crst_table_init(table, entry);
102		pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
103		mm->pgd = (pgd_t *) table;
104		table = NULL;
105	}
106	spin_unlock(&mm->page_table_lock);
107	if (table)
108		crst_table_free(mm, table);
109	if (mm->context.asce_limit < limit)
110		goto repeat;
111	update_mm(mm, current);
112	return 0;
113}
114
115void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
116{
117	pgd_t *pgd;
118
119	if (mm->context.asce_limit <= limit)
120		return;
121	__tlb_flush_mm(mm);
122	while (mm->context.asce_limit > limit) {
123		pgd = mm->pgd;
124		switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
125		case _REGION_ENTRY_TYPE_R2:
126			mm->context.asce_limit = 1UL << 42;
127			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
128						_ASCE_USER_BITS |
129						_ASCE_TYPE_REGION3;
130			break;
131		case _REGION_ENTRY_TYPE_R3:
132			mm->context.asce_limit = 1UL << 31;
133			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
134						_ASCE_USER_BITS |
135						_ASCE_TYPE_SEGMENT;
136			break;
137		default:
138			BUG();
139		}
140		mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
141		crst_table_free(mm, (unsigned long *) pgd);
142	}
143	update_mm(mm, current);
144}
145#endif
146
147/*
148 * page table entry allocation/free routines.
149 */
150unsigned long *page_table_alloc(struct mm_struct *mm)
151{
152	struct page *page;
153	unsigned long *table;
154	unsigned long bits;
155
156	bits = mm->context.noexec ? 3UL : 1UL;
157	spin_lock(&mm->page_table_lock);
158	page = NULL;
159	if (!list_empty(&mm->context.pgtable_list)) {
160		page = list_first_entry(&mm->context.pgtable_list,
161					struct page, lru);
162		if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
163			page = NULL;
164	}
165	if (!page) {
166		spin_unlock(&mm->page_table_lock);
167		page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
168		if (!page)
169			return NULL;
170		pgtable_page_ctor(page);
171		page->flags &= ~FRAG_MASK;
172		table = (unsigned long *) page_to_phys(page);
173		clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
174		spin_lock(&mm->page_table_lock);
175		list_add(&page->lru, &mm->context.pgtable_list);
176	}
177	table = (unsigned long *) page_to_phys(page);
178	while (page->flags & bits) {
179		table += 256;
180		bits <<= 1;
181	}
182	page->flags |= bits;
183	if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
184		list_move_tail(&page->lru, &mm->context.pgtable_list);
185	spin_unlock(&mm->page_table_lock);
186	return table;
187}
188
189void page_table_free(struct mm_struct *mm, unsigned long *table)
190{
191	struct page *page;
192	unsigned long bits;
193
194	bits = mm->context.noexec ? 3UL : 1UL;
195	bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
196	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
197	spin_lock(&mm->page_table_lock);
198	page->flags ^= bits;
199	if (page->flags & FRAG_MASK) {
200		/* Page now has some free pgtable fragments. */
201		list_move(&page->lru, &mm->context.pgtable_list);
202		page = NULL;
203	} else
204		/* All fragments of the 4K page have been freed. */
205		list_del(&page->lru);
206	spin_unlock(&mm->page_table_lock);
207	if (page) {
208		pgtable_page_dtor(page);
209		__free_page(page);
210	}
211}
212
213void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
214{
215	struct page *page;
216
217	spin_lock(&mm->page_table_lock);
218	/* Free shadow region and segment tables. */
219	list_for_each_entry(page, &mm->context.crst_list, lru)
220		if (page->index) {
221			free_pages((unsigned long) page->index, ALLOC_ORDER);
222			page->index = 0;
223		}
224	/* "Free" second halves of page tables. */
225	list_for_each_entry(page, &mm->context.pgtable_list, lru)
226		page->flags &= ~SECOND_HALVES;
227	spin_unlock(&mm->page_table_lock);
228	mm->context.noexec = 0;
229	update_mm(mm, tsk);
230}
231