pgtable.c revision a9162f238a84ee05b09ea4b0ebd97fb20448c28c
1/*
2 *    Copyright IBM Corp. 2007,2009
3 *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
4 */
5
6#include <linux/sched.h>
7#include <linux/kernel.h>
8#include <linux/errno.h>
9#include <linux/gfp.h>
10#include <linux/mm.h>
11#include <linux/swap.h>
12#include <linux/smp.h>
13#include <linux/highmem.h>
14#include <linux/pagemap.h>
15#include <linux/spinlock.h>
16#include <linux/module.h>
17#include <linux/quicklist.h>
18#include <linux/rcupdate.h>
19#include <linux/slab.h>
20
21#include <asm/system.h>
22#include <asm/pgtable.h>
23#include <asm/pgalloc.h>
24#include <asm/tlb.h>
25#include <asm/tlbflush.h>
26#include <asm/mmu_context.h>
27
28#ifndef CONFIG_64BIT
29#define ALLOC_ORDER	1
30#define FRAG_MASK	0x0f
31#else
32#define ALLOC_ORDER	2
33#define FRAG_MASK	0x03
34#endif
35
36unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE;
37EXPORT_SYMBOL(VMALLOC_START);
38
39static int __init parse_vmalloc(char *arg)
40{
41	if (!arg)
42		return -EINVAL;
43	VMALLOC_START = (VMALLOC_END - memparse(arg, &arg)) & PAGE_MASK;
44	return 0;
45}
46early_param("vmalloc", parse_vmalloc);
47
48unsigned long *crst_table_alloc(struct mm_struct *mm)
49{
50	struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
51
52	if (!page)
53		return NULL;
54	return (unsigned long *) page_to_phys(page);
55}
56
57void crst_table_free(struct mm_struct *mm, unsigned long *table)
58{
59	free_pages((unsigned long) table, ALLOC_ORDER);
60}
61
62#ifdef CONFIG_64BIT
63int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
64{
65	unsigned long *table, *pgd;
66	unsigned long entry;
67
68	BUG_ON(limit > (1UL << 53));
69repeat:
70	table = crst_table_alloc(mm);
71	if (!table)
72		return -ENOMEM;
73	spin_lock_bh(&mm->page_table_lock);
74	if (mm->context.asce_limit < limit) {
75		pgd = (unsigned long *) mm->pgd;
76		if (mm->context.asce_limit <= (1UL << 31)) {
77			entry = _REGION3_ENTRY_EMPTY;
78			mm->context.asce_limit = 1UL << 42;
79			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
80						_ASCE_USER_BITS |
81						_ASCE_TYPE_REGION3;
82		} else {
83			entry = _REGION2_ENTRY_EMPTY;
84			mm->context.asce_limit = 1UL << 53;
85			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
86						_ASCE_USER_BITS |
87						_ASCE_TYPE_REGION2;
88		}
89		crst_table_init(table, entry);
90		pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
91		mm->pgd = (pgd_t *) table;
92		mm->task_size = mm->context.asce_limit;
93		table = NULL;
94	}
95	spin_unlock_bh(&mm->page_table_lock);
96	if (table)
97		crst_table_free(mm, table);
98	if (mm->context.asce_limit < limit)
99		goto repeat;
100	update_mm(mm, current);
101	return 0;
102}
103
104void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
105{
106	pgd_t *pgd;
107
108	if (mm->context.asce_limit <= limit)
109		return;
110	__tlb_flush_mm(mm);
111	while (mm->context.asce_limit > limit) {
112		pgd = mm->pgd;
113		switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
114		case _REGION_ENTRY_TYPE_R2:
115			mm->context.asce_limit = 1UL << 42;
116			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
117						_ASCE_USER_BITS |
118						_ASCE_TYPE_REGION3;
119			break;
120		case _REGION_ENTRY_TYPE_R3:
121			mm->context.asce_limit = 1UL << 31;
122			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
123						_ASCE_USER_BITS |
124						_ASCE_TYPE_SEGMENT;
125			break;
126		default:
127			BUG();
128		}
129		mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
130		mm->task_size = mm->context.asce_limit;
131		crst_table_free(mm, (unsigned long *) pgd);
132	}
133	update_mm(mm, current);
134}
135#endif
136
137#ifdef CONFIG_PGSTE
138
139/**
140 * gmap_alloc - allocate a guest address space
141 * @mm: pointer to the parent mm_struct
142 *
143 * Returns a guest address space structure.
144 */
145struct gmap *gmap_alloc(struct mm_struct *mm)
146{
147	struct gmap *gmap;
148	struct page *page;
149	unsigned long *table;
150
151	gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
152	if (!gmap)
153		goto out;
154	INIT_LIST_HEAD(&gmap->crst_list);
155	gmap->mm = mm;
156	page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
157	if (!page)
158		goto out_free;
159	list_add(&page->lru, &gmap->crst_list);
160	table = (unsigned long *) page_to_phys(page);
161	crst_table_init(table, _REGION1_ENTRY_EMPTY);
162	gmap->table = table;
163	gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH |
164		     _ASCE_USER_BITS | __pa(table);
165	list_add(&gmap->list, &mm->context.gmap_list);
166	return gmap;
167
168out_free:
169	kfree(gmap);
170out:
171	return NULL;
172}
173EXPORT_SYMBOL_GPL(gmap_alloc);
174
175static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
176{
177	struct gmap_pgtable *mp;
178	struct gmap_rmap *rmap;
179	struct page *page;
180
181	if (*table & _SEGMENT_ENTRY_INV)
182		return 0;
183	page = pfn_to_page(*table >> PAGE_SHIFT);
184	mp = (struct gmap_pgtable *) page->index;
185	list_for_each_entry(rmap, &mp->mapper, list) {
186		if (rmap->entry != table)
187			continue;
188		list_del(&rmap->list);
189		kfree(rmap);
190		break;
191	}
192	*table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr;
193	return 1;
194}
195
196static void gmap_flush_tlb(struct gmap *gmap)
197{
198	if (MACHINE_HAS_IDTE)
199		__tlb_flush_idte((unsigned long) gmap->table |
200				 _ASCE_TYPE_REGION1);
201	else
202		__tlb_flush_global();
203}
204
205/**
206 * gmap_free - free a guest address space
207 * @gmap: pointer to the guest address space structure
208 */
209void gmap_free(struct gmap *gmap)
210{
211	struct page *page, *next;
212	unsigned long *table;
213	int i;
214
215
216	/* Flush tlb. */
217	if (MACHINE_HAS_IDTE)
218		__tlb_flush_idte((unsigned long) gmap->table |
219				 _ASCE_TYPE_REGION1);
220	else
221		__tlb_flush_global();
222
223	/* Free all segment & region tables. */
224	down_read(&gmap->mm->mmap_sem);
225	list_for_each_entry_safe(page, next, &gmap->crst_list, lru) {
226		table = (unsigned long *) page_to_phys(page);
227		if ((*table & _REGION_ENTRY_TYPE_MASK) == 0)
228			/* Remove gmap rmap structures for segment table. */
229			for (i = 0; i < PTRS_PER_PMD; i++, table++)
230				gmap_unlink_segment(gmap, table);
231		__free_pages(page, ALLOC_ORDER);
232	}
233	up_read(&gmap->mm->mmap_sem);
234	list_del(&gmap->list);
235	kfree(gmap);
236}
237EXPORT_SYMBOL_GPL(gmap_free);
238
239/**
240 * gmap_enable - switch primary space to the guest address space
241 * @gmap: pointer to the guest address space structure
242 */
243void gmap_enable(struct gmap *gmap)
244{
245	S390_lowcore.gmap = (unsigned long) gmap;
246}
247EXPORT_SYMBOL_GPL(gmap_enable);
248
249/**
250 * gmap_disable - switch back to the standard primary address space
251 * @gmap: pointer to the guest address space structure
252 */
253void gmap_disable(struct gmap *gmap)
254{
255	S390_lowcore.gmap = 0UL;
256}
257EXPORT_SYMBOL_GPL(gmap_disable);
258
259/*
260 * gmap_alloc_table is assumed to be called with mmap_sem held
261 */
262static int gmap_alloc_table(struct gmap *gmap,
263			       unsigned long *table, unsigned long init)
264{
265	struct page *page;
266	unsigned long *new;
267
268	page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
269	if (!page)
270		return -ENOMEM;
271	new = (unsigned long *) page_to_phys(page);
272	crst_table_init(new, init);
273	if (*table & _REGION_ENTRY_INV) {
274		list_add(&page->lru, &gmap->crst_list);
275		*table = (unsigned long) new | _REGION_ENTRY_LENGTH |
276			(*table & _REGION_ENTRY_TYPE_MASK);
277	} else
278		__free_pages(page, ALLOC_ORDER);
279	return 0;
280}
281
282/**
283 * gmap_unmap_segment - unmap segment from the guest address space
284 * @gmap: pointer to the guest address space structure
285 * @addr: address in the guest address space
286 * @len: length of the memory area to unmap
287 *
288 * Returns 0 if the unmap succeded, -EINVAL if not.
289 */
290int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
291{
292	unsigned long *table;
293	unsigned long off;
294	int flush;
295
296	if ((to | len) & (PMD_SIZE - 1))
297		return -EINVAL;
298	if (len == 0 || to + len < to)
299		return -EINVAL;
300
301	flush = 0;
302	down_read(&gmap->mm->mmap_sem);
303	for (off = 0; off < len; off += PMD_SIZE) {
304		/* Walk the guest addr space page table */
305		table = gmap->table + (((to + off) >> 53) & 0x7ff);
306		if (*table & _REGION_ENTRY_INV)
307			goto out;
308		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
309		table = table + (((to + off) >> 42) & 0x7ff);
310		if (*table & _REGION_ENTRY_INV)
311			goto out;
312		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
313		table = table + (((to + off) >> 31) & 0x7ff);
314		if (*table & _REGION_ENTRY_INV)
315			goto out;
316		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
317		table = table + (((to + off) >> 20) & 0x7ff);
318
319		/* Clear segment table entry in guest address space. */
320		flush |= gmap_unlink_segment(gmap, table);
321		*table = _SEGMENT_ENTRY_INV;
322	}
323out:
324	up_read(&gmap->mm->mmap_sem);
325	if (flush)
326		gmap_flush_tlb(gmap);
327	return 0;
328}
329EXPORT_SYMBOL_GPL(gmap_unmap_segment);
330
331/**
332 * gmap_mmap_segment - map a segment to the guest address space
333 * @gmap: pointer to the guest address space structure
334 * @from: source address in the parent address space
335 * @to: target address in the guest address space
336 *
337 * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not.
338 */
339int gmap_map_segment(struct gmap *gmap, unsigned long from,
340		     unsigned long to, unsigned long len)
341{
342	unsigned long *table;
343	unsigned long off;
344	int flush;
345
346	if ((from | to | len) & (PMD_SIZE - 1))
347		return -EINVAL;
348	if (len == 0 || from + len > PGDIR_SIZE ||
349	    from + len < from || to + len < to)
350		return -EINVAL;
351
352	flush = 0;
353	down_read(&gmap->mm->mmap_sem);
354	for (off = 0; off < len; off += PMD_SIZE) {
355		/* Walk the gmap address space page table */
356		table = gmap->table + (((to + off) >> 53) & 0x7ff);
357		if ((*table & _REGION_ENTRY_INV) &&
358		    gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY))
359			goto out_unmap;
360		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
361		table = table + (((to + off) >> 42) & 0x7ff);
362		if ((*table & _REGION_ENTRY_INV) &&
363		    gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
364			goto out_unmap;
365		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
366		table = table + (((to + off) >> 31) & 0x7ff);
367		if ((*table & _REGION_ENTRY_INV) &&
368		    gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
369			goto out_unmap;
370		table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
371		table = table + (((to + off) >> 20) & 0x7ff);
372
373		/* Store 'from' address in an invalid segment table entry. */
374		flush |= gmap_unlink_segment(gmap, table);
375		*table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off);
376	}
377	up_read(&gmap->mm->mmap_sem);
378	if (flush)
379		gmap_flush_tlb(gmap);
380	return 0;
381
382out_unmap:
383	up_read(&gmap->mm->mmap_sem);
384	gmap_unmap_segment(gmap, to, len);
385	return -ENOMEM;
386}
387EXPORT_SYMBOL_GPL(gmap_map_segment);
388
389unsigned long gmap_fault(unsigned long address, struct gmap *gmap)
390{
391	unsigned long *table, vmaddr, segment;
392	struct mm_struct *mm;
393	struct gmap_pgtable *mp;
394	struct gmap_rmap *rmap;
395	struct vm_area_struct *vma;
396	struct page *page;
397	pgd_t *pgd;
398	pud_t *pud;
399	pmd_t *pmd;
400
401	current->thread.gmap_addr = address;
402	mm = gmap->mm;
403	/* Walk the gmap address space page table */
404	table = gmap->table + ((address >> 53) & 0x7ff);
405	if (unlikely(*table & _REGION_ENTRY_INV))
406		return -EFAULT;
407	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
408	table = table + ((address >> 42) & 0x7ff);
409	if (unlikely(*table & _REGION_ENTRY_INV))
410		return -EFAULT;
411	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
412	table = table + ((address >> 31) & 0x7ff);
413	if (unlikely(*table & _REGION_ENTRY_INV))
414		return -EFAULT;
415	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
416	table = table + ((address >> 20) & 0x7ff);
417
418	/* Convert the gmap address to an mm address. */
419	segment = *table;
420	if (likely(!(segment & _SEGMENT_ENTRY_INV))) {
421		page = pfn_to_page(segment >> PAGE_SHIFT);
422		mp = (struct gmap_pgtable *) page->index;
423		return mp->vmaddr | (address & ~PMD_MASK);
424	} else if (segment & _SEGMENT_ENTRY_RO) {
425		vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
426		vma = find_vma(mm, vmaddr);
427		if (!vma || vma->vm_start > vmaddr)
428			return -EFAULT;
429
430		/* Walk the parent mm page table */
431		pgd = pgd_offset(mm, vmaddr);
432		pud = pud_alloc(mm, pgd, vmaddr);
433		if (!pud)
434			return -ENOMEM;
435		pmd = pmd_alloc(mm, pud, vmaddr);
436		if (!pmd)
437			return -ENOMEM;
438		if (!pmd_present(*pmd) &&
439		    __pte_alloc(mm, vma, pmd, vmaddr))
440			return -ENOMEM;
441		/* pmd now points to a valid segment table entry. */
442		rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT);
443		if (!rmap)
444			return -ENOMEM;
445		/* Link gmap segment table entry location to page table. */
446		page = pmd_page(*pmd);
447		mp = (struct gmap_pgtable *) page->index;
448		rmap->entry = table;
449		list_add(&rmap->list, &mp->mapper);
450		/* Set gmap segment table entry to page table. */
451		*table = pmd_val(*pmd) & PAGE_MASK;
452		return vmaddr | (address & ~PMD_MASK);
453	}
454	return -EFAULT;
455
456}
457EXPORT_SYMBOL_GPL(gmap_fault);
458
459void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table)
460{
461	struct gmap_rmap *rmap, *next;
462	struct gmap_pgtable *mp;
463	struct page *page;
464	int flush;
465
466	flush = 0;
467	spin_lock(&mm->page_table_lock);
468	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
469	mp = (struct gmap_pgtable *) page->index;
470	list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
471		*rmap->entry =
472			_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr;
473		list_del(&rmap->list);
474		kfree(rmap);
475		flush = 1;
476	}
477	spin_unlock(&mm->page_table_lock);
478	if (flush)
479		__tlb_flush_global();
480}
481
482static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
483						    unsigned long vmaddr)
484{
485	struct page *page;
486	unsigned long *table;
487	struct gmap_pgtable *mp;
488
489	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
490	if (!page)
491		return NULL;
492	mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT);
493	if (!mp) {
494		__free_page(page);
495		return NULL;
496	}
497	pgtable_page_ctor(page);
498	mp->vmaddr = vmaddr & PMD_MASK;
499	INIT_LIST_HEAD(&mp->mapper);
500	page->index = (unsigned long) mp;
501	atomic_set(&page->_mapcount, 3);
502	table = (unsigned long *) page_to_phys(page);
503	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
504	clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
505	return table;
506}
507
508static inline void page_table_free_pgste(unsigned long *table)
509{
510	struct page *page;
511	struct gmap_pgtable *mp;
512
513	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
514	mp = (struct gmap_pgtable *) page->index;
515	BUG_ON(!list_empty(&mp->mapper));
516	pgtable_page_ctor(page);
517	atomic_set(&page->_mapcount, -1);
518	kfree(mp);
519	__free_page(page);
520}
521
522#else /* CONFIG_PGSTE */
523
524static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
525						    unsigned long vmaddr)
526{
527	return NULL;
528}
529
530static inline void page_table_free_pgste(unsigned long *table)
531{
532}
533
534static inline void gmap_unmap_notifier(struct mm_struct *mm,
535					  unsigned long *table)
536{
537}
538
539#endif /* CONFIG_PGSTE */
540
541static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
542{
543	unsigned int old, new;
544
545	do {
546		old = atomic_read(v);
547		new = old ^ bits;
548	} while (atomic_cmpxchg(v, old, new) != old);
549	return new;
550}
551
552/*
553 * page table entry allocation/free routines.
554 */
555unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)
556{
557	struct page *page;
558	unsigned long *table;
559	unsigned int mask, bit;
560
561	if (mm_has_pgste(mm))
562		return page_table_alloc_pgste(mm, vmaddr);
563	/* Allocate fragments of a 4K page as 1K/2K page table */
564	spin_lock_bh(&mm->context.list_lock);
565	mask = FRAG_MASK;
566	if (!list_empty(&mm->context.pgtable_list)) {
567		page = list_first_entry(&mm->context.pgtable_list,
568					struct page, lru);
569		table = (unsigned long *) page_to_phys(page);
570		mask = atomic_read(&page->_mapcount);
571		mask = mask | (mask >> 4);
572	}
573	if ((mask & FRAG_MASK) == FRAG_MASK) {
574		spin_unlock_bh(&mm->context.list_lock);
575		page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
576		if (!page)
577			return NULL;
578		pgtable_page_ctor(page);
579		atomic_set(&page->_mapcount, 1);
580		table = (unsigned long *) page_to_phys(page);
581		clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
582		spin_lock_bh(&mm->context.list_lock);
583		list_add(&page->lru, &mm->context.pgtable_list);
584	} else {
585		for (bit = 1; mask & bit; bit <<= 1)
586			table += PTRS_PER_PTE;
587		mask = atomic_xor_bits(&page->_mapcount, bit);
588		if ((mask & FRAG_MASK) == FRAG_MASK)
589			list_del(&page->lru);
590	}
591	spin_unlock_bh(&mm->context.list_lock);
592	return table;
593}
594
595void page_table_free(struct mm_struct *mm, unsigned long *table)
596{
597	struct page *page;
598	unsigned int bit, mask;
599
600	if (mm_has_pgste(mm)) {
601		gmap_unmap_notifier(mm, table);
602		return page_table_free_pgste(table);
603	}
604	/* Free 1K/2K page table fragment of a 4K page */
605	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
606	bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
607	spin_lock_bh(&mm->context.list_lock);
608	if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
609		list_del(&page->lru);
610	mask = atomic_xor_bits(&page->_mapcount, bit);
611	if (mask & FRAG_MASK)
612		list_add(&page->lru, &mm->context.pgtable_list);
613	spin_unlock_bh(&mm->context.list_lock);
614	if (mask == 0) {
615		pgtable_page_dtor(page);
616		atomic_set(&page->_mapcount, -1);
617		__free_page(page);
618	}
619}
620
621#ifdef CONFIG_HAVE_RCU_TABLE_FREE
622
623static void __page_table_free_rcu(void *table, unsigned bit)
624{
625	struct page *page;
626
627	if (bit == FRAG_MASK)
628		return page_table_free_pgste(table);
629	/* Free 1K/2K page table fragment of a 4K page */
630	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
631	if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
632		pgtable_page_dtor(page);
633		atomic_set(&page->_mapcount, -1);
634		__free_page(page);
635	}
636}
637
638void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
639{
640	struct mm_struct *mm;
641	struct page *page;
642	unsigned int bit, mask;
643
644	mm = tlb->mm;
645	if (mm_has_pgste(mm)) {
646		gmap_unmap_notifier(mm, table);
647		table = (unsigned long *) (__pa(table) | FRAG_MASK);
648		tlb_remove_table(tlb, table);
649		return;
650	}
651	bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
652	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
653	spin_lock_bh(&mm->context.list_lock);
654	if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
655		list_del(&page->lru);
656	mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
657	if (mask & FRAG_MASK)
658		list_add_tail(&page->lru, &mm->context.pgtable_list);
659	spin_unlock_bh(&mm->context.list_lock);
660	table = (unsigned long *) (__pa(table) | (bit << 4));
661	tlb_remove_table(tlb, table);
662}
663
664void __tlb_remove_table(void *_table)
665{
666	const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK;
667	void *table = (void *)((unsigned long) _table & ~mask);
668	unsigned type = (unsigned long) _table & mask;
669
670	if (type)
671		__page_table_free_rcu(table, type);
672	else
673		free_pages((unsigned long) table, ALLOC_ORDER);
674}
675
676#endif
677
678/*
679 * switch on pgstes for its userspace process (for kvm)
680 */
681int s390_enable_sie(void)
682{
683	struct task_struct *tsk = current;
684	struct mm_struct *mm, *old_mm;
685
686	/* Do we have switched amode? If no, we cannot do sie */
687	if (user_mode == HOME_SPACE_MODE)
688		return -EINVAL;
689
690	/* Do we have pgstes? if yes, we are done */
691	if (mm_has_pgste(tsk->mm))
692		return 0;
693
694	/* lets check if we are allowed to replace the mm */
695	task_lock(tsk);
696	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
697#ifdef CONFIG_AIO
698	    !hlist_empty(&tsk->mm->ioctx_list) ||
699#endif
700	    tsk->mm != tsk->active_mm) {
701		task_unlock(tsk);
702		return -EINVAL;
703	}
704	task_unlock(tsk);
705
706	/* we copy the mm and let dup_mm create the page tables with_pgstes */
707	tsk->mm->context.alloc_pgste = 1;
708	mm = dup_mm(tsk);
709	tsk->mm->context.alloc_pgste = 0;
710	if (!mm)
711		return -ENOMEM;
712
713	/* Now lets check again if something happened */
714	task_lock(tsk);
715	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
716#ifdef CONFIG_AIO
717	    !hlist_empty(&tsk->mm->ioctx_list) ||
718#endif
719	    tsk->mm != tsk->active_mm) {
720		mmput(mm);
721		task_unlock(tsk);
722		return -EINVAL;
723	}
724
725	/* ok, we are alone. No ptrace, no threads, etc. */
726	old_mm = tsk->mm;
727	tsk->mm = tsk->active_mm = mm;
728	preempt_disable();
729	update_mm(mm, tsk);
730	atomic_inc(&mm->context.attach_count);
731	atomic_dec(&old_mm->context.attach_count);
732	cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
733	preempt_enable();
734	task_unlock(tsk);
735	mmput(old_mm);
736	return 0;
737}
738EXPORT_SYMBOL_GPL(s390_enable_sie);
739
740#if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION)
741bool kernel_page_present(struct page *page)
742{
743	unsigned long addr;
744	int cc;
745
746	addr = page_to_phys(page);
747	asm volatile(
748		"	lra	%1,0(%1)\n"
749		"	ipm	%0\n"
750		"	srl	%0,28"
751		: "=d" (cc), "+a" (addr) : : "cc");
752	return cc == 0;
753}
754#endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */
755