ioremap.c revision cb8ab687c32331fb548c613ae74df574bb0908c1
1/*
2 * Re-map IO memory to kernel address space so that we can access it.
3 * This is needed for high PCI addresses that aren't mapped in the
4 * 640k-1MB IO memory area on PC's
5 *
6 * (C) Copyright 1995 1996 Linus Torvalds
7 */
8
9#include <linux/bootmem.h>
10#include <linux/init.h>
11#include <linux/io.h>
12#include <linux/module.h>
13#include <linux/slab.h>
14#include <linux/vmalloc.h>
15
16#include <asm/cacheflush.h>
17#include <asm/e820.h>
18#include <asm/fixmap.h>
19#include <asm/pgtable.h>
20#include <asm/tlbflush.h>
21#include <asm/pgalloc.h>
22#include <asm/pat.h>
23
24#ifdef CONFIG_X86_64
25
26unsigned long __phys_addr(unsigned long x)
27{
28	if (x >= __START_KERNEL_map)
29		return x - __START_KERNEL_map + phys_base;
30	return x - PAGE_OFFSET;
31}
32EXPORT_SYMBOL(__phys_addr);
33
34static inline int phys_addr_valid(unsigned long addr)
35{
36	return addr < (1UL << boot_cpu_data.x86_phys_bits);
37}
38
39#else
40
41static inline int phys_addr_valid(unsigned long addr)
42{
43	return 1;
44}
45
46#endif
47
48int page_is_ram(unsigned long pagenr)
49{
50	resource_size_t addr, end;
51	int i;
52
53	/*
54	 * A special case is the first 4Kb of memory;
55	 * This is a BIOS owned area, not kernel ram, but generally
56	 * not listed as such in the E820 table.
57	 */
58	if (pagenr == 0)
59		return 0;
60
61	/*
62	 * Second special case: Some BIOSen report the PC BIOS
63	 * area (640->1Mb) as ram even though it is not.
64	 */
65	if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
66		    pagenr < (BIOS_END >> PAGE_SHIFT))
67		return 0;
68
69	for (i = 0; i < e820.nr_map; i++) {
70		/*
71		 * Not usable memory:
72		 */
73		if (e820.map[i].type != E820_RAM)
74			continue;
75		addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
76		end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
77
78
79		if ((pagenr >= addr) && (pagenr < end))
80			return 1;
81	}
82	return 0;
83}
84
85/*
86 * Fix up the linear direct mapping of the kernel to avoid cache attribute
87 * conflicts.
88 */
89int ioremap_change_attr(unsigned long vaddr, unsigned long size,
90			       unsigned long prot_val)
91{
92	unsigned long nrpages = size >> PAGE_SHIFT;
93	int err;
94
95	switch (prot_val) {
96	case _PAGE_CACHE_UC:
97	default:
98		err = _set_memory_uc(vaddr, nrpages);
99		break;
100	case _PAGE_CACHE_WC:
101		err = _set_memory_wc(vaddr, nrpages);
102		break;
103	case _PAGE_CACHE_WB:
104		err = _set_memory_wb(vaddr, nrpages);
105		break;
106	}
107
108	return err;
109}
110
111/*
112 * Remap an arbitrary physical address space into the kernel virtual
113 * address space. Needed when the kernel wants to access high addresses
114 * directly.
115 *
116 * NOTE! We need to allow non-page-aligned mappings too: we will obviously
117 * have to convert them into an offset in a page-aligned mapping, but the
118 * caller shouldn't need to know that small detail.
119 */
120static void __iomem *__ioremap_caller(resource_size_t phys_addr,
121		unsigned long size, unsigned long prot_val, void *caller)
122{
123	unsigned long pfn, offset, vaddr;
124	resource_size_t last_addr;
125	struct vm_struct *area;
126	unsigned long new_prot_val;
127	pgprot_t prot;
128	int retval;
129
130	/* Don't allow wraparound or zero size */
131	last_addr = phys_addr + size - 1;
132	if (!size || last_addr < phys_addr)
133		return NULL;
134
135	if (!phys_addr_valid(phys_addr)) {
136		printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
137		       (unsigned long long)phys_addr);
138		WARN_ON_ONCE(1);
139		return NULL;
140	}
141
142	/*
143	 * Don't remap the low PCI/ISA area, it's always mapped..
144	 */
145	if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
146		return (__force void __iomem *)phys_to_virt(phys_addr);
147
148	/*
149	 * Don't allow anybody to remap normal RAM that we're using..
150	 */
151	for (pfn = phys_addr >> PAGE_SHIFT;
152				(pfn << PAGE_SHIFT) < (last_addr & PAGE_MASK);
153				pfn++) {
154
155		int is_ram = page_is_ram(pfn);
156
157		if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn)))
158			return NULL;
159		WARN_ON_ONCE(is_ram);
160	}
161
162	/*
163	 * Mappings have to be page-aligned
164	 */
165	offset = phys_addr & ~PAGE_MASK;
166	phys_addr &= PAGE_MASK;
167	size = PAGE_ALIGN(last_addr+1) - phys_addr;
168
169	retval = reserve_memtype(phys_addr, phys_addr + size,
170						prot_val, &new_prot_val);
171	if (retval) {
172		pr_debug("Warning: reserve_memtype returned %d\n", retval);
173		return NULL;
174	}
175
176	if (prot_val != new_prot_val) {
177		/*
178		 * Do not fallback to certain memory types with certain
179		 * requested type:
180		 * - request is uc-, return cannot be write-back
181		 * - request is uc-, return cannot be write-combine
182		 * - request is write-combine, return cannot be write-back
183		 */
184		if ((prot_val == _PAGE_CACHE_UC_MINUS &&
185		     (new_prot_val == _PAGE_CACHE_WB ||
186		      new_prot_val == _PAGE_CACHE_WC)) ||
187		    (prot_val == _PAGE_CACHE_WC &&
188		     new_prot_val == _PAGE_CACHE_WB)) {
189			pr_debug(
190		"ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
191				(unsigned long long)phys_addr,
192				(unsigned long long)(phys_addr + size),
193				prot_val, new_prot_val);
194			free_memtype(phys_addr, phys_addr + size);
195			return NULL;
196		}
197		prot_val = new_prot_val;
198	}
199
200	switch (prot_val) {
201	case _PAGE_CACHE_UC:
202	default:
203		prot = PAGE_KERNEL_NOCACHE;
204		break;
205	case _PAGE_CACHE_UC_MINUS:
206		prot = PAGE_KERNEL_UC_MINUS;
207		break;
208	case _PAGE_CACHE_WC:
209		prot = PAGE_KERNEL_WC;
210		break;
211	case _PAGE_CACHE_WB:
212		prot = PAGE_KERNEL;
213		break;
214	}
215
216	/*
217	 * Ok, go for it..
218	 */
219	area = get_vm_area_caller(size, VM_IOREMAP, caller);
220	if (!area)
221		return NULL;
222	area->phys_addr = phys_addr;
223	vaddr = (unsigned long) area->addr;
224	if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) {
225		free_memtype(phys_addr, phys_addr + size);
226		free_vm_area(area);
227		return NULL;
228	}
229
230	if (ioremap_change_attr(vaddr, size, prot_val) < 0) {
231		free_memtype(phys_addr, phys_addr + size);
232		vunmap(area->addr);
233		return NULL;
234	}
235
236	return (void __iomem *) (vaddr + offset);
237}
238
239/**
240 * ioremap_nocache     -   map bus memory into CPU space
241 * @offset:    bus address of the memory
242 * @size:      size of the resource to map
243 *
244 * ioremap_nocache performs a platform specific sequence of operations to
245 * make bus memory CPU accessible via the readb/readw/readl/writeb/
246 * writew/writel functions and the other mmio helpers. The returned
247 * address is not guaranteed to be usable directly as a virtual
248 * address.
249 *
250 * This version of ioremap ensures that the memory is marked uncachable
251 * on the CPU as well as honouring existing caching rules from things like
252 * the PCI bus. Note that there are other caches and buffers on many
253 * busses. In particular driver authors should read up on PCI writes
254 *
255 * It's useful if some control registers are in such an area and
256 * write combining or read caching is not desirable:
257 *
258 * Must be freed with iounmap.
259 */
260void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
261{
262	/*
263	 * Ideally, this should be:
264	 *	pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS;
265	 *
266	 * Till we fix all X drivers to use ioremap_wc(), we will use
267	 * UC MINUS.
268	 */
269	unsigned long val = _PAGE_CACHE_UC_MINUS;
270
271	return __ioremap_caller(phys_addr, size, val,
272				__builtin_return_address(0));
273}
274EXPORT_SYMBOL(ioremap_nocache);
275
276/**
277 * ioremap_wc	-	map memory into CPU space write combined
278 * @offset:	bus address of the memory
279 * @size:	size of the resource to map
280 *
281 * This version of ioremap ensures that the memory is marked write combining.
282 * Write combining allows faster writes to some hardware devices.
283 *
284 * Must be freed with iounmap.
285 */
286void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
287{
288	if (pat_wc_enabled)
289		return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
290					__builtin_return_address(0));
291	else
292		return ioremap_nocache(phys_addr, size);
293}
294EXPORT_SYMBOL(ioremap_wc);
295
296void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
297{
298	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WB,
299				__builtin_return_address(0));
300}
301EXPORT_SYMBOL(ioremap_cache);
302
303/**
304 * iounmap - Free a IO remapping
305 * @addr: virtual address from ioremap_*
306 *
307 * Caller must ensure there is only one unmapping for the same pointer.
308 */
309void iounmap(volatile void __iomem *addr)
310{
311	struct vm_struct *p, *o;
312
313	if ((void __force *)addr <= high_memory)
314		return;
315
316	/*
317	 * __ioremap special-cases the PCI/ISA range by not instantiating a
318	 * vm_area and by simply returning an address into the kernel mapping
319	 * of ISA space.   So handle that here.
320	 */
321	if (addr >= phys_to_virt(ISA_START_ADDRESS) &&
322	    addr < phys_to_virt(ISA_END_ADDRESS))
323		return;
324
325	addr = (volatile void __iomem *)
326		(PAGE_MASK & (unsigned long __force)addr);
327
328	/* Use the vm area unlocked, assuming the caller
329	   ensures there isn't another iounmap for the same address
330	   in parallel. Reuse of the virtual address is prevented by
331	   leaving it in the global lists until we're done with it.
332	   cpa takes care of the direct mappings. */
333	read_lock(&vmlist_lock);
334	for (p = vmlist; p; p = p->next) {
335		if (p->addr == addr)
336			break;
337	}
338	read_unlock(&vmlist_lock);
339
340	if (!p) {
341		printk(KERN_ERR "iounmap: bad address %p\n", addr);
342		dump_stack();
343		return;
344	}
345
346	free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
347
348	/* Finally remove it */
349	o = remove_vm_area((void *)addr);
350	BUG_ON(p != o || o == NULL);
351	kfree(p);
352}
353EXPORT_SYMBOL(iounmap);
354
355/*
356 * Convert a physical pointer to a virtual kernel pointer for /dev/mem
357 * access
358 */
359void *xlate_dev_mem_ptr(unsigned long phys)
360{
361	void *addr;
362	unsigned long start = phys & PAGE_MASK;
363
364	/* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
365	if (page_is_ram(start >> PAGE_SHIFT))
366		return __va(phys);
367
368	addr = (void *)ioremap(start, PAGE_SIZE);
369	if (addr)
370		addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
371
372	return addr;
373}
374
375void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
376{
377	if (page_is_ram(phys >> PAGE_SHIFT))
378		return;
379
380	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
381	return;
382}
383
384#ifdef CONFIG_X86_32
385
386int __initdata early_ioremap_debug;
387
388static int __init early_ioremap_debug_setup(char *str)
389{
390	early_ioremap_debug = 1;
391
392	return 0;
393}
394early_param("early_ioremap_debug", early_ioremap_debug_setup);
395
396static __initdata int after_paging_init;
397static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
398		__section(.bss.page_aligned);
399
400static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
401{
402	/* Don't assume we're using swapper_pg_dir at this point */
403	pgd_t *base = __va(read_cr3());
404	pgd_t *pgd = &base[pgd_index(addr)];
405	pud_t *pud = pud_offset(pgd, addr);
406	pmd_t *pmd = pmd_offset(pud, addr);
407
408	return pmd;
409}
410
411static inline pte_t * __init early_ioremap_pte(unsigned long addr)
412{
413	return &bm_pte[pte_index(addr)];
414}
415
416void __init early_ioremap_init(void)
417{
418	pmd_t *pmd;
419
420	if (early_ioremap_debug)
421		printk(KERN_INFO "early_ioremap_init()\n");
422
423	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
424	memset(bm_pte, 0, sizeof(bm_pte));
425	pmd_populate_kernel(&init_mm, pmd, bm_pte);
426
427	/*
428	 * The boot-ioremap range spans multiple pmds, for which
429	 * we are not prepared:
430	 */
431	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
432		WARN_ON(1);
433		printk(KERN_WARNING "pmd %p != %p\n",
434		       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
435		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
436			fix_to_virt(FIX_BTMAP_BEGIN));
437		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
438			fix_to_virt(FIX_BTMAP_END));
439
440		printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
441		printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
442		       FIX_BTMAP_BEGIN);
443	}
444}
445
446void __init early_ioremap_clear(void)
447{
448	pmd_t *pmd;
449
450	if (early_ioremap_debug)
451		printk(KERN_INFO "early_ioremap_clear()\n");
452
453	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
454	pmd_clear(pmd);
455	paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT);
456	__flush_tlb_all();
457}
458
459void __init early_ioremap_reset(void)
460{
461	enum fixed_addresses idx;
462	unsigned long addr, phys;
463	pte_t *pte;
464
465	after_paging_init = 1;
466	for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
467		addr = fix_to_virt(idx);
468		pte = early_ioremap_pte(addr);
469		if (pte_present(*pte)) {
470			phys = pte_val(*pte) & PAGE_MASK;
471			set_fixmap(idx, phys);
472		}
473	}
474}
475
476static void __init __early_set_fixmap(enum fixed_addresses idx,
477				   unsigned long phys, pgprot_t flags)
478{
479	unsigned long addr = __fix_to_virt(idx);
480	pte_t *pte;
481
482	if (idx >= __end_of_fixed_addresses) {
483		BUG();
484		return;
485	}
486	pte = early_ioremap_pte(addr);
487	if (pgprot_val(flags))
488		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
489	else
490		pte_clear(NULL, addr, pte);
491	__flush_tlb_one(addr);
492}
493
494static inline void __init early_set_fixmap(enum fixed_addresses idx,
495					unsigned long phys)
496{
497	if (after_paging_init)
498		set_fixmap(idx, phys);
499	else
500		__early_set_fixmap(idx, phys, PAGE_KERNEL);
501}
502
503static inline void __init early_clear_fixmap(enum fixed_addresses idx)
504{
505	if (after_paging_init)
506		clear_fixmap(idx);
507	else
508		__early_set_fixmap(idx, 0, __pgprot(0));
509}
510
511
512int __initdata early_ioremap_nested;
513
514static int __init check_early_ioremap_leak(void)
515{
516	if (!early_ioremap_nested)
517		return 0;
518
519	printk(KERN_WARNING
520	       "Debug warning: early ioremap leak of %d areas detected.\n",
521	       early_ioremap_nested);
522	printk(KERN_WARNING
523	       "please boot with early_ioremap_debug and report the dmesg.\n");
524	WARN_ON(1);
525
526	return 1;
527}
528late_initcall(check_early_ioremap_leak);
529
530void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
531{
532	unsigned long offset, last_addr;
533	unsigned int nrpages, nesting;
534	enum fixed_addresses idx0, idx;
535
536	WARN_ON(system_state != SYSTEM_BOOTING);
537
538	nesting = early_ioremap_nested;
539	if (early_ioremap_debug) {
540		printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ",
541		       phys_addr, size, nesting);
542		dump_stack();
543	}
544
545	/* Don't allow wraparound or zero size */
546	last_addr = phys_addr + size - 1;
547	if (!size || last_addr < phys_addr) {
548		WARN_ON(1);
549		return NULL;
550	}
551
552	if (nesting >= FIX_BTMAPS_NESTING) {
553		WARN_ON(1);
554		return NULL;
555	}
556	early_ioremap_nested++;
557	/*
558	 * Mappings have to be page-aligned
559	 */
560	offset = phys_addr & ~PAGE_MASK;
561	phys_addr &= PAGE_MASK;
562	size = PAGE_ALIGN(last_addr) - phys_addr;
563
564	/*
565	 * Mappings have to fit in the FIX_BTMAP area.
566	 */
567	nrpages = size >> PAGE_SHIFT;
568	if (nrpages > NR_FIX_BTMAPS) {
569		WARN_ON(1);
570		return NULL;
571	}
572
573	/*
574	 * Ok, go for it..
575	 */
576	idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting;
577	idx = idx0;
578	while (nrpages > 0) {
579		early_set_fixmap(idx, phys_addr);
580		phys_addr += PAGE_SIZE;
581		--idx;
582		--nrpages;
583	}
584	if (early_ioremap_debug)
585		printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0));
586
587	return (void *) (offset + fix_to_virt(idx0));
588}
589
590void __init early_iounmap(void *addr, unsigned long size)
591{
592	unsigned long virt_addr;
593	unsigned long offset;
594	unsigned int nrpages;
595	enum fixed_addresses idx;
596	unsigned int nesting;
597
598	nesting = --early_ioremap_nested;
599	WARN_ON(nesting < 0);
600
601	if (early_ioremap_debug) {
602		printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr,
603		       size, nesting);
604		dump_stack();
605	}
606
607	virt_addr = (unsigned long)addr;
608	if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) {
609		WARN_ON(1);
610		return;
611	}
612	offset = virt_addr & ~PAGE_MASK;
613	nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
614
615	idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting;
616	while (nrpages > 0) {
617		early_clear_fixmap(idx);
618		--idx;
619		--nrpages;
620	}
621}
622
623void __this_fixmap_does_not_exist(void)
624{
625	WARN_ON(1);
626}
627
628#endif /* CONFIG_X86_32 */
629