1/*
2 * linux/arch/arm/mm/proc-xsc3.S
3 *
4 * Original Author: Matthew Gilbert
5 * Current Maintainer: Lennert Buytenhek <buytenh@wantstofly.org>
6 *
7 * Copyright 2004 (C) Intel Corp.
8 * Copyright 2005 (C) MontaVista Software, Inc.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 *
14 * MMU functions for the Intel XScale3 Core (XSC3).  The XSC3 core is
15 * an extension to Intel's original XScale core that adds the following
16 * features:
17 *
18 * - ARMv6 Supersections
19 * - Low Locality Reference pages (replaces mini-cache)
20 * - 36-bit addressing
21 * - L2 cache
22 * - Cache coherency if chipset supports it
23 *
24 * Based on original XScale code by Nicolas Pitre.
25 */
26
27#include <linux/linkage.h>
28#include <linux/init.h>
29#include <asm/assembler.h>
30#include <asm/hwcap.h>
31#include <asm/pgtable.h>
32#include <asm/pgtable-hwdef.h>
33#include <asm/page.h>
34#include <asm/ptrace.h>
35#include "proc-macros.S"
36
37/*
38 * This is the maximum size of an area which will be flushed.  If the
39 * area is larger than this, then we flush the whole cache.
40 */
41#define MAX_AREA_SIZE	32768
42
43/*
44 * The cache line size of the L1 I, L1 D and unified L2 cache.
45 */
46#define CACHELINESIZE	32
47
48/*
49 * The size of the L1 D cache.
50 */
51#define CACHESIZE	32768
52
53/*
54 * This macro is used to wait for a CP15 write and is needed when we
55 * have to ensure that the last operation to the coprocessor was
56 * completed before continuing with operation.
57 */
58	.macro	cpwait_ret, lr, rd
59	mrc	p15, 0, \rd, c2, c0, 0		@ arbitrary read of cp15
60	sub	pc, \lr, \rd, LSR #32		@ wait for completion and
61						@ flush instruction pipeline
62	.endm
63
64/*
65 * This macro cleans and invalidates the entire L1 D cache.
66 */
67
68 	.macro  clean_d_cache rd, rs
69	mov	\rd, #0x1f00
70	orr	\rd, \rd, #0x00e0
711:	mcr	p15, 0, \rd, c7, c14, 2		@ clean/invalidate L1 D line
72	adds	\rd, \rd, #0x40000000
73	bcc	1b
74	subs	\rd, \rd, #0x20
75	bpl	1b
76	.endm
77
78	.text
79
80/*
81 * cpu_xsc3_proc_init()
82 *
83 * Nothing too exciting at the moment
84 */
85ENTRY(cpu_xsc3_proc_init)
86	mov	pc, lr
87
88/*
89 * cpu_xsc3_proc_fin()
90 */
91ENTRY(cpu_xsc3_proc_fin)
92	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
93	bic	r0, r0, #0x1800			@ ...IZ...........
94	bic	r0, r0, #0x0006			@ .............CA.
95	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
96	mov	pc, lr
97
98/*
99 * cpu_xsc3_reset(loc)
100 *
101 * Perform a soft reset of the system.  Put the CPU into the
102 * same state as it would be if it had been reset, and branch
103 * to what would be the reset vector.
104 *
105 * loc: location to jump to for soft reset
106 */
107	.align	5
108	.pushsection	.idmap.text, "ax"
109ENTRY(cpu_xsc3_reset)
110	mov	r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
111	msr	cpsr_c, r1			@ reset CPSR
112	mrc	p15, 0, r1, c1, c0, 0		@ ctrl register
113	bic	r1, r1, #0x3900			@ ..VIZ..S........
114	bic	r1, r1, #0x0086			@ ........B....CA.
115	mcr	p15, 0, r1, c1, c0, 0		@ ctrl register
116	mcr	p15, 0, ip, c7, c7, 0		@ invalidate L1 caches and BTB
117	bic	r1, r1, #0x0001			@ ...............M
118	mcr	p15, 0, r1, c1, c0, 0		@ ctrl register
119	@ CAUTION: MMU turned off from this point.  We count on the pipeline
120	@ already containing those two last instructions to survive.
121	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I and D TLBs
122	mov	pc, r0
123ENDPROC(cpu_xsc3_reset)
124	.popsection
125
126/*
127 * cpu_xsc3_do_idle()
128 *
129 * Cause the processor to idle
130 *
131 * For now we do nothing but go to idle mode for every case
132 *
133 * XScale supports clock switching, but using idle mode support
134 * allows external hardware to react to system state changes.
135 */
136	.align	5
137
138ENTRY(cpu_xsc3_do_idle)
139	mov	r0, #1
140	mcr	p14, 0, r0, c7, c0, 0		@ go to idle
141	mov	pc, lr
142
143/* ================================= CACHE ================================ */
144
145/*
146 *	flush_icache_all()
147 *
148 *	Unconditionally clean and invalidate the entire icache.
149 */
150ENTRY(xsc3_flush_icache_all)
151	mov	r0, #0
152	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
153	mov	pc, lr
154ENDPROC(xsc3_flush_icache_all)
155
156/*
157 *	flush_user_cache_all()
158 *
159 *	Invalidate all cache entries in a particular address
160 *	space.
161 */
162ENTRY(xsc3_flush_user_cache_all)
163	/* FALLTHROUGH */
164
165/*
166 *	flush_kern_cache_all()
167 *
168 *	Clean and invalidate the entire cache.
169 */
170ENTRY(xsc3_flush_kern_cache_all)
171	mov	r2, #VM_EXEC
172	mov	ip, #0
173__flush_whole_cache:
174	clean_d_cache r0, r1
175	tst	r2, #VM_EXEC
176	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate L1 I cache and BTB
177	mcrne	p15, 0, ip, c7, c10, 4		@ data write barrier
178	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
179	mov	pc, lr
180
181/*
182 *	flush_user_cache_range(start, end, vm_flags)
183 *
184 *	Invalidate a range of cache entries in the specified
185 *	address space.
186 *
187 *	- start - start address (may not be aligned)
188 *	- end	- end address (exclusive, may not be aligned)
189 *	- vma	- vma_area_struct describing address space
190 */
191	.align	5
192ENTRY(xsc3_flush_user_cache_range)
193	mov	ip, #0
194	sub	r3, r1, r0			@ calculate total size
195	cmp	r3, #MAX_AREA_SIZE
196	bhs	__flush_whole_cache
197
1981:	tst	r2, #VM_EXEC
199	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate L1 I line
200	mcr	p15, 0, r0, c7, c14, 1		@ clean/invalidate L1 D line
201	add	r0, r0, #CACHELINESIZE
202	cmp	r0, r1
203	blo	1b
204	tst	r2, #VM_EXEC
205	mcrne	p15, 0, ip, c7, c5, 6		@ invalidate BTB
206	mcrne	p15, 0, ip, c7, c10, 4		@ data write barrier
207	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
208	mov	pc, lr
209
210/*
211 *	coherent_kern_range(start, end)
212 *
213 *	Ensure coherency between the I cache and the D cache in the
214 *	region described by start.  If you have non-snooping
215 *	Harvard caches, you need to implement this function.
216 *
217 *	- start  - virtual start address
218 *	- end	 - virtual end address
219 *
220 *	Note: single I-cache line invalidation isn't used here since
221 *	it also trashes the mini I-cache used by JTAG debuggers.
222 */
223ENTRY(xsc3_coherent_kern_range)
224/* FALLTHROUGH */
225ENTRY(xsc3_coherent_user_range)
226	bic	r0, r0, #CACHELINESIZE - 1
2271:	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
228	add	r0, r0, #CACHELINESIZE
229	cmp	r0, r1
230	blo	1b
231	mov	r0, #0
232	mcr	p15, 0, r0, c7, c5, 0		@ invalidate L1 I cache and BTB
233	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
234	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
235	mov	pc, lr
236
237/*
238 *	flush_kern_dcache_area(void *addr, size_t size)
239 *
240 *	Ensure no D cache aliasing occurs, either with itself or
241 *	the I cache.
242 *
243 *	- addr	- kernel address
244 *	- size	- region size
245 */
246ENTRY(xsc3_flush_kern_dcache_area)
247	add	r1, r0, r1
2481:	mcr	p15, 0, r0, c7, c14, 1		@ clean/invalidate L1 D line
249	add	r0, r0, #CACHELINESIZE
250	cmp	r0, r1
251	blo	1b
252	mov	r0, #0
253	mcr	p15, 0, r0, c7, c5, 0		@ invalidate L1 I cache and BTB
254	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
255	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
256	mov	pc, lr
257
258/*
259 *	dma_inv_range(start, end)
260 *
261 *	Invalidate (discard) the specified virtual address range.
262 *	May not write back any entries.  If 'start' or 'end'
263 *	are not cache line aligned, those lines must be written
264 *	back.
265 *
266 *	- start  - virtual start address
267 *	- end	 - virtual end address
268 */
269xsc3_dma_inv_range:
270	tst	r0, #CACHELINESIZE - 1
271	bic	r0, r0, #CACHELINESIZE - 1
272	mcrne	p15, 0, r0, c7, c10, 1		@ clean L1 D line
273	tst	r1, #CACHELINESIZE - 1
274	mcrne	p15, 0, r1, c7, c10, 1		@ clean L1 D line
2751:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate L1 D line
276	add	r0, r0, #CACHELINESIZE
277	cmp	r0, r1
278	blo	1b
279	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
280	mov	pc, lr
281
282/*
283 *	dma_clean_range(start, end)
284 *
285 *	Clean the specified virtual address range.
286 *
287 *	- start  - virtual start address
288 *	- end	 - virtual end address
289 */
290xsc3_dma_clean_range:
291	bic	r0, r0, #CACHELINESIZE - 1
2921:	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
293	add	r0, r0, #CACHELINESIZE
294	cmp	r0, r1
295	blo	1b
296	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
297	mov	pc, lr
298
299/*
300 *	dma_flush_range(start, end)
301 *
302 *	Clean and invalidate the specified virtual address range.
303 *
304 *	- start  - virtual start address
305 *	- end	 - virtual end address
306 */
307ENTRY(xsc3_dma_flush_range)
308	bic	r0, r0, #CACHELINESIZE - 1
3091:	mcr	p15, 0, r0, c7, c14, 1		@ clean/invalidate L1 D line
310	add	r0, r0, #CACHELINESIZE
311	cmp	r0, r1
312	blo	1b
313	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
314	mov	pc, lr
315
316/*
317 *	dma_map_area(start, size, dir)
318 *	- start	- kernel virtual start address
319 *	- size	- size of region
320 *	- dir	- DMA direction
321 */
322ENTRY(xsc3_dma_map_area)
323	add	r1, r1, r0
324	cmp	r2, #DMA_TO_DEVICE
325	beq	xsc3_dma_clean_range
326	bcs	xsc3_dma_inv_range
327	b	xsc3_dma_flush_range
328ENDPROC(xsc3_dma_map_area)
329
330/*
331 *	dma_unmap_area(start, size, dir)
332 *	- start	- kernel virtual start address
333 *	- size	- size of region
334 *	- dir	- DMA direction
335 */
336ENTRY(xsc3_dma_unmap_area)
337	mov	pc, lr
338ENDPROC(xsc3_dma_unmap_area)
339
340	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
341	define_cache_functions xsc3
342
343ENTRY(cpu_xsc3_dcache_clean_area)
3441:	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
345	add	r0, r0, #CACHELINESIZE
346	subs	r1, r1, #CACHELINESIZE
347	bhi	1b
348	mov	pc, lr
349
350/* =============================== PageTable ============================== */
351
352/*
353 * cpu_xsc3_switch_mm(pgd)
354 *
355 * Set the translation base pointer to be as described by pgd.
356 *
357 * pgd: new page tables
358 */
359	.align	5
360ENTRY(cpu_xsc3_switch_mm)
361	clean_d_cache r1, r2
362	mcr	p15, 0, ip, c7, c5, 0		@ invalidate L1 I cache and BTB
363	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
364	mcr	p15, 0, ip, c7, c5, 4		@ prefetch flush
365	orr	r0, r0, #0x18			@ cache the page table in L2
366	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
367	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I and D TLBs
368	cpwait_ret lr, ip
369
370/*
371 * cpu_xsc3_set_pte_ext(ptep, pte, ext)
372 *
373 * Set a PTE and flush it out
374 */
375cpu_xsc3_mt_table:
376	.long	0x00						@ L_PTE_MT_UNCACHED
377	.long	PTE_EXT_TEX(1)					@ L_PTE_MT_BUFFERABLE
378	.long	PTE_EXT_TEX(5) | PTE_CACHEABLE			@ L_PTE_MT_WRITETHROUGH
379	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_WRITEBACK
380	.long	PTE_EXT_TEX(1) | PTE_BUFFERABLE			@ L_PTE_MT_DEV_SHARED
381	.long	0x00						@ unused
382	.long	0x00						@ L_PTE_MT_MINICACHE (not present)
383	.long	PTE_EXT_TEX(5) | PTE_CACHEABLE | PTE_BUFFERABLE	@ L_PTE_MT_WRITEALLOC (not present?)
384	.long	0x00						@ unused
385	.long	PTE_EXT_TEX(1)					@ L_PTE_MT_DEV_WC
386	.long	0x00						@ unused
387	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_DEV_CACHED
388	.long	PTE_EXT_TEX(2)					@ L_PTE_MT_DEV_NONSHARED
389	.long	0x00						@ unused
390	.long	0x00						@ unused
391	.long	0x00						@ unused
392
393	.align	5
394ENTRY(cpu_xsc3_set_pte_ext)
395	xscale_set_pte_ext_prologue
396
397	tst	r1, #L_PTE_SHARED		@ shared?
398	and	r1, r1, #L_PTE_MT_MASK
399	adr	ip, cpu_xsc3_mt_table
400	ldr	ip, [ip, r1]
401	orrne	r2, r2, #PTE_EXT_COHERENT	@ interlock: mask in coherent bit
402	bic	r2, r2, #0x0c			@ clear old C,B bits
403	orr	r2, r2, ip
404
405	xscale_set_pte_ext_epilogue
406	mov	pc, lr
407
408	.ltorg
409	.align
410
411.globl	cpu_xsc3_suspend_size
412.equ	cpu_xsc3_suspend_size, 4 * 6
413#ifdef CONFIG_PM_SLEEP
414ENTRY(cpu_xsc3_do_suspend)
415	stmfd	sp!, {r4 - r9, lr}
416	mrc	p14, 0, r4, c6, c0, 0	@ clock configuration, for turbo mode
417	mrc	p15, 0, r5, c15, c1, 0	@ CP access reg
418	mrc	p15, 0, r6, c13, c0, 0	@ PID
419	mrc 	p15, 0, r7, c3, c0, 0	@ domain ID
420	mrc	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
421	mrc 	p15, 0, r9, c1, c0, 0	@ control reg
422	bic	r4, r4, #2		@ clear frequency change bit
423	stmia	r0, {r4 - r9}		@ store cp regs
424	ldmia	sp!, {r4 - r9, pc}
425ENDPROC(cpu_xsc3_do_suspend)
426
427ENTRY(cpu_xsc3_do_resume)
428	ldmia	r0, {r4 - r9}		@ load cp regs
429	mov	ip, #0
430	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I & D caches, BTB
431	mcr	p15, 0, ip, c7, c10, 4	@ drain write (&fill) buffer
432	mcr	p15, 0, ip, c7, c5, 4	@ flush prefetch buffer
433	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I & D TLBs
434	mcr	p14, 0, r4, c6, c0, 0	@ clock configuration, turbo mode.
435	mcr	p15, 0, r5, c15, c1, 0	@ CP access reg
436	mcr	p15, 0, r6, c13, c0, 0	@ PID
437	mcr	p15, 0, r7, c3, c0, 0	@ domain ID
438	orr	r1, r1, #0x18		@ cache the page table in L2
439	mcr	p15, 0, r1, c2, c0, 0	@ translation table base addr
440	mcr	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
441	mov	r0, r9			@ control register
442	b	cpu_resume_mmu
443ENDPROC(cpu_xsc3_do_resume)
444#endif
445
446	__CPUINIT
447
448	.type	__xsc3_setup, #function
449__xsc3_setup:
450	mov	r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
451	msr	cpsr_c, r0
452	mcr	p15, 0, ip, c7, c7, 0		@ invalidate L1 caches and BTB
453	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
454	mcr	p15, 0, ip, c7, c5, 4		@ prefetch flush
455	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I and D TLBs
456	orr	r4, r4, #0x18			@ cache the page table in L2
457	mcr	p15, 0, r4, c2, c0, 0		@ load page table pointer
458
459	mov	r0, #1 << 6			@ cp6 access for early sched_clock
460	mcr	p15, 0, r0, c15, c1, 0		@ write CP access register
461
462	mrc	p15, 0, r0, c1, c0, 1		@ get auxiliary control reg
463	and	r0, r0, #2			@ preserve bit P bit setting
464	orr	r0, r0, #(1 << 10)		@ enable L2 for LLR cache
465	mcr	p15, 0, r0, c1, c0, 1		@ set auxiliary control reg
466
467	adr	r5, xsc3_crval
468	ldmia	r5, {r5, r6}
469
470#ifdef CONFIG_CACHE_XSC3L2
471	mrc	p15, 1, r0, c0, c0, 1		@ get L2 present information
472	ands	r0, r0, #0xf8
473	orrne	r6, r6, #(1 << 26)		@ enable L2 if present
474#endif
475
476	mrc	p15, 0, r0, c1, c0, 0		@ get control register
477	bic	r0, r0, r5			@ ..V. ..R. .... ..A.
478	orr	r0, r0, r6			@ ..VI Z..S .... .C.M (mmu)
479						@ ...I Z..S .... .... (uc)
480	mov	pc, lr
481
482	.size	__xsc3_setup, . - __xsc3_setup
483
484	.type	xsc3_crval, #object
485xsc3_crval:
486	crval	clear=0x04002202, mmuset=0x00003905, ucset=0x00001900
487
488	__INITDATA
489
490	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
491	define_processor_functions xsc3, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1
492
493	.section ".rodata"
494
495	string	cpu_arch_name, "armv5te"
496	string	cpu_elf_name, "v5"
497	string	cpu_xsc3_name, "XScale-V3 based processor"
498
499	.align
500
501	.section ".proc.info.init", #alloc, #execinstr
502
503.macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req
504	.type	__\name\()_proc_info,#object
505__\name\()_proc_info:
506	.long	\cpu_val
507	.long	\cpu_mask
508	.long	PMD_TYPE_SECT | \
509		PMD_SECT_BUFFERABLE | \
510		PMD_SECT_CACHEABLE | \
511		PMD_SECT_AP_WRITE | \
512		PMD_SECT_AP_READ
513	.long	PMD_TYPE_SECT | \
514		PMD_SECT_AP_WRITE | \
515		PMD_SECT_AP_READ
516	b	__xsc3_setup
517	.long	cpu_arch_name
518	.long	cpu_elf_name
519	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
520	.long	cpu_xsc3_name
521	.long	xsc3_processor_functions
522	.long	v4wbi_tlb_fns
523	.long	xsc3_mc_user_fns
524	.long	xsc3_cache_fns
525	.size	__\name\()_proc_info, . - __\name\()_proc_info
526.endm
527
528	xsc3_proc_info xsc3, 0x69056000, 0xffffe000
529
530/* Note: PXA935 changed its implementor ID from Intel to Marvell */
531	xsc3_proc_info xsc3_pxa935, 0x56056000, 0xffffe000
532