head.S revision 0e056f20f18d0efa5da920f3cf8532adc56d5779
1/*
2 *  linux/arch/arm/boot/compressed/head.S
3 *
4 *  Copyright (C) 1996-2002 Russell King
5 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/linkage.h>
12
13/*
14 * Debugging stuff
15 *
16 * Note that these macros must not contain any code which is not
17 * 100% relocatable.  Any attempt to do so will result in a crash.
18 * Please select one of the following when turning on debugging.
19 */
20#ifdef DEBUG
21
22#if defined(CONFIG_DEBUG_ICEDCC)
23
24#ifdef CONFIG_CPU_V6
25		.macro	loadsp, rb
26		.endm
27		.macro	writeb, ch, rb
28		mcr	p14, 0, \ch, c0, c5, 0
29		.endm
30#elif defined(CONFIG_CPU_XSCALE)
31		.macro	loadsp, rb
32		.endm
33		.macro	writeb, ch, rb
34		mcr	p14, 0, \ch, c8, c0, 0
35		.endm
36#else
37		.macro	loadsp, rb
38		.endm
39		.macro	writeb, ch, rb
40		mcr	p14, 0, \ch, c1, c0, 0
41		.endm
42#endif
43
44#else
45
46#include <mach/debug-macro.S>
47
48		.macro	writeb,	ch, rb
49		senduart \ch, \rb
50		.endm
51
52#if defined(CONFIG_ARCH_SA1100)
53		.macro	loadsp, rb
54		mov	\rb, #0x80000000	@ physical base address
55#ifdef CONFIG_DEBUG_LL_SER3
56		add	\rb, \rb, #0x00050000	@ Ser3
57#else
58		add	\rb, \rb, #0x00010000	@ Ser1
59#endif
60		.endm
61#elif defined(CONFIG_ARCH_S3C2410)
62		.macro loadsp, rb
63		mov	\rb, #0x50000000
64		add	\rb, \rb, #0x4000 * CONFIG_S3C_LOWLEVEL_UART_PORT
65		.endm
66#else
67		.macro	loadsp,	rb
68		addruart \rb
69		.endm
70#endif
71#endif
72#endif
73
74		.macro	kputc,val
75		mov	r0, \val
76		bl	putc
77		.endm
78
79		.macro	kphex,val,len
80		mov	r0, \val
81		mov	r1, #\len
82		bl	phex
83		.endm
84
85		.macro	debug_reloc_start
86#ifdef DEBUG
87		kputc	#'\n'
88		kphex	r6, 8		/* processor id */
89		kputc	#':'
90		kphex	r7, 8		/* architecture id */
91#ifdef CONFIG_CPU_CP15
92		kputc	#':'
93		mrc	p15, 0, r0, c1, c0
94		kphex	r0, 8		/* control reg */
95#endif
96		kputc	#'\n'
97		kphex	r5, 8		/* decompressed kernel start */
98		kputc	#'-'
99		kphex	r9, 8		/* decompressed kernel end  */
100		kputc	#'>'
101		kphex	r4, 8		/* kernel execution address */
102		kputc	#'\n'
103#endif
104		.endm
105
106		.macro	debug_reloc_end
107#ifdef DEBUG
108		kphex	r5, 8		/* end of kernel */
109		kputc	#'\n'
110		mov	r0, r4
111		bl	memdump		/* dump 256 bytes at start of kernel */
112#endif
113		.endm
114
115		.section ".start", #alloc, #execinstr
116/*
117 * sort out different calling conventions
118 */
119		.align
120start:
121		.type	start,#function
122		.rept	8
123		mov	r0, r0
124		.endr
125
126		b	1f
127		.word	0x016f2818		@ Magic numbers to help the loader
128		.word	start			@ absolute load/run zImage address
129		.word	_edata			@ zImage end address
1301:		mov	r7, r1			@ save architecture ID
131		mov	r8, r2			@ save atags pointer
132
133#ifndef __ARM_ARCH_2__
134		/*
135		 * Booting from Angel - need to enter SVC mode and disable
136		 * FIQs/IRQs (numeric definitions from angel arm.h source).
137		 * We only do this if we were in user mode on entry.
138		 */
139		mrs	r2, cpsr		@ get current mode
140		tst	r2, #3			@ not user?
141		bne	not_angel
142		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
143 ARM(		swi	0x123456	)	@ angel_SWI_ARM
144 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
145not_angel:
146		mrs	r2, cpsr		@ turn off interrupts to
147		orr	r2, r2, #0xc0		@ prevent angel from running
148		msr	cpsr_c, r2
149#else
150		teqp	pc, #0x0c000003		@ turn off interrupts
151#endif
152
153		/*
154		 * Note that some cache flushing and other stuff may
155		 * be needed here - is there an Angel SWI call for this?
156		 */
157
158		/*
159		 * some architecture specific code can be inserted
160		 * by the linker here, but it should preserve r7, r8, and r9.
161		 */
162
163		.text
164		adr	r0, LC0
165 ARM(		ldmia	r0, {r1, r2, r3, r4, r5, r6, ip, sp}	)
166 THUMB(		ldmia	r0, {r1, r2, r3, r4, r5, r6, ip}	)
167 THUMB(		ldr	sp, [r0, #28]				)
168		subs	r0, r0, r1		@ calculate the delta offset
169
170						@ if delta is zero, we are
171		beq	not_relocated		@ running at the address we
172						@ were linked at.
173
174		/*
175		 * We're running at a different address.  We need to fix
176		 * up various pointers:
177		 *   r5 - zImage base address
178		 *   r6 - GOT start
179		 *   ip - GOT end
180		 */
181		add	r5, r5, r0
182		add	r6, r6, r0
183		add	ip, ip, r0
184
185#ifndef CONFIG_ZBOOT_ROM
186		/*
187		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
188		 * we need to fix up pointers into the BSS region.
189		 *   r2 - BSS start
190		 *   r3 - BSS end
191		 *   sp - stack pointer
192		 */
193		add	r2, r2, r0
194		add	r3, r3, r0
195		add	sp, sp, r0
196
197		/*
198		 * Relocate all entries in the GOT table.
199		 */
2001:		ldr	r1, [r6, #0]		@ relocate entries in the GOT
201		add	r1, r1, r0		@ table.  This fixes up the
202		str	r1, [r6], #4		@ C references.
203		cmp	r6, ip
204		blo	1b
205#else
206
207		/*
208		 * Relocate entries in the GOT table.  We only relocate
209		 * the entries that are outside the (relocated) BSS region.
210		 */
2111:		ldr	r1, [r6, #0]		@ relocate entries in the GOT
212		cmp	r1, r2			@ entry < bss_start ||
213		cmphs	r3, r1			@ _end < entry
214		addlo	r1, r1, r0		@ table.  This fixes up the
215		str	r1, [r6], #4		@ C references.
216		cmp	r6, ip
217		blo	1b
218#endif
219
220not_relocated:	mov	r0, #0
2211:		str	r0, [r2], #4		@ clear bss
222		str	r0, [r2], #4
223		str	r0, [r2], #4
224		str	r0, [r2], #4
225		cmp	r2, r3
226		blo	1b
227
228		/*
229		 * The C runtime environment should now be setup
230		 * sufficiently.  Turn the cache on, set up some
231		 * pointers, and start decompressing.
232		 */
233		bl	cache_on
234
235		mov	r1, sp			@ malloc space above stack
236		add	r2, sp, #0x10000	@ 64k max
237
238/*
239 * Check to see if we will overwrite ourselves.
240 *   r4 = final kernel address
241 *   r5 = start of this image
242 *   r2 = end of malloc space (and therefore this image)
243 * We basically want:
244 *   r4 >= r2 -> OK
245 *   r4 + image length <= r5 -> OK
246 */
247		cmp	r4, r2
248		bhs	wont_overwrite
249		sub	r3, sp, r5		@ > compressed kernel size
250		add	r0, r4, r3, lsl #2	@ allow for 4x expansion
251		cmp	r0, r5
252		bls	wont_overwrite
253
254		mov	r5, r2			@ decompress after malloc space
255		mov	r0, r5
256		mov	r3, r7
257		bl	decompress_kernel
258
259		add	r0, r0, #127 + 128	@ alignment + stack
260		bic	r0, r0, #127		@ align the kernel length
261/*
262 * r0     = decompressed kernel length
263 * r1-r3  = unused
264 * r4     = kernel execution address
265 * r5     = decompressed kernel start
266 * r6     = processor ID
267 * r7     = architecture ID
268 * r8     = atags pointer
269 * r9-r12,r14 = corrupted
270 */
271		add	r1, r5, r0		@ end of decompressed kernel
272		adr	r2, reloc_start
273		ldr	r3, LC1
274		add	r3, r2, r3
2751:		ldmia	r2!, {r9 - r12, r14}	@ copy relocation code
276		stmia	r1!, {r9 - r12, r14}
277		ldmia	r2!, {r9 - r12, r14}
278		stmia	r1!, {r9 - r12, r14}
279		cmp	r2, r3
280		blo	1b
281		mov	sp, r1
282		add	sp, sp, #128		@ relocate the stack
283
284		bl	cache_clean_flush
285 ARM(		add	pc, r5, r0		) @ call relocation code
286 THUMB(		add	r12, r5, r0		)
287 THUMB(		mov	pc, r12			) @ call relocation code
288
289/*
290 * We're not in danger of overwriting ourselves.  Do this the simple way.
291 *
292 * r4     = kernel execution address
293 * r7     = architecture ID
294 */
295wont_overwrite:	mov	r0, r4
296		mov	r3, r7
297		bl	decompress_kernel
298		b	call_kernel
299
300		.align	2
301		.type	LC0, #object
302LC0:		.word	LC0			@ r1
303		.word	__bss_start		@ r2
304		.word	_end			@ r3
305		.word	zreladdr		@ r4
306		.word	_start			@ r5
307		.word	_got_start		@ r6
308		.word	_got_end		@ ip
309		.word	user_stack+4096		@ sp
310LC1:		.word	reloc_end - reloc_start
311		.size	LC0, . - LC0
312
313#ifdef CONFIG_ARCH_RPC
314		.globl	params
315params:		ldr	r0, =params_phys
316		mov	pc, lr
317		.ltorg
318		.align
319#endif
320
321/*
322 * Turn on the cache.  We need to setup some page tables so that we
323 * can have both the I and D caches on.
324 *
325 * We place the page tables 16k down from the kernel execution address,
326 * and we hope that nothing else is using it.  If we're using it, we
327 * will go pop!
328 *
329 * On entry,
330 *  r4 = kernel execution address
331 *  r6 = processor ID
332 *  r7 = architecture number
333 *  r8 = atags pointer
334 *  r9 = run-time address of "start"  (???)
335 * On exit,
336 *  r1, r2, r3, r9, r10, r12 corrupted
337 * This routine must preserve:
338 *  r4, r5, r6, r7, r8
339 */
340		.align	5
341cache_on:	mov	r3, #8			@ cache_on function
342		b	call_cache_fn
343
344/*
345 * Initialize the highest priority protection region, PR7
346 * to cover all 32bit address and cacheable and bufferable.
347 */
348__armv4_mpu_cache_on:
349		mov	r0, #0x3f		@ 4G, the whole
350		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
351		mcr 	p15, 0, r0, c6, c7, 1
352
353		mov	r0, #0x80		@ PR7
354		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
355		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
356		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
357
358		mov	r0, #0xc000
359		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
360		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
361
362		mov	r0, #0
363		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
364		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
365		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
366		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
367						@ ...I .... ..D. WC.M
368		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
369		orr	r0, r0, #0x1000		@ ...1 .... .... ....
370
371		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
372
373		mov	r0, #0
374		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
375		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
376		mov	pc, lr
377
378__armv3_mpu_cache_on:
379		mov	r0, #0x3f		@ 4G, the whole
380		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
381
382		mov	r0, #0x80		@ PR7
383		mcr	p15, 0, r0, c2, c0, 0	@ cache on
384		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
385
386		mov	r0, #0xc000
387		mcr	p15, 0, r0, c5, c0, 0	@ access permission
388
389		mov	r0, #0
390		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
391		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
392						@ .... .... .... WC.M
393		orr	r0, r0, #0x000d		@ .... .... .... 11.1
394		mov	r0, #0
395		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
396
397		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
398		mov	pc, lr
399
400__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
401		bic	r3, r3, #0xff		@ Align the pointer
402		bic	r3, r3, #0x3f00
403/*
404 * Initialise the page tables, turning on the cacheable and bufferable
405 * bits for the RAM area only.
406 */
407		mov	r0, r3
408		mov	r9, r0, lsr #18
409		mov	r9, r9, lsl #18		@ start of RAM
410		add	r10, r9, #0x10000000	@ a reasonable RAM size
411		mov	r1, #0x12
412		orr	r1, r1, #3 << 10
413		add	r2, r3, #16384
4141:		cmp	r1, r9			@ if virt > start of RAM
415		orrhs	r1, r1, #0x0c		@ set cacheable, bufferable
416		cmp	r1, r10			@ if virt > end of RAM
417		bichs	r1, r1, #0x0c		@ clear cacheable, bufferable
418		str	r1, [r0], #4		@ 1:1 mapping
419		add	r1, r1, #1048576
420		teq	r0, r2
421		bne	1b
422/*
423 * If ever we are running from Flash, then we surely want the cache
424 * to be enabled also for our execution instance...  We map 2MB of it
425 * so there is no map overlap problem for up to 1 MB compressed kernel.
426 * If the execution is in RAM then we would only be duplicating the above.
427 */
428		mov	r1, #0x1e
429		orr	r1, r1, #3 << 10
430		mov	r2, pc, lsr #20
431		orr	r1, r1, r2, lsl #20
432		add	r0, r3, r2, lsl #2
433		str	r1, [r0], #4
434		add	r1, r1, #1048576
435		str	r1, [r0]
436		mov	pc, lr
437ENDPROC(__setup_mmu)
438
439__armv4_mmu_cache_on:
440		mov	r12, lr
441		bl	__setup_mmu
442		mov	r0, #0
443		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
444		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
445		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
446		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
447		orr	r0, r0, #0x0030
448#ifdef CONFIG_CPU_ENDIAN_BE8
449		orr	r0, r0, #1 << 25	@ big-endian page tables
450#endif
451		bl	__common_mmu_cache_on
452		mov	r0, #0
453		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
454		mov	pc, r12
455
456__armv7_mmu_cache_on:
457		mov	r12, lr
458		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
459		tst	r11, #0xf		@ VMSA
460		blne	__setup_mmu
461		mov	r0, #0
462		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
463		tst	r11, #0xf		@ VMSA
464		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
465		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
466		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
467		orr	r0, r0, #0x003c		@ write buffer
468#ifdef CONFIG_CPU_ENDIAN_BE8
469		orr	r0, r0, #1 << 25	@ big-endian page tables
470#endif
471		orrne	r0, r0, #1		@ MMU enabled
472		movne	r1, #-1
473		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
474		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
475		mcr	p15, 0, r0, c1, c0, 0	@ load control register
476		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
477		mov	r0, #0
478		mcr	p15, 0, r0, c7, c5, 4	@ ISB
479		mov	pc, r12
480
481__fa526_cache_on:
482		mov	r12, lr
483		bl	__setup_mmu
484		mov	r0, #0
485		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
486		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
487		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
488		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
489		orr	r0, r0, #0x1000		@ I-cache enable
490		bl	__common_mmu_cache_on
491		mov	r0, #0
492		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
493		mov	pc, r12
494
495__arm6_mmu_cache_on:
496		mov	r12, lr
497		bl	__setup_mmu
498		mov	r0, #0
499		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
500		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
501		mov	r0, #0x30
502		bl	__common_mmu_cache_on
503		mov	r0, #0
504		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
505		mov	pc, r12
506
507__common_mmu_cache_on:
508#ifndef CONFIG_THUMB2_KERNEL
509#ifndef DEBUG
510		orr	r0, r0, #0x000d		@ Write buffer, mmu
511#endif
512		mov	r1, #-1
513		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
514		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
515		b	1f
516		.align	5			@ cache line aligned
5171:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
518		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
519		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
520#endif
521
522/*
523 * All code following this line is relocatable.  It is relocated by
524 * the above code to the end of the decompressed kernel image and
525 * executed there.  During this time, we have no stacks.
526 *
527 * r0     = decompressed kernel length
528 * r1-r3  = unused
529 * r4     = kernel execution address
530 * r5     = decompressed kernel start
531 * r6     = processor ID
532 * r7     = architecture ID
533 * r8     = atags pointer
534 * r9-r12,r14 = corrupted
535 */
536		.align	5
537reloc_start:	add	r9, r5, r0
538		sub	r9, r9, #128		@ do not copy the stack
539		debug_reloc_start
540		mov	r1, r4
5411:
542		.rept	4
543		ldmia	r5!, {r0, r2, r3, r10 - r12, r14}	@ relocate kernel
544		stmia	r1!, {r0, r2, r3, r10 - r12, r14}
545		.endr
546
547		cmp	r5, r9
548		blo	1b
549		mov	sp, r1
550		add	sp, sp, #128		@ relocate the stack
551		debug_reloc_end
552
553call_kernel:	bl	cache_clean_flush
554		bl	cache_off
555		mov	r0, #0			@ must be zero
556		mov	r1, r7			@ restore architecture number
557		mov	r2, r8			@ restore atags pointer
558		mov	pc, r4			@ call kernel
559
560/*
561 * Here follow the relocatable cache support functions for the
562 * various processors.  This is a generic hook for locating an
563 * entry and jumping to an instruction at the specified offset
564 * from the start of the block.  Please note this is all position
565 * independent code.
566 *
567 *  r1  = corrupted
568 *  r2  = corrupted
569 *  r3  = block offset
570 *  r6  = corrupted
571 *  r12 = corrupted
572 */
573
574call_cache_fn:	adr	r12, proc_types
575#ifdef CONFIG_CPU_CP15
576		mrc	p15, 0, r6, c0, c0	@ get processor ID
577#else
578		ldr	r6, =CONFIG_PROCESSOR_ID
579#endif
5801:		ldr	r1, [r12, #0]		@ get value
581		ldr	r2, [r12, #4]		@ get mask
582		eor	r1, r1, r6		@ (real ^ match)
583		tst	r1, r2			@       & mask
584 ARM(		addeq	pc, r12, r3		) @ call cache function
585 THUMB(		addeq	r12, r3			)
586 THUMB(		moveq	pc, r12			) @ call cache function
587		add	r12, r12, #4*5
588		b	1b
589
590/*
591 * Table for cache operations.  This is basically:
592 *   - CPU ID match
593 *   - CPU ID mask
594 *   - 'cache on' method instruction
595 *   - 'cache off' method instruction
596 *   - 'cache flush' method instruction
597 *
598 * We match an entry using: ((real_id ^ match) & mask) == 0
599 *
600 * Writethrough caches generally only need 'on' and 'off'
601 * methods.  Writeback caches _must_ have the flush method
602 * defined.
603 */
604		.align	2
605		.type	proc_types,#object
606proc_types:
607		.word	0x41560600		@ ARM6/610
608		.word	0xffffffe0
609		W(b)	__arm6_mmu_cache_off	@ works, but slow
610		W(b)	__arm6_mmu_cache_off
611		mov	pc, lr
612 THUMB(		nop				)
613@		b	__arm6_mmu_cache_on		@ untested
614@		b	__arm6_mmu_cache_off
615@		b	__armv3_mmu_cache_flush
616
617		.word	0x00000000		@ old ARM ID
618		.word	0x0000f000
619		mov	pc, lr
620 THUMB(		nop				)
621		mov	pc, lr
622 THUMB(		nop				)
623		mov	pc, lr
624 THUMB(		nop				)
625
626		.word	0x41007000		@ ARM7/710
627		.word	0xfff8fe00
628		W(b)	__arm7_mmu_cache_off
629		W(b)	__arm7_mmu_cache_off
630		mov	pc, lr
631 THUMB(		nop				)
632
633		.word	0x41807200		@ ARM720T (writethrough)
634		.word	0xffffff00
635		W(b)	__armv4_mmu_cache_on
636		W(b)	__armv4_mmu_cache_off
637		mov	pc, lr
638 THUMB(		nop				)
639
640		.word	0x41007400		@ ARM74x
641		.word	0xff00ff00
642		W(b)	__armv3_mpu_cache_on
643		W(b)	__armv3_mpu_cache_off
644		W(b)	__armv3_mpu_cache_flush
645
646		.word	0x41009400		@ ARM94x
647		.word	0xff00ff00
648		W(b)	__armv4_mpu_cache_on
649		W(b)	__armv4_mpu_cache_off
650		W(b)	__armv4_mpu_cache_flush
651
652		.word	0x00007000		@ ARM7 IDs
653		.word	0x0000f000
654		mov	pc, lr
655 THUMB(		nop				)
656		mov	pc, lr
657 THUMB(		nop				)
658		mov	pc, lr
659 THUMB(		nop				)
660
661		@ Everything from here on will be the new ID system.
662
663		.word	0x4401a100		@ sa110 / sa1100
664		.word	0xffffffe0
665		W(b)	__armv4_mmu_cache_on
666		W(b)	__armv4_mmu_cache_off
667		W(b)	__armv4_mmu_cache_flush
668
669		.word	0x6901b110		@ sa1110
670		.word	0xfffffff0
671		W(b)	__armv4_mmu_cache_on
672		W(b)	__armv4_mmu_cache_off
673		W(b)	__armv4_mmu_cache_flush
674
675		.word	0x56056930
676		.word	0xff0ffff0		@ PXA935
677		W(b)	__armv4_mmu_cache_on
678		W(b)	__armv4_mmu_cache_off
679		W(b)	__armv4_mmu_cache_flush
680
681		.word	0x56158000		@ PXA168
682		.word	0xfffff000
683		W(b)	__armv4_mmu_cache_on
684		W(b)	__armv4_mmu_cache_off
685		W(b)	__armv5tej_mmu_cache_flush
686
687		.word	0x56056930
688		.word	0xff0ffff0		@ PXA935
689		W(b)	__armv4_mmu_cache_on
690		W(b)	__armv4_mmu_cache_off
691		W(b)	__armv4_mmu_cache_flush
692
693		.word	0x56050000		@ Feroceon
694		.word	0xff0f0000
695		W(b)	__armv4_mmu_cache_on
696		W(b)	__armv4_mmu_cache_off
697		W(b)	__armv5tej_mmu_cache_flush
698
699#ifdef CONFIG_CPU_FEROCEON_OLD_ID
700		/* this conflicts with the standard ARMv5TE entry */
701		.long	0x41009260		@ Old Feroceon
702		.long	0xff00fff0
703		b	__armv4_mmu_cache_on
704		b	__armv4_mmu_cache_off
705		b	__armv5tej_mmu_cache_flush
706#endif
707
708		.word	0x66015261		@ FA526
709		.word	0xff01fff1
710		W(b)	__fa526_cache_on
711		W(b)	__armv4_mmu_cache_off
712		W(b)	__fa526_cache_flush
713
714		@ These match on the architecture ID
715
716		.word	0x00020000		@ ARMv4T
717		.word	0x000f0000
718		W(b)	__armv4_mmu_cache_on
719		W(b)	__armv4_mmu_cache_off
720		W(b)	__armv4_mmu_cache_flush
721
722		.word	0x00050000		@ ARMv5TE
723		.word	0x000f0000
724		W(b)	__armv4_mmu_cache_on
725		W(b)	__armv4_mmu_cache_off
726		W(b)	__armv4_mmu_cache_flush
727
728		.word	0x00060000		@ ARMv5TEJ
729		.word	0x000f0000
730		W(b)	__armv4_mmu_cache_on
731		W(b)	__armv4_mmu_cache_off
732		W(b)	__armv4_mmu_cache_flush
733
734		.word	0x0007b000		@ ARMv6
735		.word	0x000ff000
736		W(b)	__armv4_mmu_cache_on
737		W(b)	__armv4_mmu_cache_off
738		W(b)	__armv6_mmu_cache_flush
739
740		.word	0x000f0000		@ new CPU Id
741		.word	0x000f0000
742		W(b)	__armv7_mmu_cache_on
743		W(b)	__armv7_mmu_cache_off
744		W(b)	__armv7_mmu_cache_flush
745
746		.word	0			@ unrecognised type
747		.word	0
748		mov	pc, lr
749 THUMB(		nop				)
750		mov	pc, lr
751 THUMB(		nop				)
752		mov	pc, lr
753 THUMB(		nop				)
754
755		.size	proc_types, . - proc_types
756
757/*
758 * Turn off the Cache and MMU.  ARMv3 does not support
759 * reading the control register, but ARMv4 does.
760 *
761 * On entry,  r6 = processor ID
762 * On exit,   r0, r1, r2, r3, r12 corrupted
763 * This routine must preserve: r4, r6, r7
764 */
765		.align	5
766cache_off:	mov	r3, #12			@ cache_off function
767		b	call_cache_fn
768
769__armv4_mpu_cache_off:
770		mrc	p15, 0, r0, c1, c0
771		bic	r0, r0, #0x000d
772		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
773		mov	r0, #0
774		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
775		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
776		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
777		mov	pc, lr
778
779__armv3_mpu_cache_off:
780		mrc	p15, 0, r0, c1, c0
781		bic	r0, r0, #0x000d
782		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
783		mov	r0, #0
784		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
785		mov	pc, lr
786
787__armv4_mmu_cache_off:
788		mrc	p15, 0, r0, c1, c0
789		bic	r0, r0, #0x000d
790		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
791		mov	r0, #0
792		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
793		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
794		mov	pc, lr
795
796__armv7_mmu_cache_off:
797		mrc	p15, 0, r0, c1, c0
798		bic	r0, r0, #0x000d
799		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
800		mov	r12, lr
801		bl	__armv7_mmu_cache_flush
802		mov	r0, #0
803		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
804		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
805		mcr	p15, 0, r0, c7, c10, 4	@ DSB
806		mcr	p15, 0, r0, c7, c5, 4	@ ISB
807		mov	pc, r12
808
809__arm6_mmu_cache_off:
810		mov	r0, #0x00000030		@ ARM6 control reg.
811		b	__armv3_mmu_cache_off
812
813__arm7_mmu_cache_off:
814		mov	r0, #0x00000070		@ ARM7 control reg.
815		b	__armv3_mmu_cache_off
816
817__armv3_mmu_cache_off:
818		mcr	p15, 0, r0, c1, c0, 0	@ turn MMU and cache off
819		mov	r0, #0
820		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
821		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
822		mov	pc, lr
823
824/*
825 * Clean and flush the cache to maintain consistency.
826 *
827 * On entry,
828 *  r6 = processor ID
829 * On exit,
830 *  r1, r2, r3, r11, r12 corrupted
831 * This routine must preserve:
832 *  r0, r4, r5, r6, r7
833 */
834		.align	5
835cache_clean_flush:
836		mov	r3, #16
837		b	call_cache_fn
838
839__armv4_mpu_cache_flush:
840		mov	r2, #1
841		mov	r3, #0
842		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
843		mov	r1, #7 << 5		@ 8 segments
8441:		orr	r3, r1, #63 << 26	@ 64 entries
8452:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
846		subs	r3, r3, #1 << 26
847		bcs	2b			@ entries 63 to 0
848		subs 	r1, r1, #1 << 5
849		bcs	1b			@ segments 7 to 0
850
851		teq	r2, #0
852		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
853		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
854		mov	pc, lr
855
856__fa526_cache_flush:
857		mov	r1, #0
858		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
859		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
860		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
861		mov	pc, lr
862
863__armv6_mmu_cache_flush:
864		mov	r1, #0
865		mcr	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
866		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
867		mcr	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
868		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
869		mov	pc, lr
870
871__armv7_mmu_cache_flush:
872		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
873		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
874		mov	r10, #0
875		beq	hierarchical
876		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
877		b	iflush
878hierarchical:
879		mcr	p15, 0, r10, c7, c10, 5	@ DMB
880		stmfd	sp!, {r0-r7, r9-r11}
881		mrc	p15, 1, r0, c0, c0, 1	@ read clidr
882		ands	r3, r0, #0x7000000	@ extract loc from clidr
883		mov	r3, r3, lsr #23		@ left align loc bit field
884		beq	finished		@ if loc is 0, then no need to clean
885		mov	r10, #0			@ start clean at cache level 0
886loop1:
887		add	r2, r10, r10, lsr #1	@ work out 3x current cache level
888		mov	r1, r0, lsr r2		@ extract cache type bits from clidr
889		and	r1, r1, #7		@ mask of the bits for current cache only
890		cmp	r1, #2			@ see what cache we have at this level
891		blt	skip			@ skip if no cache, or just i-cache
892		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
893		mcr	p15, 0, r10, c7, c5, 4	@ isb to sych the new cssr&csidr
894		mrc	p15, 1, r1, c0, c0, 0	@ read the new csidr
895		and	r2, r1, #7		@ extract the length of the cache lines
896		add	r2, r2, #4		@ add 4 (line length offset)
897		ldr	r4, =0x3ff
898		ands	r4, r4, r1, lsr #3	@ find maximum number on the way size
899		clz	r5, r4			@ find bit position of way size increment
900		ldr	r7, =0x7fff
901		ands	r7, r7, r1, lsr #13	@ extract max number of the index size
902loop2:
903		mov	r9, r4			@ create working copy of max way size
904loop3:
905 ARM(		orr	r11, r10, r9, lsl r5	) @ factor way and cache number into r11
906 ARM(		orr	r11, r11, r7, lsl r2	) @ factor index number into r11
907 THUMB(		lsl	r6, r9, r5		)
908 THUMB(		orr	r11, r10, r6		) @ factor way and cache number into r11
909 THUMB(		lsl	r6, r7, r2		)
910 THUMB(		orr	r11, r11, r6		) @ factor index number into r11
911		mcr	p15, 0, r11, c7, c14, 2	@ clean & invalidate by set/way
912		subs	r9, r9, #1		@ decrement the way
913		bge	loop3
914		subs	r7, r7, #1		@ decrement the index
915		bge	loop2
916skip:
917		add	r10, r10, #2		@ increment cache number
918		cmp	r3, r10
919		bgt	loop1
920finished:
921		ldmfd	sp!, {r0-r7, r9-r11}
922		mov	r10, #0			@ swith back to cache level 0
923		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
924iflush:
925		mcr	p15, 0, r10, c7, c10, 4	@ DSB
926		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
927		mcr	p15, 0, r10, c7, c10, 4	@ DSB
928		mcr	p15, 0, r10, c7, c5, 4	@ ISB
929		mov	pc, lr
930
931__armv5tej_mmu_cache_flush:
9321:		mrc	p15, 0, r15, c7, c14, 3	@ test,clean,invalidate D cache
933		bne	1b
934		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
935		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
936		mov	pc, lr
937
938__armv4_mmu_cache_flush:
939		mov	r2, #64*1024		@ default: 32K dcache size (*2)
940		mov	r11, #32		@ default: 32 byte line size
941		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
942		teq	r3, r6			@ cache ID register present?
943		beq	no_cache_id
944		mov	r1, r3, lsr #18
945		and	r1, r1, #7
946		mov	r2, #1024
947		mov	r2, r2, lsl r1		@ base dcache size *2
948		tst	r3, #1 << 14		@ test M bit
949		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
950		mov	r3, r3, lsr #12
951		and	r3, r3, #3
952		mov	r11, #8
953		mov	r11, r11, lsl r3	@ cache line size in bytes
954no_cache_id:
955		mov	r1, pc
956		bic	r1, r1, #63		@ align to longest cache line
957		add	r2, r1, r2
9581:
959 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
960 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
961 THUMB(		add     r1, r1, r11		)
962		teq	r1, r2
963		bne	1b
964
965		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
966		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
967		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
968		mov	pc, lr
969
970__armv3_mmu_cache_flush:
971__armv3_mpu_cache_flush:
972		mov	r1, #0
973		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
974		mov	pc, lr
975
976/*
977 * Various debugging routines for printing hex characters and
978 * memory, which again must be relocatable.
979 */
980#ifdef DEBUG
981		.align	2
982		.type	phexbuf,#object
983phexbuf:	.space	12
984		.size	phexbuf, . - phexbuf
985
986phex:		adr	r3, phexbuf
987		mov	r2, #0
988		strb	r2, [r3, r1]
9891:		subs	r1, r1, #1
990		movmi	r0, r3
991		bmi	puts
992		and	r2, r0, #15
993		mov	r0, r0, lsr #4
994		cmp	r2, #10
995		addge	r2, r2, #7
996		add	r2, r2, #'0'
997		strb	r2, [r3, r1]
998		b	1b
999
1000puts:		loadsp	r3
10011:		ldrb	r2, [r0], #1
1002		teq	r2, #0
1003		moveq	pc, lr
10042:		writeb	r2, r3
1005		mov	r1, #0x00020000
10063:		subs	r1, r1, #1
1007		bne	3b
1008		teq	r2, #'\n'
1009		moveq	r2, #'\r'
1010		beq	2b
1011		teq	r0, #0
1012		bne	1b
1013		mov	pc, lr
1014putc:
1015		mov	r2, r0
1016		mov	r0, #0
1017		loadsp	r3
1018		b	2b
1019
1020memdump:	mov	r12, r0
1021		mov	r10, lr
1022		mov	r11, #0
10232:		mov	r0, r11, lsl #2
1024		add	r0, r0, r12
1025		mov	r1, #8
1026		bl	phex
1027		mov	r0, #':'
1028		bl	putc
10291:		mov	r0, #' '
1030		bl	putc
1031		ldr	r0, [r12, r11, lsl #2]
1032		mov	r1, #8
1033		bl	phex
1034		and	r0, r11, #7
1035		teq	r0, #3
1036		moveq	r0, #' '
1037		bleq	putc
1038		and	r0, r11, #7
1039		add	r11, r11, #1
1040		teq	r0, #7
1041		bne	1b
1042		mov	r0, #'\n'
1043		bl	putc
1044		cmp	r11, #64
1045		blt	2b
1046		mov	pc, r10
1047#endif
1048
1049		.ltorg
1050reloc_end:
1051
1052		.align
1053		.section ".stack", "w"
1054user_stack:	.space	4096
1055