15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  linux/arch/arm/lib/memset.S
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  Copyright (C) 1995-2000 Russell King
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This program is free software; you can redistribute it and/or modify
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * it under the terms of the GNU General Public License version 2 as
89ab5563a3196760eb381d102cbb2bc0f7abc6a50Ben Murdoch * published by the Free Software Foundation.
91e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) *
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  ASM optimised string functions
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <linux/linkage.h>
132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include <asm/assembler.h>
142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	.text
162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	.align	5
172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ENTRY(memset)
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ands	r3, r0, #3		@ 1 unaligned?
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	mov	ip, r0			@ preserve r0 as return value
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	bne	6f			@ 1
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * we know that the pointer in ip is aligned to a word boundary.
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)1:	orr	r1, r1, r1, lsl #8
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	orr	r1, r1, r1, lsl #16
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	mov	r3, r1
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	cmp	r2, #16
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	blt	4f
30b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if ! CALGN(1)+0
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * We need 2 extra registers for this loop - use r8 and the LR
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	stmfd	sp!, {r8, lr}
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	mov	r8, r1
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	mov	lr, r1
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)2:	subs	r2, r2, #64
412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	stmgeia	ip!, {r1, r3, r8, lr}	@ 64 bytes at a time.
422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	stmgeia	ip!, {r1, r3, r8, lr}
432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	stmgeia	ip!, {r1, r3, r8, lr}
442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	stmgeia	ip!, {r1, r3, r8, lr}
452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	bgt	2b
461e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)	ldmeqfd	sp!, {r8, pc}		@ Now <64 bytes to go.
471e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)/*
482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * No need to correct the count; we're only testing bits from now on
492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) */
502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	tst	r2, #32
512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	stmneia	ip!, {r1, r3, r8, lr}
522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	stmneia	ip!, {r1, r3, r8, lr}
532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	tst	r2, #16
542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	stmneia	ip!, {r1, r3, r8, lr}
551e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)	ldmfd	sp!, {r8, lr}
561e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)
572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#else
582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/*
602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * This version aligns the destination pointer in order to write
612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * whole cache lines at once.
622a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) */
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	stmfd	sp!, {r4-r8, lr}
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	mov	r4, r1
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	mov	r5, r1
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	mov	r6, r1
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	mov	r7, r1
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	mov	r8, r1
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	mov	lr, r1
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	cmp	r2, #96
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	tstgt	ip, #31
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ble	3f
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	and	r8, ip, #31
77	rsb	r8, r8, #32
78	sub	r2, r2, r8
79	movs	r8, r8, lsl #(32 - 4)
80	stmcsia	ip!, {r4, r5, r6, r7}
81	stmmiia	ip!, {r4, r5}
82	tst	r8, #(1 << 30)
83	mov	r8, r1
84	strne	r1, [ip], #4
85
863:	subs	r2, r2, #64
87	stmgeia	ip!, {r1, r3-r8, lr}
88	stmgeia	ip!, {r1, r3-r8, lr}
89	bgt	3b
90	ldmeqfd	sp!, {r4-r8, pc}
91
92	tst	r2, #32
93	stmneia	ip!, {r1, r3-r8, lr}
94	tst	r2, #16
95	stmneia	ip!, {r4-r7}
96	ldmfd	sp!, {r4-r8, lr}
97
98#endif
99
1004:	tst	r2, #8
101	stmneia	ip!, {r1, r3}
102	tst	r2, #4
103	strne	r1, [ip], #4
104/*
105 * When we get here, we've got less than 4 bytes to zero.  We
106 * may have an unaligned pointer as well.
107 */
1085:	tst	r2, #2
109	strneb	r1, [ip], #1
110	strneb	r1, [ip], #1
111	tst	r2, #1
112	strneb	r1, [ip], #1
113	mov	pc, lr
114
1156:	subs	r2, r2, #4		@ 1 do we have enough
116	blt	5b			@ 1 bytes to align with?
117	cmp	r3, #2			@ 1
118	strltb	r1, [ip], #1		@ 1
119	strleb	r1, [ip], #1		@ 1
120	strb	r1, [ip], #1		@ 1
121	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
122	b	1b
123ENDPROC(memset)
124