1/***************************************************************************
2 Copyright (c) 2009,2010, Code Aurora Forum. All rights reserved.
3
4 Licensed under the Apache License, Version 2.0 (the "License"); you
5 may not use this file except in compliance with the License.  You may
6 obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13 implied.  See the License for the specific language governing
14 permissions and limitations under the License.
15 ***************************************************************************/
16
17	.code 32
18	.fpu neon
19	.align 4
20	.globl	memset32_neon
21	.func
22
23	/* r0 = buffer, r1 = value, r2 = times to write */
24memset32_neon:
25	cmp		r2, #1
26	streq		r1, [r0], #4
27	bxeq		lr
28
29	cmp		r2, #4
30	bgt		memset32_neon_start
31	cmp		r2, #0
32	bxeq		lr
33memset32_neon_small:
34	str		r1, [r0], #4
35	subs		r2, r2, #1
36	bne		memset32_neon_small
37	bx		lr
38memset32_neon_start:
39	cmp		r2, #16
40	blt		memset32_dropthru
41	vdup.32		q0, r1
42	vmov		q1, q0
43	cmp		r2, #32
44	blt		memset32_16
45	cmp		r2, #64
46	blt		memset32_32
47	cmp		r2, #128
48	blt		memset32_64
49memset32_128:
50	movs		r12, r2, lsr #7
51memset32_loop128:
52	subs		r12, r12, #1
53	vst1.64		{q0, q1}, [r0]!
54	vst1.64		{q0, q1}, [r0]!
55	vst1.64		{q0, q1}, [r0]!
56	vst1.64		{q0, q1}, [r0]!
57	vst1.64		{q0, q1}, [r0]!
58	vst1.64		{q0, q1}, [r0]!
59	vst1.64		{q0, q1}, [r0]!
60	vst1.64		{q0, q1}, [r0]!
61	vst1.64		{q0, q1}, [r0]!
62	vst1.64		{q0, q1}, [r0]!
63	vst1.64		{q0, q1}, [r0]!
64	vst1.64		{q0, q1}, [r0]!
65	vst1.64		{q0, q1}, [r0]!
66	vst1.64		{q0, q1}, [r0]!
67	vst1.64		{q0, q1}, [r0]!
68	vst1.64		{q0, q1}, [r0]!
69	bne		memset32_loop128
70	ands		r2, r2, #0x7f
71	bxeq		lr
72memset32_64:
73	movs		r12, r2, lsr #6
74	beq		memset32_32
75	vst1.64		{q0, q1}, [r0]!
76	vst1.64		{q0, q1}, [r0]!
77	vst1.64		{q0, q1}, [r0]!
78	vst1.64		{q0, q1}, [r0]!
79	vst1.64		{q0, q1}, [r0]!
80	vst1.64		{q0, q1}, [r0]!
81	vst1.64		{q0, q1}, [r0]!
82	vst1.64		{q0, q1}, [r0]!
83	ands		r2, r2, #0x3f
84	bxeq		lr
85memset32_32:
86	movs		r12, r2, lsr #5
87	beq		memset32_16
88	vst1.64		{q0, q1}, [r0]!
89	vst1.64		{q0, q1}, [r0]!
90	vst1.64		{q0, q1}, [r0]!
91	vst1.64		{q0, q1}, [r0]!
92	ands		r2, r2, #0x1f
93	bxeq		lr
94memset32_16:
95	movs		r12, r2, lsr #4
96	beq		memset32_dropthru
97	and		r2, r2, #0xf
98	vst1.64		{q0, q1}, [r0]!
99	vst1.64		{q0, q1}, [r0]!
100memset32_dropthru:
101	rsb		r2, r2, #15
102	add		pc, pc, r2, lsl #2
103	nop
104	str		r1, [r0, #56]
105	str		r1, [r0, #52]
106	str		r1, [r0, #48]
107	str		r1, [r0, #44]
108	str		r1, [r0, #40]
109	str		r1, [r0, #36]
110	str		r1, [r0, #32]
111	str		r1, [r0, #28]
112	str		r1, [r0, #24]
113	str		r1, [r0, #20]
114	str		r1, [r0, #16]
115	str		r1, [r0, #12]
116	str		r1, [r0, #8]
117	str		r1, [r0, #4]
118	str		r1, [r0, #0]
119	bx		lr
120
121	.endfunc
122	.end
123