R4R8First_v7.s revision 2857b47a2731579772c76d46285660972c0ba23d
1@/*
2@ ** Copyright 2003-2010, VisualOn, Inc.
3@ **
4@ ** Licensed under the Apache License, Version 2.0 (the "License");
5@ ** you may not use this file except in compliance with the License.
6@ ** You may obtain a copy of the License at
7@ **
8@ **     http://www.apache.org/licenses/LICENSE-2.0
9@ **
10@ ** Unless required by applicable law or agreed to in writing, software
11@ ** distributed under the License is distributed on an "AS IS" BASIS,
12@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13@ ** See the License for the specific language governing permissions and
14@ ** limitations under the License.
15@ */
16
17@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
18@	File:		R4R8First_v7.s
19@
20@	Content:	Radix8First and Radix4First function armv7 assemble
21@
22@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
23
24	.section .text
25	.global	Radix8First
26	.fnstart
27
28Radix8First:
29	stmdb     		sp!, {r4 - r11, lr}
30	.save	  		{r4 - r11, lr}
31	fstmfdd   		sp!, {d8 - d15}
32	.vsave	  		{d8 - d15}
33
34	ldr       		r3, SQRT1_2
35	cmp       		r1, #0
36
37	VDUP.I32  		Q15, r3
38	beq       		Radix8First_END
39
40Radix8First_LOOP:
41	VLD1.I32			{d0, d1, d2, d3},	[r0]!
42	VLD1.I32			{d8, d9, d10, d11},	[r0]!
43
44	VADD.S32			d4, d0, d1		@ r0 = buf[0] + buf[2]@i0 = buf[1] + buf[3]@
45	VSUB.S32			d5, d0, d1		@ r1 = buf[0] - buf[2]@i1 = buf[1] - buf[3]@
46	VSUB.S32			d7, d2, d3		@ r2 = buf[4] - buf[6]@i2 = buf[5] - buf[7]@
47	VADD.S32			d6, d2, d3		@ r3 = buf[4] + buf[6]@i3 = buf[5] + buf[7]@
48	VREV64.I32			d7, d7
49
50	VADD.S32			Q0, Q2, Q3		@ r4 = (r0 + r2)@i4 = (i0 + i2)@i6 = (i1 + r3)@r7 = (r1 + i3)
51	VSUB.S32			Q1, Q2, Q3		@ r5 = (r0 - r2)@i5 = (i0 - i2)@r6 = (r1 - i3)@i7 = (i1 - r3)@
52
53	VREV64.I32			d3, d3
54
55	VADD.S32			d4, d8, d9		@ r0 = buf[ 8] + buf[10]@i0 = buf[ 9] + buf[11]@
56	VSUB.S32			d7, d10, d11	@ r1 = buf[12] - buf[14]@i1 = buf[13] - buf[15]@
57	VADD.S32			d6, d10, d11	@ r2 = buf[12] + buf[14]@i2 = buf[13] + buf[15]@
58	VREV64.I32			d7, d7
59	VSUB.S32			d5, d8, d9		@ r3 = buf[ 8] - buf[10]@i3 = buf[ 9] - buf[11]@
60
61	VTRN.32				d1, d3
62
63	VADD.S32			Q4, Q2, Q3		@ t0 = (r0 + r2) >> 1@t1 = (i0 + i2) >> 1@i0 = i1 + r3@r2 = r1 + i3@
64	VSUB.S32			Q5, Q2, Q3		@ t2 = (r0 - r2) >> 1@t3 = (i0 - i2) >> 1@r0 = r1 - i3@i2 = i1 - r3@
65
66	VREV64.I32			d3, d3
67
68	VSHR.S32			d8, d8, #1
69	VSHR.S32			Q0, Q0, #1
70	VREV64.I32			d10, d10
71	VTRN.32				d11, d9
72	VSHR.S32			Q1, Q1, #1
73	VSHR.S32			d10, d10, #1
74	VREV64.I32			d9, d9
75
76	sub       			r0, r0, #0x40
77
78	VADD.S32			d12, d0, d8
79	VSUB.S32			d16, d0, d8
80	VADD.S32			d14, d2, d10
81	VSUB.S32			d18, d2, d10
82
83	VSUB.S32			d4, d11, d9
84	VADD.S32			d5, d11, d9
85
86	VREV64.I32			d18, d18
87
88	VQDMULH.S32			Q3, Q2, Q15
89	VTRN.32				d14, d18
90	VTRN.32				d6, d7
91	VREV64.I32			d18, d18
92
93	VSUB.S32			d15, d3, d6
94	VREV64.I32			d7, d7
95	VADD.S32			d19, d3, d6
96	VADD.S32			d13, d1, d7
97	VSUB.S32			d17, d1, d7
98
99	VREV64.I32			d17, d17
100	VTRN.32				d13, d17
101	VREV64.I32			d17, d17
102
103	subs       			r1, r1, #1
104
105	VST1.I32			{d12, d13, d14, d15}, [r0]!
106	VST1.I32			{d16, d17, d18, d19}, [r0]!
107	bne       			Radix8First_LOOP
108
109Radix8First_END:
110	fldmfdd   sp!, {d8 - d15}
111	ldmia     sp!, {r4 - r11, pc}
112SQRT1_2:
113	.word      0x2d413ccd
114
115	@ENDP  @ |Radix8First|
116	.fnend
117
118	.section .text
119	.global	Radix4First
120	.fnstart
121
122Radix4First:
123	stmdb     	sp!, {r4 - r11, lr}
124	.save	  	{r4 - r11, lr}
125	fstmfdd   	sp!, {d8 - d15}
126	.vsave	  	{d8 - d15}
127
128	cmp       	r1, #0
129	beq       	Radix4First_END
130
131Radix4First_LOOP:
132	VLD1.I32			{d0, d1, d2, d3}, [r0]
133
134	VADD.S32			d4, d0, d1							@ r0 = buf[0] + buf[2]@ r1 = buf[1] + buf[3]@
135	VSUB.S32			d5, d0, d1							@ r2 = buf[0] - buf[2]@ r3 = buf[1] - buf[3]@
136	VSUB.S32			d7, d2, d3							@ r4 = buf[4] + buf[6]@ r5 = buf[5] + buf[7]@
137	VADD.S32			d6, d2, d3							@ r6 = buf[4] - buf[6]@ r7 = buf[5] - buf[7]@
138
139	VREV64.I32		d7, d7									@
140
141	VADD.S32			Q4, Q2, Q3
142	VSUB.S32			Q5, Q2, Q3
143
144	VREV64.I32		d11, d11
145	VTRN.32				d9, d11
146	subs       		r1, r1, #1
147	VREV64.I32		d11, d11
148	VST1.I32			{d8, d9, d10, d11}, [r0]!
149
150	bne       		Radix4First_LOOP
151
152Radix4First_END:
153	fldmfdd   		sp!, {d8 - d15}
154	ldmia    		sp!, {r4 - r11, pc}
155
156	@ENDP  @ |Radix4First|
157	.fnend
158