R4R8First_v7.s revision 891abc0ee089f2ba5b92dcc014e5efc2ef07f01e
1@/*
2@ ** Copyright 2003-2010, VisualOn, Inc.
3@ **
4@ ** Licensed under the Apache License, Version 2.0 (the "License");
5@ ** you may not use this file except in compliance with the License.
6@ ** You may obtain a copy of the License at
7@ **
8@ **     http://www.apache.org/licenses/LICENSE-2.0
9@ **
10@ ** Unless required by applicable law or agreed to in writing, software
11@ ** distributed under the License is distributed on an "AS IS" BASIS,
12@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13@ ** See the License for the specific language governing permissions and
14@ ** limitations under the License.
15@ */
16
17@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
18@	File:		R4R8First_v7.s
19@
20@	Content:	Radix8First and Radix4First function armv7 assemble
21@
22@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
23
24	.section .text
25	.global	Radix8First
26
27Radix8First:
28	stmdb     		sp!, {r4 - r11, lr}
29
30	ldr       		r3, SQRT1_2
31	cmp       		r1, #0
32
33	VDUP.I32  		Q15, r3
34	beq       		Radix8First_END
35
36Radix8First_LOOP:
37	VLD1.I32			{d0, d1, d2, d3},	[r0]!
38	VLD1.I32			{d8, d9, d10, d11},	[r0]!
39
40	VADD.S32			d4, d0, d1		@ r0 = buf[0] + buf[2]@i0 = buf[1] + buf[3]@
41	VSUB.S32			d5, d0, d1		@ r1 = buf[0] - buf[2]@i1 = buf[1] - buf[3]@
42	VSUB.S32			d7, d2, d3		@ r2 = buf[4] - buf[6]@i2 = buf[5] - buf[7]@
43	VADD.S32			d6, d2, d3		@ r3 = buf[4] + buf[6]@i3 = buf[5] + buf[7]@
44	VREV64.I32			d7, d7
45
46	VADD.S32			Q0, Q2, Q3		@ r4 = (r0 + r2)@i4 = (i0 + i2)@i6 = (i1 + r3)@r7 = (r1 + i3)
47	VSUB.S32			Q1, Q2, Q3		@ r5 = (r0 - r2)@i5 = (i0 - i2)@r6 = (r1 - i3)@i7 = (i1 - r3)@
48
49	VREV64.I32			d3, d3
50
51	VADD.S32			d4, d8, d9		@ r0 = buf[ 8] + buf[10]@i0 = buf[ 9] + buf[11]@
52	VSUB.S32			d7, d10, d11	@ r1 = buf[12] - buf[14]@i1 = buf[13] - buf[15]@
53	VADD.S32			d6, d10, d11	@ r2 = buf[12] + buf[14]@i2 = buf[13] + buf[15]@
54	VREV64.I32			d7, d7
55	VSUB.S32			d5, d8, d9		@ r3 = buf[ 8] - buf[10]@i3 = buf[ 9] - buf[11]@
56
57	VTRN.32				d1, d3
58
59	VADD.S32			Q4, Q2, Q3		@ t0 = (r0 + r2) >> 1@t1 = (i0 + i2) >> 1@i0 = i1 + r3@r2 = r1 + i3@
60	VSUB.S32			Q5, Q2, Q3		@ t2 = (r0 - r2) >> 1@t3 = (i0 - i2) >> 1@r0 = r1 - i3@i2 = i1 - r3@
61
62	VREV64.I32			d3, d3
63
64	VSHR.S32			d8, d8, #1
65	VSHR.S32			Q0, Q0, #1
66	VREV64.I32			d10, d10
67	VTRN.32				d11, d9
68	VSHR.S32			Q1, Q1, #1
69	VSHR.S32			d10, d10, #1
70	VREV64.I32			d9, d9
71
72	sub       			r0, r0, #0x40
73
74	VADD.S32			d12, d0, d8
75	VSUB.S32			d16, d0, d8
76	VADD.S32			d14, d2, d10
77	VSUB.S32			d18, d2, d10
78
79	VSUB.S32			d4, d11, d9
80	VADD.S32			d5, d11, d9
81
82	VREV64.I32			d18, d18
83
84	VQDMULH.S32			Q3, Q2, Q15
85	VTRN.32				d14, d18
86	VTRN.32				d6, d7
87	VREV64.I32			d18, d18
88
89	VSUB.S32			d15, d3, d6
90	VREV64.I32			d7, d7
91	VADD.S32			d19, d3, d6
92	VADD.S32			d13, d1, d7
93	VSUB.S32			d17, d1, d7
94
95	VREV64.I32			d17, d17
96	VTRN.32				d13, d17
97	VREV64.I32			d17, d17
98
99	subs       			r1, r1, #1
100
101	VST1.I32			{d12, d13, d14, d15}, [r0]!
102	VST1.I32			{d16, d17, d18, d19}, [r0]!
103	bne       			Radix8First_LOOP
104
105Radix8First_END:
106	ldmia     sp!, {r4 - r11, pc}
107SQRT1_2:
108	.word      0x2d413ccd
109
110	@ENDP  @ |Radix8First|
111
112	.section .text
113	.global	Radix4First
114
115Radix4First:
116	stmdb     	sp!, {r4 - r11, lr}
117
118	cmp       	r1, #0
119	beq       	Radix4First_END
120
121Radix4First_LOOP:
122	VLD1.I32			{d0, d1, d2, d3}, [r0]
123
124	VADD.S32			d4, d0, d1							@ r0 = buf[0] + buf[2]@ r1 = buf[1] + buf[3]@
125	VSUB.S32			d5, d0, d1							@ r2 = buf[0] - buf[2]@ r3 = buf[1] - buf[3]@
126	VSUB.S32			d7, d2, d3							@ r4 = buf[4] + buf[6]@ r5 = buf[5] + buf[7]@
127	VADD.S32			d6, d2, d3							@ r6 = buf[4] - buf[6]@ r7 = buf[5] - buf[7]@
128
129	VREV64.I32		d7, d7									@
130
131	VADD.S32			Q4, Q2, Q3
132	VSUB.S32			Q5, Q2, Q3
133
134	VREV64.I32		d11, d11
135	VTRN.32				d9, d11
136	subs       		r1, r1, #1
137	VREV64.I32		d11, d11
138	VST1.I32			{d8, d9, d10, d11}, [r0]!
139
140	bne       		Radix4First_LOOP
141
142Radix4First_END:
143	ldmia    		sp!, {r4 - r11, pc}
144
145	@ENDP  @ |Radix4First|
146	.end
147