1@/******************************************************************************
2@ *
3@ * Copyright (C) 2018 The Android Open Source Project
4@ *
5@ * Licensed under the Apache License, Version 2.0 (the "License");
6@ * you may not use this file except in compliance with the License.
7@ * You may obtain a copy of the License at:
8@ *
9@ * http:@www.apache.org/licenses/LICENSE-2.0
10@ *
11@ * Unless required by applicable law or agreed to in writing, software
12@ * distributed under the License is distributed on an "AS IS" BASIS,
13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@ * See the License for the specific language governing permissions and
15@ * limitations under the License.
16@ *
17@ *****************************************************************************
18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19@*/
20
21
22.text
23.p2align 2
24
25    .global ixheaacd_esbr_cos_sin_mod_loop1
26ixheaacd_esbr_cos_sin_mod_loop1:
27
28    STMFD           sp!, {r4-r12, r14}
29    VPUSH           {D8-D11}
30@generating load addresses
31    ADD             r4, r0, r1, lsl #3  @psubband1
32    SUB             r4, r4, #4
33    ADD             r5, r3, r1, lsl #3  @psubband1_t
34    SUB             r5, r5, #8
35    MOV             r6, r1, ASR #2
36
37LOOP1:
38@first part
39    vld1.32         {d0} , [r2]!
40    vrev64.32       d1, d0
41    vld1.32         {d2[0]}, [r0]!
42    ADD             r7, r0, #252
43    vld1.32         {d2[1]}, [r7]
44    vld1.32         {d3[0]}, [r4]
45    ADD             r7, r4, #256
46    vld1.32         {d3[1]}, [r7]
47    SUB             r4, r4, #4
48
49    VMULL.S32       q2, d0, d2          @qsub 2nd
50    VMULL.S32       q3, d0, d3          @add 2nd
51    VMULL.S32       q4, d1, d2          @add 1st
52    VMULL.S32       q5, d1, d3          @qsub 1st
53
54    vadd.I64        q0, q4, q3
55    VQSUB.S64       Q1, Q5, Q2
56
57    VSHRN.I64       D0, Q0, #32
58    VSHRN.I64       D2, Q1, #32
59    VMOV.32         D3, D0
60    VST2.32         {D0[0], D2[0]}, [R3]!
61    ADD             r7, r3, #248
62    VST2.32         {D2[1], D3[1]}, [R7]
63
64@second part
65    vld1.32         {d0} , [r2]!
66    vrev64.32       d1, d0
67    vld1.32         {d2[0]}, [r0]!
68    ADD             R7, R0, #252
69    vld1.32         {d2[1]}, [r7]
70    vld1.32         {d3[0]}, [r4]
71    ADD             R7, R4, #256
72    vld1.32         {d3[1]}, [r7]
73    SUB             r4, r4, #4
74
75    VMULL.S32       q2, d0, d2          @add 2nd
76    VMULL.S32       q3, d0, d3          @sub 2nd
77    VMULL.S32       q4, d1, d2          @sub 1st
78    VMULL.S32       q5, d1, d3          @add 1st
79
80    VADD.I64        Q0, Q5, Q2
81    VQSUB.S64       Q1, Q4, Q3
82
83    VSHRN.I64       D0, Q0, #32
84    VSHRN.I64       D2, Q1, #32
85    VMOV.32         D3, D0
86    VST2.32         {D0[0], D2[0]}, [R5]
87    ADD             R7, R5, #256
88    VST2.32         {D2[1], D3[1]}, [R7]
89    SUB             r5, r5, #8
90@Third part
91    vld1.32         {d0} , [r2]!
92    vrev64.32       d1, d0
93    vld1.32         {d2[0]}, [r0]!
94    ADD             r7, r0, #252
95    vld1.32         {d2[1]}, [r7]
96    vld1.32         {d3[0]}, [r4]
97    ADD             r7, r4, #256
98    vld1.32         {d3[1]}, [r7]
99    SUB             r4, r4, #4
100
101    VMULL.S32       q2, d0, d2          @qsub 2nd
102    VMULL.S32       q3, d0, d3          @add 2nd
103    VMULL.S32       q4, d1, d2          @add 1st
104    VMULL.S32       q5, d1, d3          @qsub 1st
105
106    vadd.I64        q0, q4, q3
107    VQSUB.S64       Q1, Q5, Q2
108
109    VSHRN.I64       D0, Q0, #32
110    VSHRN.I64       D2, Q1, #32
111    VMOV.32         D3, D0
112    VST2.32         {D0[0], D2[0]}, [R3]!
113    ADD             r7, r3, #248
114    VST2.32         {D2[1], D3[1]}, [R7]
115
116@Fourth part
117    vld1.32         {d0} , [r2]!
118    vrev64.32       d1, d0
119    vld1.32         {d2[0]}, [r0]!
120    ADD             R7, R0, #252
121    vld1.32         {d2[1]}, [r7]
122    vld1.32         {d3[0]}, [r4]
123    ADD             R7, R4, #256
124    vld1.32         {d3[1]}, [r7]
125    SUB             r4, r4, #4
126
127    VMULL.S32       q2, d0, d2          @add 2nd
128    VMULL.S32       q3, d0, d3          @sub 2nd
129    VMULL.S32       q4, d1, d2          @sub 1st
130    VMULL.S32       q5, d1, d3          @add 1st
131
132    VADD.I64        Q0, Q5, Q2
133    VQSUB.S64       Q1, Q4, Q3
134
135    VSHRN.I64       D0, Q0, #32
136    VSHRN.I64       D2, Q1, #32
137    VMOV.32         D3, D0
138    VST2.32         {D0[0], D2[0]}, [R5]
139    ADD             R7, R5, #256
140    SUBS            R6, R6, #1
141    VST2.32         {D2[1], D3[1]}, [R7]
142    SUB             r5, r5, #8
143
144    BGT             LOOP1
145    VPOP            {D8-D11}
146    LDMFD           sp!, {r4-r12, r15}
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173