1@//
2@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3@//
4@//  Use of this source code is governed by a BSD-style license
5@//  that can be found in the LICENSE file in the root of the source
6@//  tree. An additional intellectual property rights grant can be found
7@//  in the file PATENTS.  All contributing project authors may
8@//  be found in the AUTHORS file in the root of the source tree.
9@//
10@//  This file was originally licensed as follows. It has been
11@//  relicensed with permission from the copyright holders.
12@//
13
14@//
15@// File Name:  armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.s
16@// OpenMAX DL: v1.0.2
17@// Last Modified Revision:   7767
18@// Last Modified Date:       Thu, 27 Sep 2007
19@//
20@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21@//
22@//
23@//
24@// Description:
25@// Compute a first stage Radix 4 FFT stage for a N point complex signal
26@//
27
28
29
30@// Include standard headers
31
32#include "dl/api/arm/armCOMM_s.h"
33#include "dl/api/arm/omxtypes_s.h"
34
35@// Import symbols required from other files
36@// (For example tables)
37
38
39
40
41@// Set debugging level
42@//DEBUG_ON    SETL {TRUE}
43
44
45
46@// Guarding implementation by the processor name
47
48
49
50@// Guarding implementation by the processor name
51
52
53@//Input Registers
54
55#define pSrc		r0
56#define pDst		r2
57#define pTwiddle	r1
58#define pPingPongBuf	r5
59#define subFFTNum	r6
60#define subFFTSize	r7
61
62
63@//Output Registers
64
65
66@//Local Scratch Registers
67
68#define grpSize		r3
69@// Reuse grpSize as setCount
70#define setCount	r3
71#define pointStep	r4
72#define outPointStep	r4
73#define setStep		r8
74#define step1		r9
75#define step3		r10
76
77@// Neon Registers
78
79#define dXr0	D0.S32
80#define dXi0	D1.S32
81#define dXr1	D2.S32
82#define dXi1	D3.S32
83#define dXr2	D4.S32
84#define dXi2	D5.S32
85#define dXr3	D6.S32
86#define dXi3	D7.S32
87#define dYr0	D8.S32
88#define dYi0	D9.S32
89#define dYr1	D10.S32
90#define dYi1	D11.S32
91#define dYr2	D12.S32
92#define dYi2	D13.S32
93#define dYr3	D14.S32
94#define dYi3	D15.S32
95#define qX0	Q0.S32
96#define qX1	Q1.S32
97#define qX2	Q2.S32
98#define qX3	Q3.S32
99#define qY0	Q4.S32
100#define qY1	Q5.S32
101#define qY2	Q6.S32
102#define qY3	Q7.S32
103#define dZr0	D16.S32
104#define dZi0	D17.S32
105#define dZr1	D18.S32
106#define dZi1	D19.S32
107#define dZr2	D20.S32
108#define dZi2	D21.S32
109#define dZr3	D22.S32
110#define dZi3	D23.S32
111#define qZ0	Q8.S32
112#define qZ1	Q9.S32
113#define qZ2	Q10.S32
114#define qZ3	Q11.S32
115
116
117        .macro FFTSTAGE scaled, inverse, name
118
119        @// Define stack arguments
120
121        @// pT0+1 increments pT0 by 8 bytes
122        @// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
123        @// Note: outPointStep = pointStep for firststage
124
125        MOV     pointStep,subFFTNum,LSL #1
126
127
128        @// Update pSubFFTSize and pSubFFTNum regs
129        VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0]
130        MOV     subFFTSize,#4                                 @// subFFTSize = 1 for the first stage
131
132        @// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount)
133        LSR     grpSize,subFFTNum,#2
134        VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
135        MOV     subFFTNum,grpSize
136
137
138        @// Calculate the step of input data for the next set
139        @//MOV     setStep,pointStep,LSL #1
140        MOV     setStep,grpSize,LSL #4
141        VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
142        ADD     setStep,setStep,pointStep                   @// setStep = 3*pointStep
143        RSB     setStep,setStep,#16                         @// setStep = - 3*pointStep+16
144
145        VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
146        MOV     step1,pointStep,LSL #1                      @// step1 = 2*pointStep
147
148        .ifeqs "\scaled", "TRUE"
149            VHADD    qY0,qX0,qX2
150        .else
151            VADD    qY0,qX0,qX2
152        .endif
153
154        RSB     step3,pointStep,#0                          @// step3 = -pointStep
155
156        @// grp = 0 a special case since all the twiddle factors are 1
157        @// Loop on the sets : 2 sets at a time
158
159grpZeroSetLoop\name :
160
161
162
163        @// Decrement setcount
164        SUBS    setCount,setCount,#2                    @// decrement the set loop counter
165
166        .ifeqs "\scaled", "TRUE"
167
168            @// finish first stage of 4 point FFT
169
170            VHSUB    qY2,qX0,qX2
171
172            VLD2    {dXr0,dXi0},[pSrc :128],step1          @//  data[0]
173            VHADD    qY1,qX1,qX3
174            VLD2    {dXr2,dXi2},[pSrc :128],step3          @//  data[2]
175            VHSUB    qY3,qX1,qX3
176
177
178            @// finish second stage of 4 point FFT
179
180            .ifeqs "\inverse", "TRUE"
181
182                VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
183                VHADD    qZ0,qY0,qY1
184
185                VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
186                VHSUB    dZr3,dYr2,dYi3
187
188                VST2    {dZr0,dZi0},[pDst :128],outPointStep
189                VHADD    dZi3,dYi2,dYr3
190
191                VHSUB    qZ1,qY0,qY1
192                VST2    {dZr3,dZi3},[pDst :128],outPointStep
193
194                VHADD    dZr2,dYr2,dYi3
195                VST2    {dZr1,dZi1},[pDst :128],outPointStep
196                VHSUB    dZi2,dYi2,dYr3
197
198                VHADD    qY0,qX0,qX2                     @// u0 for next iteration
199                VST2    {dZr2,dZi2},[pDst :128],setStep
200
201
202            .else
203
204                VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
205                VHADD    qZ0,qY0,qY1
206
207                VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
208                VHADD    dZr2,dYr2,dYi3
209
210                VST2    {dZr0,dZi0},[pDst :128],outPointStep
211                VHSUB    dZi2,dYi2,dYr3
212
213                VHSUB    qZ1,qY0,qY1
214                VST2    {dZr2,dZi2},[pDst :128],outPointStep
215
216                VHSUB    dZr3,dYr2,dYi3
217                VST2    {dZr1,dZi1},[pDst :128],outPointStep
218                VHADD    dZi3,dYi2,dYr3
219
220                VHADD    qY0,qX0,qX2                     @// u0 for next iteration
221                VST2    {dZr3,dZi3},[pDst :128],setStep
222
223            .endif
224
225
226
227        .else
228
229            @// finish first stage of 4 point FFT
230
231
232            VSUB    qY2,qX0,qX2
233
234            VLD2    {dXr0,dXi0},[pSrc :128],step1          @//  data[0]
235            VADD    qY1,qX1,qX3
236            VLD2    {dXr2,dXi2},[pSrc :128],step3          @//  data[2]
237            VSUB    qY3,qX1,qX3
238
239
240            @// finish second stage of 4 point FFT
241
242            .ifeqs "\inverse", "TRUE"
243
244                VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
245                VADD    qZ0,qY0,qY1
246
247                VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
248                VSUB    dZr3,dYr2,dYi3
249
250                VST2    {dZr0,dZi0},[pDst :128],outPointStep
251                VADD    dZi3,dYi2,dYr3
252
253                VSUB    qZ1,qY0,qY1
254                VST2    {dZr3,dZi3},[pDst :128],outPointStep
255
256                VADD    dZr2,dYr2,dYi3
257                VST2    {dZr1,dZi1},[pDst :128],outPointStep
258                VSUB    dZi2,dYi2,dYr3
259
260                VADD    qY0,qX0,qX2                     @// u0 for next iteration
261                VST2    {dZr2,dZi2},[pDst :128],setStep
262
263
264            .else
265
266                VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
267                VADD    qZ0,qY0,qY1
268
269                VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
270                VADD    dZr2,dYr2,dYi3
271
272                VST2    {dZr0,dZi0},[pDst :128],outPointStep
273                VSUB    dZi2,dYi2,dYr3
274
275                VSUB    qZ1,qY0,qY1
276                VST2    {dZr2,dZi2},[pDst :128],outPointStep
277
278                VSUB    dZr3,dYr2,dYi3
279                VST2    {dZr1,dZi1},[pDst :128],outPointStep
280                VADD    dZi3,dYi2,dYr3
281
282                VADD    qY0,qX0,qX2                     @// u0 for next iteration
283                VST2    {dZr3,dZi3},[pDst :128],setStep
284
285            .endif
286
287        .endif
288
289        BGT     grpZeroSetLoop\name
290
291        @// reset pSrc to pDst for the next stage
292        SUB     pSrc,pDst,pointStep                     @// pDst -= 2*grpSize
293        MOV     pDst,pPingPongBuf
294
295
296        .endm
297
298
299
300        M_START armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe,r4
301        FFTSTAGE "FALSE","FALSE",fwd
302        M_END
303
304
305
306        M_START armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe,r4
307        FFTSTAGE "FALSE","TRUE",inv
308        M_END
309
310
311        M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
312        FFTSTAGE "TRUE","FALSE",fwdsfs
313        M_END
314
315
316        M_START armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
317        FFTSTAGE "TRUE","TRUE",invsfs
318        M_END
319
320	.end
321