1@//
2@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3@//
4@//  Use of this source code is governed by a BSD-style license
5@//  that can be found in the LICENSE file in the root of the source
6@//  tree. An additional intellectual property rights grant can be found
7@//  in the file PATENTS.  All contributing project authors may
8@//  be found in the AUTHORS file in the root of the source tree.
9@//
10@//  This file was originally licensed as follows. It has been
11@//  relicensed with permission from the copyright holders.
12
13@//
14@//
15@// File Name:  armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.s
16@// OpenMAX DL: v1.0.2
17@// Last Modified Revision:   7761
18@// Last Modified Date:       Wed, 26 Sep 2007
19@//
20@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21@//
22@//
23@//
24@// Description:
25@// Compute a first stage Radix 4 FFT stage for a N point complex signal
26@//
27@//
28
29
30@// Include standard headers
31
32#include "dl/api/arm/armCOMM_s.h"
33#include "dl/api/arm/omxtypes_s.h"
34
35@// Import symbols required from other files
36@// (For example tables)
37
38
39
40
41@// Set debugging level
42@//DEBUG_ON    SETL {TRUE}
43
44
45
46@// Guarding implementation by the processor name
47
48
49
50@// Guarding implementation by the processor name
51
52
53@//Input Registers
54
55#define pSrc                            r0
56#define pDst                            r2
57#define pTwiddle                        r1
58#define pPingPongBuf                    r5
59#define subFFTNum                       r6
60#define subFFTSize                      r7
61
62
63@//Output Registers
64
65
66@//Local Scratch Registers
67
68#define grpSize                         r3
69@// Reuse grpSize as setCount
70#define setCount                        r3
71#define pointStep                       r4
72#define outPointStep                    r4
73#define setStep                         r8
74#define step1                           r9
75#define step3                           r10
76
77@// Neon Registers
78
79#define dXr0                            D0.S16
80#define dXi0                            D1.S16
81#define dXr1                            D2.S16
82#define dXi1                            D3.S16
83#define dXr2                            D4.S16
84#define dXi2                            D5.S16
85#define dXr3                            D6.S16
86#define dXi3                            D7.S16
87#define dYr0                            D8.S16
88#define dYi0                            D9.S16
89#define dYr1                            D10.S16
90#define dYi1                            D11.S16
91#define dYr2                            D12.S16
92#define dYi2                            D13.S16
93#define dYr3                            D14.S16
94#define dYi3                            D15.S16
95#define dZr0                            D16.S16
96#define dZi0                            D17.S16
97#define dZr1                            D18.S16
98#define dZi1                            D19.S16
99#define dZr2                            D20.S16
100#define dZi2                            D21.S16
101#define dZr3                            D22.S16
102#define dZi3                            D23.S16
103#define qY0                             Q4.S16
104#define qY2                             Q6.S16
105#define qX0                             Q0.S16
106#define qX2                             Q2.S16
107
108#define qY1                             Q5.S16
109#define qY3                             Q7.S16
110#define qX1                             Q1.S16
111#define qX3                             Q3.S16
112#define qZ0                             Q8.S16
113#define qZ1                             Q9.S16
114
115
116        .macro FFTSTAGE scaled, inverse, name
117
118        @// Define stack arguments
119
120        MOV     pointStep,subFFTNum
121        @// Update pSubFFTSize and pSubFFTNum regs
122
123
124        VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0]
125        @// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount)
126        LSR     grpSize,subFFTNum,#2
127        MOV     subFFTNum,grpSize
128
129
130        @// pT0+1 increments pT0 by 4 bytes
131        @// pT0+pointStep = increment of 4*pointStep bytes = grpSize bytes
132        @// Note: outPointStep = pointStep for firststage
133        VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
134
135
136        @// Calculate the step of input data for the next set
137        @//MOV     setStep,pointStep,LSL #1
138        MOV     setStep,grpSize,LSL #3
139        VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
140        MOV     step1,setStep
141        ADD     setStep,setStep,pointStep             @// setStep = 3*pointStep
142        RSB     setStep,setStep,#16                   @// setStep = - 3*pointStep+16
143
144
145        MOV     subFFTSize,#4                         @// subFFTSize = 1 for the first stage
146
147
148        .ifeqs  "\scaled", "TRUE"
149            VHADD    qY0,qX0,qX2             @// u0
150        .else
151            VADD   qY0,qX0,qX2               @// u0
152        .endif
153        RSB     step3,pointStep,#0
154
155        @// grp = 0 a special case since all the twiddle factors are 1
156        @// Loop on the sets: 4 sets at a time
157
158grpZeroSetLoop\name:
159
160        VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3]
161
162        .ifeqs "\scaled", "TRUE"
163
164            @// finish first stage of 4 point FFT
165
166            VHSUB    qY2,qX0,qX2             @// u1
167            SUBS    setCount,setCount,#4                    @// decrement the set loop counter
168
169            VLD2    {dXr0,dXi0},[pSrc :128],step1          @//  data[0]
170            VHADD    qY1,qX1,qX3             @// u2
171            VLD2    {dXr2,dXi2},[pSrc :128],step3
172            VHSUB    qY3,qX1,qX3             @// u3
173
174
175
176            @// finish second stage of 4 point FFT
177
178            VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
179            VHADD    qZ0,qY0,qY1             @// y0
180
181            .ifeqs  "\inverse", "TRUE"
182
183                VHSUB    dZr3,dYr2,dYi3                  @// y3
184                VHADD    dZi3,dYi2,dYr3
185                VST2    {dZr0,dZi0},[pDst :128],outPointStep
186
187                VHSUB    qZ1,qY0,qY1                     @// y2
188                VST2    {dZr3,dZi3},[pDst :128],outPointStep
189
190                VHADD    dZr2,dYr2,dYi3                  @// y1
191                VST2    {dZr1,dZi1},[pDst :128],outPointStep
192                VHSUB    dZi2,dYi2,dYr3
193
194                VHADD    qY0,qX0,qX2                     @// u0 (next loop)
195                VST2    {dZr2,dZi2},[pDst :128],setStep
196
197
198            .else
199
200                VHADD    dZr2,dYr2,dYi3                  @// y1
201                VHSUB    dZi2,dYi2,dYr3
202
203                VST2    {dZr0,dZi0},[pDst :128],outPointStep
204                VHSUB    qZ1,qY0,qY1                     @// y2
205
206                VST2    {dZr2,dZi2},[pDst :128],outPointStep
207                VHSUB    dZr3,dYr2,dYi3                  @// y3
208                VHADD    dZi3,dYi2,dYr3
209                VST2    {dZr1,dZi1},[pDst :128],outPointStep
210                VHADD    qY0,qX0,qX2                     @// u0 (next loop)
211                VST2    {dZr3,dZi3},[pDst :128],setStep
212
213            .endif
214
215
216        .else
217
218            @// finish first stage of 4 point FFT
219
220            VSUB    qY2,qX0,qX2             @// u1
221            SUBS    setCount,setCount,#4                    @// decrement the set loop counter
222
223            VLD2    {dXr0,dXi0},[pSrc :128],step1          @//  data[0]
224            VADD    qY1,qX1,qX3             @// u2
225            VLD2    {dXr2,dXi2},[pSrc :128],step3
226            VSUB    qY3,qX1,qX3             @// u3
227
228
229
230            @// finish second stage of 4 point FFT
231
232            VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
233            VADD    qZ0,qY0,qY1             @// y0
234
235            .ifeqs  "\inverse", "TRUE"
236
237                VSUB    dZr3,dYr2,dYi3                  @// y3
238                VADD    dZi3,dYi2,dYr3
239                VST2    {dZr0,dZi0},[pDst :128],outPointStep
240
241                VSUB    qZ1,qY0,qY1                     @// y2
242                VST2    {dZr3,dZi3},[pDst :128],outPointStep
243
244                VADD    dZr2,dYr2,dYi3                  @// y1
245                VST2    {dZr1,dZi1},[pDst :128],outPointStep
246                VSUB    dZi2,dYi2,dYr3
247
248                VADD    qY0,qX0,qX2                     @// u0 (next loop)
249                VST2    {dZr2,dZi2},[pDst :128],setStep
250
251
252            .else
253
254                VADD    dZr2,dYr2,dYi3                  @// y1
255                VSUB    dZi2,dYi2,dYr3
256
257                VST2    {dZr0,dZi0},[pDst :128],outPointStep
258                VSUB    qZ1,qY0,qY1                     @// y2
259
260                VST2    {dZr2,dZi2},[pDst :128],outPointStep
261                VSUB    dZr3,dYr2,dYi3                  @// y3
262                VADD    dZi3,dYi2,dYr3
263                VST2    {dZr1,dZi1},[pDst :128],outPointStep
264                VADD    qY0,qX0,qX2                     @// u0 (next loop)
265                VST2    {dZr3,dZi3},[pDst :128],setStep
266
267            .endif
268
269
270        .endif
271
272        BGT     grpZeroSetLoop\name
273
274
275        @// reset pSrc to pDst for the next stage
276        SUB     pSrc,pDst,pointStep                     @// pDst -= grpSize
277        MOV     pDst,pPingPongBuf
278
279
280        .endm
281
282
283
284        M_START armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe,r4
285        FFTSTAGE "FALSE","FALSE",FWD
286        M_END
287
288
289
290        M_START armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe,r4
291        FFTSTAGE "FALSE","TRUE",INV
292        M_END
293
294
295        M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
296        FFTSTAGE "TRUE","FALSE",FWDSFS
297        M_END
298
299
300        M_START armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
301        FFTSTAGE "TRUE","TRUE",INVSFS
302        M_END
303
304
305
306
307
308    .end
309