1@//
2@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3@//
4@//  Use of this source code is governed by a BSD-style license
5@//  that can be found in the LICENSE file in the root of the source
6@//  tree. An additional intellectual property rights grant can be found
7@//  in the file PATENTS.  All contributing project authors may
8@//  be found in the AUTHORS file in the root of the source tree.
9@//
10@//  This file was originally licensed as follows. It has been
11@//  relicensed with permission from the copyright holders.
12@//
13
14@//
15@// File Name:  omxSP_FFTInv_CToC_SC32_Sfs_s.s
16@// OpenMAX DL: v1.0.2
17@// Last Modified Revision:   6675
18@// Last Modified Date:       Fri, 06 Jul 2007
19@//
20@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21@//
22@//
23@//
24@// Description:
25@// Compute an inverse FFT for a complex signal
26@//
27
28
29@// Include standard headers
30
31#include "dl/api/arm/armCOMM_s.h"
32#include "dl/api/arm/omxtypes_s.h"
33
34@// Import symbols required from other files
35@// (For example tables)
36
37        .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
38        .extern  armSP_FFTInv_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
39        .extern  armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
40        .extern  armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
41        .extern  armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
42        .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
43        .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
44        .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
45        .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
46        .extern  armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe
47
48@// Set debugging level
49@//DEBUG_ON    SETL {TRUE}
50
51
52
53@// Guarding implementation by the processor name
54
55
56
57      @// Guarding implementation by the processor name
58
59@// Import symbols required from other files
60@// (For example tables)
61        .extern  armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
62        .extern  armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
63        .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
64        .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
65
66
67@//Input Registers
68
69#define pSrc	r0
70#define pDst	r1
71#define pFFTSpec	r2
72#define scale	r3
73
74
75@// Output registers
76#define result	r0
77
78@//Local Scratch Registers
79
80#define argTwiddle	r1
81#define argDst	r2
82#define argScale	r4
83#define tmpOrder	r4
84#define pTwiddle	r4
85#define pOut	r5
86#define subFFTSize	r7
87#define subFFTNum	r6
88#define N	r6
89#define order	r14
90#define diff	r9
91@// Total num of radix stages required to comple the FFT
92#define count	r8
93#define x0r	r4
94#define x0i	r5
95#define diffMinusOne	r2
96#define round	r3
97
98@// Neon registers
99
100#define dX0	D0.S32
101#define dShift	D1.S32
102
103
104
105    @// Allocate stack memory required by the function
106        M_ALLOC4        diffOnStack, 4
107
108    @// Write function header
109        M_START     omxSP_FFTInv_CToC_SC32_Sfs,r11,d15
110
111@ Structure offsets for the FFTSpec
112	.set	ARMsFFTSpec_N, 0
113	.set	ARMsFFTSpec_pBitRev, 4
114	.set	ARMsFFTSpec_pTwiddle, 8
115	.set	ARMsFFTSpec_pBuf, 12
116
117        @// Define stack arguments
118
119        @// Read the size from structure and take log
120        LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
121
122        @// Read other structure parameters
123        LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
124        LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
125
126        CLZ     order,N                             @// N = 2^order
127        RSB     order,order,#31
128        MOV     subFFTSize,#1
129        @//MOV     subFFTNum,N
130
131        ADD     scale,scale,order                   @// FFTInverse has a final scaling factor by N
132
133        CMP     order,#3
134        BGT     orderGreaterthan3                   @// order > 3
135
136        CMP     order,#1
137        BGE     orderGreaterthan0                   @// order > 0
138        M_STR   scale, diffOnStack,LT               @// order = 0
139        VLD1    dX0,[pSrc]
140        VST1    dX0,[pDst]
141        MOV     pSrc,pDst
142        BLT     FFTEnd
143
144orderGreaterthan0:
145        @// set the buffers appropriately for various orders
146        CMP     order,#2
147        MOVNE   argDst,pDst
148        MOVEQ   argDst,pOut
149        MOVEQ   pOut,pDst                           @// Pass the first stage destination in RN5
150        MOV     argTwiddle,pTwiddle
151        @// Store the scale factor and scale at the end
152        SUB     diff,scale,order
153        M_STR   diff, diffOnStack
154        BGE     orderGreaterthan1
155        BLLT    armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe  @// order = 1
156        B       FFTEnd
157
158orderGreaterthan1:
159        MOV     tmpOrder,order                          @// tmpOrder = RN 4
160        BL      armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
161        CMP     tmpOrder,#2
162        BLGT    armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
163        BL      armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
164        B       FFTEnd
165
166
167orderGreaterthan3:
168        @// check scale = 0 or scale = order
169        SUBS    diff, scale, order                 @// scale > order
170        MOVGT   scale,order
171        BGE     specialScaleCase                   @// scale = 0 or scale = order
172        CMP     scale,#0
173        BEQ     specialScaleCase
174        B       generalScaleCase
175
176specialScaleCase:	                                    @//  scale = 0 or scale = order  and order >= 2
177
178        TST     order, #2                           @// Set input args to fft stages
179        MOVNE   argDst,pDst
180        MOVEQ   argDst,pOut
181        MOVEQ   pOut,pDst                           @// Pass the first stage destination in RN5
182        MOV     argTwiddle,pTwiddle
183
184        CMP      diff,#0
185        M_STR    diff, diffOnStack
186        BGE      scaleEqualsOrder
187
188        @//check for even or odd order
189        @// NOTE: The following combination of BL's would work fine eventhough the first
190        @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
191        @// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
192
193        TST     order,#0x00000001
194        BLEQ    armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
195        BLNE    armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
196
197        CMP        subFFTNum,#4
198        BLT     FFTEnd
199
200
201unscaledRadix4Loop:
202        BEQ        lastStageUnscaledRadix4
203         BL        armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
204         CMP        subFFTNum,#4
205         B        unscaledRadix4Loop
206
207lastStageUnscaledRadix4:
208        BL      armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
209        B        FFTEnd
210
211
212scaleEqualsOrder:
213        @//check for even or odd order
214        @// NOTE: The following combination of BL's would work fine eventhough the first
215        @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
216        @// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
217
218        TST     order,#0x00000001
219        BLEQ    armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
220        BLNE    armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
221
222        CMP        subFFTNum,#4
223        BLT     FFTEnd
224
225
226scaledRadix4Loop:
227        BEQ        lastStageScaledRadix4
228         BL        armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
229         CMP        subFFTNum,#4
230         B        scaledRadix4Loop
231
232lastStageScaledRadix4:
233        BL      armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
234        B        FFTEnd
235
236generalScaleCase:	                                        @// 0 < scale < order and order >= 2
237        @// Determine the correct destination buffer
238        SUB     diff,order,scale
239        TST     diff,#0x01
240        ADDEQ   count,scale,diff,LSR #1         @// count = scale + (order - scale)/2
241        MOVNE   count,order
242        TST     count,#0x01                     @// Is count even or odd ?
243
244        MOVNE   argDst,pDst                     @// Set input args to fft stages
245        MOVEQ   argDst,pOut
246        MOVEQ   pOut,pDst                       @// Pass the first stage destination in RN5
247        MOV     argTwiddle,pTwiddle
248
249        M_STR   diff, diffOnStack
250
251        MOV     argScale,scale                  @// Put scale in RN4 so as to save and restore
252        BL      armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe     @// scaled first stage
253        SUBS    argScale,argScale,#1
254
255scaledRadix2Loop:
256        BLGT    armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
257        SUBS    argScale,argScale,#1            @// save and restore scale (RN4) in the scaled stages
258        BGT     scaledRadix2Loop
259
260
261        M_LDR   diff, diffOnStack
262        @//check for even or odd order
263        TST     diff,#0x00000001
264        BEQ     generalUnscaledRadix4Loop
265        B       unscaledRadix2Loop
266
267generalUnscaledRadix4Loop:
268        CMP        subFFTNum,#4
269         BEQ        generalLastStageUnscaledRadix4
270         BL        armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
271         B        generalUnscaledRadix4Loop
272
273generalLastStageUnscaledRadix4:
274        BL      armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
275        B        End
276
277
278unscaledRadix2Loop:
279        CMP        subFFTNum,#2
280         BEQ        generalLastStageUnscaledRadix2
281         BL        armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe
282         B        unscaledRadix2Loop
283
284generalLastStageUnscaledRadix2:
285        BL      armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
286        B        End
287
288
289FFTEnd:	                                              @// Does only the scaling
290
291        M_LDR   diff, diffOnStack
292        CMP     diff,#0
293        BLE     End
294
295        RSB     diff,diff,#0                        @// to use VRSHL for right shift by a variable
296        VDUP    dShift,diff
297
298scaleFFTData:	                                        @// N = subFFTSize  ; dataptr = pDst  ; scale = diff
299        VLD1    {dX0},[pSrc]            @// pSrc contains pDst pointer
300        SUBS    subFFTSize,subFFTSize,#1
301        VRSHL   dX0,dShift
302        VST1    {dX0},[pSrc]!
303
304        BGT     scaleFFTData
305
306
307End:
308        @// Set return value
309        MOV     result, #OMX_Sts_NoErr
310
311        @// Write function tail
312        M_END
313
314	.end
315