1@//
2@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3@//
4@//  Use of this source code is governed by a BSD-style license
5@//  that can be found in the LICENSE file in the root of the source
6@//  tree. An additional intellectual property rights grant can be found
7@//  in the file PATENTS.  All contributing project authors may
8@//  be found in the AUTHORS file in the root of the source tree.
9@//
10@//  This is a modification of armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.S
11@//  to support float instead of SC32.
12@//
13
14@//
15@// Description:
16@// Compute the first stage of a Radix 2 DIT in-order out-of-place FFT
17@// stage for a N point complex signal.
18@//
19@//
20
21
22@// Include standard headers
23
24#include "dl/api/arm/armCOMM_s.h"
25#include "dl/api/arm/omxtypes_s.h"
26
27
28@// Import symbols required from other files
29@// (For example tables)
30
31
32
33
34@// Set debugging level
35@//DEBUG_ON    SETL {TRUE}
36
37
38
39@// Guarding implementation by the processor name
40
41
42
43@// Guarding implementation by the processor name
44
45
46@//Input Registers
47
48#define pSrc            r0
49#define pDst            r2
50#define pTwiddle        r1
51#define pPingPongBuf    r5
52#define subFFTNum       r6
53#define subFFTSize      r7
54
55
56@//Output Registers
57
58
59@//Local Scratch Registers
60
61#define pointStep       r3
62#define outPointStep    r3
63#define grpSize         r4
64#define setCount        r4
65#define step            r8
66#define dstStep         r8
67
68@// Neon Registers
69
70#define dX0     D0.F32
71#define dX1     D1.F32
72#define dY0     D2.F32
73#define dY1     D3.F32
74
75
76        .MACRO FFTSTAGE scaled, inverse, name
77
78        @// Define stack arguments
79
80
81        @// update subFFTSize and subFFTNum into RN6 and RN7 for the next stage
82
83
84        MOV        subFFTSize,#2
85        LSR        grpSize,subFFTNum,#1
86        MOV        subFFTNum,grpSize
87
88
89        @// pT0+1 increments pT0 by 8 bytes
90        @// pT0+pointStep = increment of 8*pointStep bytes = 4*grpSize bytes
91        @// Note: outPointStep = pointStep for firststage
92        @// Note: setCount = grpSize/2 (reuse the updated grpSize for setCount)
93
94        MOV        pointStep,grpSize,LSL #3
95        RSB        step,pointStep,#8
96
97
98        @// Loop on the sets for grp zero
99
100grpZeroSetLoop\name :
101
102        VLD1    dX0,[pSrc],pointStep
103        VLD1    dX1,[pSrc],step                   @// step = -pointStep + 8
104        SUBS    setCount,setCount,#1
105
106        VADD    dY0,dX0,dX1
107        VSUB    dY1,dX0,dX1
108
109        VST1    dY0,[pDst],outPointStep
110        @// dstStep =  step = -pointStep + 8
111        VST1    dY1,[pDst],dstStep
112
113        BGT     grpZeroSetLoop\name
114
115
116        @// reset pSrc to pDst for the next stage
117        SUB     pSrc,pDst,pointStep                     @// pDst -= 2*grpSize
118        MOV     pDst,pPingPongBuf
119
120        .endm
121
122
123
124        M_START armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe,r4
125        FFTSTAGE "FALSE","FALSE",fwd
126        M_END
127
128
129
130        M_START armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe,r4
131        FFTSTAGE "FALSE","TRUE",inv
132        M_END
133
134	.end
135