1@//
2@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3@//
4@//  Use of this source code is governed by a BSD-style license
5@//  that can be found in the LICENSE file in the root of the source
6@//  tree. An additional intellectual property rights grant can be found
7@//  in the file PATENTS.  All contributing project authors may
8@//  be found in the AUTHORS file in the root of the source tree.
9@//
10@//  This is a modification of armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.S
11@//  to support float instead of SC32.
12@//
13
14@//
15@// Description:
16@// Compute a Radix 2 FFT stage for a N point complex signal
17@//
18@//
19
20
21@// Include standard headers
22
23#include "dl/api/arm/armCOMM_s.h"
24#include "dl/api/arm/omxtypes_s.h"
25
26@//        M_VARIANTS ARM1136JS
27
28@// Import symbols required from other files
29@// (For example tables)
30
31
32
33
34@// Set debugging level
35@//DEBUG_ON    SETL {TRUE}
36
37
38
39@// Guarding implementation by the processor name
40
41@//    IF  ARM1136JS
42
43@//Input Registers
44
45#define pSrc            r0
46#define pDst            r2
47#define pTwiddle        r1
48#define pPingPongBuf    r5
49#define subFFTNum       r6
50#define subFFTSize      r7
51
52
53@//Output Registers
54
55
56@//Local Scratch Registers
57
58#define pDstBuf         r3                   /*@// Temporarily hold pingpong buffer ptr*/
59#define grpSize         r14
60#define outPointStep    r12
61#define setCount        r14
62#define pointStep       r12
63
64@// Real and Imaginary parts
65#define x0r s0
66#define x0i s1
67#define x1r s2
68#define x1i s3
69#define y1r s4
70#define y1i s5
71#define y0r s6
72#define y0i s7
73
74
75
76        .macro FFTSTAGE scaled, inverse, name
77
78        @// Update grpCount and grpSize rightaway inorder to reuse pGrpCount
79	@// and pGrpSize regs
80
81        mov     subFFTSize, #2
82        lsr     grpSize, subFFTNum, #1
83        mov     subFFTNum, grpSize
84
85        @// pT0+1 increments pT0 by 8 bytes
86        @// pT0+pointStep = increment of 8*pointStep bytes = 4*grpSize bytes
87        @// Note: outPointStep = pointStep for firststage
88        @// Note: setCount = grpSize/2 (reuse the updated grpSize for setCount)
89        MOV     pointStep,grpSize,LSL #3
90
91
92
93        @// Loop on the sets for grp zero
94
95grpZeroSetLoop\name:
96
97        add      pSrc, pSrc, pointStep
98        @// {x1r,x1i} = [pSrc, pointStep]
99        vldm.f32 pSrc, {x1r, x1i}
100        sub      pSrc, pSrc, pointStep
101        vldm.f32 pSrc!, {x0r, x0i}
102
103        SUBS    setCount,setCount,#1            @// decrement the loop counter
104
105
106
107        vsub.f32     y1r,x0r,x1r
108        vsub.f32     y1i,x0i,x1i
109
110        vadd.f32     y0r,x0r,x1r
111        vadd.f32     y0i,x0i,x1i
112
113        add     pDst, pDst, outPointStep
114        @// {y1r,y1i} -> [pDst, outPointStep]
115        vstm    pDst, {y1r, y1i}
116        sub     pDst, pDst, outPointStep
117        vstm    pDst!, {y0r, y0i}
118
119        BGT     grpZeroSetLoop\name
120
121
122        @// reset pSrc to pDst for the next stage
123        SUB     pSrc,pDst,pointStep             @// pDst -= 2*grpSize
124        mov     pDst, pPingPongBuf
125
126        .endm
127
128
129        M_START armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp,r4
130        FFTSTAGE "FALSE","FALSE",FWD
131        M_END
132
133        M_START armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp,r4
134        FFTSTAGE "FALSE","TRUE",INV
135        M_END
136
137
138@/    ENDIF                                                           @//ARM1136JS
139
140
141@// Guarding implementation by the processor name
142
143
144
145    .end
146