1@//
2@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3@//
4@//  Use of this source code is governed by a BSD-style license
5@//  that can be found in the LICENSE file in the root of the source
6@//  tree. An additional intellectual property rights grant can be found
7@//  in the file PATENTS.  All contributing project authors may
8@//  be found in the AUTHORS file in the root of the source tree.
9@//
10@//  This file was originally licensed as follows. It has been
11@//  relicensed with permission from the copyright holders.
12
13@//
14@//
15@// File Name:  armSP_FFT_CToC_SC16_Radix2_ps_unsafe_s.s
16@// OpenMAX DL: v1.0.2
17@// Last Modified Revision:   6740
18@// Last Modified Date:       Wed, 18 Jul 2007
19@//
20@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21@//
22@//
23@//
24@// Description:
25@// Compute a Radix 2 FFT stage for a N point complex signal
26@//
27@//
28
29
30@// Include standard headers
31
32#include "dl/api/arm/armCOMM_s.h"
33#include "dl/api/arm/omxtypes_s.h"
34
35
36@// Import symbols required from other files
37@// (For example tables)
38
39
40
41
42@// Set debugging level
43@//DEBUG_ON    SETL {TRUE}
44
45
46
47
48@// Guarding implementation by the processor name
49
50
51@//Input Registers
52
53#define pSrc                            r0
54#define pDst                            r2
55#define pTwiddle                        r1
56#define subFFTNum                       r6
57#define subFFTSize                      r7
58
59
60@//Output Registers
61
62
63@//Local Scratch Registers
64
65#define outPointStep                    r3
66#define grpCount                        r4
67#define dstStep                         r5
68#define twStep                          r8
69#define pTmp                            r4
70
71@// Neon Registers
72
73#define dW1S32                          D0.S32
74#define dW2S32                          D1.S32
75#define dW1                             D0.S16
76#define dW2                             D1.S16
77
78#define dX0                             D2.S16
79#define dX1                             D3.S16
80#define dX2                             D4.S16
81#define dX3                             D5.S16
82#define dY0                             D6.S16
83#define dY1                             D7.S16
84#define dY2                             D8.S16
85#define dY3                             D9.S16
86#define qT0                             Q5.S32
87#define qT1                             Q6.S32
88
89
90        .macro FFTSTAGE scaled, inverse, name
91
92        @// Define stack arguments
93
94
95        @// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs
96
97
98        LSL     grpCount,subFFTSize,#1
99
100
101        @// update subFFTSize for the next stage
102        MOV     subFFTSize,grpCount
103
104        @// pOut0+1 increments pOut0 by 8 bytes
105        @// pOut0+outPointStep == increment of 4*outPointStep bytes = 2*size bytes
106        SMULBB  outPointStep,grpCount,subFFTNum
107        MOV     twStep,subFFTNum,LSL #1
108        LSR     subFFTNum,subFFTNum,#1                      @//grpSize
109
110
111        RSB      dstStep,outPointStep,#8
112
113
114        @// Note: pointStep is 8 in this case: so need of extra reg
115        @// Loop on the groups: 2 groups at a time
116
117grpLoop\name:
118
119        VLD1     dW1S32[],[pTwiddle],twStep                @//[wi | wr]
120        VLD1     dW2S32[],[pTwiddle],twStep
121
122        @// Process the sets for each grp:  2 sets at a time (no set looping required)
123
124        VLD1    dX0,[pSrc]!            @// point0: of set0,set1 of grp0
125        VLD1    dX1,[pSrc]!            @// point1: of set0,set1 of grp0
126        VLD1    dX2,[pSrc]!            @// point0: of set0,set1 of grp1
127        VLD1    dX3,[pSrc]!            @// point1: of set0,set1 of grp1
128
129        SUBS    grpCount,grpCount,#4              @// decrement the loop counter
130        VUZP    dW1,dW2
131        VUZP    dX1,dX3
132
133        .ifeqs  "\inverse", "TRUE"
134            VMULL   qT0,dX1,dW1
135            VMLAL   qT0,dX3,dW2                       @// real part
136            VMULL   qT1,dX3,dW1
137            VMLSL   qT1,dX1,dW2                       @// imag part
138
139        .else
140            VMULL   qT0,dX1,dW1
141            VMLSL   qT0,dX3,dW2                       @// real part
142            VMULL   qT1,dX3,dW1
143            VMLAL   qT1,dX1,dW2                       @// imag part
144
145        .endif
146
147        VRSHRN  dX1,qT0,#15
148        VRSHRN  dX3,qT1,#15
149
150        VZIP    dX1,dX3
151
152
153        .ifeqs "\scaled", "TRUE"
154
155            VHSUB    dY0,dX0,dX1
156            VHADD    dY1,dX0,dX1
157            VHSUB    dY2,dX2,dX3
158            VHADD    dY3,dX2,dX3
159
160        .else
161
162            VSUB    dY0,dX0,dX1
163            VADD    dY1,dX0,dX1
164            VSUB    dY2,dX2,dX3
165            VADD    dY3,dX2,dX3
166
167
168
169        .endif
170
171        VST1    dY0,[pDst],outPointStep             @// point0: of set0,set1 of grp0
172        VST1    dY1,[pDst],dstStep                  @// dstStep = -outPointStep + 8
173        VST1    dY2,[pDst],outPointStep             @// point0: of set0,set1 of grp1
174        VST1    dY3,[pDst],dstStep                  @// point1: of set0,set1 of grp1
175
176
177        BGT     grpLoop\name
178
179
180        @// Reset and Swap pSrc and pDst for the next stage
181        MOV     pTmp,pDst
182        SUB     pDst,pSrc,outPointStep,LSL #1       @// pDst -= 2*size; pSrc -= 4*size bytes
183        SUB     pSrc,pTmp,outPointStep
184
185        @// Reset pTwiddle for the next stage
186        SUB     pTwiddle,pTwiddle,outPointStep      @// pTwiddle -= 2*size bytes
187
188        .endm
189
190
191
192        M_START armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe,r4
193        FFTSTAGE "FALSE","FALSE",FWD
194        M_END
195
196
197
198        M_START armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe,r4
199        FFTSTAGE "FALSE","TRUE",INV
200        M_END
201
202
203
204        M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe,r4
205        FFTSTAGE "TRUE","FALSE",FWDSFS
206        M_END
207
208
209
210        M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe,r4
211        FFTSTAGE "TRUE","TRUE",INVSFS
212        M_END
213
214
215
216    .end
217