1@//
2@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3@//
4@//  Use of this source code is governed by a BSD-style license
5@//  that can be found in the LICENSE file in the root of the source
6@//  tree. An additional intellectual property rights grant can be found
7@//  in the file PATENTS.  All contributing project authors may
8@//  be found in the AUTHORS file in the root of the source tree.
9@//
10@//  This file was originally licensed as follows. It has been
11@//  relicensed with permission from the copyright holders.
12
13@//
14@//
15@// File Name:  armSP_FFT_CToC_SC16_Radix2_unsafe_s.s
16@// OpenMAX DL: v1.0.2
17@// Last Modified Revision:   5892
18@// Last Modified Date:       Thu, 07 Jun 2007
19@//
20@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21@//
22@//
23@//
24@// Description:
25@// Compute a Radix 2 FFT stage for a N point complex signal
26@//
27@//
28
29
30@// Include standard headers
31
32#include "dl/api/arm/armCOMM_s.h"
33#include "dl/api/arm/omxtypes_s.h"
34
35
36@// Import symbols required from other files
37@// (For example tables)
38
39
40
41@// Set debugging level
42@//DEBUG_ON    SETL {TRUE}
43
44
45
46@// Guarding implementation by the processor name
47
48
49
50
51    @// Guarding implementation by the processor name
52
53
54@//Input Registers
55
56#define pSrc                            r0
57#define pDst                            r2
58#define pTwiddle                        r1
59#define subFFTNum                       r6
60#define subFFTSize                      r7
61
62
63@//Output Registers
64
65
66@//Local Scratch Registers
67
68#define outPointStep                    r3
69#define pointStep                       r4
70#define grpCount                        r5
71#define setCount                        r8
72#define step                            r10
73#define dstStep                         r11
74#define pTmp                            r9
75
76@// Neon Registers
77
78#define dW                              D0.S16
79#define dX0                             D2.S16
80#define dX1                             D3.S16
81#define dX2                             D4.S16
82#define dX3                             D5.S16
83#define dY0                             D6.S16
84#define dY1                             D7.S16
85#define dY2                             D8.S16
86#define dY3                             D9.S16
87#define qT0                             Q3.S32
88#define qT1                             Q4.S32
89
90
91
92        .macro FFTSTAGE scaled, inverse, name
93
94        @// Define stack arguments
95
96
97        @// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs
98
99        LSR     subFFTNum,subFFTNum,#1                      @//grpSize
100        LSL     grpCount,subFFTSize,#1
101
102
103        @// pT0+1 increments pT0 by 8 bytes
104        @// pT0+pointStep = increment of 4*pointStep bytes = 2*grpSize bytes
105        MOV     pointStep,subFFTNum,LSL #1
106
107        @// update subFFTSize for the next stage
108        MOV     subFFTSize,grpCount
109
110        @// pOut0+1 increments pOut0 by 8 bytes
111        @// pOut0+outPointStep == increment of 4*outPointStep bytes = 2*size bytes
112        SMULBB  outPointStep,grpCount,pointStep
113        LSL     pointStep,pointStep,#1
114
115
116        RSB      step,pointStep,#16
117        RSB      dstStep,outPointStep,#16
118
119        @// Loop on the groups
120
121grpLoop\name:
122
123        VLD1     dW,[pTwiddle],pointStep                @//[wi | wr]
124        MOV      setCount,pointStep,LSR #2
125
126
127        @// Loop on the sets: 4 at a time
128
129
130setLoop\name:
131
132
133        VLD2    {dX0,dX1},[pSrc],pointStep            @// point0: dX0-real part dX1-img part
134        VLD2    {dX2,dX3},[pSrc],step                 @// point1: dX2-real part dX3-img part
135
136        SUBS    setCount,setCount,#4
137
138        .ifeqs  "\inverse", "TRUE"
139            VMULL   qT0,dX2,dW[0]
140            VMLAL   qT0,dX3,dW[1]                       @// real part
141            VMULL   qT1,dX3,dW[0]
142            VMLSL   qT1,dX2,dW[1]                       @// imag part
143
144        .else
145
146            VMULL   qT0,dX2,dW[0]
147            VMLSL   qT0,dX3,dW[1]                       @// real part
148            VMULL   qT1,dX3,dW[0]
149            VMLAL   qT1,dX2,dW[1]                       @// imag part
150
151        .endif
152
153        VRSHRN  dX2,qT0,#15
154        VRSHRN  dX3,qT1,#15
155
156        .ifeqs "\scaled", "TRUE"
157            VHSUB    dY0,dX0,dX2
158            VHSUB    dY1,dX1,dX3
159            VHADD    dY2,dX0,dX2
160            VHADD    dY3,dX1,dX3
161
162        .else
163            VSUB    dY0,dX0,dX2
164            VSUB    dY1,dX1,dX3
165            VADD    dY2,dX0,dX2
166            VADD    dY3,dX1,dX3
167
168        .endif
169
170        VST2    {dY0,dY1},[pDst],outPointStep
171        VST2    {dY2,dY3},[pDst],dstStep              @// dstStep = -outPointStep + 16
172
173        BGT     setLoop\name
174
175        SUBS    grpCount,grpCount,#2
176        ADD     pSrc,pSrc,pointStep
177        BGT     grpLoop\name
178
179
180        @// Reset and Swap pSrc and pDst for the next stage
181        MOV     pTmp,pDst
182        SUB     pDst,pSrc,outPointStep,LSL #1       @// pDst -= 2*size; pSrc -= 4*size bytes
183        SUB     pSrc,pTmp,outPointStep
184
185        @// Reset pTwiddle for the next stage
186        SUB     pTwiddle,pTwiddle,outPointStep      @// pTwiddle -= 2*size bytes
187
188
189        .endm
190
191
192
193        M_START armSP_FFTFwd_CToC_SC16_Radix2_OutOfPlace_unsafe,r4
194        FFTSTAGE "FALSE","FALSE",FWD
195        M_END
196
197
198
199        M_START armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe,r4
200        FFTSTAGE "FALSE","TRUE",INV
201        M_END
202
203
204
205        M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe,r4
206        FFTSTAGE "TRUE","FALSE",FWDSFS
207        M_END
208
209
210
211        M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe,r4
212        FFTSTAGE "TRUE","TRUE",INVSFS
213        M_END
214
215
216
217
218
219    .end
220