1@//
2@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3@//
4@//  Use of this source code is governed by a BSD-style license
5@//  that can be found in the LICENSE file in the root of the source
6@//  tree. An additional intellectual property rights grant can be found
7@//  in the file PATENTS.  All contributing project authors may
8@//  be found in the AUTHORS file in the root of the source tree.
9@//
10@//  This file was originally licensed as follows. It has been
11@//  relicensed with permission from the copyright holders.
12
13@//
14@//
15@// File Name:  armSP_FFT_CToC_SC16_Radix2_ls_unsafe_s.s
16@// OpenMAX DL: v1.0.2
17@// Last Modified Revision:   6741
18@// Last Modified Date:       Wed, 18 Jul 2007
19@//
20@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21@//
22@//
23@//
24@// Description:
25@// Compute a Radix 2 FFT stage for a N point complex signal
26@//
27@//
28
29
30@// Include standard headers
31
32#include "dl/api/arm/armCOMM_s.h"
33#include "dl/api/arm/omxtypes_s.h"
34
35
36@// Import symbols required from other files
37@// (For example tables)
38
39
40
41
42@// Set debugging level
43@//DEBUG_ON    SETL {TRUE}
44
45
46@// Guarding implementation by the processor name
47
48
49
50
51
52
53
54@// Guarding implementation by the processor name
55
56
57@//Input Registers
58
59#define pSrc                            r0
60#define pDst                            r2
61#define pTwiddle                        r1
62#define subFFTNum                       r6
63#define subFFTSize                      r7
64
65
66@//Output Registers
67
68
69@//Local Scratch Registers
70
71
72#define outPointStep                    r3
73#define grpCount                        r4
74#define dstStep                         r5
75#define pTmp                            r4
76#define step                            r8
77
78@// Neon Registers
79
80#define dWr                             D0.S16
81#define dWi                             D1.S16
82#define dXr0                            D2.S16
83#define dXi0                            D3.S16
84#define dXr1                            D4.S16
85#define dXi1                            D5.S16
86#define dYr0                            D6.S16
87#define dYi0                            D7.S16
88#define dYr1                            D8.S16
89#define dYi1                            D9.S16
90#define qT0                             Q5.S32
91#define qT1                             Q6.S32
92
93
94        .macro FFTSTAGE scaled, inverse, name
95
96
97        MOV     outPointStep,subFFTSize,LSL #2
98        @// Update grpCount and grpSize rightaway
99
100        MOV     subFFTNum,#1                            @//after the last stage
101        LSL     grpCount,subFFTSize,#1
102
103        @// update subFFTSize for the next stage
104        MOV     subFFTSize,grpCount
105
106        SUB      step,outPointStep,#4                   @// step = -4+outPointStep
107        RSB      dstStep,step,#0                        @// dstStep = -4-outPointStep+8 = -step
108        @//RSB      dstStep,outPointStep,#16
109
110
111        @// Loop on 2 grps at a time for the last stage
112
113grpLoop\name:
114        VLD2    {dWr[0],dWi[0]},[pTwiddle]!             @// grp 0
115        VLD2    {dWr[1],dWi[1]},[pTwiddle]!             @// grp 1
116
117        @//VLD2    {dWr,dWi},[pTwiddle],#16
118
119        VLD4    {dXr0[0],dXi0[0],dXr1[0],dXi1[0]},[pSrc]!   @// grp 0
120        VLD4    {dXr0[1],dXi0[1],dXr1[1],dXi1[1]},[pSrc]!   @// grp 1
121
122
123        @//VLD4    {dXr0,dXi0,dXr1,dXi1},[pSrc],#32
124        SUBS    grpCount,grpCount,#4                   @// grpCount is multiplied by 2
125
126        .ifeqs  "\inverse", "TRUE"
127            VMULL   qT0,dXr1,dWr
128            VMLAL   qT0,dXi1,dWi                       @// real part
129            VMULL   qT1,dXi1,dWr
130            VMLSL   qT1,dXr1,dWi                       @// imag part
131
132        .else
133            VMULL   qT0,dXr1,dWr
134            VMLSL   qT0,dXi1,dWi                       @// real part
135            VMULL   qT1,dXi1,dWr
136            VMLAL   qT1,dXr1,dWi                       @// imag part
137
138        .endif
139
140        VRSHRN  dXr1,qT0,#15
141        VRSHRN  dXi1,qT1,#15
142
143
144        .ifeqs "\scaled", "TRUE"
145
146            VHSUB    dYr0,dXr0,dXr1
147            VHSUB    dYi0,dXi0,dXi1
148            VHADD    dYr1,dXr0,dXr1
149            VHADD    dYi1,dXi0,dXi1
150
151        .else
152
153            VSUB    dYr0,dXr0,dXr1
154            VSUB    dYi0,dXi0,dXi1
155            VADD    dYr1,dXr0,dXr1
156            VADD    dYi1,dXi0,dXi1
157
158
159        .endif
160
161        VST2    {dYr0[0],dYi0[0]},[pDst]!
162        VST2    {dYr0[1],dYi0[1]},[pDst],step               @// step = -4+outPointStep
163
164        VST2    {dYr1[0],dYi1[0]},[pDst]!
165        VST2    {dYr1[1],dYi1[1]},[pDst],dstStep            @// dstStep = -4-outPointStep+8 = -step
166
167        @//VST2    {dYr0,dYi0},[pDst],outPointStep
168        @//VST2    {dYr1,dYi1},[pDst],dstStep                  @// dstStep =  step = -outPointStep + 16
169
170        BGT     grpLoop\name
171
172
173        @// Reset and Swap pSrc and pDst for the next stage
174        MOV     pTmp,pDst
175        SUB     pDst,pSrc,outPointStep,LSL #1       @// pDst -= 2*size; pSrc -= 4*size bytes
176        SUB     pSrc,pTmp,outPointStep
177
178        @// Reset pTwiddle for the next stage
179        SUB     pTwiddle,pTwiddle,outPointStep      @// pTwiddle -= 2*size bytes
180
181        .endm
182
183
184
185        M_START armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe,r4
186        FFTSTAGE "FALSE","FALSE",FWD
187        M_END
188
189
190
191        M_START armSP_FFTInv_CToC_SC16_Radix2_ls_OutOfPlace_unsafe,r4
192        FFTSTAGE "FALSE","TRUE",INV
193        M_END
194
195
196
197        M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe,r4
198        FFTSTAGE "TRUE","FALSE",FWDSFS
199        M_END
200
201
202
203        M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe,r4
204        FFTSTAGE "TRUE","TRUE",INVSFS
205        M_END
206
207
208
209
210    .end
211