1@//
2@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3@//
4@//  Use of this source code is governed by a BSD-style license
5@//  that can be found in the LICENSE file in the root of the source
6@//  tree. An additional intellectual property rights grant can be found
7@//  in the file PATENTS.  All contributing project authors may
8@//  be found in the AUTHORS file in the root of the source tree.
9@//
10@//  This file was originally licensed as follows. It has been
11@//  relicensed with permission from the copyright holders.
12@//
13
14@//
15@// File Name:  armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.s
16@// OpenMAX DL: v1.0.2
17@// Last Modified Revision:   7493
18@// Last Modified Date:       Mon, 24 Sep 2007
19@//
20@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21@//
22@//
23@//
24@// Description:
25@// Compute the last stage of a Radix 2 DIT in-order out-of-place FFT
26@// stage for a N point complex signal.
27@//
28
29
30
31@// Include standard headers
32
33#include "dl/api/arm/armCOMM_s.h"
34#include "dl/api/arm/omxtypes_s.h"
35
36
37@// Import symbols required from other files
38@// (For example tables)
39
40
41
42
43@// Set debugging level
44@//DEBUG_ON    SETL {TRUE}
45
46
47@// Guarding implementation by the processor name
48
49
50@//Input Registers
51
52#define pSrc		r0
53#define pDst		r2
54#define pTwiddle	r1
55#define subFFTNum	r6
56#define subFFTSize	r7
57
58
59@//Output Registers
60
61
62@//Local Scratch Registers
63
64
65#define outPointStep	r3
66#define grpCount	r4
67#define dstStep		r5
68#define pTmp		r4
69
70@// Neon Registers
71
72#define dWr	D0.S32
73#define dWi	d1.s32
74#define dXr0	d2.s32
75#define dXi0	d3.s32
76#define dXr1	d4.s32
77#define dXi1	d5.s32
78#define dYr0	d6.s32
79#define dYi0	d7.s32
80#define dYr1	d8.s32
81#define dYi1	d9.s32
82#define qT0	q5.s64
83#define qT1	q6.s64
84
85        .macro FFTSTAGE scaled, inverse, name
86
87
88        MOV     outPointStep,subFFTSize,LSL #3
89        @// Update grpCount and grpSize rightaway
90
91        MOV     subFFTNum,#1                            @//after the last stage
92        LSL     grpCount,subFFTSize,#1
93
94        @// update subFFTSize for the next stage
95        MOV     subFFTSize,grpCount
96
97        RSB      dstStep,outPointStep,#16
98
99
100        @// Loop on 2 grps at a time for the last stage
101
102grpLoop\name :
103        VLD2    {dWr,dWi},[pTwiddle :64]!
104
105        VLD4    {dXr0,dXi0,dXr1,dXi1},[pSrc :128]!
106        SUBS    grpCount,grpCount,#4                   @// grpCount is multiplied by 2
107
108        .ifeqs  "\inverse", "TRUE"
109            VMULL   qT0,dWr,dXr1
110            VMLAL   qT0,dWi,dXi1                       @// real part
111            VMULL   qT1,dWr,dXi1
112            VMLSL   qT1,dWi,dXr1                       @// imag part
113
114        .else
115
116            VMULL   qT0,dWr,dXr1
117            VMLSL   qT0,dWi,dXi1                       @// real part
118            VMULL   qT1,dWr,dXi1
119            VMLAL   qT1,dWi,dXr1                       @// imag part
120
121        .endif
122
123        VRSHRN  dXr1,qT0,#31
124        VRSHRN  dXi1,qT1,#31
125
126
127        .ifeqs "\scaled", "TRUE"
128
129            VHSUB    dYr0,dXr0,dXr1
130            VHSUB    dYi0,dXi0,dXi1
131            VHADD    dYr1,dXr0,dXr1
132            VHADD    dYi1,dXi0,dXi1
133
134        .else
135
136            VSUB    dYr0,dXr0,dXr1
137            VSUB    dYi0,dXi0,dXi1
138            VADD    dYr1,dXr0,dXr1
139            VADD    dYi1,dXi0,dXi1
140
141
142        .endif
143
144        VST2    {dYr0,dYi0},[pDst],outPointStep
145        VST2    {dYr1,dYi1},[pDst],dstStep                  @// dstStep =  step = -outPointStep + 16
146
147        bgt     grpLoop\name
148
149
150        @// Reset and Swap pSrc and pDst for the next stage
151        MOV     pTmp,pDst
152        SUB     pDst,pSrc,outPointStep,LSL #1       @// pDst -= 4*size; pSrc -= 8*size bytes
153        SUB     pSrc,pTmp,outPointStep
154
155        @// Reset pTwiddle for the next stage
156        SUB     pTwiddle,pTwiddle,outPointStep      @// pTwiddle -= 4*size bytes
157
158        .endm
159
160
161
162        M_START armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe,r4,""
163        FFTSTAGE "FALSE","FALSE",fwd
164        M_END
165
166
167
168        M_START armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe,r4
169        FFTSTAGE "FALSE","TRUE",inv
170        M_END
171
172
173
174        M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe,r4
175        FFTSTAGE "TRUE","FALSE",fwdsfs
176        M_END
177
178
179
180        M_START armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe,r4
181        FFTSTAGE "TRUE","TRUE",invsfs
182        M_END
183
184	.end
185