1@//
2@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3@//
4@//  Use of this source code is governed by a BSD-style license
5@//  that can be found in the LICENSE file in the root of the source
6@//  tree. An additional intellectual property rights grant can be found
7@//  in the file PATENTS.  All contributing project authors may
8@//  be found in the AUTHORS file in the root of the source tree.
9@//
10@//  This is a modification of armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.S
11@//  to support float instead of SC32.
12@//
13
14@//
15@// Description:
16@// Compute the last stage of a Radix 2 DIT in-order out-of-place FFT
17@// stage for a N point complex signal.
18@//
19@//
20
21
22@// Include standard headers
23
24#include "dl/api/arm/armCOMM_s.h"
25#include "dl/api/arm/omxtypes_s.h"
26
27
28@// Import symbols required from other files
29@// (For example tables)
30
31
32
33
34@// Set debugging level
35@//DEBUG_ON    SETL {TRUE}
36
37
38@// Guarding implementation by the processor name
39
40
41@//Input Registers
42
43#define pSrc            r0
44#define pDst            r2
45#define pTwiddle        r1
46#define subFFTNum       r6
47#define subFFTSize      r7
48
49
50@//Output Registers
51
52
53@//Local Scratch Registers
54
55
56#define outPointStep    r3
57#define grpCount        r4
58#define dstStep         r5
59#define pTmp            r4
60
61@// Neon Registers
62
63#define dWr     d0.f32
64#define dWi     d1.f32
65#define dXr0    d2.f32
66#define dXi0    d3.f32
67#define dXr1    d4.f32
68#define dXi1    d5.f32
69#define dYr0    d6.f32
70#define dYi0    d7.f32
71#define dYr1    d8.f32
72#define dYi1    d9.f32
73#define qT0     d10.f32
74#define qT1     d12.f32
75
76        .macro FFTSTAGE scaled, inverse, name
77
78
79        MOV     outPointStep,subFFTSize,LSL #3
80        @// Update grpCount and grpSize rightaway
81
82        MOV     subFFTNum,#1                            @//after the last stage
83        LSL     grpCount,subFFTSize,#1
84
85        @// update subFFTSize for the next stage
86        MOV     subFFTSize,grpCount
87
88        RSB      dstStep,outPointStep,#16
89
90
91        @// Loop on 2 grps at a time for the last stage
92
93radix2lsGrpLoop\name :
94        @ dWr = [pTwiddle[0].Re, pTwiddle[1].Re]
95        @ dWi = [pTwiddle[0].Im, pTwiddle[1].Im]
96        VLD2    {dWr,dWi},[pTwiddle :64]!
97
98        @ dXr0 = [pSrc[0].Re, pSrc[2].Re]
99        @ dXi0 = [pSrc[0].Im, pSrc[2].Im]
100        @ dXr1 = [pSrc[1].Re, pSrc[3].Re]
101        @ dXi1 = [pSrc[1].Im, pSrc[3].Im]
102        VLD4    {dXr0,dXi0,dXr1,dXi1},[pSrc :128]!
103        SUBS    grpCount,grpCount,#4                   @// grpCount is multiplied by 2
104
105        .ifeqs  "\inverse", "TRUE"
106            VMUL   qT0,dWr,dXr1
107            VMLA   qT0,dWi,dXi1                       @// real part
108            VMUL   qT1,dWr,dXi1
109            VMLS   qT1,dWi,dXr1                       @// imag part
110
111        .else
112
113            VMUL   qT0,dWr,dXr1
114            VMLS   qT0,dWi,dXi1                       @// real part
115            VMUL   qT1,dWr,dXi1
116            VMLA   qT1,dWi,dXr1                       @// imag part
117
118        .endif
119
120        VSUB    dYr0,dXr0,qT0
121        VSUB    dYi0,dXi0,qT1
122        VADD    dYr1,dXr0,qT0
123        VADD    dYi1,dXi0,qT1
124
125        VST2    {dYr0,dYi0},[pDst],outPointStep
126        VST2    {dYr1,dYi1},[pDst],dstStep                  @// dstStep =  step = -outPointStep + 16
127
128        BGT     radix2lsGrpLoop\name
129
130
131        @// Reset and Swap pSrc and pDst for the next stage
132        MOV     pTmp,pDst
133        SUB     pDst,pSrc,outPointStep,LSL #1       @// pDst -= 4*size; pSrc -= 8*size bytes
134        SUB     pSrc,pTmp,outPointStep
135
136        @// Reset pTwiddle for the next stage
137        SUB     pTwiddle,pTwiddle,outPointStep      @// pTwiddle -= 4*size bytes
138
139        .endm
140
141
142
143        M_START armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe,r4,""
144        FFTSTAGE "FALSE","FALSE",fwd
145        M_END
146
147
148
149        M_START armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe,r4
150        FFTSTAGE "FALSE","TRUE",inv
151        M_END
152
153	.end
154