1//
2//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3//
4//  Use of this source code is governed by a BSD-style license
5//  that can be found in the LICENSE file in the root of the source
6//  tree. An additional intellectual property rights grant can be found
7//  in the file PATENTS.  All contributing project authors may
8//  be found in the AUTHORS file in the root of the source tree.
9//
10//  This is a modification of armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.S
11//  to support float instead of SC32.
12//
13
14//
15// Description:
16// Compute the first stage of a Radix 2 DIT in-order out-of-place FFT
17// stage for a N point complex signal.
18//
19//
20
21
22// Include standard headers
23
24#include "dl/api/arm/arm64COMM_s.h"
25#include "dl/api/arm/omxtypes_s.h"
26
27
28// Import symbols required from other files
29// (For example tables)
30
31
32
33
34// Set debugging level
35//DEBUG_ON    SETL {TRUE}
36
37
38
39// Guarding implementation by the processor name
40
41
42
43// Guarding implementation by the processor name
44
45
46//Input Registers
47
48#define pSrc            x0
49#define pDst            x1
50#define pTwiddle        x2
51#define	pSubFFTNum	x3
52#define pSubFFTSize	x4
53
54
55//Output Registers
56
57
58//Local Scratch Registers
59
60#define subFFTNum       x5
61#define subFFTSize      x6
62#define pointStep       x7
63#define outPointStep    x7
64#define grpSize         x8
65#define setCount        x8
66#define step            x9
67#define dstStep         x9
68
69// Neon Registers
70#define dX0     v0.2s
71#define dX1     v1.2s
72#define dY0     v2.2s
73#define dY1     v3.2s
74
75        .macro FFTSTAGE scaled, inverse, name
76
77        // Define stack arguments
78
79        // Move args values into our work registers
80        ldr     subFFTNum, [pSubFFTNum]
81        ldr     subFFTSize, [pSubFFTSize]
82
83        // update subFFTSize and subFFTNum into RN6 and RN7 for the next stage
84
85
86        MOV        subFFTSize,#2
87        LSR        grpSize,subFFTNum,#1
88        MOV        subFFTNum,grpSize
89
90
91        // pT0+1 increments pT0 by 8 bytes
92        // pT0+pointStep = increment of 8*pointStep bytes = 4*grpSize bytes
93        // Note: outPointStep = pointStep for firststage
94        // Note: setCount = grpSize/2 (reuse the updated grpSize for setCount)
95
96        lsl     pointStep, grpSize, #3
97        rsb     step, pointStep, #8
98
99        // Loop on the sets for grp zero
100
101grpZeroSetLoop\name :
102
103        LD1    {dX0},[pSrc],pointStep
104        LD1    {dX1},[pSrc],step                   // step = -pointStep + 8
105
106        SUBS    setCount,setCount,#1
107
108        fadd    dY0,dX0,dX1
109        fsub    dY1,dX0,dX1
110
111        ST1    {dY0},[pDst],outPointStep
112        // dstStep =  step = -pointStep + 8
113        ST1    {dY1},[pDst],dstStep
114
115        BGT     grpZeroSetLoop\name
116
117
118        // Save subFFTNum and subFFTSize for next stage
119        str     subFFTNum, [pSubFFTNum]
120        str     subFFTSize, [pSubFFTSize]
121
122        .endm
123
124
125
126        M_START armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace
127        FFTSTAGE "FALSE","FALSE",fwd
128        M_END
129
130
131
132        M_START armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace
133        FFTSTAGE "FALSE","TRUE",inv
134        M_END
135
136        .end
137