1@// 2@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3@// 4@// Use of this source code is governed by a BSD-style license 5@// that can be found in the LICENSE file in the root of the source 6@// tree. An additional intellectual property rights grant can be found 7@// in the file PATENTS. All contributing project authors may 8@// be found in the AUTHORS file in the root of the source tree. 9@// 10@// This is a modification of armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.S 11@// to support float instead of SC32. 12@// 13 14@// 15@// Description: 16@// Compute a first stage Radix 4 FFT stage for a N point complex signal 17@// 18@// 19 20 21@// Include standard headers 22 23#include "dl/api/arm/armCOMM_s.h" 24#include "dl/api/arm/omxtypes_s.h" 25 26@// M_VARIANTS ARM1136JS 27 28@// Import symbols required from other files 29@// (For example tables) 30 31 32 33 34@// Set debugging level 35@//DEBUG_ON SETL {TRUE} 36 37 38 39@// Guarding implementation by the processor name 40 41@// IF ARM1136JS 42 43@//Input Registers 44 45#define pSrc r0 46#define pDst r2 47#define pTwiddle r1 48#define pPingPongBuf r5 49#define subFFTNum r6 50#define subFFTSize r7 51 52 53@//Output Registers 54 55 56@//Local Scratch Registers 57 58#define grpSize r14 59#define outPointStep r12 60#define setStep r3 61#define setCount r14 /*@// Reuse grpSize as setCount*/ 62#define pointStep r12 63 64@// Real and Imaginary parts 65#define x0r s0 66#define x0i s1 67#define x1r s2 68#define x1i s3 69#define x2r s4 70#define x2i s5 71#define x3r s6 72#define x3i s7 73#define t3r s0 /*@// Temporarily hold x3r and x3i*/ 74#define t3i s1 75#define sr s8 76#define si s9 77 78 79 80 .macro FFTSTAGE scaled, inverse, name 81 82 @// Define stack arguments 83 84 85 @// Update grpCount and grpSize rightaway inorder to reuse 86 @// pSubFFTSize and pSubFFTNum regs 87 mov subFFTSize, #4 88 lsr grpSize, subFFTNum, #2 89 mov subFFTNum, grpSize 90 91 92 @// pT0+1 increments pT0 by 8 bytes 93 @// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes 94 @// Note: outPointStep = pointStep for firststage 95 @// Note: setCount = grpSize/4 (reuse the updated grpSize for setCount) 96 MOV pointStep,grpSize,LSL #3 97 98 99 @// Calculate the step of input data for the next set 100 @//MOV setStep,pointStep,LSL #1 101 MOV setStep,grpSize,LSL #4 102 @// setStep = 3*pointStep 103 ADD setStep,setStep,pointStep 104 @// setStep = - 3*pointStep+8 105 RSB setStep,setStep,#8 106 107 @// grp = 0 a special case since all the twiddle factors are 1 108 @// Loop on the sets 109 110grpZeroSetLoop\name: 111 112 vldm.f32 pSrc, {x0r, x0i} 113 add pSrc, pSrc, pointStep 114 vldm.f32 pSrc, {x1r, x1i} 115 add pSrc, pSrc, pointStep 116 vldm.f32 pSrc, {x2r, x2i} 117 add pSrc, pSrc, pointStep 118 vldm.f32 pSrc, {x3r, x3i} 119 add pSrc, pSrc, setStep 120 121 122 @// Decrement setcount 123 SUBS setCount,setCount,#1 124 125 126 127 @// finish first stage of 4 point FFT 128 129 vadd.f32 x0r,x0r,x2r @// x0 = x0 + x2 130 vadd.f32 x0i,x0i,x2i 131 132 vadd.f32 sr, x2r, x2r 133 vadd.f32 si, x2i, x2i 134 vsub.f32 x2r,x0r,sr @// x2 = x0 - x2 135 vsub.f32 x2i,x0i,si 136 137 vadd.f32 x1r,x1r,x3r @// x1 = x1 + x3 138 vadd.f32 x1i,x1i,x3i 139 140 vadd.f32 sr, x3r, x3r 141 vadd.f32 si, x3i, x3i 142 vsub.f32 x3r,x1r,sr @// x3 = x1 - x3 143 vsub.f32 x3i,x1i,si 144 145 146 @// finish second stage of 4 point FFT 147 148 149 vadd.f32 x0r,x0r,x1r @// x0 = x0 + x1 150 vadd.f32 x0i,x0i,x1i 151 152 vadd.f32 sr, x1r, x1r 153 vadd.f32 si, x1i, x1i 154 vsub.f32 x1r,x0r,sr @// x1 = x0 - x1 155 vsub.f32 x1i,x0i,si 156 157 vstm.f32 pDst, {x0r, x0i} 158 add pDst, pDst, outPointStep 159 160 vadd.f32 x2r,x2r,x3i 161 vsub.f32 x2i,x2i,x3r 162 163 vadd.f32 sr, x3r, x3r 164 vadd.f32 si, x3i, x3i 165 vsub.f32 t3r, x2r, si 166 vadd.f32 t3i, x2i, sr 167 168 .ifeqs "\inverse", "TRUE" 169 vstm.f32 pDst, {t3r, t3i} 170 add pDst, pDst, outPointStep 171 vstm.f32 pDst, {x1r, x1i} 172 add pDst, pDst, outPointStep 173 vstm.f32 pDst, {x2r, x2i} 174 add pDst, pDst, setStep 175 .else 176 vstm.f32 pDst, {x2r, x2i} 177 add pDst, pDst, outPointStep 178 vstm.f32 pDst, {x1r, x1i} 179 add pDst, pDst, outPointStep 180 vstm.f32 pDst, {t3r, t3i} 181 add pDst, pDst, setStep 182 .endif 183 184 185 BGT grpZeroSetLoop\name 186 187 188 @// reset pSrc to pDst for the next stage 189 SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize 190 mov pDst, pPingPongBuf 191 192 .endm 193 194 195 M_START armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp,r4 196 FFTSTAGE "FALSE","FALSE",FWD 197 M_END 198 199 200 M_START armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp,r4 201 FFTSTAGE "FALSE","TRUE",INV 202 M_END 203 204 205@// ENDIF @//ARM1136JS 206 207 208@// Guarding implementation by the processor name 209 210 211 212 213 .end 214