1@// 2@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3@// 4@// Use of this source code is governed by a BSD-style license 5@// that can be found in the LICENSE file in the root of the source 6@// tree. An additional intellectual property rights grant can be found 7@// in the file PATENTS. All contributing project authors may 8@// be found in the AUTHORS file in the root of the source tree. 9@// 10@// This is a modification of omxSP_FFTInv_CCSToR_S32_Sfs_s.s 11@// to support float instead of SC32. 12@// 13 14@// 15@// Description: 16@// Compute an inverse FFT for a complex signal 17@// 18@// 19 20 21@// Include standard headers 22 23#include "dl/api/arm/armCOMM_s.h" 24#include "dl/api/arm/omxtypes_s.h" 25 26 27@// Import symbols required from other files 28@// (For example tables) 29 30 .extern armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe 31 .extern armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe 32 .extern armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe 33 .extern armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe 34 .extern armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe 35 .extern armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe 36 37 38@// Set debugging level 39@//DEBUG_ON SETL {TRUE} 40 41 42 43@// Guarding implementation by the processor name 44 45 46 47 @// Guarding implementation by the processor name 48 49@// Import symbols required from other files 50@// (For example tables) 51 .extern armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe 52 .extern armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe 53 54 55@//Input Registers 56 57#define pSrc r0 58#define pDst r1 59#define pFFTSpec r2 60#define scale r3 61 62 63@// Output registers 64#define result r0 65 66@//Local Scratch Registers 67 68#define argTwiddle r1 69#define argDst r2 70#define argScale r4 71#define tmpOrder r4 72#define pTwiddle r4 73#define pOut r5 74#define subFFTSize r7 75#define subFFTNum r6 76#define N r6 77#define order r14 78#define diff r9 79@// Total num of radix stages required to comple the FFT 80#define count r8 81#define x0r r4 82#define x0i r5 83#define diffMinusOne r2 84#define round r3 85 86#define pOut1 r2 87#define size r7 88#define step r8 89#define step1 r9 90#define twStep r10 91#define pTwiddleTmp r11 92#define argTwiddle1 r12 93#define zero r14 94 95@// Neon registers 96 97#define dX0 D0.F32 98#define dShift D1.F32 99#define dX1 D1.F32 100#define dY0 D2.F32 101#define dY1 D3.F32 102#define dX0r D0.F32 103#define dX0i D1.F32 104#define dX1r D2.F32 105#define dX1i D3.F32 106#define dW0r D4.F32 107#define dW0i D5.F32 108#define dW1r D6.F32 109#define dW1i D7.F32 110#define dT0 D8.F32 111#define dT1 D9.F32 112#define dT2 D10.F32 113#define dT3 D11.F32 114#define qT0 d12.F32 115#define qT1 d14.F32 116#define qT2 d16.F32 117#define qT3 d18.F32 118#define dY0r D4.F32 119#define dY0i D5.F32 120#define dY1r D6.F32 121#define dY1i D7.F32 122#define dzero D20.F32 123 124#define dY2 D4.F32 125#define dY3 D5.F32 126#define dW0 D6.F32 127#define dW1 D7.F32 128#define dW0Tmp D10.F32 129#define dW1Neg D11.F32 130 131#define sN S0.S32 132#define fN S1.F32 133@// one must be the same as dScale[0]! 134#define dScale D2.F32 135#define one S4.F32 136 137#define qX0 Q2.F32 138#define qX1 Q3.F32 139 140 @// Allocate stack memory required by the function 141 M_ALLOC4 complexFFTSize, 4 142 143 @// Write function header 144 M_START omxSP_FFTInv_CCSToR_F32_Sfs,r11,d15 145 146@ Structure offsets for the FFTSpec 147 .set ARMsFFTSpec_N, 0 148 .set ARMsFFTSpec_pBitRev, 4 149 .set ARMsFFTSpec_pTwiddle, 8 150 .set ARMsFFTSpec_pBuf, 12 151 152 @// Define stack arguments 153 154 @// Read the size from structure and take log 155 LDR N, [pFFTSpec, #ARMsFFTSpec_N] 156 157 @// Read other structure parameters 158 LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle] 159 LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf] 160 161 @// N=1 Treat seperately 162 CMP N,#1 163 BGT sizeGreaterThanOne 164 VLD1 dX0[0],[pSrc] 165 VST1 dX0[0],[pDst] 166 167 B End 168 169sizeGreaterThanOne: 170 171 @// Call the preTwiddle Radix2 stage before doing the compledIFFT 172 173 174 BL armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe 175 176 177complexIFFT: 178 179 ASR N,N,#1 @// N/2 point complex IFFT 180 M_STR N, complexFFTSize @ Save N for scaling later 181 ADD pSrc,pOut,N,LSL #3 @// set pSrc as pOut1 182 183 CLZ order,N @// N = 2^order 184 RSB order,order,#31 185 MOV subFFTSize,#1 186 @//MOV subFFTNum,N 187 188 CMP order,#3 189 BGT orderGreaterthan3 @// order > 3 190 191 CMP order,#1 192 BGE orderGreaterthan0 @// order > 0 193 194 VLD1 dX0,[pSrc] 195 VST1 dX0,[pDst] 196 MOV pSrc,pDst 197 BLT FFTEnd 198 199orderGreaterthan0: 200 @// set the buffers appropriately for various orders 201 CMP order,#2 202 MOVNE argDst,pDst 203 MOVEQ argDst,pOut 204 @// Pass the first stage destination in RN5 205 MOVEQ pOut,pDst 206 MOV argTwiddle,pTwiddle 207 208 BGE orderGreaterthan1 209 BLLT armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe @// order = 1 210 B FFTEnd 211 212orderGreaterthan1: 213 MOV tmpOrder,order @// tmpOrder = RN 4 214 BL armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe 215 CMP tmpOrder,#2 216 BLGT armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe 217 BL armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe 218 B FFTEnd 219 220 221orderGreaterthan3: 222specialScaleCase: 223 224 @// Set input args to fft stages 225 TST order, #2 226 MOVNE argDst,pDst 227 MOVEQ argDst,pOut 228 @// Pass the first stage destination in RN5 229 MOVEQ pOut,pDst 230 MOV argTwiddle,pTwiddle 231 232 @//check for even or odd order 233 @// NOTE: The following combination of BL's would work fine even though 234 @// the first BL would corrupt the flags. This is because the end of 235 @// the "grpZeroSetLoop" loop inside 236 @// armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag 237 @// to EQ 238 239 TST order,#0x00000001 240 BLEQ armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe 241 BLNE armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe 242 243 CMP subFFTNum,#4 244 BLT FFTEnd 245 246 247unscaledRadix4Loop: 248 BEQ lastStageUnscaledRadix4 249 BL armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe 250 CMP subFFTNum,#4 251 B unscaledRadix4Loop 252 253lastStageUnscaledRadix4: 254 BL armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe 255 B FFTEnd 256 257FFTEnd: @// Does only the scaling 258 @ Scale inverse FFT result by 1/N 259 260 M_LDR N, complexFFTSize 261 VMOV sN,N 262 VCVT fN, sN @ fn = fftSize, as a float 263 VMOV one, 1.0 264 VDIV one, one, fN @ one = dScale[0] = 1 / fftSize 265 266 267 @// subFFTSize = N = complexFFTSize, which is always even and 268 @// greater than 0. 269 CMP subFFTSize, #4 270 BLT scaleFFTData1 271scaleFFTData: 272 @// Scale 4 complex (8 float) elements at a time 273 VLD1 {qX0, qX1}, [pSrc :256] @// pSrc contains pDst pointer 274 SUBS subFFTSize, subFFTSize, #4 275 VMUL qX0, qX0, dScale[0] 276 VMUL qX1, qX1, dScale[0] 277 VST1 {qX0, qX1}, [pSrc :256]! 278 279 BGT scaleFFTData 280scaleFFTData1: 281 CMP subFFTSize, #2 282 BLT End 283 VLD1 {qX0}, [pSrc] 284 VMUL qX0, qX0, dScale[0] 285 VST1 {qX0}, [pSrc]! 286End: 287 @// Set return value 288 MOV result, #OMX_Sts_NoErr 289 290 @// Write function tail 291 M_END 292 293 294 295 .end 296