1@// 2@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3@// 4@// Use of this source code is governed by a BSD-style license 5@// that can be found in the LICENSE file in the root of the source 6@// tree. An additional intellectual property rights grant can be found 7@// in the file PATENTS. All contributing project authors may 8@// be found in the AUTHORS file in the root of the source tree. 9@// 10@// This is a modification of omxSP_FFTInv_CToC_SC32_Sfs_s.s 11@// to support float instead of SC32. 12@// 13 14@// 15@// Description: 16@// Compute an inverse FFT for a complex signal 17@// 18@// 19 20 21@// Include standard headers 22 23#include "dl/api/arm/armCOMM_s.h" 24#include "dl/api/arm/omxtypes_s.h" 25 26@// M_VARIANTS ARM1136JS 27 28@// Import symbols required from other files 29@// (For example tables) 30 31 .extern armSP_FFTInv_CToC_FC32_Sfs_Radix2_fs_OutOfPlace_unsafe_vfp 32 .extern armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp 33 .extern armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp 34 .extern armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp 35 .extern armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp 36 37@// Set debugging level 38@//DEBUG_ON SETL {TRUE} 39 40 41 42@// Guarding implementation by the processor name 43 44@// IF ARM1136JS 45 46@//Input Registers 47 48#define pSrc r0 49#define pDst r1 50#define pFFTSpec r2 51 52 53@// Output registers 54#define result r0 55 56@//Local Scratch Registers 57 58#define argTwiddle r1 59#define argDst r2 60#define argScale r4 61#define pTwiddle r4 62#define pOut r5 63#define subFFTSize r7 64#define subFFTNum r6 65#define N r6 66#define order r14 67#define diff r9 68#define count r8 69#define diffMinusOne r2 70#define round r3 71 72#define x0r s0 73#define x0i s1 74#define fone s2 75#define fscale s3 76 77 78 @// Allocate stack memory required by the function 79 80 @// Write function header 81 M_START omxSP_FFTInv_CToC_FC32_Sfs_vfp,r11 82 83@ Structure offsets for FFTSpec 84 .set ARMsFFTSpec_N, 0 85 .set ARMsFFTSpec_pBitRev, 4 86 .set ARMsFFTSpec_pTwiddle, 8 87 .set ARMsFFTSpec_pBuf, 12 88 89 @// Define stack arguments 90 91 @// Read the size from structure and take log 92 LDR N, [pFFTSpec, #ARMsFFTSpec_N] 93 94 @// Read other structure parameters 95 LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle] 96 LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf] 97 98 CLZ order,N @// N = 2^order 99 RSB order,order,#31 100 MOV subFFTSize,#1 101 @//MOV subFFTNum,N 102 103 CMP order,#1 104 BGT orderGreaterthan1 @// order > 1 105 @// Order = 0 or 1 106 vldmlt.f32 pSrc, {x0r, x0i} 107 vstmlt.f32 pDst, {x0r, x0i} 108 109 MOVLT pSrc,pDst 110 BLT FFTEnd 111 112 @// Handle order = 1 113 MOV argDst,pDst 114 MOV argTwiddle,pTwiddle 115 116 BL armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp 117 B FFTEnd 118 119orderGreaterthan1: 120 121 TST order, #2 @// Set input args to fft stages 122 MOVNE argDst,pDst 123 MOVEQ argDst,pOut 124 MOVEQ pOut,pDst @// Pass the first stage dest in RN5 125 MOV argTwiddle,pTwiddle 126 127 128 @//check for even or odd order 129 @// NOTE: The following combination of BL's would work fine 130 @// eventhough the first BL would corrupt the flags. This is 131 @// because the end of the "grpZeroSetLoop" loop inside 132 @// armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp sets 133 @// the Z flag to EQ 134 135 TST order,#0x00000001 136 BLEQ armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp 137 BLNE armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp 138 139unscaledRadix4Loop: 140 CMP subFFTNum,#1 141 BEQ FFTEnd 142 BL armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp 143 B unscaledRadix4Loop 144 145 146FFTEnd: 147 148 vldm.f32 pSrc, {x0r, x0i} 149 150 vmov.f32 fscale, subFFTSize 151 vcvt.f32.s32 fscale, fscale @// fscale = N as a float 152 movw round, #0 153 movt round, #0x3f80 @// round = 1.0 154 vmov.f32 fone, round 155 vdiv.f32 fscale, fone, fscale @// fscale = 1/N 156scaleFFTData: @// N = subFFTSize 157 SUBS subFFTSize,subFFTSize,#1 158 vmul.f32 x0r, x0r, fscale 159 vmul.f32 x0i, x0i, fscale 160 vstm.f32 pSrc, {x0r, x0i} 161 add pSrc, #8 162 vldmgt.f32 pSrc, {x0r, x0i} 163 164 bgt scaleFFTData 165 166 167 @// Set return value 168 MOV result, #OMX_Sts_NoErr 169 170 @// Write function tail 171 M_END 172 173@// ENDIF @//ARM1136JS 174 175 176 @// Guarding implementation by the processor name 177 178 179 180 .end 181