1@ 2@ Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3@ 4@ Use of this source code is governed by a BSD-style license 5@ that can be found in the LICENSE file in the root of the source 6@ tree. An additional intellectual property rights grant can be found 7@ in the file PATENTS. All contributing project authors may 8@ be found in the AUTHORS file in the root of the source tree. 9@ 10@ Some code in this file was originally from file 11@ omxSP_FFTInv_CToC_SC16_Sfs_s.S which was licensed as follows. 12@ It has been relicensed with permission from the copyright holders. 13@ 14 15@ 16@ File Name: omxSP_FFTInv_CToC_SC16_Sfs_s.s 17@ OpenMAX DL: v1.0.2 18@ Last Modified Revision: 6729 19@ Last Modified Date: Tue, 17 Jul 2007 20@ 21@ (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 22@ 23 24@ 25@ Description: 26@ Compute an inverse FFT for a 16-bit real signal, with complex FFT routines. 27@ 28 29#include "dl/api/arm/armCOMM_s.h" 30#include "dl/api/arm/omxtypes_s.h" 31 32.extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe 33.extern armSP_FFTInv_CToC_SC16_Radix2_fs_OutOfPlace_unsafe 34.extern armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe 35.extern armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe 36.extern armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe 37.extern armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe 38.extern armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe 39.extern armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe 40.extern armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe 41.extern armSP_FFTInv_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe 42.extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe 43.extern armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe 44.extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe 45.extern armSP_FFTInv_CToC_SC16_Radix2_ls_OutOfPlace_unsafe 46.extern armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe 47.extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe 48 49@Input Registers 50#define pSrc r0 51#define pDst r1 52#define pFFTSpec r2 53#define scale r3 54 55@ Output registers 56#define result r0 57 58@Local Scratch Registers 59#define argTwiddle r1 60#define argDst r2 61#define argScale r4 62#define pTwiddle r4 63#define tmpOrder r4 64#define pOut r5 65#define subFFTSize r7 66#define subFFTNum r6 67#define N r6 68#define order r14 69#define diff r9 70@ Total num of radix stages to comple the FFT 71#define count r8 72#define x0r r4 73#define x0i r5 74#define diffMinusOne r2 75#define round r3 76#define pOut1 r2 77#define size r7 78#define step r8 79#define step1 r9 80#define twStep r10 81#define pTwiddleTmp r11 82#define argTwiddle1 r12 83#define zero r14 84 85@ Neon registers 86#define dX0 D0.S32 87#define dShift D1.S32 88#define qShift Q0.s16 89#define dX1 D1.S32 90#define dY0 D2.S32 91#define dY1 D3.S32 92#define dX0r D0.S32 93#define dX0i D1.S32 94#define dX1r D2.S32 95#define dX1i D3.S32 96#define dW0r D4.S32 97#define dW0i D5.S32 98#define dW1r D6.S32 99#define dW1i D7.S32 100#define dT0 D8.S32 101#define dT1 D9.S32 102#define dT2 D10.S32 103#define dT3 D11.S32 104#define qT0 Q6.S64 105#define qT1 Q7.S64 106#define qT0s Q6.S16 107#define qT1s Q7.S16 108#define qT2 Q8.S64 109#define qT3 Q9.S64 110#define dY0r D4.S32 111#define dY0i D5.S32 112#define dY1r D6.S32 113#define dY1i D7.S32 114#define dzero D20.S32 115#define dY2 D4.S32 116#define dY3 D5.S32 117#define dW0 D6.S32 118#define dW1 D7.S32 119#define dW0Tmp D10.S32 120#define dW1Neg D11.S32 121 122 123 124 @ Allocate stack memory required by the function 125 M_ALLOC4 diffOnStack, 4 126 127 @ Write function header 128 M_START omxSP_FFTInv_CCSToR_S16_Sfs,r11,d15 129 130@ Structure offsets for the FFTSpec 131 .set ARMsFFTSpec_N, 0 132 .set ARMsFFTSpec_pBitRev, 4 133 .set ARMsFFTSpec_pTwiddle, 8 134 .set ARMsFFTSpec_pBuf, 12 135 136 @ Define stack arguments 137 138 @ Read the size from structure and take log 139 LDR N, [pFFTSpec, #ARMsFFTSpec_N] 140 141 @ Read other structure parameters 142 LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle] 143 LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf] 144 145 @ Call the preTwiddle Radix2 stage before doing the complex IFFT 146 147 @ The following conditional BL combination would work since 148 @ evenOddButterflyLoop in the first call would set Z flag to zero 149 150 CMP scale,#0 151 BLEQ armSP_FFTInv_CCSToR_S16_preTwiddleRadix2_unsafe 152 BLGT armSP_FFTInv_CCSToR_S16_Sfs_preTwiddleRadix2_unsafe 153 154complexIFFT: 155 156 ASR N,N,#1 @ N/2 point complex IFFT 157 ADD pSrc,pOut,N,LSL #2 @ set pSrc as pOut1 158 159 CLZ order,N @ N = 2^order 160 RSB order,order,#31 161 MOV subFFTSize,#1 162 163 ADD scale,scale,order @ FFTInverse has a final scaling factor by N 164 165 CMP order,#3 166 BGT orderGreaterthan3 @ order > 3 167 168 CMP order,#1 169 BGE orderGreaterthan0 @ order > 0 170 M_STR scale, diffOnStack,LT @ order = 0 171 LDRLT x0r,[pSrc] 172 STRLT x0r,[pDst] 173 MOVLT pSrc,pDst 174 BLT FFTEnd 175 176orderGreaterthan0: 177 @ set the buffers appropriately for various orders 178 CMP order,#2 179 MOVNE argDst,pDst 180 MOVEQ argDst,pOut 181 MOVEQ pOut,pDst @ Pass the first stage destination in RN5 182 MOV argTwiddle,pTwiddle 183 @ Store the scale factor and scale at the end 184 SUB diff,scale,order 185 M_STR diff, diffOnStack 186 BGE orderGreaterthan1 187 BLLT armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @ order = 1 188 B FFTEnd 189 190 191orderGreaterthan1: 192 MOV tmpOrder,order @ tmpOrder = RN 4 193 BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe 194 CMP tmpOrder,#2 195 BLGT armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe 196 BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe 197 B FFTEnd 198 199 200 201 202orderGreaterthan3: 203 @ check scale = 0 or scale = order 204 SUB diff, scale, order @ scale > order 205 206 TST order, #2 @ Set input args to fft stages 207 MOVNE argDst,pDst 208 MOVEQ argDst,pOut 209 MOVEQ pOut,pDst @ Pass the first stage destination in RN5 210 MOV argTwiddle,pTwiddle 211 212 CMP diff,#0 213 M_STR diff, diffOnStack 214 BGE scaleEqualsOrder 215 216 @check for even or odd order 217 @ NOTE: The following combination of BL's would work fine eventhough the first 218 @ BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside 219 @ armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ 220 221 TST order,#0x00000001 222 BLEQ armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe 223 BLNE armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe 224 225 CMP subFFTNum,#4 226 BLT FFTEnd 227 228unscaledRadix4Loop: 229 BEQ lastStageUnscaledRadix4 230 BL armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe 231 CMP subFFTNum,#4 232 B unscaledRadix4Loop 233 234lastStageUnscaledRadix4: 235 BL armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe 236 B FFTEnd 237 238scaleEqualsOrder: 239 @check for even or odd order 240 @ NOTE: The following combination of BL's would work fine eventhough the first 241 @ BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside 242 @ armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ 243 244 TST order,#0x00000001 245 BLEQ armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe 246 BLNE armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe 247 248 CMP subFFTNum,#4 249 BLT FFTEnd 250 251scaledRadix4Loop: 252 BEQ lastStageScaledRadix4 253 BL armSP_FFTInv_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe 254 CMP subFFTNum,#4 255 B scaledRadix4Loop 256 257lastStageScaledRadix4: 258 BL armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe 259 260FFTEnd: @ Does only the scaling 261 262 M_LDR diff, diffOnStack 263 CMP diff,#0 264 BLE End 265 266 RSB diff,diff,#0 @ to use VRSHL for right shift by a variable 267 VDUP qShift,diff 268 269 @ Use parallel loads for bigger FFT size. 270 CMP subFFTSize, #8 271 BLT scaleLessFFTData 272 273scaleFFTData: 274 VLD1 {qT0s, qT1s},[pSrc:256] @ pSrc contains pDst pointer 275 SUBS subFFTSize,subFFTSize,#8 276 VSHL qT0s,qShift 277 VSHL qT1s,qShift 278 VST1 {qT0s, qT1s},[pSrc:256]! 279 BGT scaleFFTData 280 B End 281 282scaleLessFFTData: @ N = subFFTSize ; dataptr = pDst ; scale = diff 283 VLD1 {dX0[0]},[pSrc] @ pSrc contains pDst pointer 284 SUBS subFFTSize,subFFTSize,#1 285 VRSHL dX0,dShift 286 VST1 {dX0[0]},[pSrc]! 287 BGT scaleLessFFTData 288 289End: 290 @ Set return value 291 MOV result, #OMX_Sts_NoErr 292 293 @ Write function tail 294 M_END 295 296 297 298 299 300 301 .end 302