1@// 2@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3@// 4@// Use of this source code is governed by a BSD-style license 5@// that can be found in the LICENSE file in the root of the source 6@// tree. An additional intellectual property rights grant can be found 7@// in the file PATENTS. All contributing project authors may 8@// be found in the AUTHORS file in the root of the source tree. 9@// 10@// This file was originally licensed as follows. It has been 11@// relicensed with permission from the copyright holders. 12@// 13 14@// 15@// File Name: omxSP_FFTInv_CToC_SC32_Sfs_s.s 16@// OpenMAX DL: v1.0.2 17@// Last Modified Revision: 6675 18@// Last Modified Date: Fri, 06 Jul 2007 19@// 20@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 21@// 22@// 23@// 24@// Description: 25@// Compute an inverse FFT for a complex signal 26@// 27 28 29@// Include standard headers 30 31#include "dl/api/arm/armCOMM_s.h" 32#include "dl/api/arm/omxtypes_s.h" 33 34@// Import symbols required from other files 35@// (For example tables) 36 37 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe 38 .extern armSP_FFTInv_CToC_SC32_Radix2_fs_OutOfPlace_unsafe 39 .extern armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe 40 .extern armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe 41 .extern armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe 42 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe 43 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe 44 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe 45 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe 46 .extern armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe 47 48@// Set debugging level 49@//DEBUG_ON SETL {TRUE} 50 51 52 53@// Guarding implementation by the processor name 54 55 56 57 @// Guarding implementation by the processor name 58 59@// Import symbols required from other files 60@// (For example tables) 61 .extern armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe 62 .extern armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe 63 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe 64 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe 65 66 67@//Input Registers 68 69#define pSrc r0 70#define pDst r1 71#define pFFTSpec r2 72#define scale r3 73 74 75@// Output registers 76#define result r0 77 78@//Local Scratch Registers 79 80#define argTwiddle r1 81#define argDst r2 82#define argScale r4 83#define tmpOrder r4 84#define pTwiddle r4 85#define pOut r5 86#define subFFTSize r7 87#define subFFTNum r6 88#define N r6 89#define order r14 90#define diff r9 91@// Total num of radix stages required to comple the FFT 92#define count r8 93#define x0r r4 94#define x0i r5 95#define diffMinusOne r2 96#define round r3 97 98@// Neon registers 99 100#define dX0 D0.S32 101#define dShift D1.S32 102 103 104 105 @// Allocate stack memory required by the function 106 M_ALLOC4 diffOnStack, 4 107 108 @// Write function header 109 M_START omxSP_FFTInv_CToC_SC32_Sfs,r11,d15 110 111@ Structure offsets for the FFTSpec 112 .set ARMsFFTSpec_N, 0 113 .set ARMsFFTSpec_pBitRev, 4 114 .set ARMsFFTSpec_pTwiddle, 8 115 .set ARMsFFTSpec_pBuf, 12 116 117 @// Define stack arguments 118 119 @// Read the size from structure and take log 120 LDR N, [pFFTSpec, #ARMsFFTSpec_N] 121 122 @// Read other structure parameters 123 LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle] 124 LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf] 125 126 CLZ order,N @// N = 2^order 127 RSB order,order,#31 128 MOV subFFTSize,#1 129 @//MOV subFFTNum,N 130 131 ADD scale,scale,order @// FFTInverse has a final scaling factor by N 132 133 CMP order,#3 134 BGT orderGreaterthan3 @// order > 3 135 136 CMP order,#1 137 BGE orderGreaterthan0 @// order > 0 138 M_STR scale, diffOnStack,LT @// order = 0 139 VLD1 dX0,[pSrc] 140 VST1 dX0,[pDst] 141 MOV pSrc,pDst 142 BLT FFTEnd 143 144orderGreaterthan0: 145 @// set the buffers appropriately for various orders 146 CMP order,#2 147 MOVNE argDst,pDst 148 MOVEQ argDst,pOut 149 MOVEQ pOut,pDst @// Pass the first stage destination in RN5 150 MOV argTwiddle,pTwiddle 151 @// Store the scale factor and scale at the end 152 SUB diff,scale,order 153 M_STR diff, diffOnStack 154 BGE orderGreaterthan1 155 BLLT armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// order = 1 156 B FFTEnd 157 158orderGreaterthan1: 159 MOV tmpOrder,order @// tmpOrder = RN 4 160 BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe 161 CMP tmpOrder,#2 162 BLGT armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe 163 BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe 164 B FFTEnd 165 166 167orderGreaterthan3: 168 @// check scale = 0 or scale = order 169 SUBS diff, scale, order @// scale > order 170 MOVGT scale,order 171 BGE specialScaleCase @// scale = 0 or scale = order 172 CMP scale,#0 173 BEQ specialScaleCase 174 B generalScaleCase 175 176specialScaleCase: @// scale = 0 or scale = order and order >= 2 177 178 TST order, #2 @// Set input args to fft stages 179 MOVNE argDst,pDst 180 MOVEQ argDst,pOut 181 MOVEQ pOut,pDst @// Pass the first stage destination in RN5 182 MOV argTwiddle,pTwiddle 183 184 CMP diff,#0 185 M_STR diff, diffOnStack 186 BGE scaleEqualsOrder 187 188 @//check for even or odd order 189 @// NOTE: The following combination of BL's would work fine eventhough the first 190 @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside 191 @// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ 192 193 TST order,#0x00000001 194 BLEQ armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe 195 BLNE armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe 196 197 CMP subFFTNum,#4 198 BLT FFTEnd 199 200 201unscaledRadix4Loop: 202 BEQ lastStageUnscaledRadix4 203 BL armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe 204 CMP subFFTNum,#4 205 B unscaledRadix4Loop 206 207lastStageUnscaledRadix4: 208 BL armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe 209 B FFTEnd 210 211 212scaleEqualsOrder: 213 @//check for even or odd order 214 @// NOTE: The following combination of BL's would work fine eventhough the first 215 @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside 216 @// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ 217 218 TST order,#0x00000001 219 BLEQ armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe 220 BLNE armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe 221 222 CMP subFFTNum,#4 223 BLT FFTEnd 224 225 226scaledRadix4Loop: 227 BEQ lastStageScaledRadix4 228 BL armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe 229 CMP subFFTNum,#4 230 B scaledRadix4Loop 231 232lastStageScaledRadix4: 233 BL armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe 234 B FFTEnd 235 236generalScaleCase: @// 0 < scale < order and order >= 2 237 @// Determine the correct destination buffer 238 SUB diff,order,scale 239 TST diff,#0x01 240 ADDEQ count,scale,diff,LSR #1 @// count = scale + (order - scale)/2 241 MOVNE count,order 242 TST count,#0x01 @// Is count even or odd ? 243 244 MOVNE argDst,pDst @// Set input args to fft stages 245 MOVEQ argDst,pOut 246 MOVEQ pOut,pDst @// Pass the first stage destination in RN5 247 MOV argTwiddle,pTwiddle 248 249 M_STR diff, diffOnStack 250 251 MOV argScale,scale @// Put scale in RN4 so as to save and restore 252 BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage 253 SUBS argScale,argScale,#1 254 255scaledRadix2Loop: 256 BLGT armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe 257 SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages 258 BGT scaledRadix2Loop 259 260 261 M_LDR diff, diffOnStack 262 @//check for even or odd order 263 TST diff,#0x00000001 264 BEQ generalUnscaledRadix4Loop 265 B unscaledRadix2Loop 266 267generalUnscaledRadix4Loop: 268 CMP subFFTNum,#4 269 BEQ generalLastStageUnscaledRadix4 270 BL armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe 271 B generalUnscaledRadix4Loop 272 273generalLastStageUnscaledRadix4: 274 BL armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe 275 B End 276 277 278unscaledRadix2Loop: 279 CMP subFFTNum,#2 280 BEQ generalLastStageUnscaledRadix2 281 BL armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe 282 B unscaledRadix2Loop 283 284generalLastStageUnscaledRadix2: 285 BL armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe 286 B End 287 288 289FFTEnd: @// Does only the scaling 290 291 M_LDR diff, diffOnStack 292 CMP diff,#0 293 BLE End 294 295 RSB diff,diff,#0 @// to use VRSHL for right shift by a variable 296 VDUP dShift,diff 297 298scaleFFTData: @// N = subFFTSize ; dataptr = pDst ; scale = diff 299 VLD1 {dX0},[pSrc] @// pSrc contains pDst pointer 300 SUBS subFFTSize,subFFTSize,#1 301 VRSHL dX0,dShift 302 VST1 {dX0},[pSrc]! 303 304 BGT scaleFFTData 305 306 307End: 308 @// Set return value 309 MOV result, #OMX_Sts_NoErr 310 311 @// Write function tail 312 M_END 313 314 .end 315