1@// 2@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3@// 4@// Use of this source code is governed by a BSD-style license 5@// that can be found in the LICENSE file in the root of the source 6@// tree. An additional intellectual property rights grant can be found 7@// in the file PATENTS. All contributing project authors may 8@// be found in the AUTHORS file in the root of the source tree. 9@// 10@// This is a modification of armSP_FFT_CToC_SC32_Radix2_unsafe_s.s 11@// to support float instead of SC32. 12@// 13 14@// Description: 15@// Compute a Radix 2 DIT in-order out-of-place FFT stage for an N point 16@// complex signal. This handles the general stage, not the first or last 17@// stage. 18@// 19@// 20 21 22@// Include standard headers 23 24#include "dl/api/arm/armCOMM_s.h" 25#include "dl/api/arm/omxtypes_s.h" 26 27 28@// Import symbols required from other files 29@// (For example tables) 30 31 32 33@// Set debugging level 34@//DEBUG_ON SETL {TRUE} 35 36 37 38@// Guarding implementation by the processor name 39 40 41 42 43@// Guarding implementation by the processor name 44 45 46@//Input Registers 47 48#define pSrc r0 49#define pDst r2 50#define pTwiddle r1 51#define subFFTNum r6 52#define subFFTSize r7 53 54 55@//Output Registers 56 57 58@//Local Scratch Registers 59 60#define outPointStep r3 61#define pointStep r4 62#define grpCount r5 63#define setCount r8 64@//const RN 9 65#define step r10 66#define dstStep r11 67#define pTable r9 68#define pTmp r9 69 70@// Neon Registers 71 72#define dW D0.F32 73#define dX0 D2.F32 74#define dX1 D3.F32 75#define dX2 D4.F32 76#define dX3 D5.F32 77#define dY0 D6.F32 78#define dY1 D7.F32 79#define dY2 D8.F32 80#define dY3 D9.F32 81#define qT0 D10.F32 82#define qT1 D11.F32 83 84 85 .macro FFTSTAGE scaled, inverse, name 86 87 @// Define stack arguments 88 89 90 @// Update grpCount and grpSize rightaway inorder to reuse pGrpCount 91 @// and pGrpSize regs 92 93 LSR subFFTNum,subFFTNum,#1 @//grpSize 94 LSL grpCount,subFFTSize,#1 95 96 97 @// pT0+1 increments pT0 by 8 bytes 98 @// pT0+pointStep = increment of 8*pointStep bytes = 4*grpSize bytes 99 MOV pointStep,subFFTNum,LSL #2 100 101 @// update subFFTSize for the next stage 102 MOV subFFTSize,grpCount 103 104 @// pOut0+1 increments pOut0 by 8 bytes 105 @// pOut0+outPointStep == increment of 8*outPointStep bytes = 106 @// 4*size bytes 107 SMULBB outPointStep,grpCount,pointStep 108 LSL pointStep,pointStep,#1 109 110 111 RSB step,pointStep,#16 112 RSB dstStep,outPointStep,#16 113 114 @// Loop on the groups 115 116radix2GrpLoop\name : 117 MOV setCount,pointStep,LSR #3 118 VLD1 dW,[pTwiddle],pointStep @//[wi | wr] 119 120 121 @// Loop on the sets 122 123 124radix2SetLoop\name : 125 126 127 @// point0: dX0-real part dX1-img part 128 VLD2 {dX0,dX1},[pSrc],pointStep 129 @// point1: dX2-real part dX3-img part 130 VLD2 {dX2,dX3},[pSrc],step 131 132 SUBS setCount,setCount,#2 133 134 .ifeqs "\inverse", "TRUE" 135 VMUL qT0,dX2,dW[0] 136 VMLA qT0,dX3,dW[1] @// real part 137 VMUL qT1,dX3,dW[0] 138 VMLS qT1,dX2,dW[1] @// imag part 139 140 .else 141 142 VMUL qT0,dX2,dW[0] 143 VMLS qT0,dX3,dW[1] @// real part 144 VMUL qT1,dX3,dW[0] 145 VMLA qT1,dX2,dW[1] @// imag part 146 147 .endif 148 149 VSUB dY0,dX0,qT0 150 VSUB dY1,dX1,qT1 151 VADD dY2,dX0,qT0 152 VADD dY3,dX1,qT1 153 154 VST2 {dY0,dY1},[pDst],outPointStep 155 @// dstStep = -outPointStep + 16 156 VST2 {dY2,dY3},[pDst],dstStep 157 158 BGT radix2SetLoop\name 159 160 SUBS grpCount,grpCount,#2 161 ADD pSrc,pSrc,pointStep 162 BGT radix2GrpLoop\name 163 164 165 @// Reset and Swap pSrc and pDst for the next stage 166 MOV pTmp,pDst 167 @// pDst -= 4*size; pSrc -= 8*size bytes 168 SUB pDst,pSrc,outPointStep,LSL #1 169 SUB pSrc,pTmp,outPointStep 170 171 @// Reset pTwiddle for the next stage 172 @// pTwiddle -= 4*size bytes 173 SUB pTwiddle,pTwiddle,outPointStep 174 175 176 .endm 177 178 179 180 M_START armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace_unsafe,r4 181 FFTSTAGE "FALSE","FALSE",FWD 182 M_END 183 184 185 186 M_START armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe,r4 187 FFTSTAGE "FALSE","TRUE",INV 188 M_END 189 190 191 .end 192