1/* 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include <arm_neon.h> 12 13#include "dl/api/omxtypes.h" 14#include "dl/sp/api/armSP.h" 15#include "dl/sp/api/omxSP.h" 16 17extern void armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace( 18 const OMX_FC32* pSrc, 19 OMX_FC32* pDst, 20 OMX_FC32* pTwiddle, 21 long* subFFTNum, 22 long* subFFTSize); 23 24extern void armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace( 25 const OMX_FC32* pSrc, 26 OMX_FC32* pDst, 27 OMX_FC32* pTwiddle, 28 long* subFFTNum, 29 long* subFFTSize); 30 31extern void armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace( 32 const OMX_FC32* pSrc, 33 OMX_FC32* pDst, 34 OMX_FC32* pTwiddle, 35 long* subFFTNum, 36 long* subFFTSize); 37 38extern void armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace( 39 const OMX_FC32* pSrc, 40 OMX_FC32* pDst, 41 OMX_FC32* pTwiddle, 42 long* subFFTNum, 43 long* subFFTSize); 44 45extern void armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace( 46 const OMX_FC32* pSrc, 47 OMX_FC32* pDst, 48 OMX_FC32* pTwiddle, 49 long* subFFTNum, 50 long* subFFTSize); 51 52extern void armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace( 53 const OMX_FC32* pSrc, 54 OMX_FC32* pDst, 55 OMX_FC32* pTwiddle, 56 long* subFFTNum, 57 long* subFFTSize); 58 59extern void armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace( 60 const OMX_FC32* pSrc, 61 OMX_FC32* pDst, 62 OMX_FC32* pTwiddle, 63 long* subFFTNum, 64 long* subFFTSize); 65 66/* 67 * Scale FFT data by 1/|length|. |length| must be a power of two 68 */ 69static inline ScaleFFTData(OMX_FC32* fftData, unsigned length) { 70 float32_t* data = (float32_t*)fftData; 71 float32_t scale = 1.0f / length; 72 73 /* 74 * Do two complex elements at a time because |length| is always 75 * greater than or equal to 2 (order >= 1) 76 */ 77 do { 78 float32x4_t x = vld1q_f32(data); 79 80 length -= 2; 81 x = vmulq_n_f32(x, scale); 82 vst1q_f32(data, x); 83 data += 4; 84 } while (length > 0); 85} 86 87/** 88 * Function: omxSP_FFTInv_CToC_FC32 89 * 90 * Description: 91 * These functions compute an inverse FFT for a complex signal of 92 * length of 2^order, where 0 <= order <= 15. Transform length is 93 * determined by the specification structure, which must be 94 * initialized prior to calling the FFT function using the appropriate 95 * helper, i.e., <FFTInit_C_FC32>. The relationship between the input 96 * and output sequences can be expressed in terms of the IDFT, i.e.: 97 * 98 * x[n] = SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N) 99 * n=0,1,2,...N-1 100 * N=2^order. 101 * 102 * Input Arguments: 103 * pSrc - pointer to the complex-valued input signal, of length 2^order ; 104 * must be aligned on a 32-byte boundary. 105 * pFFTSpec - pointer to the preallocated and initialized specification 106 * structure 107 * 108 * Output Arguments: 109 * order 110 * pDst - pointer to the complex-valued output signal, of length 2^order; 111 * must be aligned on a 32-byte boundary. 112 * 113 * Return Value: 114 * 115 * OMX_Sts_NoErr - no error 116 * OMX_Sts_BadArgErr - returned if one or more of the following conditions 117 * is true: 118 * - one or more of the following pointers is NULL: pSrc, pDst, or 119 * pFFTSpec. 120 * - pSrc or pDst is not 32-byte aligned 121 * 122 */ 123 124OMXResult omxSP_FFTInv_CToC_FC32_Sfs(const OMX_FC32* pSrc, 125 OMX_FC32* pDst, 126 const OMXFFTSpec_C_FC32* pFFTSpec) { 127 ARMsFFTSpec_FC32* spec = (ARMsFFTSpec_FC32*)pFFTSpec; 128 int order; 129 long subFFTSize; 130 long subFFTNum; 131 OMX_FC32* pTwiddle; 132 OMX_FC32* pOut; 133 134 /* 135 * Check args are not NULL and the source and destination pointers 136 * are properly aligned. 137 */ 138 if (!validateParametersFC32(pSrc, pDst, spec)) 139 return OMX_Sts_BadArgErr; 140 141 order = fastlog2(spec->N); 142 143 subFFTSize = 1; 144 subFFTNum = spec->N; 145 pTwiddle = spec->pTwiddle; 146 pOut = spec->pBuf; 147 148 if (order > 3) { 149 OMX_FC32* argDst; 150 151 /* 152 * Set up argDst and pOut appropriately so that pOut = pDst for 153 * the very last FFT stage. 154 */ 155 if ((order & 2) == 0) { 156 argDst = pOut; 157 pOut = pDst; 158 } else { 159 argDst = pDst; 160 } 161 162 /* 163 * Odd order uses a radix 8 first stage; even order, a radix 4 164 * first stage. 165 */ 166 if (order & 1) { 167 armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace( 168 pSrc, argDst, pTwiddle, &subFFTNum, &subFFTSize); 169 } else { 170 armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace( 171 pSrc, argDst, pTwiddle, &subFFTNum, &subFFTSize); 172 } 173 174 /* 175 * Now use radix 4 stages to finish rest of the FFT 176 */ 177 if (subFFTNum >= 4) { 178 while (subFFTNum > 4) { 179 OMX_FC32* tmp; 180 181 armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace( 182 argDst, pOut, pTwiddle, &subFFTNum, &subFFTSize); 183 /* 184 * Swap argDst and pOut 185 */ 186 tmp = pOut; 187 pOut = argDst; 188 argDst = tmp; 189 } 190 191 armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace( 192 argDst, pOut, pTwiddle, &subFFTNum, &subFFTSize); 193 } 194 } else if (order == 3) { 195 armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace( 196 pSrc, pDst, pTwiddle, &subFFTNum, &subFFTSize); 197 armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace( 198 pDst, pOut, pTwiddle, &subFFTNum, &subFFTSize); 199 armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace( 200 pOut, pDst, pTwiddle, &subFFTNum, &subFFTSize); 201 } else if (order == 2) { 202 armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace( 203 pSrc, pOut, pTwiddle, &subFFTNum, &subFFTSize); 204 armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace( 205 pOut, pDst, pTwiddle, &subFFTNum, &subFFTSize); 206 } else { 207 /* Order = 1 */ 208 armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace( 209 pSrc, pDst, pTwiddle, &subFFTNum, &subFFTSize); 210 } 211 212 ScaleFFTData(pDst, spec->N); 213 return OMX_Sts_NoErr; 214} 215