1/* 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 * 10 */ 11 12#include <stdint.h> 13 14#include "dl/api/omxtypes.h" 15#include "dl/sp/api/mipsSP.h" 16 17OMXResult mips_FFTFwd_RToCCS_F32_real(const OMX_F32* pSrc, 18 OMX_F32* pDst, 19 const MIPSFFTSpec_R_FC32* pFFTSpec) { 20 OMX_U32 num_transforms; 21 OMX_FC32* p_dst = (OMX_FC32*)pDst; 22 OMX_FC32* p_buf = (OMX_FC32*)pFFTSpec->pBuf; 23 OMX_F32 tmp1, tmp2, tmp3, tmp4; 24 const OMX_F32* w_re_ptr; 25 const OMX_F32* w_im_ptr; 26 27 /* Transform for order = 2. */ 28 /* TODO: hard-code the offsets for p_src. */ 29 if (pFFTSpec->order == 2) { 30 OMX_U16* p_bitrev = pFFTSpec->pBitRev; 31 32 tmp1 = pSrc[p_bitrev[0]] + pSrc[p_bitrev[1]]; 33 tmp2 = pSrc[p_bitrev[2]] + pSrc[p_bitrev[3]]; 34 tmp3 = pSrc[p_bitrev[0]] - pSrc[p_bitrev[1]]; 35 tmp4 = pSrc[p_bitrev[2]] - pSrc[p_bitrev[3]]; 36 37 p_dst[0].Re = tmp1 + tmp2; 38 p_dst[2].Re = tmp1 - tmp2; 39 p_dst[0].Im = 0.0f; 40 p_dst[2].Im = 0.0f; 41 p_dst[1].Re = tmp3; 42 p_dst[1].Im = -tmp4; 43 44 return OMX_Sts_NoErr; 45 } 46 47 /* 48 * Loop performing sub-transforms of size 4, which contain two butterfly 49 * operations. Reading the input signal from split-radix bitreverse offsets. 50 */ 51 num_transforms = (SUBTRANSFORM_CONST >> (16 - pFFTSpec->order)) | 1; 52 for (uint32_t n = 0; n < num_transforms; ++n) { 53 OMX_U32 offset = pFFTSpec->pOffset[n] << 2; 54 OMX_FC32* p_tmp = p_buf + offset; 55 OMX_U16* p_bitrev = pFFTSpec->pBitRev + offset; 56 57 tmp1 = pSrc[p_bitrev[0]] + pSrc[p_bitrev[1]]; 58 tmp2 = pSrc[p_bitrev[2]] + pSrc[p_bitrev[3]]; 59 tmp3 = pSrc[p_bitrev[0]] - pSrc[p_bitrev[1]]; 60 tmp4 = pSrc[p_bitrev[2]] - pSrc[p_bitrev[3]]; 61 62 p_tmp[0].Re = tmp1 + tmp2; 63 p_tmp[2].Re = tmp1 - tmp2; 64 p_tmp[0].Im = 0.0f; 65 p_tmp[2].Im = 0.0f; 66 p_tmp[1].Re = tmp3; 67 p_tmp[3].Re = tmp3; 68 p_tmp[1].Im = -tmp4; 69 p_tmp[3].Im = tmp4; 70 } 71 72 /* 73 * Loop performing sub-transforms of size 8, 74 * which contain four butterfly operations. 75 */ 76 num_transforms >>= 1; 77 if (!num_transforms) { 78 /* 79 * Means the FFT size is equal to 8, so this is the last stage. Place the 80 * output to the destination buffer and avoid unnecessary computations. 81 */ 82 OMX_FC32* p_tmp = p_buf; 83 OMX_U16* p_bitrev = pFFTSpec->pBitRev; 84 OMX_F32 tmp5; 85 86 tmp1 = pSrc[p_bitrev[4]] + pSrc[p_bitrev[5]]; 87 tmp2 = pSrc[p_bitrev[6]] + pSrc[p_bitrev[7]]; 88 tmp3 = tmp1 + tmp2; 89 tmp4 = tmp1 - tmp2; 90 91 tmp1 = pSrc[p_bitrev[4]] - pSrc[p_bitrev[5]]; 92 tmp2 = pSrc[p_bitrev[6]] - pSrc[p_bitrev[7]]; 93 tmp5 = SQRT1_2 * (tmp1 + tmp2); 94 tmp1 = SQRT1_2 * (tmp1 - tmp2); 95 96 p_dst[4].Re = p_tmp[0].Re - tmp3; 97 p_dst[0].Re = p_tmp[0].Re + tmp3; 98 p_dst[0].Im = p_tmp[0].Im; 99 p_dst[4].Im = p_tmp[0].Im; 100 p_dst[2].Re = p_tmp[2].Re; 101 p_dst[2].Im = p_tmp[2].Im - tmp4; 102 p_dst[1].Re = p_tmp[1].Re + tmp5; 103 p_dst[1].Im = p_tmp[1].Im - tmp1; 104 p_dst[3].Re = p_tmp[3].Re - tmp5; 105 p_dst[3].Im = p_tmp[3].Im - tmp1; 106 107 return OMX_Sts_NoErr; 108 } 109 110 num_transforms |= 1; 111 112 for (uint32_t n = 0; n < num_transforms; ++n) { 113 OMX_U32 offset = pFFTSpec->pOffset[n] << 3; 114 OMX_FC32* p_tmp = p_buf + offset; 115 OMX_U16* p_bitrev = pFFTSpec->pBitRev + offset; 116 OMX_F32 tmp5; 117 118 tmp1 = pSrc[p_bitrev[4]] + pSrc[p_bitrev[5]]; 119 tmp2 = pSrc[p_bitrev[6]] + pSrc[p_bitrev[7]]; 120 tmp3 = tmp1 + tmp2; 121 tmp4 = tmp1 - tmp2; 122 123 tmp1 = pSrc[p_bitrev[4]] - pSrc[p_bitrev[5]]; 124 tmp2 = pSrc[p_bitrev[6]] - pSrc[p_bitrev[7]]; 125 tmp5 = SQRT1_2 * (tmp1 + tmp2); 126 tmp1 = SQRT1_2 * (tmp1 - tmp2); 127 128 p_tmp[4].Re = p_tmp[0].Re - tmp3; 129 p_tmp[0].Re = p_tmp[0].Re + tmp3; 130 p_tmp[4].Im = p_tmp[0].Im; 131 p_tmp[6].Re = p_tmp[2].Re; 132 p_tmp[6].Im = p_tmp[2].Im + tmp4; 133 p_tmp[2].Im = p_tmp[2].Im - tmp4; 134 135 p_tmp[5].Re = p_tmp[1].Re - tmp5; 136 p_tmp[1].Re = p_tmp[1].Re + tmp5; 137 p_tmp[5].Im = p_tmp[1].Im + tmp1; 138 p_tmp[1].Im = p_tmp[1].Im - tmp1; 139 p_tmp[7].Re = p_tmp[3].Re + tmp5; 140 p_tmp[3].Re = p_tmp[3].Re - tmp5; 141 p_tmp[7].Im = p_tmp[3].Im + tmp1; 142 p_tmp[3].Im = p_tmp[3].Im - tmp1; 143 } 144 145 /* 146 * Last FFT stage, performing sub-transforms of size 16. Place the output 147 * into the destination buffer and avoid unnecessary computations. 148 */ 149 tmp1 = p_buf[8].Re + p_buf[12].Re; 150 tmp2 = p_buf[8].Re - p_buf[12].Re; 151 tmp3 = p_buf[8].Im + p_buf[12].Im; 152 tmp4 = p_buf[8].Im - p_buf[12].Im; 153 154 p_dst[8].Re = p_buf[0].Re - tmp1; 155 p_dst[0].Re = p_buf[0].Re + tmp1; 156 p_dst[8].Im = p_buf[0].Im - tmp3; 157 p_dst[0].Im = p_buf[0].Im + tmp3; 158 p_dst[4].Re = p_buf[4].Re + tmp4; 159 p_dst[4].Im = p_buf[4].Im - tmp2; 160 161 w_re_ptr = pFFTSpec->pTwiddle + 1; 162 w_im_ptr = pFFTSpec->pTwiddle + (OMX_U32)(1 << pFFTSpec->order - 2) - 1; 163 164 /* Loop performing split-radix butterfly operations. */ 165 for (uint32_t n = 1; n < 4; ++n) { 166 OMX_F32 tmp5, tmp6; 167 OMX_F32 w_re = *w_re_ptr; 168 OMX_F32 w_im = *w_im_ptr; 169 170 tmp1 = w_re * p_buf[8 + n].Re + w_im * p_buf[8 + n].Im; 171 tmp2 = w_re * p_buf[8 + n].Im - w_im * p_buf[8 + n].Re; 172 tmp3 = w_re * p_buf[12 + n].Re - w_im * p_buf[12 + n].Im; 173 tmp4 = w_re * p_buf[12 + n].Im + w_im * p_buf[12 + n].Re; 174 175 tmp5 = tmp1 + tmp3; 176 tmp1 = tmp1 - tmp3; 177 tmp6 = tmp2 + tmp4; 178 tmp2 = tmp2 - tmp4; 179 180 p_dst[n].Re = p_buf[n].Re + tmp5; 181 p_dst[n].Im = p_buf[n].Im + tmp6; 182 p_dst[4 + n].Re = p_buf[4 + n].Re + tmp2; 183 p_dst[4 + n].Im = p_buf[4 + n].Im - tmp1; 184 185 ++w_re_ptr; 186 --w_im_ptr; 187 } 188 return OMX_Sts_NoErr; 189} 190