1/* 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 * 10 */ 11 12#include <stdint.h> 13 14#include "dl/api/omxtypes.h" 15#include "dl/sp/api/mipsSP.h" 16 17OMXResult mips_FFTInv_CCSToR_F32_complex(const OMX_F32* pSrc, 18 OMX_F32* pDst, 19 const MIPSFFTSpec_R_FC32* pFFTSpec) { 20 OMX_U32 num_transforms, step; 21 /* Quarter, half and three-quarters of transform size. */ 22 OMX_U32 n1_4, n1_2, n3_4; 23 const OMX_F32* w_re_ptr; 24 const OMX_F32* w_im_ptr; 25 OMX_U32 fft_size = 1 << pFFTSpec->order; 26 OMX_FC32* p_buf = (OMX_FC32*)pFFTSpec->pBuf; 27 const OMX_FC32* p_src = (const OMX_FC32*)pSrc; 28 OMX_FC32* p_dst; 29 OMX_U16* p_bitrev = pFFTSpec->pBitRevInv; 30 OMX_F32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, factor; 31 OMX_F32 w_re, w_im; 32 33 w_re_ptr = pFFTSpec->pTwiddle + 1; 34 w_im_ptr = pFFTSpec->pTwiddle + (OMX_U32)(1 << pFFTSpec->order - 2) - 1; 35 36 /* 37 * Preliminary loop performing input adaptation due to computing real FFT 38 * through complex FFT of half the size. 39 */ 40 for (uint32_t n = 1; n < fft_size / 8; ++n) { 41 tmp1 = p_src[n].Re; 42 tmp2 = p_src[n].Im; 43 tmp3 = p_src[fft_size / 2 - n].Re; 44 tmp4 = p_src[fft_size / 2 - n].Im; 45 46 tmp5 = tmp1 + tmp3; 47 tmp6 = tmp1 - tmp3; 48 tmp7 = tmp2 + tmp4; 49 tmp8 = tmp2 - tmp4; 50 51 w_re = *w_re_ptr; 52 w_im = *w_im_ptr; 53 54 p_buf[p_bitrev[n]].Re = 0.5f * (tmp5 - w_re * tmp7 - w_im * tmp6); 55 p_buf[p_bitrev[n]].Im = 0.5f * (tmp8 + w_re * tmp6 - w_im * tmp7); 56 p_buf[p_bitrev[fft_size / 2 - n]].Re = 57 0.5f * (tmp5 + w_re * tmp7 + w_im * tmp6); 58 p_buf[p_bitrev[fft_size / 2 - n]].Im = 59 0.5f * (-tmp8 + w_re * tmp6 - w_im * tmp7); 60 61 tmp1 = p_src[n + fft_size / 4].Re; 62 tmp2 = p_src[n + fft_size / 4].Im; 63 tmp3 = p_src[fft_size / 4 - n].Re; 64 tmp4 = p_src[fft_size / 4 - n].Im; 65 66 tmp5 = tmp1 + tmp3; 67 tmp6 = tmp1 - tmp3; 68 tmp7 = tmp2 + tmp4; 69 tmp8 = tmp2 - tmp4; 70 71 p_buf[p_bitrev[n + fft_size / 4]].Re = 72 0.5f * (tmp5 + w_im * tmp7 - w_re * tmp6); 73 p_buf[p_bitrev[n + fft_size / 4]].Im = 74 0.5f * (tmp8 - w_im * tmp6 - w_re * tmp7); 75 p_buf[p_bitrev[fft_size / 4 - n]].Re = 76 0.5f * (tmp5 - w_im * tmp7 + w_re * tmp6); 77 p_buf[p_bitrev[fft_size / 4 - n]].Im = 78 0.5f * (-tmp8 - w_im * tmp6 - w_re * tmp7); 79 80 ++w_re_ptr; 81 --w_im_ptr; 82 } 83 tmp1 = p_src[fft_size / 8].Re; 84 tmp2 = p_src[fft_size / 8].Im; 85 tmp3 = p_src[3 * fft_size / 8].Re; 86 tmp4 = p_src[3 * fft_size / 8].Im; 87 88 tmp5 = tmp1 + tmp3; 89 tmp6 = tmp1 - tmp3; 90 tmp7 = tmp2 + tmp4; 91 tmp8 = tmp2 - tmp4; 92 93 w_re = *w_re_ptr; 94 w_im = *w_im_ptr; 95 96 p_buf[p_bitrev[fft_size / 8]].Re = 0.5f * (tmp5 - w_re * tmp7 - w_im * tmp6); 97 p_buf[p_bitrev[fft_size / 8]].Im = 0.5f * (tmp8 + w_re * tmp6 - w_im * tmp7); 98 p_buf[p_bitrev[3 * fft_size / 8]].Re = 99 0.5f * (tmp5 + w_re * tmp7 + w_im * tmp6); 100 p_buf[p_bitrev[3 * fft_size / 8]].Im = 101 0.5f * (-tmp8 + w_re * tmp6 - w_im * tmp7); 102 103 p_buf[p_bitrev[0]].Re = 0.5f * (p_src[0].Re + p_src[fft_size / 2].Re); 104 p_buf[p_bitrev[0]].Im = 0.5f * (p_src[0].Re - p_src[fft_size / 2].Re); 105 p_buf[p_bitrev[fft_size / 4]].Re = p_src[fft_size / 4].Re; 106 p_buf[p_bitrev[fft_size / 4]].Im = -p_src[fft_size / 4].Im; 107 108 /* 109 * Loop performing sub-transforms of size 4, 110 * which contain 2 butterfly operations. 111 */ 112 num_transforms = (SUBTRANSFORM_CONST >> (17 - pFFTSpec->order)) | 1; 113 for (uint32_t n = 0; n < num_transforms; ++n) { 114 OMX_U32 offset = pFFTSpec->pOffset[n] << 2; 115 OMX_FC32* p_tmp = p_buf + offset; 116 117 tmp1 = p_tmp[0].Re + p_tmp[1].Re; 118 tmp2 = p_tmp[0].Im + p_tmp[1].Im; 119 tmp3 = p_tmp[0].Re - p_tmp[1].Re; 120 tmp4 = p_tmp[0].Im - p_tmp[1].Im; 121 tmp5 = p_tmp[2].Re + p_tmp[3].Re; 122 tmp6 = p_tmp[2].Im + p_tmp[3].Im; 123 tmp8 = p_tmp[2].Im - p_tmp[3].Im; 124 tmp7 = p_tmp[2].Re - p_tmp[3].Re; 125 126 p_tmp[0].Re = tmp1 + tmp5; 127 p_tmp[2].Re = tmp1 - tmp5; 128 p_tmp[0].Im = tmp2 + tmp6; 129 p_tmp[2].Im = tmp2 - tmp6; 130 p_tmp[1].Re = tmp3 - tmp8; 131 p_tmp[3].Re = tmp3 + tmp8; 132 p_tmp[1].Im = tmp4 + tmp7; 133 p_tmp[3].Im = tmp4 - tmp7; 134 } 135 136 num_transforms = (num_transforms >> 1) | 1; 137 /* 138 * Loop performing sub-transforms of size 8, 139 * which contain four butterfly operations. 140 */ 141 for (uint32_t n = 0; n < num_transforms; ++n) { 142 OMX_U32 offset = pFFTSpec->pOffset[n] << 3; 143 OMX_FC32* p_tmp = p_buf + offset; 144 145 tmp1 = p_tmp[4].Re + p_tmp[5].Re; 146 tmp3 = p_tmp[6].Re + p_tmp[7].Re; 147 tmp2 = p_tmp[4].Im + p_tmp[5].Im; 148 tmp4 = p_tmp[6].Im + p_tmp[7].Im; 149 150 tmp5 = tmp1 + tmp3; 151 tmp7 = tmp1 - tmp3; 152 tmp6 = tmp2 + tmp4; 153 tmp8 = tmp2 - tmp4; 154 155 tmp1 = p_tmp[4].Re - p_tmp[5].Re; 156 tmp2 = p_tmp[4].Im - p_tmp[5].Im; 157 tmp3 = p_tmp[6].Re - p_tmp[7].Re; 158 tmp4 = p_tmp[6].Im - p_tmp[7].Im; 159 160 p_tmp[4].Re = p_tmp[0].Re - tmp5; 161 p_tmp[0].Re = p_tmp[0].Re + tmp5; 162 p_tmp[4].Im = p_tmp[0].Im - tmp6; 163 p_tmp[0].Im = p_tmp[0].Im + tmp6; 164 p_tmp[6].Re = p_tmp[2].Re + tmp8; 165 p_tmp[2].Re = p_tmp[2].Re - tmp8; 166 p_tmp[6].Im = p_tmp[2].Im - tmp7; 167 p_tmp[2].Im = p_tmp[2].Im + tmp7; 168 169 tmp5 = SQRT1_2 * (tmp1 - tmp2); 170 tmp7 = SQRT1_2 * (tmp3 + tmp4); 171 tmp6 = SQRT1_2 * (tmp1 + tmp2); 172 tmp8 = SQRT1_2 * (tmp4 - tmp3); 173 174 tmp1 = tmp5 + tmp7; 175 tmp3 = tmp5 - tmp7; 176 tmp2 = tmp6 + tmp8; 177 tmp4 = tmp6 - tmp8; 178 179 p_tmp[5].Re = p_tmp[1].Re - tmp1; 180 p_tmp[1].Re = p_tmp[1].Re + tmp1; 181 p_tmp[5].Im = p_tmp[1].Im - tmp2; 182 p_tmp[1].Im = p_tmp[1].Im + tmp2; 183 p_tmp[7].Re = p_tmp[3].Re + tmp4; 184 p_tmp[3].Re = p_tmp[3].Re - tmp4; 185 p_tmp[7].Im = p_tmp[3].Im - tmp3; 186 p_tmp[3].Im = p_tmp[3].Im + tmp3; 187 } 188 189 step = 1 << (pFFTSpec->order - 4); 190 n1_4 = 4; 191 /* Outer loop that loops over FFT stages. */ 192 for (uint32_t fft_stage = 4; fft_stage <= pFFTSpec->order - 2; ++fft_stage) { 193 n1_2 = 2 * n1_4; 194 n3_4 = 3 * n1_4; 195 num_transforms = (num_transforms >> 1) | 1; 196 for (uint32_t n = 0; n < num_transforms; ++n) { 197 OMX_U32 offset = pFFTSpec->pOffset[n] << fft_stage; 198 OMX_FC32* p_tmp = p_buf + offset; 199 200 tmp1 = p_tmp[n1_2].Re + p_tmp[n3_4].Re; 201 tmp2 = p_tmp[n1_2].Re - p_tmp[n3_4].Re; 202 tmp3 = p_tmp[n1_2].Im + p_tmp[n3_4].Im; 203 tmp4 = p_tmp[n1_2].Im - p_tmp[n3_4].Im; 204 205 p_tmp[n1_2].Re = p_tmp[0].Re - tmp1; 206 p_tmp[0].Re = p_tmp[0].Re + tmp1; 207 p_tmp[n1_2].Im = p_tmp[0].Im - tmp3; 208 p_tmp[0].Im = p_tmp[0].Im + tmp3; 209 p_tmp[n3_4].Re = p_tmp[n1_4].Re + tmp4; 210 p_tmp[n1_4].Re = p_tmp[n1_4].Re - tmp4; 211 p_tmp[n3_4].Im = p_tmp[n1_4].Im - tmp2; 212 p_tmp[n1_4].Im = p_tmp[n1_4].Im + tmp2; 213 214 w_re_ptr = pFFTSpec->pTwiddle + step; 215 w_im_ptr = 216 pFFTSpec->pTwiddle + (OMX_U32)(1 << pFFTSpec->order - 2) - step; 217 218 /* 219 * Loop performing split-radix butterfly operations for one sub-transform. 220 */ 221 for (uint32_t i = 1; i < n1_4; ++i) { 222 w_re = *w_re_ptr; 223 w_im = *w_im_ptr; 224 225 tmp1 = w_re * p_tmp[n1_2 + i].Re - w_im * p_tmp[n1_2 + i].Im; 226 tmp2 = w_re * p_tmp[n1_2 + i].Im + w_im * p_tmp[n1_2 + i].Re; 227 tmp3 = w_re * p_tmp[n3_4 + i].Re + w_im * p_tmp[n3_4 + i].Im; 228 tmp4 = w_re * p_tmp[n3_4 + i].Im - w_im * p_tmp[n3_4 + i].Re; 229 230 tmp5 = tmp1 + tmp3; 231 tmp1 = tmp1 - tmp3; 232 tmp6 = tmp2 + tmp4; 233 tmp2 = tmp2 - tmp4; 234 235 p_tmp[n1_2 + i].Re = p_tmp[i].Re - tmp5; 236 p_tmp[i].Re = p_tmp[i].Re + tmp5; 237 p_tmp[n1_2 + i].Im = p_tmp[i].Im - tmp6; 238 p_tmp[i].Im = p_tmp[i].Im + tmp6; 239 p_tmp[n3_4 + i].Re = p_tmp[n1_4 + i].Re + tmp2; 240 p_tmp[n1_4 + i].Re = p_tmp[n1_4 + i].Re - tmp2; 241 p_tmp[n3_4 + i].Im = p_tmp[n1_4 + i].Im - tmp1; 242 p_tmp[n1_4 + i].Im = p_tmp[n1_4 + i].Im + tmp1; 243 244 w_re_ptr += step; 245 w_im_ptr -= step; 246 } 247 } 248 step >>= 1; 249 n1_4 <<= 1; 250 } 251 252 /* Last FFT stage, write data to output buffer. */ 253 n1_2 = 2 * n1_4; 254 n3_4 = 3 * n1_4; 255 factor = (OMX_F32)2.0f / fft_size; 256 257 p_dst = (OMX_FC32*)pDst; 258 259 tmp1 = p_buf[n1_2].Re + p_buf[n3_4].Re; 260 tmp2 = p_buf[n1_2].Re - p_buf[n3_4].Re; 261 tmp3 = p_buf[n1_2].Im + p_buf[n3_4].Im; 262 tmp4 = p_buf[n1_2].Im - p_buf[n3_4].Im; 263 264 p_dst[n1_2].Re = factor * (p_buf[0].Re - tmp1); 265 p_dst[0].Re = factor * (p_buf[0].Re + tmp1); 266 p_dst[n1_2].Im = factor * (p_buf[0].Im - tmp3); 267 p_dst[0].Im = factor * (p_buf[0].Im + tmp3); 268 p_dst[n3_4].Re = factor * (p_buf[n1_4].Re + tmp4); 269 p_dst[n1_4].Re = factor * (p_buf[n1_4].Re - tmp4); 270 p_dst[n3_4].Im = factor * (p_buf[n1_4].Im - tmp2); 271 p_dst[n1_4].Im = factor * (p_buf[n1_4].Im + tmp2); 272 273 w_re_ptr = pFFTSpec->pTwiddle + step; 274 w_im_ptr = pFFTSpec->pTwiddle + (OMX_U32)(1 << pFFTSpec->order - 2) - step; 275 276 for (uint32_t i = 1; i < n1_4; ++i) { 277 w_re = *w_re_ptr; 278 w_im = *w_im_ptr; 279 280 tmp1 = w_re * p_buf[n1_2 + i].Re - w_im * p_buf[n1_2 + i].Im; 281 tmp2 = w_re * p_buf[n1_2 + i].Im + w_im * p_buf[n1_2 + i].Re; 282 tmp3 = w_re * p_buf[n3_4 + i].Re + w_im * p_buf[n3_4 + i].Im; 283 tmp4 = w_re * p_buf[n3_4 + i].Im - w_im * p_buf[n3_4 + i].Re; 284 285 tmp5 = tmp1 + tmp3; 286 tmp1 = tmp1 - tmp3; 287 tmp6 = tmp2 + tmp4; 288 tmp2 = tmp2 - tmp4; 289 290 p_dst[n1_2 + i].Re = factor * (p_buf[i].Re - tmp5); 291 p_dst[i].Re = factor * (p_buf[i].Re + tmp5); 292 p_dst[n1_2 + i].Im = factor * (p_buf[i].Im - tmp6); 293 p_dst[i].Im = factor * (p_buf[i].Im + tmp6); 294 p_dst[n3_4 + i].Re = factor * (p_buf[n1_4 + i].Re + tmp2); 295 p_dst[n1_4 + i].Re = factor * (p_buf[n1_4 + i].Re - tmp2); 296 p_dst[n3_4 + i].Im = factor * (p_buf[n1_4 + i].Im - tmp1); 297 p_dst[n1_4 + i].Im = factor * (p_buf[n1_4 + i].Im + tmp1); 298 299 w_re_ptr += step; 300 w_im_ptr -= step; 301 } 302 303 return OMX_Sts_NoErr; 304} 305