1/*
2 *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 *
10 */
11
12#include <stdint.h>
13
14#include "dl/api/omxtypes.h"
15#include "dl/sp/api/mipsSP.h"
16
17OMXResult mips_FFTFwd_RToCCS_F32_real(const OMX_F32* pSrc,
18                                      OMX_F32* pDst,
19                                      const MIPSFFTSpec_R_FC32* pFFTSpec) {
20  OMX_U32 num_transforms;
21  OMX_FC32* p_dst = (OMX_FC32*)pDst;
22  OMX_FC32* p_buf = (OMX_FC32*)pFFTSpec->pBuf;
23  OMX_F32 tmp1, tmp2, tmp3, tmp4;
24  const OMX_F32* w_re_ptr;
25  const OMX_F32* w_im_ptr;
26
27  /* Transform for order = 2. */
28  /* TODO: hard-code the offsets for p_src. */
29  if (pFFTSpec->order == 2) {
30    OMX_U16* p_bitrev = pFFTSpec->pBitRev;
31
32    tmp1 = pSrc[p_bitrev[0]] + pSrc[p_bitrev[1]];
33    tmp2 = pSrc[p_bitrev[2]] + pSrc[p_bitrev[3]];
34    tmp3 = pSrc[p_bitrev[0]] - pSrc[p_bitrev[1]];
35    tmp4 = pSrc[p_bitrev[2]] - pSrc[p_bitrev[3]];
36
37    p_dst[0].Re = tmp1 + tmp2;
38    p_dst[2].Re = tmp1 - tmp2;
39    p_dst[0].Im = 0.0f;
40    p_dst[2].Im = 0.0f;
41    p_dst[1].Re = tmp3;
42    p_dst[1].Im = -tmp4;
43
44    return OMX_Sts_NoErr;
45  }
46
47  /*
48   * Loop performing sub-transforms of size 4, which contain two butterfly
49   * operations. Reading the input signal from split-radix bitreverse offsets.
50   */
51  num_transforms = (SUBTRANSFORM_CONST >> (16 - pFFTSpec->order)) | 1;
52  for (uint32_t n = 0; n < num_transforms; ++n) {
53    OMX_U32 offset = pFFTSpec->pOffset[n] << 2;
54    OMX_FC32* p_tmp = p_buf + offset;
55    OMX_U16* p_bitrev = pFFTSpec->pBitRev + offset;
56
57    tmp1 = pSrc[p_bitrev[0]] + pSrc[p_bitrev[1]];
58    tmp2 = pSrc[p_bitrev[2]] + pSrc[p_bitrev[3]];
59    tmp3 = pSrc[p_bitrev[0]] - pSrc[p_bitrev[1]];
60    tmp4 = pSrc[p_bitrev[2]] - pSrc[p_bitrev[3]];
61
62    p_tmp[0].Re = tmp1 + tmp2;
63    p_tmp[2].Re = tmp1 - tmp2;
64    p_tmp[0].Im = 0.0f;
65    p_tmp[2].Im = 0.0f;
66    p_tmp[1].Re = tmp3;
67    p_tmp[3].Re = tmp3;
68    p_tmp[1].Im = -tmp4;
69    p_tmp[3].Im = tmp4;
70  }
71
72  /*
73   * Loop performing sub-transforms of size 8,
74   * which contain four butterfly operations.
75   */
76  num_transforms >>= 1;
77  if (!num_transforms) {
78    /*
79     * Means the FFT size is equal to 8, so this is the last stage. Place the
80     * output to the destination buffer and avoid unnecessary computations.
81     */
82    OMX_FC32* p_tmp = p_buf;
83    OMX_U16* p_bitrev = pFFTSpec->pBitRev;
84    OMX_F32 tmp5;
85
86    tmp1 = pSrc[p_bitrev[4]] + pSrc[p_bitrev[5]];
87    tmp2 = pSrc[p_bitrev[6]] + pSrc[p_bitrev[7]];
88    tmp3 = tmp1 + tmp2;
89    tmp4 = tmp1 - tmp2;
90
91    tmp1 = pSrc[p_bitrev[4]] - pSrc[p_bitrev[5]];
92    tmp2 = pSrc[p_bitrev[6]] - pSrc[p_bitrev[7]];
93    tmp5 = SQRT1_2 * (tmp1 + tmp2);
94    tmp1 = SQRT1_2 * (tmp1 - tmp2);
95
96    p_dst[4].Re = p_tmp[0].Re - tmp3;
97    p_dst[0].Re = p_tmp[0].Re + tmp3;
98    p_dst[0].Im = p_tmp[0].Im;
99    p_dst[4].Im = p_tmp[0].Im;
100    p_dst[2].Re = p_tmp[2].Re;
101    p_dst[2].Im = p_tmp[2].Im - tmp4;
102    p_dst[1].Re = p_tmp[1].Re + tmp5;
103    p_dst[1].Im = p_tmp[1].Im - tmp1;
104    p_dst[3].Re = p_tmp[3].Re - tmp5;
105    p_dst[3].Im = p_tmp[3].Im - tmp1;
106
107    return OMX_Sts_NoErr;
108  }
109
110  num_transforms |= 1;
111
112  for (uint32_t n = 0; n < num_transforms; ++n) {
113    OMX_U32 offset = pFFTSpec->pOffset[n] << 3;
114    OMX_FC32* p_tmp = p_buf + offset;
115    OMX_U16* p_bitrev = pFFTSpec->pBitRev + offset;
116    OMX_F32 tmp5;
117
118    tmp1 = pSrc[p_bitrev[4]] + pSrc[p_bitrev[5]];
119    tmp2 = pSrc[p_bitrev[6]] + pSrc[p_bitrev[7]];
120    tmp3 = tmp1 + tmp2;
121    tmp4 = tmp1 - tmp2;
122
123    tmp1 = pSrc[p_bitrev[4]] - pSrc[p_bitrev[5]];
124    tmp2 = pSrc[p_bitrev[6]] - pSrc[p_bitrev[7]];
125    tmp5 = SQRT1_2 * (tmp1 + tmp2);
126    tmp1 = SQRT1_2 * (tmp1 - tmp2);
127
128    p_tmp[4].Re = p_tmp[0].Re - tmp3;
129    p_tmp[0].Re = p_tmp[0].Re + tmp3;
130    p_tmp[4].Im = p_tmp[0].Im;
131    p_tmp[6].Re = p_tmp[2].Re;
132    p_tmp[6].Im = p_tmp[2].Im + tmp4;
133    p_tmp[2].Im = p_tmp[2].Im - tmp4;
134
135    p_tmp[5].Re = p_tmp[1].Re - tmp5;
136    p_tmp[1].Re = p_tmp[1].Re + tmp5;
137    p_tmp[5].Im = p_tmp[1].Im + tmp1;
138    p_tmp[1].Im = p_tmp[1].Im - tmp1;
139    p_tmp[7].Re = p_tmp[3].Re + tmp5;
140    p_tmp[3].Re = p_tmp[3].Re - tmp5;
141    p_tmp[7].Im = p_tmp[3].Im + tmp1;
142    p_tmp[3].Im = p_tmp[3].Im - tmp1;
143  }
144
145  /*
146   * Last FFT stage,  performing sub-transforms of size 16. Place the output
147   * into the destination buffer and avoid unnecessary computations.
148   */
149  tmp1 = p_buf[8].Re + p_buf[12].Re;
150  tmp2 = p_buf[8].Re - p_buf[12].Re;
151  tmp3 = p_buf[8].Im + p_buf[12].Im;
152  tmp4 = p_buf[8].Im - p_buf[12].Im;
153
154  p_dst[8].Re = p_buf[0].Re - tmp1;
155  p_dst[0].Re = p_buf[0].Re + tmp1;
156  p_dst[8].Im = p_buf[0].Im - tmp3;
157  p_dst[0].Im = p_buf[0].Im + tmp3;
158  p_dst[4].Re = p_buf[4].Re + tmp4;
159  p_dst[4].Im = p_buf[4].Im - tmp2;
160
161  w_re_ptr = pFFTSpec->pTwiddle + 1;
162  w_im_ptr = pFFTSpec->pTwiddle + (OMX_U32)(1 << pFFTSpec->order - 2) - 1;
163
164  /* Loop performing split-radix butterfly operations. */
165  for (uint32_t n = 1; n < 4; ++n) {
166    OMX_F32 tmp5, tmp6;
167    OMX_F32 w_re = *w_re_ptr;
168    OMX_F32 w_im = *w_im_ptr;
169
170    tmp1 = w_re * p_buf[8 + n].Re + w_im * p_buf[8 + n].Im;
171    tmp2 = w_re * p_buf[8 + n].Im - w_im * p_buf[8 + n].Re;
172    tmp3 = w_re * p_buf[12 + n].Re - w_im * p_buf[12 + n].Im;
173    tmp4 = w_re * p_buf[12 + n].Im + w_im * p_buf[12 + n].Re;
174
175    tmp5 = tmp1 + tmp3;
176    tmp1 = tmp1 - tmp3;
177    tmp6 = tmp2 + tmp4;
178    tmp2 = tmp2 - tmp4;
179
180    p_dst[n].Re = p_buf[n].Re + tmp5;
181    p_dst[n].Im = p_buf[n].Im + tmp6;
182    p_dst[4 + n].Re = p_buf[4 + n].Re + tmp2;
183    p_dst[4 + n].Im = p_buf[4 + n].Im - tmp1;
184
185    ++w_re_ptr;
186    --w_im_ptr;
187  }
188  return OMX_Sts_NoErr;
189}
190