1/*
2 *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <arm_neon.h>
12
13#include "dl/api/omxtypes.h"
14#include "dl/sp/api/armSP.h"
15#include "dl/sp/api/omxSP.h"
16
17extern void armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace(
18    const OMX_FC32* pSrc,
19    OMX_FC32* pDst,
20    OMX_FC32* pTwiddle,
21    long* subFFTNum,
22    long* subFFTSize);
23
24extern void armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace(
25    const OMX_FC32* pSrc,
26    OMX_FC32* pDst,
27    OMX_FC32* pTwiddle,
28    long* subFFTNum,
29    long* subFFTSize);
30
31extern void armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace(
32    const OMX_FC32* pSrc,
33    OMX_FC32* pDst,
34    OMX_FC32* pTwiddle,
35    long* subFFTNum,
36    long* subFFTSize);
37
38extern void armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace(
39    const OMX_FC32* pSrc,
40    OMX_FC32* pDst,
41    OMX_FC32* pTwiddle,
42    long* subFFTNum,
43    long* subFFTSize);
44
45extern void armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace(
46    const OMX_FC32* pSrc,
47    OMX_FC32* pDst,
48    OMX_FC32* pTwiddle,
49    long* subFFTNum,
50    long* subFFTSize);
51
52extern void armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace(
53    const OMX_FC32* pSrc,
54    OMX_FC32* pDst,
55    OMX_FC32* pTwiddle,
56    long* subFFTNum,
57    long* subFFTSize);
58
59extern void armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace(
60    const OMX_FC32* pSrc,
61    OMX_FC32* pDst,
62    OMX_FC32* pTwiddle,
63    long* subFFTNum,
64    long* subFFTSize);
65
66/*
67 * Scale FFT data by 1/|length|. |length| must be a power of two
68 */
69static inline ScaleFFTData(OMX_FC32* fftData, unsigned length) {
70  float32_t* data = (float32_t*)fftData;
71  float32_t scale = 1.0f / length;
72
73  /*
74   * Do two complex elements at a time because |length| is always
75   * greater than or equal to 2 (order >= 1)
76   */
77  do {
78    float32x4_t x = vld1q_f32(data);
79
80    length -= 2;
81    x = vmulq_n_f32(x, scale);
82    vst1q_f32(data, x);
83    data += 4;
84  } while (length > 0);
85}
86
87/**
88 * Function:  omxSP_FFTInv_CToC_FC32
89 *
90 * Description:
91 * These functions compute an inverse FFT for a complex signal of
92 * length of 2^order, where 0 <= order <= 15. Transform length is
93 * determined by the specification structure, which must be
94 * initialized prior to calling the FFT function using the appropriate
95 * helper, i.e., <FFTInit_C_FC32>. The relationship between the input
96 * and output sequences can be expressed in terms of the IDFT, i.e.:
97 *
98 *     x[n] = SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N)
99 *     n=0,1,2,...N-1
100 *     N=2^order.
101 *
102 * Input Arguments:
103 *   pSrc - pointer to the complex-valued input signal, of length 2^order ;
104 *          must be aligned on a 32-byte boundary.
105 *   pFFTSpec - pointer to the preallocated and initialized specification
106 *            structure
107 *
108 * Output Arguments:
109 *   order
110 *   pDst - pointer to the complex-valued output signal, of length 2^order;
111 *          must be aligned on a 32-byte boundary.
112 *
113 * Return Value:
114 *
115 *    OMX_Sts_NoErr - no error
116 *    OMX_Sts_BadArgErr - returned if one or more of the following conditions
117 *              is true:
118 *    -   one or more of the following pointers is NULL: pSrc, pDst, or
119 *              pFFTSpec.
120 *    -   pSrc or pDst is not 32-byte aligned
121 *
122 */
123
124OMXResult omxSP_FFTInv_CToC_FC32_Sfs(const OMX_FC32* pSrc,
125                                     OMX_FC32* pDst,
126                                     const OMXFFTSpec_C_FC32* pFFTSpec) {
127  ARMsFFTSpec_FC32* spec = (ARMsFFTSpec_FC32*)pFFTSpec;
128  int order;
129  long subFFTSize;
130  long subFFTNum;
131  OMX_FC32* pTwiddle;
132  OMX_FC32* pOut;
133
134  /*
135   * Check args are not NULL and the source and destination pointers
136   * are properly aligned.
137   */
138  if (!validateParametersFC32(pSrc, pDst, spec))
139    return OMX_Sts_BadArgErr;
140
141  order = fastlog2(spec->N);
142
143  subFFTSize = 1;
144  subFFTNum = spec->N;
145  pTwiddle = spec->pTwiddle;
146  pOut = spec->pBuf;
147
148  if (order > 3) {
149    OMX_FC32* argDst;
150
151    /*
152     * Set up argDst and pOut appropriately so that pOut = pDst for
153     * the very last FFT stage.
154     */
155    if ((order & 2) == 0) {
156      argDst = pOut;
157      pOut = pDst;
158    } else {
159      argDst = pDst;
160    }
161
162    /*
163     * Odd order uses a radix 8 first stage; even order, a radix 4
164     * first stage.
165     */
166    if (order & 1) {
167      armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace(
168          pSrc, argDst, pTwiddle, &subFFTNum, &subFFTSize);
169    } else {
170      armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace(
171          pSrc, argDst, pTwiddle, &subFFTNum, &subFFTSize);
172    }
173
174    /*
175     * Now use radix 4 stages to finish rest of the FFT
176     */
177    if (subFFTNum >= 4) {
178      while (subFFTNum > 4) {
179        OMX_FC32* tmp;
180
181        armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace(
182            argDst, pOut, pTwiddle, &subFFTNum, &subFFTSize);
183        /*
184         * Swap argDst and pOut
185         */
186        tmp = pOut;
187        pOut = argDst;
188        argDst = tmp;
189      }
190
191      armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace(
192          argDst, pOut, pTwiddle, &subFFTNum, &subFFTSize);
193    }
194  } else if (order == 3) {
195    armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace(
196        pSrc, pDst, pTwiddle, &subFFTNum, &subFFTSize);
197    armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace(
198        pDst, pOut, pTwiddle, &subFFTNum, &subFFTSize);
199    armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace(
200        pOut, pDst, pTwiddle, &subFFTNum, &subFFTSize);
201  } else if (order == 2) {
202    armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace(
203        pSrc, pOut, pTwiddle, &subFFTNum, &subFFTSize);
204    armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace(
205        pOut, pDst, pTwiddle, &subFFTNum, &subFFTSize);
206  } else {
207    /* Order = 1 */
208    armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace(
209        pSrc, pDst, pTwiddle, &subFFTNum, &subFFTSize);
210  }
211
212  ScaleFFTData(pDst, spec->N);
213  return OMX_Sts_NoErr;
214}
215