1da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com/*
2da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com *
4da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com *  Use of this source code is governed by a BSD-style license
5da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com *  that can be found in the LICENSE file in the root of the source
6da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com *  tree. An additional intellectual property rights grant can be found
7da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com *  in the file PATENTS.  All contributing project authors may
8da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com *  be found in the AUTHORS file in the root of the source tree.
9da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com *
10da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com */
11da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
12da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com#include "dl/api/omxtypes.h"
13da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com#include "dl/sp/src/x86/x86SP_SSE_Math.h"
14da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
15da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.comvoid x86SP_FFT_CToC_FC32_Inv_Radix4_ls_sse(
16da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    const OMX_F32 *in,
17da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    OMX_F32 *out,
18da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    const OMX_F32 *twiddle,
19da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    OMX_INT n) {
20da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com  OMX_INT n_by_2 = n >> 1;
21da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com  OMX_INT n_by_4 = n >> 2;
22da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com  OMX_INT n_mul_2 = n << 1;
23da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com  OMX_INT i;
24da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
25da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com  OMX_F32 *out0 = out;
26da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
27da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com  for (i = 0; i < n_by_2; i += 8) {
28da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    const OMX_F32 *tw1 = twiddle + i;
29da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    const OMX_F32 *tw2 = tw1 + i;
30da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    const OMX_F32 *tw3 = tw2 + i;
31da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    const OMX_F32 *in0 = in + (i << 1);
32da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    const OMX_F32 *in1 = in0 + 4;
33da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    const OMX_F32 *in2 = in1 + 4;
34da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    const OMX_F32 *in3 = in2 + 4;
35da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    OMX_F32 *out1 = out0 + n_by_4;
36da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    OMX_F32 *out2 = out1 + n_by_4;
37da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    OMX_F32 *out3 = out2 + n_by_4;
38da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
39da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    VC v_tw1;
40da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    VC v_tw2;
41da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    VC v_tw3;
42da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    VC v_t0;
43da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    VC v_t1;
44da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    VC v_t2;
45da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    VC v_t3;
46da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    VC v_t4;
47da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    VC v_t5;
48da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    VC v_t6;
49da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    VC v_t7;
50da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
51da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    v_tw1.real = _mm_set_ps(tw1[6], tw1[4], tw1[2], tw1[0]);
52da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    v_tw1.imag = _mm_set_ps(
53da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com        tw1[6 + n_mul_2],
54da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com        tw1[4 + n_mul_2],
55da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com        tw1[2 + n_mul_2],
56da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com        tw1[n_mul_2]);
57da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    v_tw2.real = _mm_set_ps(tw2[12], tw2[8], tw2[4], tw2[0]);
58da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    v_tw2.imag = _mm_set_ps(
59da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com        tw2[12 + n_mul_2],
60da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com        tw2[8 + n_mul_2],
61da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com        tw2[4 + n_mul_2],
62da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com        tw2[n_mul_2]);
63da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    v_tw3.real = _mm_set_ps(tw3[18], tw3[12], tw3[6], tw3[0]);
64da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    v_tw3.imag = _mm_set_ps(
65da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com        tw3[18 + n_mul_2],
66da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com        tw3[12 + n_mul_2],
67da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com        tw3[6 + n_mul_2],
68da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com        tw3[n_mul_2]);
69da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
70da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    VC_LOAD_MATRIX_TRANSPOSE(&v_t0, &v_t1, &v_t2, &v_t3, in0, in1, in2, in3, n);
71da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
72da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    RADIX4_INV_BUTTERFLY(&v_t4, &v_t5, &v_t6, &v_t7,
73da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com                         &v_tw1, &v_tw2, &v_tw3,
74da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com                         &v_t0, &v_t1, &v_t2, &v_t3);
75da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
76da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    RADIX4_INV_BUTTERFLY_STORE(out0, out1, out2, out3,
77da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com                               &v_t4, &v_t5, &v_t6, &v_t7, n);
78da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
79da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    out0 += 4;
80da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com  }
81da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com}
82