12a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/*
22a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
32a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *
42a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *  Use of this source code is governed by a BSD-style license
52a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *  that can be found in the LICENSE file in the root of the source
62a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *  tree. An additional intellectual property rights grant can be found
72a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *  in the file PATENTS.  All contributing project authors may
82a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *  be found in the AUTHORS file in the root of the source tree.
92a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *
102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) */
112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "dl/api/omxtypes.h"
13eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch#include "dl/sp/src/x86/x86SP_SSE_Math.h"
14a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
15effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochvoid x86SP_FFT_CToC_FC32_Inv_Radix2_ls_sse(
162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const OMX_F32 *in,
172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    OMX_F32 *out,
182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const OMX_F32 *twiddle,
192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    OMX_INT n) {
202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  OMX_F32 *out0 =out;
212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  OMX_INT i;
222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  for (i = 0; i < n; i += 8) {
24a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  VC v_tw;
25a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  VC v_t0;
262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  VC v_t1;
272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  VC v_temp;
282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // Load twiddle
302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const OMX_F32 *tw = twiddle + i;
312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    v_tw.real = _mm_set_ps(tw[6], tw[4], tw[2], tw[0]);
322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const OMX_F32 * twi = tw + (n << 1);
332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    v_tw.imag = _mm_set_ps(twi[6], twi[4], twi[2], twi[0]);
342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // Load real part
362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const OMX_F32 *t = in + i;
372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    VC_LOAD_SHUFFLE(&(v_t0.real), &(v_t1.real), t);
382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // Load imag part
402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    t = t + n;
41a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    VC_LOAD_SHUFFLE(&(v_t0.imag), &(v_t1.imag), t);
42a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    OMX_F32 *out1 = out0 + (n >> 1);
442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    VC_CONJ_MUL(&v_temp, &v_tw, &v_t1);
452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
46a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    VC_SUB_STORE_SPLIT(out1, &v_t0, &v_temp, n);
47a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
48a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    VC_ADD_STORE_SPLIT(out0, &v_t0, &v_temp, n);
491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    out0 += 4;
512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)