1da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com/* 2da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com * 4da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com * Use of this source code is governed by a BSD-style license 5da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com * that can be found in the LICENSE file in the root of the source 6da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com * tree. An additional intellectual property rights grant can be found 7da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com * in the file PATENTS. All contributing project authors may 8da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com * be found in the AUTHORS file in the root of the source tree. 9da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com * 10da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com */ 11da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com 12da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com#include "dl/api/omxtypes.h" 13da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com#include "dl/sp/src/x86/x86SP_SSE_Math.h" 14da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com 15da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.comvoid x86SP_FFT_CToC_FC32_Inv_Radix4_ls_sse( 16da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com const OMX_F32 *in, 17da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com OMX_F32 *out, 18da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com const OMX_F32 *twiddle, 19da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com OMX_INT n) { 20da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com OMX_INT n_by_2 = n >> 1; 21da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com OMX_INT n_by_4 = n >> 2; 22da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com OMX_INT n_mul_2 = n << 1; 23da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com OMX_INT i; 24da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com 25da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com OMX_F32 *out0 = out; 26da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com 27da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com for (i = 0; i < n_by_2; i += 8) { 28da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com const OMX_F32 *tw1 = twiddle + i; 29da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com const OMX_F32 *tw2 = tw1 + i; 30da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com const OMX_F32 *tw3 = tw2 + i; 31da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com const OMX_F32 *in0 = in + (i << 1); 32da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com const OMX_F32 *in1 = in0 + 4; 33da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com const OMX_F32 *in2 = in1 + 4; 34da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com const OMX_F32 *in3 = in2 + 4; 35da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com OMX_F32 *out1 = out0 + n_by_4; 36da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com OMX_F32 *out2 = out1 + n_by_4; 37da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com OMX_F32 *out3 = out2 + n_by_4; 38da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com 39da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com VC v_tw1; 40da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com VC v_tw2; 41da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com VC v_tw3; 42da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com VC v_t0; 43da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com VC v_t1; 44da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com VC v_t2; 45da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com VC v_t3; 46da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com VC v_t4; 47da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com VC v_t5; 48da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com VC v_t6; 49da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com VC v_t7; 50da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com 51da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com v_tw1.real = _mm_set_ps(tw1[6], tw1[4], tw1[2], tw1[0]); 52da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com v_tw1.imag = _mm_set_ps( 53da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com tw1[6 + n_mul_2], 54da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com tw1[4 + n_mul_2], 55da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com tw1[2 + n_mul_2], 56da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com tw1[n_mul_2]); 57da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com v_tw2.real = _mm_set_ps(tw2[12], tw2[8], tw2[4], tw2[0]); 58da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com v_tw2.imag = _mm_set_ps( 59da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com tw2[12 + n_mul_2], 60da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com tw2[8 + n_mul_2], 61da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com tw2[4 + n_mul_2], 62da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com tw2[n_mul_2]); 63da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com v_tw3.real = _mm_set_ps(tw3[18], tw3[12], tw3[6], tw3[0]); 64da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com v_tw3.imag = _mm_set_ps( 65da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com tw3[18 + n_mul_2], 66da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com tw3[12 + n_mul_2], 67da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com tw3[6 + n_mul_2], 68da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com tw3[n_mul_2]); 69da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com 70da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com VC_LOAD_MATRIX_TRANSPOSE(&v_t0, &v_t1, &v_t2, &v_t3, in0, in1, in2, in3, n); 71da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com 72da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com RADIX4_INV_BUTTERFLY(&v_t4, &v_t5, &v_t6, &v_t7, 73da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com &v_tw1, &v_tw2, &v_tw3, 74da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com &v_t0, &v_t1, &v_t2, &v_t3); 75da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com 76da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com RADIX4_INV_BUTTERFLY_STORE(out0, out1, out2, out3, 77da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com &v_t4, &v_t5, &v_t6, &v_t7, n); 78da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com 79da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com out0 += 4; 80da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com } 81da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com} 82