198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project/* Copyright (C) 2007-2008 Jean-Marc Valin 298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project * Copyright (C) 2008 Thorvald Natvig 398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project */ 498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project/** 598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project @file resample_sse.h 698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project @brief Resampler functions (SSE version) 798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project*/ 898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project/* 998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project Redistribution and use in source and binary forms, with or without 1098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project modification, are permitted provided that the following conditions 1198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project are met: 1298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project 1398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project - Redistributions of source code must retain the above copyright 1498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project notice, this list of conditions and the following disclaimer. 1598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project 1698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project - Redistributions in binary form must reproduce the above copyright 1798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project notice, this list of conditions and the following disclaimer in the 1898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project documentation and/or other materials provided with the distribution. 1998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project 2098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project - Neither the name of the Xiph.org Foundation nor the names of its 2198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project contributors may be used to endorse or promote products derived from 2298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project this software without specific prior written permission. 2398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project 2498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 2898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 2998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 3098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 3198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 3298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 3398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 3498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project*/ 3698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project 3798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project#include <xmmintrin.h> 3898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project 3998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project#define OVERRIDE_INNER_PRODUCT_SINGLE 4098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Projectstatic inline float inner_product_single(const float *a, const float *b, unsigned int len) 4198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project{ 4298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project int i; 4398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project float ret; 4498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project __m128 sum = _mm_setzero_ps(); 4598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project for (i=0;i<len;i+=8) 4698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project { 4798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i))); 4898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4))); 4998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project } 5098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum)); 5198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55)); 5298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project _mm_store_ss(&ret, sum); 5398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project return ret; 5498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project} 5598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project 5698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project#define OVERRIDE_INTERPOLATE_PRODUCT_SINGLE 5798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Projectstatic inline float interpolate_product_single(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) { 5898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project int i; 5998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project float ret; 6098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project __m128 sum = _mm_setzero_ps(); 6198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project __m128 f = _mm_loadu_ps(frac); 6298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project for(i=0;i<len;i+=2) 6398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project { 6498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample))); 6598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample))); 6698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project } 6798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum = _mm_mul_ps(f, sum); 6898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum)); 6998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55)); 7098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project _mm_store_ss(&ret, sum); 7198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project return ret; 7298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project} 7398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project 7498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project#ifdef _USE_SSE2 7598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project#include <emmintrin.h> 7698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project#define OVERRIDE_INNER_PRODUCT_DOUBLE 7798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project 7898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Projectstatic inline double inner_product_double(const float *a, const float *b, unsigned int len) 7998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project{ 8098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project int i; 8198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project double ret; 8298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project __m128d sum = _mm_setzero_pd(); 8398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project __m128 t; 8498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project for (i=0;i<len;i+=8) 8598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project { 8698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project t = _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i)); 8798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum = _mm_add_pd(sum, _mm_cvtps_pd(t)); 8898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t))); 8998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project 9098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project t = _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4)); 9198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum = _mm_add_pd(sum, _mm_cvtps_pd(t)); 9298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t))); 9398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project } 9498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__m128) sum)); 9598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project _mm_store_sd(&ret, sum); 9698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project return ret; 9798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project} 9898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project 9998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project#define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE 10098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Projectstatic inline double interpolate_product_double(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) { 10198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project int i; 10298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project double ret; 10398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project __m128d sum; 10498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project __m128d sum1 = _mm_setzero_pd(); 10598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project __m128d sum2 = _mm_setzero_pd(); 10698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project __m128 f = _mm_loadu_ps(frac); 10798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project __m128d f1 = _mm_cvtps_pd(f); 10898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project __m128d f2 = _mm_cvtps_pd(_mm_movehl_ps(f,f)); 10998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project __m128 t; 11098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project for(i=0;i<len;i+=2) 11198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project { 11298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project t = _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample)); 11398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum1 = _mm_add_pd(sum1, _mm_cvtps_pd(t)); 11498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum2 = _mm_add_pd(sum2, _mm_cvtps_pd(_mm_movehl_ps(t, t))); 11598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project 11698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project t = _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample)); 11798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum1 = _mm_add_pd(sum1, _mm_cvtps_pd(t)); 11898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum2 = _mm_add_pd(sum2, _mm_cvtps_pd(_mm_movehl_ps(t, t))); 11998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project } 12098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum1 = _mm_mul_pd(f1, sum1); 12198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum2 = _mm_mul_pd(f2, sum2); 12298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum = _mm_add_pd(sum1, sum2); 12398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__m128) sum)); 12498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project _mm_store_sd(&ret, sum); 12598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project return ret; 12698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project} 12798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project 12898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project#endif 129