198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project/* Copyright (C) 2007-2008 Jean-Marc Valin
298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project * Copyright (C) 2008 Thorvald Natvig
398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project */
498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project/**
598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   @file resample_sse.h
698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   @brief Resampler functions (SSE version)
798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project*/
898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project/*
998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   Redistribution and use in source and binary forms, with or without
1098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   modification, are permitted provided that the following conditions
1198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   are met:
1298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
1398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   - Redistributions of source code must retain the above copyright
1498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   notice, this list of conditions and the following disclaimer.
1598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
1698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   - Redistributions in binary form must reproduce the above copyright
1798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   notice, this list of conditions and the following disclaimer in the
1898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   documentation and/or other materials provided with the distribution.
1998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
2098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   - Neither the name of the Xiph.org Foundation nor the names of its
2198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   contributors may be used to endorse or promote products derived from
2298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   this software without specific prior written permission.
2398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
2498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
2898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
2998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
3098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
3198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
3298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
3398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
3498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project*/
3698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
3798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project#include <xmmintrin.h>
3898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
3998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project#define OVERRIDE_INNER_PRODUCT_SINGLE
4098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Projectstatic inline float inner_product_single(const float *a, const float *b, unsigned int len)
4198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project{
4298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   int i;
4398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   float ret;
4498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   __m128 sum = _mm_setzero_ps();
4598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   for (i=0;i<len;i+=8)
4698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   {
4798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i)));
4898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4)));
4998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   }
5098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
5198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
5298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_store_ss(&ret, sum);
5398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   return ret;
5498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project}
5598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
5698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project#define OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
5798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Projectstatic inline float interpolate_product_single(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
5898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  int i;
5998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  float ret;
6098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  __m128 sum = _mm_setzero_ps();
6198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  __m128 f = _mm_loadu_ps(frac);
6298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  for(i=0;i<len;i+=2)
6398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  {
6498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project    sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample)));
6598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project    sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample)));
6698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  }
6798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   sum = _mm_mul_ps(f, sum);
6898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
6998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
7098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_store_ss(&ret, sum);
7198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   return ret;
7298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project}
7398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
7498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project#ifdef _USE_SSE2
7598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project#include <emmintrin.h>
7698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project#define OVERRIDE_INNER_PRODUCT_DOUBLE
7798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
7898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Projectstatic inline double inner_product_double(const float *a, const float *b, unsigned int len)
7998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project{
8098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   int i;
8198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   double ret;
8298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   __m128d sum = _mm_setzero_pd();
8398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   __m128 t;
8498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   for (i=0;i<len;i+=8)
8598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   {
8698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      t = _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i));
8798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
8898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
8998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
9098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      t = _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4));
9198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
9298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
9398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   }
9498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__m128) sum));
9598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_store_sd(&ret, sum);
9698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   return ret;
9798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project}
9898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
9998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project#define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
10098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Projectstatic inline double interpolate_product_double(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
10198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  int i;
10298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  double ret;
10398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  __m128d sum;
10498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  __m128d sum1 = _mm_setzero_pd();
10598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  __m128d sum2 = _mm_setzero_pd();
10698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  __m128 f = _mm_loadu_ps(frac);
10798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  __m128d f1 = _mm_cvtps_pd(f);
10898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  __m128d f2 = _mm_cvtps_pd(_mm_movehl_ps(f,f));
10998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  __m128 t;
11098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  for(i=0;i<len;i+=2)
11198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  {
11298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project    t = _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample));
11398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project    sum1 = _mm_add_pd(sum1, _mm_cvtps_pd(t));
11498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project    sum2 = _mm_add_pd(sum2, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
11598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
11698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project    t = _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample));
11798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project    sum1 = _mm_add_pd(sum1, _mm_cvtps_pd(t));
11898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project    sum2 = _mm_add_pd(sum2, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
11998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  }
12098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  sum1 = _mm_mul_pd(f1, sum1);
12198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  sum2 = _mm_mul_pd(f2, sum2);
12298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  sum = _mm_add_pd(sum1, sum2);
12398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__m128) sum));
12498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  _mm_store_sd(&ret, sum);
12598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project  return ret;
12698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project}
12798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
12898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project#endif
129