198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project/* Copyright (C) 2002 Jean-Marc Valin */
298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project/**
398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   @file filters_sse.h
498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   @brief Various analysis/synthesis filters (SSE version)
598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project*/
698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project/*
798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   Redistribution and use in source and binary forms, with or without
898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   modification, are permitted provided that the following conditions
998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   are met:
1098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
1198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   - Redistributions of source code must retain the above copyright
1298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   notice, this list of conditions and the following disclaimer.
1398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
1498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   - Redistributions in binary form must reproduce the above copyright
1598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   notice, this list of conditions and the following disclaimer in the
1698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   documentation and/or other materials provided with the distribution.
1798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
1898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   - Neither the name of the Xiph.org Foundation nor the names of its
1998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   contributors may be used to endorse or promote products derived from
2098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   this software without specific prior written permission.
2198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
2298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
2698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
2798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
2898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
2998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
3098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
3198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
3298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project*/
3498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
3598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project#include <xmmintrin.h>
3698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
3798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Projectvoid filter_mem16_10(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem)
3898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project{
3998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   __m128 num[3], den[3], mem[3];
4098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
4198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   int i;
4298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
4398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   /* Copy numerator, denominator and memory to aligned xmm */
4498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   for (i=0;i<2;i++)
4598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   {
4698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[i] = _mm_loadu_ps(_mem+4*i);
4798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      num[i] = _mm_loadu_ps(_num+4*i);
4898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      den[i] = _mm_loadu_ps(_den+4*i);
4998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   }
5098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   mem[2] = _mm_setr_ps(_mem[8], _mem[9], 0, 0);
5198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   num[2] = _mm_setr_ps(_num[8], _num[9], 0, 0);
5298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   den[2] = _mm_setr_ps(_den[8], _den[9], 0, 0);
5398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
5498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   for (i=0;i<N;i++)
5598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   {
5698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      __m128 xx;
5798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      __m128 yy;
5898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      /* Compute next filter result */
5998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      xx = _mm_load_ps1(x+i);
6098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      yy = _mm_add_ss(xx, mem[0]);
6198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      _mm_store_ss(y+i, yy);
6298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      yy = _mm_shuffle_ps(yy, yy, 0);
6398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
6498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      /* Update memory */
6598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[0] = _mm_move_ss(mem[0], mem[1]);
6698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39);
6798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
6898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[0] = _mm_add_ps(mem[0], _mm_mul_ps(xx, num[0]));
6998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[0] = _mm_sub_ps(mem[0], _mm_mul_ps(yy, den[0]));
7098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
7198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[1] = _mm_move_ss(mem[1], mem[2]);
7298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39);
7398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
7498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[1] = _mm_add_ps(mem[1], _mm_mul_ps(xx, num[1]));
7598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[1] = _mm_sub_ps(mem[1], _mm_mul_ps(yy, den[1]));
7698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
7798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0xfd);
7898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
7998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[2] = _mm_add_ps(mem[2], _mm_mul_ps(xx, num[2]));
8098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[2] = _mm_sub_ps(mem[2], _mm_mul_ps(yy, den[2]));
8198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   }
8298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   /* Put memory back in its place */
8398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_storeu_ps(_mem, mem[0]);
8498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_storeu_ps(_mem+4, mem[1]);
8598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_store_ss(_mem+8, mem[2]);
8698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0x55);
8798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_store_ss(_mem+9, mem[2]);
8898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project}
8998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
9098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Projectvoid filter_mem16_8(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem)
9198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project{
9298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   __m128 num[2], den[2], mem[2];
9398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
9498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   int i;
9598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
9698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   /* Copy numerator, denominator and memory to aligned xmm */
9798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   for (i=0;i<2;i++)
9898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   {
9998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[i] = _mm_loadu_ps(_mem+4*i);
10098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      num[i] = _mm_loadu_ps(_num+4*i);
10198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      den[i] = _mm_loadu_ps(_den+4*i);
10298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   }
10398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
10498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   for (i=0;i<N;i++)
10598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   {
10698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      __m128 xx;
10798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      __m128 yy;
10898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      /* Compute next filter result */
10998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      xx = _mm_load_ps1(x+i);
11098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      yy = _mm_add_ss(xx, mem[0]);
11198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      _mm_store_ss(y+i, yy);
11298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      yy = _mm_shuffle_ps(yy, yy, 0);
11398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
11498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      /* Update memory */
11598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[0] = _mm_move_ss(mem[0], mem[1]);
11698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39);
11798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
11898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[0] = _mm_add_ps(mem[0], _mm_mul_ps(xx, num[0]));
11998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[0] = _mm_sub_ps(mem[0], _mm_mul_ps(yy, den[0]));
12098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
12198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[1] = _mm_sub_ss(mem[1], mem[1]);
12298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39);
12398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
12498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[1] = _mm_add_ps(mem[1], _mm_mul_ps(xx, num[1]));
12598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[1] = _mm_sub_ps(mem[1], _mm_mul_ps(yy, den[1]));
12698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   }
12798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   /* Put memory back in its place */
12898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_storeu_ps(_mem, mem[0]);
12998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_storeu_ps(_mem+4, mem[1]);
13098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project}
13198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
13298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
13398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project#define OVERRIDE_FILTER_MEM16
13498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Projectvoid filter_mem16(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem, char *stack)
13598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project{
13698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   if(ord==10)
13798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      filter_mem16_10(x, _num, _den, y, N, ord, _mem);
13898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   else if (ord==8)
13998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      filter_mem16_8(x, _num, _den, y, N, ord, _mem);
14098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project}
14198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
14298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
14398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
14498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Projectvoid iir_mem16_10(const float *x, const float *_den, float *y, int N, int ord, float *_mem)
14598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project{
14698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   __m128 den[3], mem[3];
14798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
14898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   int i;
14998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
15098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   /* Copy numerator, denominator and memory to aligned xmm */
15198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   for (i=0;i<2;i++)
15298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   {
15398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[i] = _mm_loadu_ps(_mem+4*i);
15498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      den[i] = _mm_loadu_ps(_den+4*i);
15598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   }
15698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   mem[2] = _mm_setr_ps(_mem[8], _mem[9], 0, 0);
15798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   den[2] = _mm_setr_ps(_den[8], _den[9], 0, 0);
15898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
15998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   for (i=0;i<N;i++)
16098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   {
16198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      __m128 xx;
16298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      __m128 yy;
16398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      /* Compute next filter result */
16498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      xx = _mm_load_ps1(x+i);
16598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      yy = _mm_add_ss(xx, mem[0]);
16698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      _mm_store_ss(y+i, yy);
16798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      yy = _mm_shuffle_ps(yy, yy, 0);
16898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
16998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      /* Update memory */
17098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[0] = _mm_move_ss(mem[0], mem[1]);
17198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39);
17298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
17398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[0] = _mm_sub_ps(mem[0], _mm_mul_ps(yy, den[0]));
17498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
17598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[1] = _mm_move_ss(mem[1], mem[2]);
17698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39);
17798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
17898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[1] = _mm_sub_ps(mem[1], _mm_mul_ps(yy, den[1]));
17998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
18098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0xfd);
18198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
18298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[2] = _mm_sub_ps(mem[2], _mm_mul_ps(yy, den[2]));
18398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   }
18498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   /* Put memory back in its place */
18598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_storeu_ps(_mem, mem[0]);
18698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_storeu_ps(_mem+4, mem[1]);
18798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_store_ss(_mem+8, mem[2]);
18898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0x55);
18998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_store_ss(_mem+9, mem[2]);
19098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project}
19198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
19298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
19398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Projectvoid iir_mem16_8(const float *x, const float *_den, float *y, int N, int ord, float *_mem)
19498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project{
19598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   __m128 den[2], mem[2];
19698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
19798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   int i;
19898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
19998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   /* Copy numerator, denominator and memory to aligned xmm */
20098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   for (i=0;i<2;i++)
20198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   {
20298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[i] = _mm_loadu_ps(_mem+4*i);
20398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      den[i] = _mm_loadu_ps(_den+4*i);
20498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   }
20598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
20698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   for (i=0;i<N;i++)
20798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   {
20898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      __m128 xx;
20998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      __m128 yy;
21098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      /* Compute next filter result */
21198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      xx = _mm_load_ps1(x+i);
21298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      yy = _mm_add_ss(xx, mem[0]);
21398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      _mm_store_ss(y+i, yy);
21498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      yy = _mm_shuffle_ps(yy, yy, 0);
21598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
21698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      /* Update memory */
21798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[0] = _mm_move_ss(mem[0], mem[1]);
21898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39);
21998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
22098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[0] = _mm_sub_ps(mem[0], _mm_mul_ps(yy, den[0]));
22198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
22298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[1] = _mm_sub_ss(mem[1], mem[1]);
22398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39);
22498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
22598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[1] = _mm_sub_ps(mem[1], _mm_mul_ps(yy, den[1]));
22698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   }
22798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   /* Put memory back in its place */
22898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_storeu_ps(_mem, mem[0]);
22998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_storeu_ps(_mem+4, mem[1]);
23098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project}
23198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
23298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project#define OVERRIDE_IIR_MEM16
23398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Projectvoid iir_mem16(const float *x, const float *_den, float *y, int N, int ord, float *_mem, char *stack)
23498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project{
23598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   if(ord==10)
23698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      iir_mem16_10(x, _den, y, N, ord, _mem);
23798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   else if (ord==8)
23898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      iir_mem16_8(x, _den, y, N, ord, _mem);
23998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project}
24098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
24198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
24298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Projectvoid fir_mem16_10(const float *x, const float *_num, float *y, int N, int ord, float *_mem)
24398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project{
24498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   __m128 num[3], mem[3];
24598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
24698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   int i;
24798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
24898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   /* Copy numerator, denominator and memory to aligned xmm */
24998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   for (i=0;i<2;i++)
25098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   {
25198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[i] = _mm_loadu_ps(_mem+4*i);
25298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      num[i] = _mm_loadu_ps(_num+4*i);
25398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   }
25498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   mem[2] = _mm_setr_ps(_mem[8], _mem[9], 0, 0);
25598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   num[2] = _mm_setr_ps(_num[8], _num[9], 0, 0);
25698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
25798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   for (i=0;i<N;i++)
25898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   {
25998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      __m128 xx;
26098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      __m128 yy;
26198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      /* Compute next filter result */
26298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      xx = _mm_load_ps1(x+i);
26398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      yy = _mm_add_ss(xx, mem[0]);
26498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      _mm_store_ss(y+i, yy);
26598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      yy = _mm_shuffle_ps(yy, yy, 0);
26698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
26798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      /* Update memory */
26898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[0] = _mm_move_ss(mem[0], mem[1]);
26998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39);
27098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
27198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[0] = _mm_add_ps(mem[0], _mm_mul_ps(xx, num[0]));
27298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
27398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[1] = _mm_move_ss(mem[1], mem[2]);
27498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39);
27598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
27698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[1] = _mm_add_ps(mem[1], _mm_mul_ps(xx, num[1]));
27798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
27898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0xfd);
27998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
28098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[2] = _mm_add_ps(mem[2], _mm_mul_ps(xx, num[2]));
28198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   }
28298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   /* Put memory back in its place */
28398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_storeu_ps(_mem, mem[0]);
28498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_storeu_ps(_mem+4, mem[1]);
28598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_store_ss(_mem+8, mem[2]);
28698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0x55);
28798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_store_ss(_mem+9, mem[2]);
28898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project}
28998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
29098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Projectvoid fir_mem16_8(const float *x, const float *_num, float *y, int N, int ord, float *_mem)
29198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project{
29298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   __m128 num[2], mem[2];
29398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
29498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   int i;
29598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
29698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   /* Copy numerator, denominator and memory to aligned xmm */
29798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   for (i=0;i<2;i++)
29898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   {
29998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[i] = _mm_loadu_ps(_mem+4*i);
30098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      num[i] = _mm_loadu_ps(_num+4*i);
30198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   }
30298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
30398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   for (i=0;i<N;i++)
30498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   {
30598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      __m128 xx;
30698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      __m128 yy;
30798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      /* Compute next filter result */
30898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      xx = _mm_load_ps1(x+i);
30998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      yy = _mm_add_ss(xx, mem[0]);
31098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      _mm_store_ss(y+i, yy);
31198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      yy = _mm_shuffle_ps(yy, yy, 0);
31298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
31398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      /* Update memory */
31498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[0] = _mm_move_ss(mem[0], mem[1]);
31598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39);
31698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
31798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[0] = _mm_add_ps(mem[0], _mm_mul_ps(xx, num[0]));
31898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
31998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[1] = _mm_sub_ss(mem[1], mem[1]);
32098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39);
32198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
32298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      mem[1] = _mm_add_ps(mem[1], _mm_mul_ps(xx, num[1]));
32398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   }
32498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   /* Put memory back in its place */
32598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_storeu_ps(_mem, mem[0]);
32698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   _mm_storeu_ps(_mem+4, mem[1]);
32798913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project}
32898913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project
32998913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project#define OVERRIDE_FIR_MEM16
33098913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Projectvoid fir_mem16(const float *x, const float *_num, float *y, int N, int ord, float *_mem, char *stack)
33198913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project{
33298913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   if(ord==10)
33398913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      fir_mem16_10(x, _num, y, N, ord, _mem);
33498913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project   else if (ord==8)
33598913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project      fir_mem16_8(x, _num, y, N, ord, _mem);
33698913fed6520d8849fb2e246be943e04474aefaThe Android Open Source Project}
337