16adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha/* Copyright (C) 2007-2008 Jean-Marc Valin 26adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha * Copyright (C) 2008 Thorvald Natvig 36adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha * Copyright (C) 2011 Jyri Sarha, Texas Instruments 46adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha */ 56adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha/** 66adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha @file resample_neon.h 76adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha @brief Resampler functions (NEON version) 86adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha*/ 96adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha/* 106adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha Redistribution and use in source and binary forms, with or without 116adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha modification, are permitted provided that the following conditions 126adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha are met: 136adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha 146adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha - Redistributions of source code must retain the above copyright 156adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha notice, this list of conditions and the following disclaimer. 166adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha 176adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha - Redistributions in binary form must reproduce the above copyright 186adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha notice, this list of conditions and the following disclaimer in the 196adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha documentation and/or other materials provided with the distribution. 206adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha 216adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha - Neither the name of the Xiph.org Foundation nor the names of its 226adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha contributors may be used to endorse or promote products derived from 236adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha this software without specific prior written permission. 246adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha 256adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 266adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 276adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 286adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 296adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 306adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 316adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 326adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 336adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 346adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 356adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 366adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha*/ 376adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha 386adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha#include <arm_neon.h> 396adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha 406adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha#ifdef FIXED_POINT 41ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarhastatic inline int32_t saturate_32bit_to_16bit(int32_t a) { 42ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha int32_t ret; 43ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha asm volatile ("vmov.s32 d24[0], %[a]\n" 44ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha "vqmovn.s32 d24, q12\n" 45ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha "vmov.s16 %[ret], d24[0]\n" 46ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha : [ret] "=&r" (ret) 47ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha : [a] "r" (a) 48ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha : "q12", "d24", "d25" ); 49ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha return ret; 50ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha} 51ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha#undef WORD2INT 52ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha#define WORD2INT(x) (saturate_32bit_to_16bit(x)) 5335318dd943257760780f28b95b6ca99a79886c3dJyri Sarha 546adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha#define OVERRIDE_INNER_PRODUCT_SINGLE 556adacb80950e35de9df0a1d7a060aba795712494Jyri Sarhastatic inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len) 566adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha{ 576adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha int32_t ret; 5835318dd943257760780f28b95b6ca99a79886c3dJyri Sarha uint32_t remainder = len % 16; 5935318dd943257760780f28b95b6ca99a79886c3dJyri Sarha len = len - remainder; 6035318dd943257760780f28b95b6ca99a79886c3dJyri Sarha 6135318dd943257760780f28b95b6ca99a79886c3dJyri Sarha asm volatile (" cmp %[len], #0\n" 6235318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " bne 1f\n" 6335318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vld1.16 {d16}, [%[a]]!\n" 6435318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vld1.16 {d20}, [%[b]]!\n" 6535318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " subs %[remainder], %[remainder], #4\n" 6635318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vmull.s16 q0, d16, d20\n" 6735318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " beq 5f\n" 6835318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " b 4f\n" 6935318dd943257760780f28b95b6ca99a79886c3dJyri Sarha "1:" 7035318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vld1.16 {d16, d17, d18, d19}, [%[a]]!\n" 7135318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vld1.16 {d20, d21, d22, d23}, [%[b]]!\n" 7235318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " subs %[len], %[len], #16\n" 7335318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vmull.s16 q0, d16, d20\n" 7435318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vmlal.s16 q0, d17, d21\n" 7535318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vmlal.s16 q0, d18, d22\n" 7635318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vmlal.s16 q0, d19, d23\n" 7735318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " beq 3f\n" 7835318dd943257760780f28b95b6ca99a79886c3dJyri Sarha "2:" 7935318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vld1.16 {d16, d17, d18, d19}, [%[a]]!\n" 8035318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vld1.16 {d20, d21, d22, d23}, [%[b]]!\n" 8135318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " subs %[len], %[len], #16\n" 8235318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vmlal.s16 q0, d16, d20\n" 8335318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vmlal.s16 q0, d17, d21\n" 8435318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vmlal.s16 q0, d18, d22\n" 8535318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vmlal.s16 q0, d19, d23\n" 8635318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " bne 2b\n" 8735318dd943257760780f28b95b6ca99a79886c3dJyri Sarha "3:" 8835318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " cmp %[remainder], #0\n" 8935318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " beq 5f\n" 9035318dd943257760780f28b95b6ca99a79886c3dJyri Sarha "4:" 9135318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vld1.16 {d16}, [%[a]]!\n" 9235318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vld1.16 {d20}, [%[b]]!\n" 9335318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " subs %[remainder], %[remainder], #4\n" 9435318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vmlal.s16 q0, d16, d20\n" 9535318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " bne 4b\n" 9635318dd943257760780f28b95b6ca99a79886c3dJyri Sarha "5:" 9735318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vaddl.s32 q0, d0, d1\n" 9835318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vadd.s64 d0, d0, d1\n" 9935318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vqmovn.s64 d0, q0\n" 10035318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vqrshrn.s32 d0, q0, #15\n" 10135318dd943257760780f28b95b6ca99a79886c3dJyri Sarha " vmov.s16 %[ret], d0[0]\n" 10235318dd943257760780f28b95b6ca99a79886c3dJyri Sarha : [ret] "=&r" (ret), [a] "+r" (a), [b] "+r" (b), 10335318dd943257760780f28b95b6ca99a79886c3dJyri Sarha [len] "+r" (len), [remainder] "+r" (remainder) 10435318dd943257760780f28b95b6ca99a79886c3dJyri Sarha : 10535318dd943257760780f28b95b6ca99a79886c3dJyri Sarha : "cc", "q0", 10635318dd943257760780f28b95b6ca99a79886c3dJyri Sarha "d16", "d17", "d18", "d19", 10735318dd943257760780f28b95b6ca99a79886c3dJyri Sarha "d20", "d21", "d22", "d23"); 1086adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha return ret; 1096adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha} 11035318dd943257760780f28b95b6ca99a79886c3dJyri Sarha 111ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha#elif defined(FLOATING_POINT) 112ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha 113ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarhastatic inline int32_t saturate_float_to_16bit(float a) { 114ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha int32_t ret; 115ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha asm ("vmov.f32 d24[0], %[a]\n" 116ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha "vcvt.s32.f32 d24, d24, #15\n" 117ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha "vqrshrn.s32 d24, q12, #15\n" 118ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha "vmov.s16 %[ret], d24[0]\n" 119ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha : [ret] "=&r" (ret) 120ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha : [a] "r" (a) 121ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha : "q12", "d24", "d25" ); 122ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha return ret; 123ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha} 124ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha#undef WORD2INT 125ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha#define WORD2INT(x) (saturate_float_to_16bit(x)) 126ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha 127ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha#define OVERRIDE_INNER_PRODUCT_SINGLE 128ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarhastatic inline float inner_product_single(const float *a, const float *b, unsigned int len) 129ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha{ 130ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha float ret; 131ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha uint32_t remainder = len % 16; 132ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha len = len - remainder; 133ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha 134ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha asm volatile (" cmp %[len], #0\n" 135ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " bne 1f\n" 136ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vld1.32 {q4}, [%[a]]!\n" 137ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vld1.32 {q8}, [%[b]]!\n" 138ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " subs %[remainder], %[remainder], #4\n" 139ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vmul.f32 q0, q4, q8\n" 140ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " beq 5f\n" 141ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " b 4f\n" 142ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha "1:" 143ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vld1.32 {q4, q5}, [%[a]]!\n" 144ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vld1.32 {q8, q9}, [%[b]]!\n" 145ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vld1.32 {q6, q7}, [%[a]]!\n" 146ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vld1.32 {q10, q11}, [%[b]]!\n" 147ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " subs %[len], %[len], #16\n" 148ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vmul.f32 q0, q4, q8\n" 149ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vmul.f32 q1, q5, q9\n" 150ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vmul.f32 q2, q6, q10\n" 151ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vmul.f32 q3, q7, q11\n" 152ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " beq 3f\n" 153ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha "2:" 154ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vld1.32 {q4, q5}, [%[a]]!\n" 155ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vld1.32 {q8, q9}, [%[b]]!\n" 156ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vld1.32 {q6, q7}, [%[a]]!\n" 157ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vld1.32 {q10, q11}, [%[b]]!\n" 158ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " subs %[len], %[len], #16\n" 159ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vmla.f32 q0, q4, q8\n" 160ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vmla.f32 q1, q5, q9\n" 161ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vmla.f32 q2, q6, q10\n" 162ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vmla.f32 q3, q7, q11\n" 163ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " bne 2b\n" 164ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha "3:" 165ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vadd.f32 q4, q0, q1\n" 166ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vadd.f32 q5, q2, q3\n" 167ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vadd.f32 q0, q4, q5\n" 168ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " cmp %[remainder], #0\n" 169ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " beq 5f\n" 170ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha "4:" 171ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vld1.32 {q6}, [%[a]]!\n" 172ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vld1.32 {q10}, [%[b]]!\n" 173ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " subs %[remainder], %[remainder], #4\n" 174ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vmla.f32 q0, q6, q10\n" 175ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " bne 4b\n" 176ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha "5:" 177ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vadd.f32 d0, d0, d1\n" 178ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vpadd.f32 d0, d0, d0\n" 179ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha " vmov.f32 %[ret], d0[0]\n" 180ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha : [ret] "=&r" (ret), [a] "+r" (a), [b] "+r" (b), 181ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha [len] "+l" (len), [remainder] "+l" (remainder) 182ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha : 183ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha : "cc", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", 184ebe6230a7f7c69f5a4389f2b09b7b19ef9e94f32Jyri Sarha "q9", "q10", "q11"); 185ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha return ret; 186ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha} 187ecb2da017f5b5101d9414b398aced34de623b9ebJyri Sarha 1886adacb80950e35de9df0a1d7a060aba795712494Jyri Sarha#endif 189