186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung/*
286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * Copyright (C) 2013 The Android Open Source Project
386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * Licensed under the Apache License, Version 2.0 (the "License");
586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * you may not use this file except in compliance with the License.
686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * You may obtain a copy of the License at
786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *      http://www.apache.org/licenses/LICENSE-2.0
986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
1086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * Unless required by applicable law or agreed to in writing, software
1186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * distributed under the License is distributed on an "AS IS" BASIS,
1286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * See the License for the specific language governing permissions and
1486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * limitations under the License.
1586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung */
1686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
1786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#ifndef ANDROID_AUDIO_RESAMPLER_FIR_OPS_H
1886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#define ANDROID_AUDIO_RESAMPLER_FIR_OPS_H
1986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
2086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungnamespace android {
2186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
2286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#if defined(__arm__) && !defined(__thumb__)
2386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#define USE_INLINE_ASSEMBLY (true)
2486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#else
2586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#define USE_INLINE_ASSEMBLY (false)
2686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#endif
2786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
286b667dde03a5707285a2ff76ada525075d4c60efAndy Hung#if defined(__aarch64__) || defined(__ARM_NEON__)
294699a6a4c1fa62cd72dfda7b08573678eabbcfa3Glenn Kasten#ifndef USE_NEON
3086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#define USE_NEON (true)
314699a6a4c1fa62cd72dfda7b08573678eabbcfa3Glenn Kasten#endif
3286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#else
3386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#define USE_NEON (false)
3486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#endif
354699a6a4c1fa62cd72dfda7b08573678eabbcfa3Glenn Kasten#if USE_NEON
364699a6a4c1fa62cd72dfda7b08573678eabbcfa3Glenn Kasten#include <arm_neon.h>
374699a6a4c1fa62cd72dfda7b08573678eabbcfa3Glenn Kasten#endif
3886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
39841920db8206cc6428ab460e5fa5b7a50edd06d0Henrik Smiding#if defined(__SSSE3__)  // Should be supported in x86 ABI for both 32 & 64-bit.
40841920db8206cc6428ab460e5fa5b7a50edd06d0Henrik Smiding#define USE_SSE (true)
41841920db8206cc6428ab460e5fa5b7a50edd06d0Henrik Smiding#include <tmmintrin.h>
42841920db8206cc6428ab460e5fa5b7a50edd06d0Henrik Smiding#else
43841920db8206cc6428ab460e5fa5b7a50edd06d0Henrik Smiding#define USE_SSE (false)
44841920db8206cc6428ab460e5fa5b7a50edd06d0Henrik Smiding#endif
45841920db8206cc6428ab460e5fa5b7a50edd06d0Henrik Smiding
4686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungtemplate<typename T, typename U>
4786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungstruct is_same
4886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{
4986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    static const bool value = false;
5086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung};
5186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
5286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungtemplate<typename T>
5386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungstruct is_same<T, T>  // partial specialization
5486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{
5586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    static const bool value = true;
5686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung};
5786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
5886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungstatic inline
5986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungint32_t mulRL(int left, int32_t in, uint32_t vRL)
6086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{
6186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#if USE_INLINE_ASSEMBLY
6286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    int32_t out;
6386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    if (left) {
6486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        asm( "smultb %[out], %[in], %[vRL] \n"
6586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung             : [out]"=r"(out)
6686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung             : [in]"%r"(in), [vRL]"r"(vRL)
6786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung             : );
6886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    } else {
6986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        asm( "smultt %[out], %[in], %[vRL] \n"
7086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung             : [out]"=r"(out)
7186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung             : [in]"%r"(in), [vRL]"r"(vRL)
7286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung             : );
7386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    }
7486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    return out;
7586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#else
7686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    int16_t v = left ? static_cast<int16_t>(vRL) : static_cast<int16_t>(vRL>>16);
7786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    return static_cast<int32_t>((static_cast<int64_t>(in) * v) >> 16);
7886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#endif
7986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung}
8086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
8186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungstatic inline
8286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungint32_t mulAdd(int16_t in, int16_t v, int32_t a)
8386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{
8486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#if USE_INLINE_ASSEMBLY
8586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    int32_t out;
8686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    asm( "smlabb %[out], %[v], %[in], %[a] \n"
8786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung         : [out]"=r"(out)
8886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung         : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
8986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung         : );
9086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    return out;
9186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#else
9286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    return a + v * in;
9386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#endif
9486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung}
9586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
9686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungstatic inline
9786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungint32_t mulAdd(int16_t in, int32_t v, int32_t a)
9886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{
9986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#if USE_INLINE_ASSEMBLY
10086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    int32_t out;
10186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    asm( "smlawb %[out], %[v], %[in], %[a] \n"
10286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung         : [out]"=r"(out)
10386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung         : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
10486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung         : );
10586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    return out;
10686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#else
10786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    return a + static_cast<int32_t>((static_cast<int64_t>(v) * in) >> 16);
10886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#endif
10986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung}
11086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
11186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungstatic inline
11286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungint32_t mulAdd(int32_t in, int32_t v, int32_t a)
11386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{
11486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#if USE_INLINE_ASSEMBLY
11586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    int32_t out;
11686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    asm( "smmla %[out], %[v], %[in], %[a] \n"
11786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung         : [out]"=r"(out)
11886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung         : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
11986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung         : );
12086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    return out;
12186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#else
12286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    return a + static_cast<int32_t>((static_cast<int64_t>(v) * in) >> 32);
12386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#endif
12486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung}
12586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
12686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungstatic inline
12786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungint32_t mulAddRL(int left, uint32_t inRL, int16_t v, int32_t a)
12886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{
1299145202c61433682ba26aeb283c87d774f41032aAndy Hung#if 0 // USE_INLINE_ASSEMBLY Seems to fail with Clang b/34110890
13086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    int32_t out;
13186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    if (left) {
13286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        asm( "smlabb %[out], %[v], %[inRL], %[a] \n"
13386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung             : [out]"=r"(out)
13486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung             : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a)
13586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung             : );
13686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    } else {
13786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        asm( "smlabt %[out], %[v], %[inRL], %[a] \n"
13886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung             : [out]"=r"(out)
13986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung             : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a)
14086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung             : );
14186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    }
14286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    return out;
14386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#else
14486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    int16_t s = left ? static_cast<int16_t>(inRL) : static_cast<int16_t>(inRL>>16);
14586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    return a + v * s;
14686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#endif
14786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung}
14886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
14986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungstatic inline
15086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungint32_t mulAddRL(int left, uint32_t inRL, int32_t v, int32_t a)
15186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{
1529145202c61433682ba26aeb283c87d774f41032aAndy Hung#if 0 // USE_INLINE_ASSEMBLY Seems to fail with Clang b/34110890
15386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    int32_t out;
15486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    if (left) {
15586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        asm( "smlawb %[out], %[v], %[inRL], %[a] \n"
15686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung             : [out]"=r"(out)
15786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung             : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a)
15886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung             : );
15986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    } else {
16086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        asm( "smlawt %[out], %[v], %[inRL], %[a] \n"
16186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung             : [out]"=r"(out)
16286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung             : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a)
16386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung             : );
16486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    }
16586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    return out;
16686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#else
16786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    int16_t s = left ? static_cast<int16_t>(inRL) : static_cast<int16_t>(inRL>>16);
16886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    return a + static_cast<int32_t>((static_cast<int64_t>(v) * s) >> 16);
16986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#endif
17086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung}
17186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
17263238efb0d674758902918e3cdaac322126484b7Glenn Kasten} // namespace android
17386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
17486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#endif /*ANDROID_AUDIO_RESAMPLER_FIR_OPS_H*/
175