1/* 2 * Copyright (c) 2017 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#ifndef VPX_DSP_ARM_SUM_NEON_H_ 12#define VPX_DSP_ARM_SUM_NEON_H_ 13 14#include <arm_neon.h> 15 16#include "./vpx_config.h" 17#include "vpx/vpx_integer.h" 18 19static INLINE int32x2_t horizontal_add_int16x8(const int16x8_t a) { 20 const int32x4_t b = vpaddlq_s16(a); 21 const int64x2_t c = vpaddlq_s32(b); 22 return vadd_s32(vreinterpret_s32_s64(vget_low_s64(c)), 23 vreinterpret_s32_s64(vget_high_s64(c))); 24} 25 26static INLINE uint32x2_t horizontal_add_uint16x8(const uint16x8_t a) { 27 const uint32x4_t b = vpaddlq_u16(a); 28 const uint64x2_t c = vpaddlq_u32(b); 29 return vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)), 30 vreinterpret_u32_u64(vget_high_u64(c))); 31} 32 33static INLINE uint32x2_t horizontal_add_long_uint16x8(const uint16x8_t a, 34 const uint16x8_t b) { 35 const uint32x4_t c = vpaddlq_u16(a); 36 const uint32x4_t d = vpadalq_u16(c, b); 37 const uint64x2_t e = vpaddlq_u32(d); 38 return vadd_u32(vreinterpret_u32_u64(vget_low_u64(e)), 39 vreinterpret_u32_u64(vget_high_u64(e))); 40} 41 42static INLINE uint32x2_t horizontal_add_uint32x4(const uint32x4_t a) { 43 const uint64x2_t b = vpaddlq_u32(a); 44 return vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)), 45 vreinterpret_u32_u64(vget_high_u64(b))); 46} 47#endif // VPX_DSP_ARM_SUM_NEON_H_ 48