1/*
2 *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#ifndef VPX_DSP_ARM_SUM_NEON_H_
12#define VPX_DSP_ARM_SUM_NEON_H_
13
14#include <arm_neon.h>
15
16#include "./vpx_config.h"
17#include "vpx/vpx_integer.h"
18
19static INLINE int32x2_t horizontal_add_int16x8(const int16x8_t a) {
20  const int32x4_t b = vpaddlq_s16(a);
21  const int64x2_t c = vpaddlq_s32(b);
22  return vadd_s32(vreinterpret_s32_s64(vget_low_s64(c)),
23                  vreinterpret_s32_s64(vget_high_s64(c)));
24}
25
26static INLINE uint32x2_t horizontal_add_uint16x8(const uint16x8_t a) {
27  const uint32x4_t b = vpaddlq_u16(a);
28  const uint64x2_t c = vpaddlq_u32(b);
29  return vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)),
30                  vreinterpret_u32_u64(vget_high_u64(c)));
31}
32
33static INLINE uint32x2_t horizontal_add_long_uint16x8(const uint16x8_t a,
34                                                      const uint16x8_t b) {
35  const uint32x4_t c = vpaddlq_u16(a);
36  const uint32x4_t d = vpadalq_u16(c, b);
37  const uint64x2_t e = vpaddlq_u32(d);
38  return vadd_u32(vreinterpret_u32_u64(vget_low_u64(e)),
39                  vreinterpret_u32_u64(vget_high_u64(e)));
40}
41
42static INLINE uint32x2_t horizontal_add_uint32x4(const uint32x4_t a) {
43  const uint64x2_t b = vpaddlq_u32(a);
44  return vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
45                  vreinterpret_u32_u64(vget_high_u64(b)));
46}
47#endif  // VPX_DSP_ARM_SUM_NEON_H_
48