1810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org/*
2810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org *
4810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org *  Use of this source code is governed by a BSD-style license
5810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org *  that can be found in the LICENSE file in the root of the source
6810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org *  tree. An additional intellectual property rights grant can be found
7810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org *  in the file PATENTS.  All contributing project authors may
8810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org *  be found in the AUTHORS file in the root of the source tree.
9810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org */
10810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
11810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org#include <arm_neon.h>
12810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
13810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.orgunsigned int vp8_sad8x8_neon(
14810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        unsigned char *src_ptr,
15810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        int src_stride,
16810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        unsigned char *ref_ptr,
17810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        int ref_stride) {
18810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint8x8_t d0, d8;
19810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint16x8_t q12;
20810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint32x4_t q1;
21810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint64x2_t q3;
22810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint32x2_t d5;
23810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    int i;
24810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
25810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    d0 = vld1_u8(src_ptr);
26810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    src_ptr += src_stride;
27810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    d8 = vld1_u8(ref_ptr);
28810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    ref_ptr += ref_stride;
29810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q12 = vabdl_u8(d0, d8);
30810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
31810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    for (i = 0; i < 7; i++) {
32810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        d0 = vld1_u8(src_ptr);
33810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        src_ptr += src_stride;
34810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        d8 = vld1_u8(ref_ptr);
35810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        ref_ptr += ref_stride;
36810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        q12 = vabal_u8(q12, d0, d8);
37810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    }
38810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
39810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q1 = vpaddlq_u16(q12);
40810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q3 = vpaddlq_u32(q1);
41810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
42810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org                  vreinterpret_u32_u64(vget_high_u64(q3)));
43810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
44810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    return vget_lane_u32(d5, 0);
45810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org}
46810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
47810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.orgunsigned int vp8_sad8x16_neon(
48810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        unsigned char *src_ptr,
49810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        int src_stride,
50810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        unsigned char *ref_ptr,
51810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        int ref_stride) {
52810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint8x8_t d0, d8;
53810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint16x8_t q12;
54810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint32x4_t q1;
55810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint64x2_t q3;
56810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint32x2_t d5;
57810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    int i;
58810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
59810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    d0 = vld1_u8(src_ptr);
60810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    src_ptr += src_stride;
61810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    d8 = vld1_u8(ref_ptr);
62810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    ref_ptr += ref_stride;
63810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q12 = vabdl_u8(d0, d8);
64810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
65810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    for (i = 0; i < 15; i++) {
66810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        d0 = vld1_u8(src_ptr);
67810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        src_ptr += src_stride;
68810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        d8 = vld1_u8(ref_ptr);
69810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        ref_ptr += ref_stride;
70810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        q12 = vabal_u8(q12, d0, d8);
71810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    }
72810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
73810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q1 = vpaddlq_u16(q12);
74810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q3 = vpaddlq_u32(q1);
75810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
76810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org                  vreinterpret_u32_u64(vget_high_u64(q3)));
77810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
78810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    return vget_lane_u32(d5, 0);
79810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org}
80810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
81810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.orgunsigned int vp8_sad4x4_neon(
82810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        unsigned char *src_ptr,
83810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        int src_stride,
84810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        unsigned char *ref_ptr,
85810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        int ref_stride) {
86810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint8x8_t d0, d8;
87810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint16x8_t q12;
88810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint32x2_t d1;
89810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint64x1_t d3;
90810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    int i;
91810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
92810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    d0 = vld1_u8(src_ptr);
93810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    src_ptr += src_stride;
94810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    d8 = vld1_u8(ref_ptr);
95810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    ref_ptr += ref_stride;
96810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q12 = vabdl_u8(d0, d8);
97810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
98810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    for (i = 0; i < 3; i++) {
99810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        d0 = vld1_u8(src_ptr);
100810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        src_ptr += src_stride;
101810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        d8 = vld1_u8(ref_ptr);
102810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        ref_ptr += ref_stride;
103810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        q12 = vabal_u8(q12, d0, d8);
104810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    }
105810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
106810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    d1 = vpaddl_u16(vget_low_u16(q12));
107810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    d3 = vpaddl_u32(d1);
108810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
109810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    return vget_lane_u32(vreinterpret_u32_u64(d3), 0);
110810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org}
111810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
112810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.orgunsigned int vp8_sad16x16_neon(
113810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        unsigned char *src_ptr,
114810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        int src_stride,
115810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        unsigned char *ref_ptr,
116810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        int ref_stride) {
117810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint8x16_t q0, q4;
118810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint16x8_t q12, q13;
119810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint32x4_t q1;
120810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint64x2_t q3;
121810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint32x2_t d5;
122810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    int i;
123810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
124810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q0 = vld1q_u8(src_ptr);
125810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    src_ptr += src_stride;
126810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q4 = vld1q_u8(ref_ptr);
127810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    ref_ptr += ref_stride;
128810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
129810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
130810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
131810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    for (i = 0; i < 15; i++) {
132810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        q0 = vld1q_u8(src_ptr);
133810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        src_ptr += src_stride;
134810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        q4 = vld1q_u8(ref_ptr);
135810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        ref_ptr += ref_stride;
136810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
137810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
138810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    }
139810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
140810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q12 = vaddq_u16(q12, q13);
141810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q1 = vpaddlq_u16(q12);
142810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q3 = vpaddlq_u32(q1);
143810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
144810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org                  vreinterpret_u32_u64(vget_high_u64(q3)));
145810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
146810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    return vget_lane_u32(d5, 0);
147810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org}
148810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
149810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.orgunsigned int vp8_sad16x8_neon(
150810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        unsigned char *src_ptr,
151810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        int src_stride,
152810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        unsigned char *ref_ptr,
153810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        int ref_stride) {
154810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint8x16_t q0, q4;
155810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint16x8_t q12, q13;
156810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint32x4_t q1;
157810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint64x2_t q3;
158810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    uint32x2_t d5;
159810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    int i;
160810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
161810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q0 = vld1q_u8(src_ptr);
162810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    src_ptr += src_stride;
163810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q4 = vld1q_u8(ref_ptr);
164810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    ref_ptr += ref_stride;
165810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
166810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
167810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
168810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    for (i = 0; i < 7; i++) {
169810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        q0 = vld1q_u8(src_ptr);
170810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        src_ptr += src_stride;
171810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        q4 = vld1q_u8(ref_ptr);
172810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        ref_ptr += ref_stride;
173810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
174810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org        q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
175810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    }
176810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
177810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q12 = vaddq_u16(q12, q13);
178810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q1 = vpaddlq_u16(q12);
179810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    q3 = vpaddlq_u32(q1);
180810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
181810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org                  vreinterpret_u32_u64(vget_high_u64(q3)));
182810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org
183810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org    return vget_lane_u32(d5, 0);
184810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org}
185