1810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org/* 2810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 3810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org * 4810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org * Use of this source code is governed by a BSD-style license 5810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org * that can be found in the LICENSE file in the root of the source 6810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org * tree. An additional intellectual property rights grant can be found 7810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org * in the file PATENTS. All contributing project authors may 8810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org * be found in the AUTHORS file in the root of the source tree. 9810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org */ 10810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 11810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org#include <arm_neon.h> 12810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 13810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.orgunsigned int vp8_sad8x8_neon( 14810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org unsigned char *src_ptr, 15810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org int src_stride, 16810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org unsigned char *ref_ptr, 17810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org int ref_stride) { 18810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint8x8_t d0, d8; 19810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint16x8_t q12; 20810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint32x4_t q1; 21810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint64x2_t q3; 22810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint32x2_t d5; 23810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org int i; 24810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 25810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org d0 = vld1_u8(src_ptr); 26810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org src_ptr += src_stride; 27810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org d8 = vld1_u8(ref_ptr); 28810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org ref_ptr += ref_stride; 29810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q12 = vabdl_u8(d0, d8); 30810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 31810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org for (i = 0; i < 7; i++) { 32810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org d0 = vld1_u8(src_ptr); 33810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org src_ptr += src_stride; 34810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org d8 = vld1_u8(ref_ptr); 35810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org ref_ptr += ref_stride; 36810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q12 = vabal_u8(q12, d0, d8); 37810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org } 38810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 39810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q1 = vpaddlq_u16(q12); 40810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q3 = vpaddlq_u32(q1); 41810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)), 42810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org vreinterpret_u32_u64(vget_high_u64(q3))); 43810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 44810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org return vget_lane_u32(d5, 0); 45810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org} 46810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 47810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.orgunsigned int vp8_sad8x16_neon( 48810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org unsigned char *src_ptr, 49810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org int src_stride, 50810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org unsigned char *ref_ptr, 51810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org int ref_stride) { 52810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint8x8_t d0, d8; 53810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint16x8_t q12; 54810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint32x4_t q1; 55810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint64x2_t q3; 56810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint32x2_t d5; 57810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org int i; 58810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 59810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org d0 = vld1_u8(src_ptr); 60810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org src_ptr += src_stride; 61810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org d8 = vld1_u8(ref_ptr); 62810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org ref_ptr += ref_stride; 63810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q12 = vabdl_u8(d0, d8); 64810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 65810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org for (i = 0; i < 15; i++) { 66810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org d0 = vld1_u8(src_ptr); 67810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org src_ptr += src_stride; 68810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org d8 = vld1_u8(ref_ptr); 69810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org ref_ptr += ref_stride; 70810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q12 = vabal_u8(q12, d0, d8); 71810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org } 72810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 73810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q1 = vpaddlq_u16(q12); 74810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q3 = vpaddlq_u32(q1); 75810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)), 76810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org vreinterpret_u32_u64(vget_high_u64(q3))); 77810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 78810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org return vget_lane_u32(d5, 0); 79810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org} 80810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 81810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.orgunsigned int vp8_sad4x4_neon( 82810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org unsigned char *src_ptr, 83810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org int src_stride, 84810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org unsigned char *ref_ptr, 85810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org int ref_stride) { 86810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint8x8_t d0, d8; 87810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint16x8_t q12; 88810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint32x2_t d1; 89810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint64x1_t d3; 90810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org int i; 91810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 92810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org d0 = vld1_u8(src_ptr); 93810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org src_ptr += src_stride; 94810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org d8 = vld1_u8(ref_ptr); 95810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org ref_ptr += ref_stride; 96810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q12 = vabdl_u8(d0, d8); 97810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 98810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org for (i = 0; i < 3; i++) { 99810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org d0 = vld1_u8(src_ptr); 100810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org src_ptr += src_stride; 101810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org d8 = vld1_u8(ref_ptr); 102810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org ref_ptr += ref_stride; 103810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q12 = vabal_u8(q12, d0, d8); 104810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org } 105810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 106810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org d1 = vpaddl_u16(vget_low_u16(q12)); 107810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org d3 = vpaddl_u32(d1); 108810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 109810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org return vget_lane_u32(vreinterpret_u32_u64(d3), 0); 110810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org} 111810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 112810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.orgunsigned int vp8_sad16x16_neon( 113810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org unsigned char *src_ptr, 114810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org int src_stride, 115810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org unsigned char *ref_ptr, 116810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org int ref_stride) { 117810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint8x16_t q0, q4; 118810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint16x8_t q12, q13; 119810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint32x4_t q1; 120810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint64x2_t q3; 121810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint32x2_t d5; 122810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org int i; 123810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 124810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q0 = vld1q_u8(src_ptr); 125810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org src_ptr += src_stride; 126810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q4 = vld1q_u8(ref_ptr); 127810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org ref_ptr += ref_stride; 128810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4)); 129810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4)); 130810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 131810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org for (i = 0; i < 15; i++) { 132810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q0 = vld1q_u8(src_ptr); 133810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org src_ptr += src_stride; 134810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q4 = vld1q_u8(ref_ptr); 135810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org ref_ptr += ref_stride; 136810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4)); 137810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4)); 138810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org } 139810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 140810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q12 = vaddq_u16(q12, q13); 141810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q1 = vpaddlq_u16(q12); 142810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q3 = vpaddlq_u32(q1); 143810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)), 144810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org vreinterpret_u32_u64(vget_high_u64(q3))); 145810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 146810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org return vget_lane_u32(d5, 0); 147810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org} 148810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 149810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.orgunsigned int vp8_sad16x8_neon( 150810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org unsigned char *src_ptr, 151810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org int src_stride, 152810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org unsigned char *ref_ptr, 153810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org int ref_stride) { 154810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint8x16_t q0, q4; 155810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint16x8_t q12, q13; 156810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint32x4_t q1; 157810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint64x2_t q3; 158810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org uint32x2_t d5; 159810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org int i; 160810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 161810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q0 = vld1q_u8(src_ptr); 162810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org src_ptr += src_stride; 163810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q4 = vld1q_u8(ref_ptr); 164810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org ref_ptr += ref_stride; 165810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4)); 166810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4)); 167810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 168810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org for (i = 0; i < 7; i++) { 169810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q0 = vld1q_u8(src_ptr); 170810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org src_ptr += src_stride; 171810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q4 = vld1q_u8(ref_ptr); 172810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org ref_ptr += ref_stride; 173810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4)); 174810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4)); 175810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org } 176810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 177810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q12 = vaddq_u16(q12, q13); 178810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q1 = vpaddlq_u16(q12); 179810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org q3 = vpaddlq_u32(q1); 180810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)), 181810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org vreinterpret_u32_u64(vget_high_u64(q3))); 182810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org 183810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org return vget_lane_u32(d5, 0); 184810cf1767dc8df4783e02ba8a712072f50ddc99efgalligan@chromium.org} 185