1d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org/* 2d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 3d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org * 4d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org * Use of this source code is governed by a BSD-style license 5d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org * that can be found in the LICENSE file in the root of the source 6d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org * tree. An additional intellectual property rights grant can be found 7d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org * in the file PATENTS. All contributing project authors may 8d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org * be found in the AUTHORS file in the root of the source tree. 9d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org */ 10d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 11d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org#include <arm_neon.h> 12d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org#include "vpx_ports/mem.h" 13d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org#include "vpx/vpx_integer.h" 14d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 15d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgstatic const uint16_t bilinear_taps_coeff[8][2] = { 16d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org {128, 0}, 17d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org {112, 16}, 18d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org { 96, 32}, 19d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org { 80, 48}, 20d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org { 64, 64}, 21d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org { 48, 80}, 22d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org { 32, 96}, 23d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org { 16, 112} 24d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org}; 25d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 26d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgunsigned int vp8_sub_pixel_variance16x16_neon_func( 27d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const unsigned char *src_ptr, 28d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int src_pixels_per_line, 29d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int xoffset, 30d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int yoffset, 31d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const unsigned char *dst_ptr, 32d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int dst_pixels_per_line, 33d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org unsigned int *sse) { 34d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int i; 35d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org DECLARE_ALIGNED_ARRAY(16, unsigned char, tmp, 528); 36d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org unsigned char *tmpp; 37d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org unsigned char *tmpp2; 38d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; 39d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d16u8, d17u8, d18u8; 40d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint8x8_t d19u8, d20u8, d21u8; 41d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; 42d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint32x2_t d0u32, d10u32; 43d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int64x1_t d0s64, d1s64, d2s64, d3s64; 44d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint8x16_t q0u8, q1u8, q2u8, q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8; 45d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint8x16_t q10u8, q11u8, q12u8, q13u8, q14u8, q15u8; 46d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16, q6u16, q7u16, q8u16; 47d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint16x8_t q9u16, q10u16, q11u16, q12u16, q13u16, q14u16; 48d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int32x4_t q8s32, q9s32, q10s32; 49d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int64x2_t q0s64, q1s64, q5s64; 50d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 51d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp2 = tmp + 272; 52d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp = tmp; 53d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org if (xoffset == 0) { // secondpass_bfilter16x16_only 54d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0u8 = vdup_n_u8(bilinear_taps_coeff[yoffset][0]); 55d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d1u8 = vdup_n_u8(bilinear_taps_coeff[yoffset][1]); 56d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 57d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q11u8 = vld1q_u8(src_ptr); 58d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += src_pixels_per_line; 59d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org for (i = 4; i > 0; i--) { 60d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q12u8 = vld1q_u8(src_ptr); 61d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += src_pixels_per_line; 62d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q13u8 = vld1q_u8(src_ptr); 63d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += src_pixels_per_line; 64d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q14u8 = vld1q_u8(src_ptr); 65d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += src_pixels_per_line; 66d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q15u8 = vld1q_u8(src_ptr); 67d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += src_pixels_per_line; 68d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 69d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org __builtin_prefetch(src_ptr); 70d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org __builtin_prefetch(src_ptr + src_pixels_per_line); 71d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org __builtin_prefetch(src_ptr + src_pixels_per_line * 2); 72d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 73d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8); 74d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8); 75d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8); 76d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8); 77d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8); 78d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8); 79d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8); 80d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8); 81d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 82d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8); 83d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8); 84d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8); 85d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8); 86d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8); 87d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8); 88d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8); 89d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8); 90d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 91d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d2u8 = vqrshrn_n_u16(q1u16, 7); 92d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d3u8 = vqrshrn_n_u16(q2u16, 7); 93d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d4u8 = vqrshrn_n_u16(q3u16, 7); 94d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d5u8 = vqrshrn_n_u16(q4u16, 7); 95d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d6u8 = vqrshrn_n_u16(q5u16, 7); 96d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d7u8 = vqrshrn_n_u16(q6u16, 7); 97d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d8u8 = vqrshrn_n_u16(q7u16, 7); 98d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d9u8 = vqrshrn_n_u16(q8u16, 7); 99d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 100d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1u8 = vcombine_u8(d2u8, d3u8); 101d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q2u8 = vcombine_u8(d4u8, d5u8); 102d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q3u8 = vcombine_u8(d6u8, d7u8); 103d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q4u8 = vcombine_u8(d8u8, d9u8); 104d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 105d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q11u8 = q15u8; 106d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 107d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp2, q1u8); 108d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp2 += 16; 109d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp2, q2u8); 110d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp2 += 16; 111d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp2, q3u8); 112d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp2 += 16; 113d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp2, q4u8); 114d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp2 += 16; 115d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } 116d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } else if (yoffset == 0) { // firstpass_bfilter16x16_only 117d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0u8 = vdup_n_u8(bilinear_taps_coeff[xoffset][0]); 118d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d1u8 = vdup_n_u8(bilinear_taps_coeff[xoffset][1]); 119d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 120d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org for (i = 4; i > 0 ; i--) { 121d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d2u8 = vld1_u8(src_ptr); 122d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d3u8 = vld1_u8(src_ptr + 8); 123d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d4u8 = vld1_u8(src_ptr + 16); 124d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += src_pixels_per_line; 125d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d5u8 = vld1_u8(src_ptr); 126d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d6u8 = vld1_u8(src_ptr + 8); 127d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d7u8 = vld1_u8(src_ptr + 16); 128d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += src_pixels_per_line; 129d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d8u8 = vld1_u8(src_ptr); 130d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d9u8 = vld1_u8(src_ptr + 8); 131d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d10u8 = vld1_u8(src_ptr + 16); 132d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += src_pixels_per_line; 133d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d11u8 = vld1_u8(src_ptr); 134d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d12u8 = vld1_u8(src_ptr + 8); 135d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d13u8 = vld1_u8(src_ptr + 16); 136d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += src_pixels_per_line; 137d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 138d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org __builtin_prefetch(src_ptr); 139d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org __builtin_prefetch(src_ptr + src_pixels_per_line); 140d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org __builtin_prefetch(src_ptr + src_pixels_per_line * 2); 141d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 142d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q7u16 = vmull_u8(d2u8, d0u8); 143d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8u16 = vmull_u8(d3u8, d0u8); 144d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9u16 = vmull_u8(d5u8, d0u8); 145d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10u16 = vmull_u8(d6u8, d0u8); 146d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q11u16 = vmull_u8(d8u8, d0u8); 147d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q12u16 = vmull_u8(d9u8, d0u8); 148d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q13u16 = vmull_u8(d11u8, d0u8); 149d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q14u16 = vmull_u8(d12u8, d0u8); 150d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 151d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d2u8 = vext_u8(d2u8, d3u8, 1); 152d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d5u8 = vext_u8(d5u8, d6u8, 1); 153d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d8u8 = vext_u8(d8u8, d9u8, 1); 154d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d11u8 = vext_u8(d11u8, d12u8, 1); 155d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 156d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q7u16 = vmlal_u8(q7u16, d2u8, d1u8); 157d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9u16 = vmlal_u8(q9u16, d5u8, d1u8); 158d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q11u16 = vmlal_u8(q11u16, d8u8, d1u8); 159d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q13u16 = vmlal_u8(q13u16, d11u8, d1u8); 160d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 161d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d3u8 = vext_u8(d3u8, d4u8, 1); 162d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d6u8 = vext_u8(d6u8, d7u8, 1); 163d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d9u8 = vext_u8(d9u8, d10u8, 1); 164d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d12u8 = vext_u8(d12u8, d13u8, 1); 165d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 166d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8u16 = vmlal_u8(q8u16, d3u8, d1u8); 167d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10u16 = vmlal_u8(q10u16, d6u8, d1u8); 168d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q12u16 = vmlal_u8(q12u16, d9u8, d1u8); 169d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q14u16 = vmlal_u8(q14u16, d12u8, d1u8); 170d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 171d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d14u8 = vqrshrn_n_u16(q7u16, 7); 172d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d15u8 = vqrshrn_n_u16(q8u16, 7); 173d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d16u8 = vqrshrn_n_u16(q9u16, 7); 174d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d17u8 = vqrshrn_n_u16(q10u16, 7); 175d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d18u8 = vqrshrn_n_u16(q11u16, 7); 176d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d19u8 = vqrshrn_n_u16(q12u16, 7); 177d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d20u8 = vqrshrn_n_u16(q13u16, 7); 178d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d21u8 = vqrshrn_n_u16(q14u16, 7); 179d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 180d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q7u8 = vcombine_u8(d14u8, d15u8); 181d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8u8 = vcombine_u8(d16u8, d17u8); 182d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9u8 = vcombine_u8(d18u8, d19u8); 183d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10u8 = vcombine_u8(d20u8, d21u8); 184d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 185d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp2, q7u8); 186d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp2 += 16; 187d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp2, q8u8); 188d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp2 += 16; 189d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp2, q9u8); 190d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp2 += 16; 191d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp2, q10u8); 192d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp2 += 16; 193d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } 194d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } else { 195d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0u8 = vdup_n_u8(bilinear_taps_coeff[xoffset][0]); 196d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d1u8 = vdup_n_u8(bilinear_taps_coeff[xoffset][1]); 197d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 198d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d2u8 = vld1_u8(src_ptr); 199d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d3u8 = vld1_u8(src_ptr + 8); 200d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d4u8 = vld1_u8(src_ptr + 16); 201d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += src_pixels_per_line; 202d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d5u8 = vld1_u8(src_ptr); 203d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d6u8 = vld1_u8(src_ptr + 8); 204d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d7u8 = vld1_u8(src_ptr + 16); 205d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += src_pixels_per_line; 206d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d8u8 = vld1_u8(src_ptr); 207d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d9u8 = vld1_u8(src_ptr + 8); 208d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d10u8 = vld1_u8(src_ptr + 16); 209d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += src_pixels_per_line; 210d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d11u8 = vld1_u8(src_ptr); 211d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d12u8 = vld1_u8(src_ptr + 8); 212d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d13u8 = vld1_u8(src_ptr + 16); 213d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += src_pixels_per_line; 214d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 215d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org // First Pass: output_height lines x output_width columns (17x16) 216d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org for (i = 3; i > 0; i--) { 217d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q7u16 = vmull_u8(d2u8, d0u8); 218d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8u16 = vmull_u8(d3u8, d0u8); 219d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9u16 = vmull_u8(d5u8, d0u8); 220d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10u16 = vmull_u8(d6u8, d0u8); 221d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q11u16 = vmull_u8(d8u8, d0u8); 222d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q12u16 = vmull_u8(d9u8, d0u8); 223d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q13u16 = vmull_u8(d11u8, d0u8); 224d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q14u16 = vmull_u8(d12u8, d0u8); 225d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 226d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d2u8 = vext_u8(d2u8, d3u8, 1); 227d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d5u8 = vext_u8(d5u8, d6u8, 1); 228d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d8u8 = vext_u8(d8u8, d9u8, 1); 229d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d11u8 = vext_u8(d11u8, d12u8, 1); 230d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 231d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q7u16 = vmlal_u8(q7u16, d2u8, d1u8); 232d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9u16 = vmlal_u8(q9u16, d5u8, d1u8); 233d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q11u16 = vmlal_u8(q11u16, d8u8, d1u8); 234d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q13u16 = vmlal_u8(q13u16, d11u8, d1u8); 235d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 236d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d3u8 = vext_u8(d3u8, d4u8, 1); 237d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d6u8 = vext_u8(d6u8, d7u8, 1); 238d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d9u8 = vext_u8(d9u8, d10u8, 1); 239d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d12u8 = vext_u8(d12u8, d13u8, 1); 240d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 241d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8u16 = vmlal_u8(q8u16, d3u8, d1u8); 242d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10u16 = vmlal_u8(q10u16, d6u8, d1u8); 243d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q12u16 = vmlal_u8(q12u16, d9u8, d1u8); 244d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q14u16 = vmlal_u8(q14u16, d12u8, d1u8); 245d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 246d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d14u8 = vqrshrn_n_u16(q7u16, 7); 247d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d15u8 = vqrshrn_n_u16(q8u16, 7); 248d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d16u8 = vqrshrn_n_u16(q9u16, 7); 249d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d17u8 = vqrshrn_n_u16(q10u16, 7); 250d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d18u8 = vqrshrn_n_u16(q11u16, 7); 251d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d19u8 = vqrshrn_n_u16(q12u16, 7); 252d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d20u8 = vqrshrn_n_u16(q13u16, 7); 253d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d21u8 = vqrshrn_n_u16(q14u16, 7); 254d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 255d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d2u8 = vld1_u8(src_ptr); 256d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d3u8 = vld1_u8(src_ptr + 8); 257d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d4u8 = vld1_u8(src_ptr + 16); 258d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += src_pixels_per_line; 259d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d5u8 = vld1_u8(src_ptr); 260d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d6u8 = vld1_u8(src_ptr + 8); 261d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d7u8 = vld1_u8(src_ptr + 16); 262d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += src_pixels_per_line; 263d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d8u8 = vld1_u8(src_ptr); 264d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d9u8 = vld1_u8(src_ptr + 8); 265d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d10u8 = vld1_u8(src_ptr + 16); 266d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += src_pixels_per_line; 267d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d11u8 = vld1_u8(src_ptr); 268d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d12u8 = vld1_u8(src_ptr + 8); 269d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d13u8 = vld1_u8(src_ptr + 16); 270d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += src_pixels_per_line; 271d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 272d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q7u8 = vcombine_u8(d14u8, d15u8); 273d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8u8 = vcombine_u8(d16u8, d17u8); 274d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9u8 = vcombine_u8(d18u8, d19u8); 275d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10u8 = vcombine_u8(d20u8, d21u8); 276d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 277d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp, q7u8); 278d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp += 16; 279d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp, q8u8); 280d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp += 16; 281d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp, q9u8); 282d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp += 16; 283d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp, q10u8); 284d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp += 16; 285d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } 286d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 287d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org // First-pass filtering for rest 5 lines 288d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d14u8 = vld1_u8(src_ptr); 289d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d15u8 = vld1_u8(src_ptr + 8); 290d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d16u8 = vld1_u8(src_ptr + 16); 291d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += src_pixels_per_line; 292d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 293d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9u16 = vmull_u8(d2u8, d0u8); 294d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10u16 = vmull_u8(d3u8, d0u8); 295d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q11u16 = vmull_u8(d5u8, d0u8); 296d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q12u16 = vmull_u8(d6u8, d0u8); 297d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q13u16 = vmull_u8(d8u8, d0u8); 298d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q14u16 = vmull_u8(d9u8, d0u8); 299d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 300d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d2u8 = vext_u8(d2u8, d3u8, 1); 301d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d5u8 = vext_u8(d5u8, d6u8, 1); 302d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d8u8 = vext_u8(d8u8, d9u8, 1); 303d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 304d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9u16 = vmlal_u8(q9u16, d2u8, d1u8); 305d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q11u16 = vmlal_u8(q11u16, d5u8, d1u8); 306d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q13u16 = vmlal_u8(q13u16, d8u8, d1u8); 307d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 308d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d3u8 = vext_u8(d3u8, d4u8, 1); 309d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d6u8 = vext_u8(d6u8, d7u8, 1); 310d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d9u8 = vext_u8(d9u8, d10u8, 1); 311d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 312d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10u16 = vmlal_u8(q10u16, d3u8, d1u8); 313d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q12u16 = vmlal_u8(q12u16, d6u8, d1u8); 314d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q14u16 = vmlal_u8(q14u16, d9u8, d1u8); 315d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 316d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1u16 = vmull_u8(d11u8, d0u8); 317d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q2u16 = vmull_u8(d12u8, d0u8); 318d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q3u16 = vmull_u8(d14u8, d0u8); 319d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q4u16 = vmull_u8(d15u8, d0u8); 320d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 321d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d11u8 = vext_u8(d11u8, d12u8, 1); 322d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d14u8 = vext_u8(d14u8, d15u8, 1); 323d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 324d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1u16 = vmlal_u8(q1u16, d11u8, d1u8); 325d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q3u16 = vmlal_u8(q3u16, d14u8, d1u8); 326d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 327d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d12u8 = vext_u8(d12u8, d13u8, 1); 328d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d15u8 = vext_u8(d15u8, d16u8, 1); 329d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 330d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q2u16 = vmlal_u8(q2u16, d12u8, d1u8); 331d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q4u16 = vmlal_u8(q4u16, d15u8, d1u8); 332d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 333d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d10u8 = vqrshrn_n_u16(q9u16, 7); 334d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d11u8 = vqrshrn_n_u16(q10u16, 7); 335d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d12u8 = vqrshrn_n_u16(q11u16, 7); 336d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d13u8 = vqrshrn_n_u16(q12u16, 7); 337d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d14u8 = vqrshrn_n_u16(q13u16, 7); 338d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d15u8 = vqrshrn_n_u16(q14u16, 7); 339d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d16u8 = vqrshrn_n_u16(q1u16, 7); 340d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d17u8 = vqrshrn_n_u16(q2u16, 7); 341d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d18u8 = vqrshrn_n_u16(q3u16, 7); 342d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d19u8 = vqrshrn_n_u16(q4u16, 7); 343d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 344d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q5u8 = vcombine_u8(d10u8, d11u8); 345d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q6u8 = vcombine_u8(d12u8, d13u8); 346d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q7u8 = vcombine_u8(d14u8, d15u8); 347d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8u8 = vcombine_u8(d16u8, d17u8); 348d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9u8 = vcombine_u8(d18u8, d19u8); 349d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 350d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp, q5u8); 351d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp += 16; 352d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp, q6u8); 353d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp += 16; 354d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp, q7u8); 355d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp += 16; 356d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp, q8u8); 357d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp += 16; 358d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp, q9u8); 359d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 360d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org // secondpass_filter 361d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0u8 = vdup_n_u8(bilinear_taps_coeff[yoffset][0]); 362d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d1u8 = vdup_n_u8(bilinear_taps_coeff[yoffset][1]); 363d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 364d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp = tmp; 365d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp2 = tmpp + 272; 366d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q11u8 = vld1q_u8(tmpp); 367d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp += 16; 368d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org for (i = 4; i > 0; i--) { 369d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q12u8 = vld1q_u8(tmpp); 370d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp += 16; 371d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q13u8 = vld1q_u8(tmpp); 372d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp += 16; 373d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q14u8 = vld1q_u8(tmpp); 374d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp += 16; 375d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q15u8 = vld1q_u8(tmpp); 376d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp += 16; 377d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 378d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8); 379d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8); 380d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8); 381d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8); 382d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8); 383d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8); 384d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8); 385d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8); 386d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 387d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8); 388d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8); 389d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8); 390d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8); 391d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8); 392d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8); 393d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8); 394d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8); 395d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 396d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d2u8 = vqrshrn_n_u16(q1u16, 7); 397d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d3u8 = vqrshrn_n_u16(q2u16, 7); 398d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d4u8 = vqrshrn_n_u16(q3u16, 7); 399d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d5u8 = vqrshrn_n_u16(q4u16, 7); 400d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d6u8 = vqrshrn_n_u16(q5u16, 7); 401d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d7u8 = vqrshrn_n_u16(q6u16, 7); 402d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d8u8 = vqrshrn_n_u16(q7u16, 7); 403d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d9u8 = vqrshrn_n_u16(q8u16, 7); 404d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 405d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1u8 = vcombine_u8(d2u8, d3u8); 406d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q2u8 = vcombine_u8(d4u8, d5u8); 407d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q3u8 = vcombine_u8(d6u8, d7u8); 408d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q4u8 = vcombine_u8(d8u8, d9u8); 409d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 410d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q11u8 = q15u8; 411d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 412d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp2, q1u8); 413d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp2 += 16; 414d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp2, q2u8); 415d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp2 += 16; 416d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp2, q3u8); 417d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp2 += 16; 418d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1q_u8((uint8_t *)tmpp2, q4u8); 419d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp2 += 16; 420d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } 421d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } 422d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 423d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org // sub_pixel_variance16x16_neon 424d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vdupq_n_s32(0); 425d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vdupq_n_s32(0); 426d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vdupq_n_s32(0); 427d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 428d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp = tmp + 272; 429d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org for (i = 0; i < 8; i++) { // sub_pixel_variance16x16_neon_loop 430d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q0u8 = vld1q_u8(tmpp); 431d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp += 16; 432d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1u8 = vld1q_u8(tmpp); 433d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org tmpp += 16; 434d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q2u8 = vld1q_u8(dst_ptr); 435d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org dst_ptr += dst_pixels_per_line; 436d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q3u8 = vld1q_u8(dst_ptr); 437d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org dst_ptr += dst_pixels_per_line; 438d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 439d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0u8 = vget_low_u8(q0u8); 440d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d1u8 = vget_high_u8(q0u8); 441d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d2u8 = vget_low_u8(q1u8); 442d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d3u8 = vget_high_u8(q1u8); 443d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 444d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q11u16 = vsubl_u8(d0u8, vget_low_u8(q2u8)); 445d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q12u16 = vsubl_u8(d1u8, vget_high_u8(q2u8)); 446d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q13u16 = vsubl_u8(d2u8, vget_low_u8(q3u8)); 447d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q14u16 = vsubl_u8(d3u8, vget_high_u8(q3u8)); 448d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 449d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); 450d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); 451d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16)); 452d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vmlal_s16(q9s32, d22s16, d22s16); 453d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vmlal_s16(q10s32, d23s16, d23s16); 454d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 455d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); 456d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); 457d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16)); 458d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vmlal_s16(q9s32, d24s16, d24s16); 459d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vmlal_s16(q10s32, d25s16, d25s16); 460d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 461d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); 462d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); 463d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16)); 464d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vmlal_s16(q9s32, d26s16, d26s16); 465d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vmlal_s16(q10s32, d27s16, d27s16); 466d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 467d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16)); 468d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16)); 469d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16)); 470d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vmlal_s16(q9s32, d28s16, d28s16); 471d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vmlal_s16(q10s32, d29s16, d29s16); 472d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } 473d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 474d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vaddq_s32(q10s32, q9s32); 475d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q0s64 = vpaddlq_s32(q8s32); 476d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1s64 = vpaddlq_s32(q10s32); 477d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 478d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0s64 = vget_low_s64(q0s64); 479d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d1s64 = vget_high_s64(q0s64); 480d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d2s64 = vget_low_s64(q1s64); 481d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d3s64 = vget_high_s64(q1s64); 482d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0s64 = vadd_s64(d0s64, d1s64); 483d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d1s64 = vadd_s64(d2s64, d3s64); 484d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 485d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), 486d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vreinterpret_s32_s64(d0s64)); 487d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); 488d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 489d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 8); 490d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); 491d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 492d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org return vget_lane_u32(d0u32, 0); 493d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org} 494d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 495d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgunsigned int vp8_variance_halfpixvar16x16_h_neon( 496d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const unsigned char *src_ptr, 497d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int source_stride, 498d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const unsigned char *ref_ptr, 499d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int recon_stride, 500d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org unsigned int *sse) { 501d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int i; 502d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; 503d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; 504d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; 505d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint32x2_t d0u32, d10u32; 506d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int64x1_t d0s64, d1s64, d2s64, d3s64; 507d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint8x16_t q0u8, q1u8, q2u8, q3u8, q4u8, q5u8, q6u8; 508d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint8x16_t q7u8, q11u8, q12u8, q13u8, q14u8; 509d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint16x8_t q0u16, q1u16, q2u16, q3u16, q4u16, q5u16, q6u16, q7u16; 510d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int32x4_t q8s32, q9s32, q10s32; 511d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int64x2_t q0s64, q1s64, q5s64; 512d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 513d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vdupq_n_s32(0); 514d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vdupq_n_s32(0); 515d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vdupq_n_s32(0); 516d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 517d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org for (i = 0; i < 4; i++) { // vp8_filt_fpo16x16s_4_0_loop_neon 518d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q0u8 = vld1q_u8(src_ptr); 519d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1u8 = vld1q_u8(src_ptr + 16); 520d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += source_stride; 521d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q2u8 = vld1q_u8(src_ptr); 522d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q3u8 = vld1q_u8(src_ptr + 16); 523d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += source_stride; 524d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q4u8 = vld1q_u8(src_ptr); 525d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q5u8 = vld1q_u8(src_ptr + 16); 526d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += source_stride; 527d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q6u8 = vld1q_u8(src_ptr); 528d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q7u8 = vld1q_u8(src_ptr + 16); 529d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += source_stride; 530d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 531d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q11u8 = vld1q_u8(ref_ptr); 532d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org ref_ptr += recon_stride; 533d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q12u8 = vld1q_u8(ref_ptr); 534d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org ref_ptr += recon_stride; 535d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q13u8 = vld1q_u8(ref_ptr); 536d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org ref_ptr += recon_stride; 537d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q14u8 = vld1q_u8(ref_ptr); 538d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org ref_ptr += recon_stride; 539d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 540d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1u8 = vextq_u8(q0u8, q1u8, 1); 541d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q3u8 = vextq_u8(q2u8, q3u8, 1); 542d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q5u8 = vextq_u8(q4u8, q5u8, 1); 543d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q7u8 = vextq_u8(q6u8, q7u8, 1); 544d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 545d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q0u8 = vrhaddq_u8(q0u8, q1u8); 546d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1u8 = vrhaddq_u8(q2u8, q3u8); 547d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q2u8 = vrhaddq_u8(q4u8, q5u8); 548d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q3u8 = vrhaddq_u8(q6u8, q7u8); 549d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 550d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0u8 = vget_low_u8(q0u8); 551d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d1u8 = vget_high_u8(q0u8); 552d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d2u8 = vget_low_u8(q1u8); 553d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d3u8 = vget_high_u8(q1u8); 554d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d4u8 = vget_low_u8(q2u8); 555d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d5u8 = vget_high_u8(q2u8); 556d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d6u8 = vget_low_u8(q3u8); 557d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d7u8 = vget_high_u8(q3u8); 558d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 559d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q4u16 = vsubl_u8(d0u8, vget_low_u8(q11u8)); 560d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q5u16 = vsubl_u8(d1u8, vget_high_u8(q11u8)); 561d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q6u16 = vsubl_u8(d2u8, vget_low_u8(q12u8)); 562d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q7u16 = vsubl_u8(d3u8, vget_high_u8(q12u8)); 563d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q0u16 = vsubl_u8(d4u8, vget_low_u8(q13u8)); 564d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1u16 = vsubl_u8(d5u8, vget_high_u8(q13u8)); 565d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q2u16 = vsubl_u8(d6u8, vget_low_u8(q14u8)); 566d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q3u16 = vsubl_u8(d7u8, vget_high_u8(q14u8)); 567d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 568d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d8s16 = vreinterpret_s16_u16(vget_low_u16(q4u16)); 569d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d9s16 = vreinterpret_s16_u16(vget_high_u16(q4u16)); 570d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q4u16)); 571d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vmlal_s16(q9s32, d8s16, d8s16); 572d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vmlal_s16(q10s32, d9s16, d9s16); 573d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d10s16 = vreinterpret_s16_u16(vget_low_u16(q5u16)); 574d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d11s16 = vreinterpret_s16_u16(vget_high_u16(q5u16)); 575d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q5u16)); 576d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vmlal_s16(q9s32, d10s16, d10s16); 577d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vmlal_s16(q10s32, d11s16, d11s16); 578d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d12s16 = vreinterpret_s16_u16(vget_low_u16(q6u16)); 579d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d13s16 = vreinterpret_s16_u16(vget_high_u16(q6u16)); 580d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q6u16)); 581d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vmlal_s16(q9s32, d12s16, d12s16); 582d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vmlal_s16(q10s32, d13s16, d13s16); 583d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d14s16 = vreinterpret_s16_u16(vget_low_u16(q7u16)); 584d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d15s16 = vreinterpret_s16_u16(vget_high_u16(q7u16)); 585d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q7u16)); 586d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vmlal_s16(q9s32, d14s16, d14s16); 587d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vmlal_s16(q10s32, d15s16, d15s16); 588d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0s16 = vreinterpret_s16_u16(vget_low_u16(q0u16)); 589d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d1s16 = vreinterpret_s16_u16(vget_high_u16(q0u16)); 590d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q0u16)); 591d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vmlal_s16(q9s32, d0s16, d0s16); 592d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vmlal_s16(q10s32, d1s16, d1s16); 593d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d2s16 = vreinterpret_s16_u16(vget_low_u16(q1u16)); 594d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d3s16 = vreinterpret_s16_u16(vget_high_u16(q1u16)); 595d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q1u16)); 596d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vmlal_s16(q9s32, d2s16, d2s16); 597d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vmlal_s16(q10s32, d3s16, d3s16); 598d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d4s16 = vreinterpret_s16_u16(vget_low_u16(q2u16)); 599d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d5s16 = vreinterpret_s16_u16(vget_high_u16(q2u16)); 600d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q2u16)); 601d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vmlal_s16(q9s32, d4s16, d4s16); 602d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vmlal_s16(q10s32, d5s16, d5s16); 603d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d6s16 = vreinterpret_s16_u16(vget_low_u16(q3u16)); 604d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d7s16 = vreinterpret_s16_u16(vget_high_u16(q3u16)); 605d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q3u16)); 606d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vmlal_s16(q9s32, d6s16, d6s16); 607d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vmlal_s16(q10s32, d7s16, d7s16); 608d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } 609d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 610d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vaddq_s32(q10s32, q9s32); 611d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q0s64 = vpaddlq_s32(q8s32); 612d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1s64 = vpaddlq_s32(q10s32); 613d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 614d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0s64 = vget_low_s64(q0s64); 615d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d1s64 = vget_high_s64(q0s64); 616d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d2s64 = vget_low_s64(q1s64); 617d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d3s64 = vget_high_s64(q1s64); 618d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0s64 = vadd_s64(d0s64, d1s64); 619d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d1s64 = vadd_s64(d2s64, d3s64); 620d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 621d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), 622d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vreinterpret_s32_s64(d0s64)); 623d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); 624d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 625d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 8); 626d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); 627d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 628d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org return vget_lane_u32(d0u32, 0); 629d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org} 630d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 631d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgunsigned int vp8_variance_halfpixvar16x16_v_neon( 632d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const unsigned char *src_ptr, 633d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int source_stride, 634d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const unsigned char *ref_ptr, 635d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int recon_stride, 636d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org unsigned int *sse) { 637d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int i; 638d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint8x8_t d0u8, d1u8, d4u8, d5u8, d8u8, d9u8, d12u8, d13u8; 639d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; 640d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; 641d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint32x2_t d0u32, d10u32; 642d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int64x1_t d0s64, d1s64, d2s64, d3s64; 643d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint8x16_t q0u8, q1u8, q2u8, q3u8, q4u8, q5u8, q6u8, q7u8, q15u8; 644d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint16x8_t q0u16, q1u16, q2u16, q3u16, q11u16, q12u16, q13u16, q14u16; 645d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int32x4_t q8s32, q9s32, q10s32; 646d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int64x2_t q0s64, q1s64, q5s64; 647d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 648d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vdupq_n_s32(0); 649d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vdupq_n_s32(0); 650d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vdupq_n_s32(0); 651d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 652d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q0u8 = vld1q_u8(src_ptr); 653d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += source_stride; 654d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org for (i = 0; i < 4; i++) { // vp8_filt_fpo16x16s_4_0_loop_neon 655d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q2u8 = vld1q_u8(src_ptr); 656d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += source_stride; 657d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q4u8 = vld1q_u8(src_ptr); 658d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += source_stride; 659d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q6u8 = vld1q_u8(src_ptr); 660d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += source_stride; 661d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q15u8 = vld1q_u8(src_ptr); 662d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += source_stride; 663d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 664d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1u8 = vld1q_u8(ref_ptr); 665d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org ref_ptr += recon_stride; 666d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q3u8 = vld1q_u8(ref_ptr); 667d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org ref_ptr += recon_stride; 668d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q5u8 = vld1q_u8(ref_ptr); 669d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org ref_ptr += recon_stride; 670d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q7u8 = vld1q_u8(ref_ptr); 671d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org ref_ptr += recon_stride; 672d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 673d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q0u8 = vrhaddq_u8(q0u8, q2u8); 674d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q2u8 = vrhaddq_u8(q2u8, q4u8); 675d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q4u8 = vrhaddq_u8(q4u8, q6u8); 676d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q6u8 = vrhaddq_u8(q6u8, q15u8); 677d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 678d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0u8 = vget_low_u8(q0u8); 679d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d1u8 = vget_high_u8(q0u8); 680d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d4u8 = vget_low_u8(q2u8); 681d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d5u8 = vget_high_u8(q2u8); 682d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d8u8 = vget_low_u8(q4u8); 683d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d9u8 = vget_high_u8(q4u8); 684d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d12u8 = vget_low_u8(q6u8); 685d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d13u8 = vget_high_u8(q6u8); 686d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 687d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q11u16 = vsubl_u8(d0u8, vget_low_u8(q1u8)); 688d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q12u16 = vsubl_u8(d1u8, vget_high_u8(q1u8)); 689d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q13u16 = vsubl_u8(d4u8, vget_low_u8(q3u8)); 690d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q14u16 = vsubl_u8(d5u8, vget_high_u8(q3u8)); 691d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q0u16 = vsubl_u8(d8u8, vget_low_u8(q5u8)); 692d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1u16 = vsubl_u8(d9u8, vget_high_u8(q5u8)); 693d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q2u16 = vsubl_u8(d12u8, vget_low_u8(q7u8)); 694d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q3u16 = vsubl_u8(d13u8, vget_high_u8(q7u8)); 695d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 696d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); 697d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); 698d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16)); 699d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vmlal_s16(q9s32, d22s16, d22s16); 700d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vmlal_s16(q10s32, d23s16, d23s16); 701d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); 702d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); 703d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16)); 704d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vmlal_s16(q9s32, d24s16, d24s16); 705d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vmlal_s16(q10s32, d25s16, d25s16); 706d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); 707d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); 708d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16)); 709d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vmlal_s16(q9s32, d26s16, d26s16); 710d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vmlal_s16(q10s32, d27s16, d27s16); 711d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16)); 712d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16)); 713d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16)); 714d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vmlal_s16(q9s32, d28s16, d28s16); 715d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vmlal_s16(q10s32, d29s16, d29s16); 716d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0s16 = vreinterpret_s16_u16(vget_low_u16(q0u16)); 717d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d1s16 = vreinterpret_s16_u16(vget_high_u16(q0u16)); 718d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q0u16)); 719d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vmlal_s16(q9s32, d0s16, d0s16); 720d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vmlal_s16(q10s32, d1s16, d1s16); 721d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d2s16 = vreinterpret_s16_u16(vget_low_u16(q1u16)); 722d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d3s16 = vreinterpret_s16_u16(vget_high_u16(q1u16)); 723d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q1u16)); 724d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vmlal_s16(q9s32, d2s16, d2s16); 725d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vmlal_s16(q10s32, d3s16, d3s16); 726d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d4s16 = vreinterpret_s16_u16(vget_low_u16(q2u16)); 727d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d5s16 = vreinterpret_s16_u16(vget_high_u16(q2u16)); 728d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q2u16)); 729d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vmlal_s16(q9s32, d4s16, d4s16); 730d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vmlal_s16(q10s32, d5s16, d5s16); 731d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d6s16 = vreinterpret_s16_u16(vget_low_u16(q3u16)); 732d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d7s16 = vreinterpret_s16_u16(vget_high_u16(q3u16)); 733d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q3u16)); 734d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9s32 = vmlal_s16(q9s32, d6s16, d6s16); 735d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vmlal_s16(q10s32, d7s16, d7s16); 736d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 737d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q0u8 = q15u8; 738d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } 739d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 740d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10s32 = vaddq_s32(q10s32, q9s32); 741d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q0s64 = vpaddlq_s32(q8s32); 742d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1s64 = vpaddlq_s32(q10s32); 743d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 744d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0s64 = vget_low_s64(q0s64); 745d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d1s64 = vget_high_s64(q0s64); 746d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d2s64 = vget_low_s64(q1s64); 747d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d3s64 = vget_high_s64(q1s64); 748d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0s64 = vadd_s64(d0s64, d1s64); 749d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d1s64 = vadd_s64(d2s64, d3s64); 750d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 751d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), 752d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vreinterpret_s32_s64(d0s64)); 753d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); 754d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 755d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 8); 756d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); 757d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 758d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org return vget_lane_u32(d0u32, 0); 759d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org} 760d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 761d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgunsigned int vp8_variance_halfpixvar16x16_hv_neon( 762d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const unsigned char *src_ptr, 763d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int source_stride, 764d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const unsigned char *ref_ptr, 765d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int recon_stride, 766d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org unsigned int *sse) { 767d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int i; 768d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; 769d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int16x4_t d0s16, d1s16, d2s16, d3s16, d10s16, d11s16, d12s16, d13s16; 770d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int16x4_t d18s16, d19s16, d20s16, d21s16, d22s16, d23s16, d24s16, d25s16; 771d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint32x2_t d0u32, d10u32; 772d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int64x1_t d0s64, d1s64, d2s64, d3s64; 773d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint8x16_t q0u8, q1u8, q2u8, q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8; 774d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint16x8_t q0u16, q1u16, q5u16, q6u16, q9u16, q10u16, q11u16, q12u16; 775d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int32x4_t q13s32, q14s32, q15s32; 776d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int64x2_t q0s64, q1s64, q5s64; 777d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 778d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q13s32 = vdupq_n_s32(0); 779d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q14s32 = vdupq_n_s32(0); 780d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q15s32 = vdupq_n_s32(0); 781d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 782d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q0u8 = vld1q_u8(src_ptr); 783d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1u8 = vld1q_u8(src_ptr + 16); 784d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += source_stride; 785d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1u8 = vextq_u8(q0u8, q1u8, 1); 786d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q0u8 = vrhaddq_u8(q0u8, q1u8); 787d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org for (i = 0; i < 4; i++) { // vp8_filt_fpo16x16s_4_0_loop_neon 788d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q2u8 = vld1q_u8(src_ptr); 789d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q3u8 = vld1q_u8(src_ptr + 16); 790d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += source_stride; 791d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q4u8 = vld1q_u8(src_ptr); 792d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q5u8 = vld1q_u8(src_ptr + 16); 793d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += source_stride; 794d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q6u8 = vld1q_u8(src_ptr); 795d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q7u8 = vld1q_u8(src_ptr + 16); 796d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += source_stride; 797d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8u8 = vld1q_u8(src_ptr); 798d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9u8 = vld1q_u8(src_ptr + 16); 799d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += source_stride; 800d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 801d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q3u8 = vextq_u8(q2u8, q3u8, 1); 802d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q5u8 = vextq_u8(q4u8, q5u8, 1); 803d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q7u8 = vextq_u8(q6u8, q7u8, 1); 804d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9u8 = vextq_u8(q8u8, q9u8, 1); 805d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 806d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1u8 = vrhaddq_u8(q2u8, q3u8); 807d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q2u8 = vrhaddq_u8(q4u8, q5u8); 808d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q3u8 = vrhaddq_u8(q6u8, q7u8); 809d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q4u8 = vrhaddq_u8(q8u8, q9u8); 810d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q0u8 = vrhaddq_u8(q0u8, q1u8); 811d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1u8 = vrhaddq_u8(q1u8, q2u8); 812d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q2u8 = vrhaddq_u8(q2u8, q3u8); 813d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q3u8 = vrhaddq_u8(q3u8, q4u8); 814d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 815d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q5u8 = vld1q_u8(ref_ptr); 816d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org ref_ptr += recon_stride; 817d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q6u8 = vld1q_u8(ref_ptr); 818d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org ref_ptr += recon_stride; 819d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q7u8 = vld1q_u8(ref_ptr); 820d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org ref_ptr += recon_stride; 821d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q8u8 = vld1q_u8(ref_ptr); 822d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org ref_ptr += recon_stride; 823d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 824d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0u8 = vget_low_u8(q0u8); 825d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d1u8 = vget_high_u8(q0u8); 826d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d2u8 = vget_low_u8(q1u8); 827d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d3u8 = vget_high_u8(q1u8); 828d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d4u8 = vget_low_u8(q2u8); 829d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d5u8 = vget_high_u8(q2u8); 830d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d6u8 = vget_low_u8(q3u8); 831d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d7u8 = vget_high_u8(q3u8); 832d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 833d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q9u16 = vsubl_u8(d0u8, vget_low_u8(q5u8)); 834d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q10u16 = vsubl_u8(d1u8, vget_high_u8(q5u8)); 835d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q11u16 = vsubl_u8(d2u8, vget_low_u8(q6u8)); 836d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q12u16 = vsubl_u8(d3u8, vget_high_u8(q6u8)); 837d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q0u16 = vsubl_u8(d4u8, vget_low_u8(q7u8)); 838d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1u16 = vsubl_u8(d5u8, vget_high_u8(q7u8)); 839d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q5u16 = vsubl_u8(d6u8, vget_low_u8(q8u8)); 840d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q6u16 = vsubl_u8(d7u8, vget_high_u8(q8u8)); 841d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 842d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16)); 843d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16)); 844d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q13s32 = vpadalq_s16(q13s32, vreinterpretq_s16_u16(q9u16)); 845d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q14s32 = vmlal_s16(q14s32, d18s16, d18s16); 846d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q15s32 = vmlal_s16(q15s32, d19s16, d19s16); 847d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 848d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16)); 849d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d21s16 = vreinterpret_s16_u16(vget_high_u16(q10u16)); 850d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q13s32 = vpadalq_s16(q13s32, vreinterpretq_s16_u16(q10u16)); 851d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q14s32 = vmlal_s16(q14s32, d20s16, d20s16); 852d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q15s32 = vmlal_s16(q15s32, d21s16, d21s16); 853d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 854d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); 855d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); 856d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q13s32 = vpadalq_s16(q13s32, vreinterpretq_s16_u16(q11u16)); 857d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q14s32 = vmlal_s16(q14s32, d22s16, d22s16); 858d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q15s32 = vmlal_s16(q15s32, d23s16, d23s16); 859d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 860d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); 861d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); 862d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q13s32 = vpadalq_s16(q13s32, vreinterpretq_s16_u16(q12u16)); 863d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q14s32 = vmlal_s16(q14s32, d24s16, d24s16); 864d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q15s32 = vmlal_s16(q15s32, d25s16, d25s16); 865d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 866d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0s16 = vreinterpret_s16_u16(vget_low_u16(q0u16)); 867d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d1s16 = vreinterpret_s16_u16(vget_high_u16(q0u16)); 868d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q13s32 = vpadalq_s16(q13s32, vreinterpretq_s16_u16(q0u16)); 869d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q14s32 = vmlal_s16(q14s32, d0s16, d0s16); 870d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q15s32 = vmlal_s16(q15s32, d1s16, d1s16); 871d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 872d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d2s16 = vreinterpret_s16_u16(vget_low_u16(q1u16)); 873d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d3s16 = vreinterpret_s16_u16(vget_high_u16(q1u16)); 874d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q13s32 = vpadalq_s16(q13s32, vreinterpretq_s16_u16(q1u16)); 875d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q14s32 = vmlal_s16(q14s32, d2s16, d2s16); 876d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q15s32 = vmlal_s16(q15s32, d3s16, d3s16); 877d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 878d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d10s16 = vreinterpret_s16_u16(vget_low_u16(q5u16)); 879d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d11s16 = vreinterpret_s16_u16(vget_high_u16(q5u16)); 880d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q13s32 = vpadalq_s16(q13s32, vreinterpretq_s16_u16(q5u16)); 881d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q14s32 = vmlal_s16(q14s32, d10s16, d10s16); 882d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q15s32 = vmlal_s16(q15s32, d11s16, d11s16); 883d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 884d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d12s16 = vreinterpret_s16_u16(vget_low_u16(q6u16)); 885d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d13s16 = vreinterpret_s16_u16(vget_high_u16(q6u16)); 886d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q13s32 = vpadalq_s16(q13s32, vreinterpretq_s16_u16(q6u16)); 887d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q14s32 = vmlal_s16(q14s32, d12s16, d12s16); 888d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q15s32 = vmlal_s16(q15s32, d13s16, d13s16); 889d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 890d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q0u8 = q4u8; 891d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } 892d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 893d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q15s32 = vaddq_s32(q14s32, q15s32); 894d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q0s64 = vpaddlq_s32(q13s32); 895d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q1s64 = vpaddlq_s32(q15s32); 896d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 897d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0s64 = vget_low_s64(q0s64); 898d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d1s64 = vget_high_s64(q0s64); 899d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d2s64 = vget_low_s64(q1s64); 900d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d3s64 = vget_high_s64(q1s64); 901d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0s64 = vadd_s64(d0s64, d1s64); 902d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d1s64 = vadd_s64(d2s64, d3s64); 903d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 904d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), 905d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vreinterpret_s32_s64(d0s64)); 906d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); 907d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 908d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 8); 909d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); 910d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 911d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org return vget_lane_u32(d0u32, 0); 912d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org} 913d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 914d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgenum { kWidth8 = 8 }; 915d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgenum { kHeight8 = 8 }; 916d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgenum { kHeight8PlusOne = 9 }; 917d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgenum { kPixelStepOne = 1 }; 918d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgenum { kAlign16 = 16 }; 919d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 920d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org#define FILTER_BITS 7 921d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 922d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgstatic INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) { 923d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const int32x4_t a = vpaddlq_s16(v_16x8); 924d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const int64x2_t b = vpaddlq_s32(a); 925d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)), 926d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vreinterpret_s32_s64(vget_high_s64(b))); 927d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org return vget_lane_s32(c, 0); 928d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org} 929d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 930d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgstatic INLINE int horizontal_add_s32x4(const int32x4_t v_32x4) { 931d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const int64x2_t b = vpaddlq_s32(v_32x4); 932d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)), 933d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vreinterpret_s32_s64(vget_high_s64(b))); 934d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org return vget_lane_s32(c, 0); 935d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org} 936d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 937d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgstatic void variance_neon_w8(const uint8_t *a, int a_stride, 938d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const uint8_t *b, int b_stride, 939d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int w, int h, unsigned int *sse, int *sum) { 940d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int i, j; 941d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int16x8_t v_sum = vdupq_n_s16(0); 942d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int32x4_t v_sse_lo = vdupq_n_s32(0); 943d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int32x4_t v_sse_hi = vdupq_n_s32(0); 944d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 945d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org for (i = 0; i < h; ++i) { 946d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org for (j = 0; j < w; j += 8) { 947d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const uint8x8_t v_a = vld1_u8(&a[j]); 948d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const uint8x8_t v_b = vld1_u8(&b[j]); 949d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const uint16x8_t v_diff = vsubl_u8(v_a, v_b); 950d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const int16x8_t sv_diff = vreinterpretq_s16_u16(v_diff); 951d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org v_sum = vaddq_s16(v_sum, sv_diff); 952d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org v_sse_lo = vmlal_s16(v_sse_lo, 953d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vget_low_s16(sv_diff), 954d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vget_low_s16(sv_diff)); 955d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org v_sse_hi = vmlal_s16(v_sse_hi, 956d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vget_high_s16(sv_diff), 957d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vget_high_s16(sv_diff)); 958d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } 959d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org a += a_stride; 960d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org b += b_stride; 961d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } 962d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 963d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org *sum = horizontal_add_s16x8(v_sum); 964d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org *sse = (unsigned int)horizontal_add_s32x4(vaddq_s32(v_sse_lo, v_sse_hi)); 965d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org} 966d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 967d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgstatic unsigned int variance8x8_neon(const uint8_t *a, int a_stride, 968d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const uint8_t *b, int b_stride, 969d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org unsigned int *sse) { 970d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int sum; 971d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org variance_neon_w8(a, a_stride, b, b_stride, kWidth8, kHeight8, sse, &sum); 972d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org return *sse - (((int64_t)sum * sum) / (kWidth8 * kHeight8)); 973d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org} 974d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 975d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgstatic void var_filter_block2d_bil_w8(const uint8_t *src_ptr, 976d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org uint8_t *output_ptr, 977d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org unsigned int src_pixels_per_line, 978d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int pixel_step, 979d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org unsigned int output_height, 980d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org unsigned int output_width, 981d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const uint16_t *vpx_filter) { 982d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const uint8x8_t f0 = vmov_n_u8((uint8_t)vpx_filter[0]); 983d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const uint8x8_t f1 = vmov_n_u8((uint8_t)vpx_filter[1]); 984d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org unsigned int i; 985d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org for (i = 0; i < output_height; ++i) { 986d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const uint8x8_t src_0 = vld1_u8(&src_ptr[0]); 987d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const uint8x8_t src_1 = vld1_u8(&src_ptr[pixel_step]); 988d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const uint16x8_t a = vmull_u8(src_0, f0); 989d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const uint16x8_t b = vmlal_u8(a, src_1, f1); 990d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const uint8x8_t out = vrshrn_n_u16(b, FILTER_BITS); 991d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vst1_u8(&output_ptr[0], out); 992d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org // Next row... 993d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src_ptr += src_pixels_per_line; 994d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org output_ptr += output_width; 995d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } 996d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org} 997d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 998d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgunsigned int vp8_sub_pixel_variance8x8_neon( 999d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const unsigned char *src, 1000d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int src_stride, 1001d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int xoffset, 1002d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int yoffset, 1003d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const unsigned char *dst, 1004d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int dst_stride, 1005d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org unsigned int *sse) { 1006d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, temp2, kHeight8 * kWidth8); 1007d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, fdata3, kHeight8PlusOne * kWidth8); 1008d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org if (xoffset == 0) { 1009d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org var_filter_block2d_bil_w8(src, temp2, src_stride, kWidth8, kHeight8, 1010d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org kWidth8, bilinear_taps_coeff[yoffset]); 1011d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } else if (yoffset == 0) { 1012d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org var_filter_block2d_bil_w8(src, temp2, src_stride, kPixelStepOne, 1013d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org kHeight8PlusOne, kWidth8, 1014d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org bilinear_taps_coeff[xoffset]); 1015d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } else { 1016d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org var_filter_block2d_bil_w8(src, fdata3, src_stride, kPixelStepOne, 1017d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org kHeight8PlusOne, kWidth8, 1018d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org bilinear_taps_coeff[xoffset]); 1019d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org var_filter_block2d_bil_w8(fdata3, temp2, kWidth8, kWidth8, kHeight8, 1020d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org kWidth8, bilinear_taps_coeff[yoffset]); 1021d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } 1022d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org return variance8x8_neon(temp2, kWidth8, dst, dst_stride, sse); 1023d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org} 1024d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 1025