1233d2500723e5594f3e7c70896ffeeef32b9c950ywan/* 2233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 4233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Use of this source code is governed by a BSD-style license 5233d2500723e5594f3e7c70896ffeeef32b9c950ywan * that can be found in the LICENSE file in the root of the source 6233d2500723e5594f3e7c70896ffeeef32b9c950ywan * tree. An additional intellectual property rights grant can be found 7233d2500723e5594f3e7c70896ffeeef32b9c950ywan * in the file PATENTS. All contributing project authors may 8233d2500723e5594f3e7c70896ffeeef32b9c950ywan * be found in the AUTHORS file in the root of the source tree. 9233d2500723e5594f3e7c70896ffeeef32b9c950ywan */ 10233d2500723e5594f3e7c70896ffeeef32b9c950ywan 11233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "./vp9_rtcd.h" 12233d2500723e5594f3e7c70896ffeeef32b9c950ywan 13233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vpx_ports/mem.h" 14233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vpx/vpx_integer.h" 15233d2500723e5594f3e7c70896ffeeef32b9c950ywan 16233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/vp9_common.h" 17233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/vp9_filter.h" 18233d2500723e5594f3e7c70896ffeeef32b9c950ywan 19233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/encoder/vp9_variance.h" 20233d2500723e5594f3e7c70896ffeeef32b9c950ywan 21233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid variance(const uint8_t *src_ptr, 22233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 23233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 24233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 25233d2500723e5594f3e7c70896ffeeef32b9c950ywan int w, 26233d2500723e5594f3e7c70896ffeeef32b9c950ywan int h, 27233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse, 28233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *sum) { 29233d2500723e5594f3e7c70896ffeeef32b9c950ywan int i, j; 30233d2500723e5594f3e7c70896ffeeef32b9c950ywan int diff; 31233d2500723e5594f3e7c70896ffeeef32b9c950ywan 32233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sum = 0; 33233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = 0; 34233d2500723e5594f3e7c70896ffeeef32b9c950ywan 35233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (i = 0; i < h; i++) { 36233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (j = 0; j < w; j++) { 37233d2500723e5594f3e7c70896ffeeef32b9c950ywan diff = src_ptr[j] - ref_ptr[j]; 38233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sum += diff; 39233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse += diff * diff; 40233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 41233d2500723e5594f3e7c70896ffeeef32b9c950ywan 42233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr += source_stride; 43233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref_ptr += recon_stride; 44233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 45233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 46233d2500723e5594f3e7c70896ffeeef32b9c950ywan 47233d2500723e5594f3e7c70896ffeeef32b9c950ywan/**************************************************************************** 48233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 49233d2500723e5594f3e7c70896ffeeef32b9c950ywan * ROUTINE : filter_block2d_bil_first_pass 50233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 51233d2500723e5594f3e7c70896ffeeef32b9c950ywan * INPUTS : uint8_t *src_ptr : Pointer to source block. 52233d2500723e5594f3e7c70896ffeeef32b9c950ywan * uint32_t src_pixels_per_line : Stride of input block. 53233d2500723e5594f3e7c70896ffeeef32b9c950ywan * uint32_t pixel_step : Offset between filter input 54233d2500723e5594f3e7c70896ffeeef32b9c950ywan * samples (see notes). 55233d2500723e5594f3e7c70896ffeeef32b9c950ywan * uint32_t output_height : Input block height. 56233d2500723e5594f3e7c70896ffeeef32b9c950ywan * uint32_t output_width : Input block width. 57233d2500723e5594f3e7c70896ffeeef32b9c950ywan * int32_t *vp9_filter : Array of 2 bi-linear filter 58233d2500723e5594f3e7c70896ffeeef32b9c950ywan * taps. 59233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 60233d2500723e5594f3e7c70896ffeeef32b9c950ywan * OUTPUTS : int32_t *output_ptr : Pointer to filtered block. 61233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 62233d2500723e5594f3e7c70896ffeeef32b9c950ywan * RETURNS : void 63233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 64233d2500723e5594f3e7c70896ffeeef32b9c950ywan * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in 65233d2500723e5594f3e7c70896ffeeef32b9c950ywan * either horizontal or vertical direction to produce the 66233d2500723e5594f3e7c70896ffeeef32b9c950ywan * filtered output block. Used to implement first-pass 67233d2500723e5594f3e7c70896ffeeef32b9c950ywan * of 2-D separable filter. 68233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 69233d2500723e5594f3e7c70896ffeeef32b9c950ywan * SPECIAL NOTES : Produces int32_t output to retain precision for next pass. 70233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Two filter taps should sum to VP9_FILTER_WEIGHT. 71233d2500723e5594f3e7c70896ffeeef32b9c950ywan * pixel_step defines whether the filter is applied 72233d2500723e5594f3e7c70896ffeeef32b9c950ywan * horizontally (pixel_step=1) or vertically (pixel_step= 73233d2500723e5594f3e7c70896ffeeef32b9c950ywan * stride). 74233d2500723e5594f3e7c70896ffeeef32b9c950ywan * It defines the offset required to move from one input 75233d2500723e5594f3e7c70896ffeeef32b9c950ywan * to the next. 76233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 77233d2500723e5594f3e7c70896ffeeef32b9c950ywan ****************************************************************************/ 78233d2500723e5594f3e7c70896ffeeef32b9c950ywanstatic void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr, 79233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t *output_ptr, 80233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int src_pixels_per_line, 81233d2500723e5594f3e7c70896ffeeef32b9c950ywan int pixel_step, 82233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int output_height, 83233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int output_width, 84233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *vp9_filter) { 85233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int i, j; 86233d2500723e5594f3e7c70896ffeeef32b9c950ywan 87233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (i = 0; i < output_height; i++) { 88233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (j = 0; j < output_width; j++) { 89233d2500723e5594f3e7c70896ffeeef32b9c950ywan output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] + 90233d2500723e5594f3e7c70896ffeeef32b9c950ywan (int)src_ptr[pixel_step] * vp9_filter[1], 91233d2500723e5594f3e7c70896ffeeef32b9c950ywan FILTER_BITS); 92233d2500723e5594f3e7c70896ffeeef32b9c950ywan 93233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr++; 94233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 95233d2500723e5594f3e7c70896ffeeef32b9c950ywan 96233d2500723e5594f3e7c70896ffeeef32b9c950ywan // Next row... 97233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr += src_pixels_per_line - output_width; 98233d2500723e5594f3e7c70896ffeeef32b9c950ywan output_ptr += output_width; 99233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 100233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 101233d2500723e5594f3e7c70896ffeeef32b9c950ywan 102233d2500723e5594f3e7c70896ffeeef32b9c950ywan/**************************************************************************** 103233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 104233d2500723e5594f3e7c70896ffeeef32b9c950ywan * ROUTINE : filter_block2d_bil_second_pass 105233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 106233d2500723e5594f3e7c70896ffeeef32b9c950ywan * INPUTS : int32_t *src_ptr : Pointer to source block. 107233d2500723e5594f3e7c70896ffeeef32b9c950ywan * uint32_t src_pixels_per_line : Stride of input block. 108233d2500723e5594f3e7c70896ffeeef32b9c950ywan * uint32_t pixel_step : Offset between filter input 109233d2500723e5594f3e7c70896ffeeef32b9c950ywan * samples (see notes). 110233d2500723e5594f3e7c70896ffeeef32b9c950ywan * uint32_t output_height : Input block height. 111233d2500723e5594f3e7c70896ffeeef32b9c950ywan * uint32_t output_width : Input block width. 112233d2500723e5594f3e7c70896ffeeef32b9c950ywan * int32_t *vp9_filter : Array of 2 bi-linear filter 113233d2500723e5594f3e7c70896ffeeef32b9c950ywan * taps. 114233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 115233d2500723e5594f3e7c70896ffeeef32b9c950ywan * OUTPUTS : uint16_t *output_ptr : Pointer to filtered block. 116233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 117233d2500723e5594f3e7c70896ffeeef32b9c950ywan * RETURNS : void 118233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 119233d2500723e5594f3e7c70896ffeeef32b9c950ywan * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in 120233d2500723e5594f3e7c70896ffeeef32b9c950ywan * either horizontal or vertical direction to produce the 121233d2500723e5594f3e7c70896ffeeef32b9c950ywan * filtered output block. Used to implement second-pass 122233d2500723e5594f3e7c70896ffeeef32b9c950ywan * of 2-D separable filter. 123233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 124233d2500723e5594f3e7c70896ffeeef32b9c950ywan * SPECIAL NOTES : Requires 32-bit input as produced by 125233d2500723e5594f3e7c70896ffeeef32b9c950ywan * filter_block2d_bil_first_pass. 126233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Two filter taps should sum to VP9_FILTER_WEIGHT. 127233d2500723e5594f3e7c70896ffeeef32b9c950ywan * pixel_step defines whether the filter is applied 128233d2500723e5594f3e7c70896ffeeef32b9c950ywan * horizontally (pixel_step=1) or vertically (pixel_step= 129233d2500723e5594f3e7c70896ffeeef32b9c950ywan * stride). 130233d2500723e5594f3e7c70896ffeeef32b9c950ywan * It defines the offset required to move from one input 131233d2500723e5594f3e7c70896ffeeef32b9c950ywan * to the next. 132233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 133233d2500723e5594f3e7c70896ffeeef32b9c950ywan ****************************************************************************/ 134233d2500723e5594f3e7c70896ffeeef32b9c950ywanstatic void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr, 135233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t *output_ptr, 136233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int src_pixels_per_line, 137233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int pixel_step, 138233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int output_height, 139233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int output_width, 140233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *vp9_filter) { 141233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int i, j; 142233d2500723e5594f3e7c70896ffeeef32b9c950ywan 143233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (i = 0; i < output_height; i++) { 144233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (j = 0; j < output_width; j++) { 145233d2500723e5594f3e7c70896ffeeef32b9c950ywan output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] + 146233d2500723e5594f3e7c70896ffeeef32b9c950ywan (int)src_ptr[pixel_step] * vp9_filter[1], 147233d2500723e5594f3e7c70896ffeeef32b9c950ywan FILTER_BITS); 148233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr++; 149233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 150233d2500723e5594f3e7c70896ffeeef32b9c950ywan 151233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr += src_pixels_per_line - output_width; 152233d2500723e5594f3e7c70896ffeeef32b9c950ywan output_ptr += output_width; 153233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 154233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 155233d2500723e5594f3e7c70896ffeeef32b9c950ywan 156233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { 157233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int i, sum = 0; 158233d2500723e5594f3e7c70896ffeeef32b9c950ywan 159233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (i = 0; i < 256; i++) { 160233d2500723e5594f3e7c70896ffeeef32b9c950ywan sum += (src_ptr[i] * src_ptr[i]); 161233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 162233d2500723e5594f3e7c70896ffeeef32b9c950ywan 163233d2500723e5594f3e7c70896ffeeef32b9c950ywan return sum; 164233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 165233d2500723e5594f3e7c70896ffeeef32b9c950ywan 166233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance64x32_c(const uint8_t *src_ptr, 167233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 168233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 169233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 170233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 171233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 172233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 173233d2500723e5594f3e7c70896ffeeef32b9c950ywan 174233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg); 175233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 176233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((int64_t)avg * avg) >> 11)); 177233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 178233d2500723e5594f3e7c70896ffeeef32b9c950ywan 179233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr, 180233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 181233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 182233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 183233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 184233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 185233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 186233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 187233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[68 * 64]; 188233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 189233d2500723e5594f3e7c70896ffeeef32b9c950ywan 190233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 191233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 192233d2500723e5594f3e7c70896ffeeef32b9c950ywan 193233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 194233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 33, 64, hfilter); 195233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter); 196233d2500723e5594f3e7c70896ffeeef32b9c950ywan 197233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance64x32(temp2, 64, dst_ptr, dst_pixels_per_line, sse); 198233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 199233d2500723e5594f3e7c70896ffeeef32b9c950ywan 200233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, 201233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 202233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 203233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 204233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 205233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 206233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse, 207233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *second_pred) { 208233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 209233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[68 * 64]; 210233d2500723e5594f3e7c70896ffeeef32b9c950ywan DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer 211233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 212233d2500723e5594f3e7c70896ffeeef32b9c950ywan 213233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 214233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 215233d2500723e5594f3e7c70896ffeeef32b9c950ywan 216233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 217233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 33, 64, hfilter); 218233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter); 219233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64); 220233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse); 221233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 222233d2500723e5594f3e7c70896ffeeef32b9c950ywan 223233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance32x64_c(const uint8_t *src_ptr, 224233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 225233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 226233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 227233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 228233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 229233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 230233d2500723e5594f3e7c70896ffeeef32b9c950ywan 231233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg); 232233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 233233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((int64_t)avg * avg) >> 11)); 234233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 235233d2500723e5594f3e7c70896ffeeef32b9c950ywan 236233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, 237233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 238233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 239233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 240233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 241233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 242233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 243233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 244233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[68 * 64]; 245233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 246233d2500723e5594f3e7c70896ffeeef32b9c950ywan 247233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 248233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 249233d2500723e5594f3e7c70896ffeeef32b9c950ywan 250233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 251233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 65, 32, hfilter); 252233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter); 253233d2500723e5594f3e7c70896ffeeef32b9c950ywan 254233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance32x64(temp2, 32, dst_ptr, dst_pixels_per_line, sse); 255233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 256233d2500723e5594f3e7c70896ffeeef32b9c950ywan 257233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, 258233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 259233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 260233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 261233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 262233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 263233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse, 264233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *second_pred) { 265233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 266233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[68 * 64]; 267233d2500723e5594f3e7c70896ffeeef32b9c950ywan DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); // compound pred buffer 268233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 269233d2500723e5594f3e7c70896ffeeef32b9c950ywan 270233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 271233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 272233d2500723e5594f3e7c70896ffeeef32b9c950ywan 273233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 274233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 65, 32, hfilter); 275233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter); 276233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32); 277233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse); 278233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 279233d2500723e5594f3e7c70896ffeeef32b9c950ywan 280233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance32x16_c(const uint8_t *src_ptr, 281233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 282233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 283233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 284233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 285233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 286233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 287233d2500723e5594f3e7c70896ffeeef32b9c950ywan 288233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg); 289233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 290233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((int64_t)avg * avg) >> 9)); 291233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 292233d2500723e5594f3e7c70896ffeeef32b9c950ywan 293233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr, 294233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 295233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 296233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 297233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 298233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 299233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 300233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 301233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[36 * 32]; 302233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 303233d2500723e5594f3e7c70896ffeeef32b9c950ywan 304233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 305233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 306233d2500723e5594f3e7c70896ffeeef32b9c950ywan 307233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 308233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 17, 32, hfilter); 309233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter); 310233d2500723e5594f3e7c70896ffeeef32b9c950ywan 311233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance32x16(temp2, 32, dst_ptr, dst_pixels_per_line, sse); 312233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 313233d2500723e5594f3e7c70896ffeeef32b9c950ywan 314233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, 315233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 316233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 317233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 318233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 319233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 320233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse, 321233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *second_pred) { 322233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 323233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[36 * 32]; 324233d2500723e5594f3e7c70896ffeeef32b9c950ywan DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); // compound pred buffer 325233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 326233d2500723e5594f3e7c70896ffeeef32b9c950ywan 327233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 328233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 329233d2500723e5594f3e7c70896ffeeef32b9c950ywan 330233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 331233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 17, 32, hfilter); 332233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter); 333233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32); 334233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse); 335233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 336233d2500723e5594f3e7c70896ffeeef32b9c950ywan 337233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance16x32_c(const uint8_t *src_ptr, 338233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 339233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 340233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 341233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 342233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 343233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 344233d2500723e5594f3e7c70896ffeeef32b9c950ywan 345233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg); 346233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 347233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((int64_t)avg * avg) >> 9)); 348233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 349233d2500723e5594f3e7c70896ffeeef32b9c950ywan 350233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr, 351233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 352233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 353233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 354233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 355233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 356233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 357233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 358233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[36 * 32]; 359233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 360233d2500723e5594f3e7c70896ffeeef32b9c950ywan 361233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 362233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 363233d2500723e5594f3e7c70896ffeeef32b9c950ywan 364233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 365233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 33, 16, hfilter); 366233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter); 367233d2500723e5594f3e7c70896ffeeef32b9c950ywan 368233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance16x32(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 369233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 370233d2500723e5594f3e7c70896ffeeef32b9c950ywan 371233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, 372233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 373233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 374233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 375233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 376233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 377233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse, 378233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *second_pred) { 379233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 380233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[36 * 32]; 381233d2500723e5594f3e7c70896ffeeef32b9c950ywan DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); // compound pred buffer 382233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 383233d2500723e5594f3e7c70896ffeeef32b9c950ywan 384233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 385233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 386233d2500723e5594f3e7c70896ffeeef32b9c950ywan 387233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 388233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 33, 16, hfilter); 389233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter); 390233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16); 391233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse); 392233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 393233d2500723e5594f3e7c70896ffeeef32b9c950ywan 394233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance64x64_c(const uint8_t *src_ptr, 395233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 396233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 397233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 398233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 399233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 400233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 401233d2500723e5594f3e7c70896ffeeef32b9c950ywan 402233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg); 403233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 404233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((int64_t)avg * avg) >> 12)); 405233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 406233d2500723e5594f3e7c70896ffeeef32b9c950ywan 407233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance32x32_c(const uint8_t *src_ptr, 408233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 409233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 410233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 411233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 412233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 413233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 414233d2500723e5594f3e7c70896ffeeef32b9c950ywan 415233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg); 416233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 417233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((int64_t)avg * avg) >> 10)); 418233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 419233d2500723e5594f3e7c70896ffeeef32b9c950ywan 420233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance16x16_c(const uint8_t *src_ptr, 421233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 422233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 423233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 424233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 425233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 426233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 427233d2500723e5594f3e7c70896ffeeef32b9c950ywan 428233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); 429233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 430233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 8)); 431233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 432233d2500723e5594f3e7c70896ffeeef32b9c950ywan 433233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance8x16_c(const uint8_t *src_ptr, 434233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 435233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 436233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 437233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 438233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 439233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 440233d2500723e5594f3e7c70896ffeeef32b9c950ywan 441233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); 442233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 443233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 7)); 444233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 445233d2500723e5594f3e7c70896ffeeef32b9c950ywan 446233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance16x8_c(const uint8_t *src_ptr, 447233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 448233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 449233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 450233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 451233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 452233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 453233d2500723e5594f3e7c70896ffeeef32b9c950ywan 454233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); 455233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 456233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 7)); 457233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 458233d2500723e5594f3e7c70896ffeeef32b9c950ywan 459233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, 460233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, int ref_stride, 461233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse, int *sum) { 462233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum); 463233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 464233d2500723e5594f3e7c70896ffeeef32b9c950ywan 465233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance8x8_c(const uint8_t *src_ptr, 466233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 467233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 468233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 469233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 470233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 471233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 472233d2500723e5594f3e7c70896ffeeef32b9c950ywan 473233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); 474233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 475233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 6)); 476233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 477233d2500723e5594f3e7c70896ffeeef32b9c950ywan 478233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance8x4_c(const uint8_t *src_ptr, 479233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 480233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 481233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 482233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 483233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 484233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 485233d2500723e5594f3e7c70896ffeeef32b9c950ywan 486233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg); 487233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 488233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 5)); 489233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 490233d2500723e5594f3e7c70896ffeeef32b9c950ywan 491233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance4x8_c(const uint8_t *src_ptr, 492233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 493233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 494233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 495233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 496233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 497233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 498233d2500723e5594f3e7c70896ffeeef32b9c950ywan 499233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg); 500233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 501233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 5)); 502233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 503233d2500723e5594f3e7c70896ffeeef32b9c950ywan 504233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance4x4_c(const uint8_t *src_ptr, 505233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 506233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 507233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 508233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 509233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 510233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 511233d2500723e5594f3e7c70896ffeeef32b9c950ywan 512233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg); 513233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 514233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 4)); 515233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 516233d2500723e5594f3e7c70896ffeeef32b9c950ywan 517233d2500723e5594f3e7c70896ffeeef32b9c950ywan 518233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_mse16x16_c(const uint8_t *src_ptr, 519233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 520233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 521233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 522233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 523233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 524233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 525233d2500723e5594f3e7c70896ffeeef32b9c950ywan 526233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); 527233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 528233d2500723e5594f3e7c70896ffeeef32b9c950ywan return var; 529233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 530233d2500723e5594f3e7c70896ffeeef32b9c950ywan 531233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_mse16x8_c(const uint8_t *src_ptr, 532233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 533233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 534233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 535233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 536233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 537233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 538233d2500723e5594f3e7c70896ffeeef32b9c950ywan 539233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); 540233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 541233d2500723e5594f3e7c70896ffeeef32b9c950ywan return var; 542233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 543233d2500723e5594f3e7c70896ffeeef32b9c950ywan 544233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_mse8x16_c(const uint8_t *src_ptr, 545233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 546233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 547233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 548233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 549233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 550233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 551233d2500723e5594f3e7c70896ffeeef32b9c950ywan 552233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); 553233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 554233d2500723e5594f3e7c70896ffeeef32b9c950ywan return var; 555233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 556233d2500723e5594f3e7c70896ffeeef32b9c950ywan 557233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_mse8x8_c(const uint8_t *src_ptr, 558233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 559233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 560233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 561233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 562233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 563233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 564233d2500723e5594f3e7c70896ffeeef32b9c950ywan 565233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); 566233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 567233d2500723e5594f3e7c70896ffeeef32b9c950ywan return var; 568233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 569233d2500723e5594f3e7c70896ffeeef32b9c950ywan 570233d2500723e5594f3e7c70896ffeeef32b9c950ywan 571233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, 572233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 573233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 574233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 575233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 576233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 577233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 578233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[20 * 16]; 579233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 580233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering 581233d2500723e5594f3e7c70896ffeeef32b9c950ywan 582233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 583233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 584233d2500723e5594f3e7c70896ffeeef32b9c950ywan 585233d2500723e5594f3e7c70896ffeeef32b9c950ywan // First filter 1d Horizontal 586233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 587233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 5, 4, hfilter); 588233d2500723e5594f3e7c70896ffeeef32b9c950ywan 589233d2500723e5594f3e7c70896ffeeef32b9c950ywan // Now filter Verticaly 590233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); 591233d2500723e5594f3e7c70896ffeeef32b9c950ywan 592233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse); 593233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 594233d2500723e5594f3e7c70896ffeeef32b9c950ywan 595233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, 596233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 597233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 598233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 599233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 600233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 601233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse, 602233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *second_pred) { 603233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[20 * 16]; 604233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 605233d2500723e5594f3e7c70896ffeeef32b9c950ywan DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); // compound pred buffer 606233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering 607233d2500723e5594f3e7c70896ffeeef32b9c950ywan 608233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 609233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 610233d2500723e5594f3e7c70896ffeeef32b9c950ywan 611233d2500723e5594f3e7c70896ffeeef32b9c950ywan // First filter 1d Horizontal 612233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 613233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 5, 4, hfilter); 614233d2500723e5594f3e7c70896ffeeef32b9c950ywan 615233d2500723e5594f3e7c70896ffeeef32b9c950ywan // Now filter Verticaly 616233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); 617233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4); 618233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse); 619233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 620233d2500723e5594f3e7c70896ffeeef32b9c950ywan 621233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr, 622233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 623233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 624233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 625233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 626233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 627233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 628233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering 629233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[20 * 16]; 630233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 631233d2500723e5594f3e7c70896ffeeef32b9c950ywan 632233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 633233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 634233d2500723e5594f3e7c70896ffeeef32b9c950ywan 635233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 636233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 9, 8, hfilter); 637233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); 638233d2500723e5594f3e7c70896ffeeef32b9c950ywan 639233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 640233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 641233d2500723e5594f3e7c70896ffeeef32b9c950ywan 642233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, 643233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 644233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 645233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 646233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 647233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 648233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse, 649233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *second_pred) { 650233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering 651233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[20 * 16]; 652233d2500723e5594f3e7c70896ffeeef32b9c950ywan DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); // compound pred buffer 653233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 654233d2500723e5594f3e7c70896ffeeef32b9c950ywan 655233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 656233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 657233d2500723e5594f3e7c70896ffeeef32b9c950ywan 658233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 659233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 9, 8, hfilter); 660233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); 661233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8); 662233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse); 663233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 664233d2500723e5594f3e7c70896ffeeef32b9c950ywan 665233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, 666233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 667233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 668233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 669233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 670233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 671233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 672233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[17 * 16]; // Temp data buffer used in filtering 673233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[20 * 16]; 674233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 675233d2500723e5594f3e7c70896ffeeef32b9c950ywan 676233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 677233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 678233d2500723e5594f3e7c70896ffeeef32b9c950ywan 679233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 680233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 17, 16, hfilter); 681233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter); 682233d2500723e5594f3e7c70896ffeeef32b9c950ywan 683233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance16x16(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 684233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 685233d2500723e5594f3e7c70896ffeeef32b9c950ywan 686233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, 687233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 688233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 689233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 690233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 691233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 692233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse, 693233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *second_pred) { 694233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[17 * 16]; 695233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[20 * 16]; 696233d2500723e5594f3e7c70896ffeeef32b9c950ywan DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); // compound pred buffer 697233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 698233d2500723e5594f3e7c70896ffeeef32b9c950ywan 699233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 700233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 701233d2500723e5594f3e7c70896ffeeef32b9c950ywan 702233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 703233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 17, 16, hfilter); 704233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter); 705233d2500723e5594f3e7c70896ffeeef32b9c950ywan 706233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16); 707233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse); 708233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 709233d2500723e5594f3e7c70896ffeeef32b9c950ywan 710233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, 711233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 712233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 713233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 714233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 715233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 716233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 717233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 718233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[68 * 64]; 719233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 720233d2500723e5594f3e7c70896ffeeef32b9c950ywan 721233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 722233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 723233d2500723e5594f3e7c70896ffeeef32b9c950ywan 724233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 725233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 65, 64, hfilter); 726233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter); 727233d2500723e5594f3e7c70896ffeeef32b9c950ywan 728233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance64x64(temp2, 64, dst_ptr, dst_pixels_per_line, sse); 729233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 730233d2500723e5594f3e7c70896ffeeef32b9c950ywan 731233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, 732233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 733233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 734233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 735233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 736233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 737233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse, 738233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *second_pred) { 739233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 740233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[68 * 64]; 741233d2500723e5594f3e7c70896ffeeef32b9c950ywan DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer 742233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 743233d2500723e5594f3e7c70896ffeeef32b9c950ywan 744233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 745233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 746233d2500723e5594f3e7c70896ffeeef32b9c950ywan 747233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 748233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 65, 64, hfilter); 749233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter); 750233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64); 751233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse); 752233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 753233d2500723e5594f3e7c70896ffeeef32b9c950ywan 754233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, 755233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 756233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 757233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 758233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 759233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 760233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 761233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 762233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[36 * 32]; 763233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 764233d2500723e5594f3e7c70896ffeeef32b9c950ywan 765233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 766233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 767233d2500723e5594f3e7c70896ffeeef32b9c950ywan 768233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 769233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 33, 32, hfilter); 770233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter); 771233d2500723e5594f3e7c70896ffeeef32b9c950ywan 772233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance32x32(temp2, 32, dst_ptr, dst_pixels_per_line, sse); 773233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 774233d2500723e5594f3e7c70896ffeeef32b9c950ywan 775233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, 776233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 777233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 778233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 779233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 780233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 781233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse, 782233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *second_pred) { 783233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 784233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[36 * 32]; 785233d2500723e5594f3e7c70896ffeeef32b9c950ywan DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); // compound pred buffer 786233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 787233d2500723e5594f3e7c70896ffeeef32b9c950ywan 788233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 789233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 790233d2500723e5594f3e7c70896ffeeef32b9c950ywan 791233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 792233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 33, 32, hfilter); 793233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter); 794233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32); 795233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse); 796233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 797233d2500723e5594f3e7c70896ffeeef32b9c950ywan 798233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, 799233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 800233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 801233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 802233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 803233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0, 804233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref_ptr, recon_stride, sse); 805233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 806233d2500723e5594f3e7c70896ffeeef32b9c950ywan 807233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, 808233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 809233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 810233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 811233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 812233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0, 813233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref_ptr, recon_stride, sse); 814233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 815233d2500723e5594f3e7c70896ffeeef32b9c950ywan 816233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, 817233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 818233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 819233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 820233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 821233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0, 822233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref_ptr, recon_stride, sse); 823233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 824233d2500723e5594f3e7c70896ffeeef32b9c950ywan 825233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, 826233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 827233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 828233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 829233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 830233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8, 831233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref_ptr, recon_stride, sse); 832233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 833233d2500723e5594f3e7c70896ffeeef32b9c950ywan 834233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, 835233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 836233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 837233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 838233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 839233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8, 840233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref_ptr, recon_stride, sse); 841233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 842233d2500723e5594f3e7c70896ffeeef32b9c950ywan 843233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, 844233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 845233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 846233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 847233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 848233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8, 849233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref_ptr, recon_stride, sse); 850233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 851233d2500723e5594f3e7c70896ffeeef32b9c950ywan 852233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, 853233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 854233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 855233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 856233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 857233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8, 858233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref_ptr, recon_stride, sse); 859233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 860233d2500723e5594f3e7c70896ffeeef32b9c950ywan 861233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, 862233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 863233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 864233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 865233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 866233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8, 867233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref_ptr, recon_stride, sse); 868233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 869233d2500723e5594f3e7c70896ffeeef32b9c950ywan 870233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, 871233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 872233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 873233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 874233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 875233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8, 876233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref_ptr, recon_stride, sse); 877233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 878233d2500723e5594f3e7c70896ffeeef32b9c950ywan 879233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr, 880233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 881233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 882233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 883233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 884233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 885233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 886233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, 887233d2500723e5594f3e7c70896ffeeef32b9c950ywan xoffset, yoffset, dst_ptr, 888233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_pixels_per_line, sse); 889233d2500723e5594f3e7c70896ffeeef32b9c950ywan return *sse; 890233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 891233d2500723e5594f3e7c70896ffeeef32b9c950ywan 892233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, 893233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 894233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 895233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 896233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 897233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 898233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 899233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line, 900233d2500723e5594f3e7c70896ffeeef32b9c950ywan xoffset, yoffset, dst_ptr, 901233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_pixels_per_line, sse); 902233d2500723e5594f3e7c70896ffeeef32b9c950ywan return *sse; 903233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 904233d2500723e5594f3e7c70896ffeeef32b9c950ywan 905233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, 906233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 907233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 908233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 909233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 910233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 911233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 912233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line, 913233d2500723e5594f3e7c70896ffeeef32b9c950ywan xoffset, yoffset, dst_ptr, 914233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_pixels_per_line, sse); 915233d2500723e5594f3e7c70896ffeeef32b9c950ywan return *sse; 916233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 917233d2500723e5594f3e7c70896ffeeef32b9c950ywan 918233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr, 919233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 920233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 921233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 922233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 923233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 924233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 925233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering 926233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[20 * 16]; 927233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 928233d2500723e5594f3e7c70896ffeeef32b9c950ywan 929233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 930233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 931233d2500723e5594f3e7c70896ffeeef32b9c950ywan 932233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 933233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 9, 16, hfilter); 934233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter); 935233d2500723e5594f3e7c70896ffeeef32b9c950ywan 936233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance16x8(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 937233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 938233d2500723e5594f3e7c70896ffeeef32b9c950ywan 939233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, 940233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 941233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 942233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 943233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 944233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 945233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse, 946233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *second_pred) { 947233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering 948233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[20 * 16]; 949233d2500723e5594f3e7c70896ffeeef32b9c950ywan DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); // compound pred buffer 950233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 951233d2500723e5594f3e7c70896ffeeef32b9c950ywan 952233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 953233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 954233d2500723e5594f3e7c70896ffeeef32b9c950ywan 955233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 956233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 9, 16, hfilter); 957233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter); 958233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16); 959233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse); 960233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 961233d2500723e5594f3e7c70896ffeeef32b9c950ywan 962233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, 963233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 964233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 965233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 966233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 967233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 968233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 969233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering 970233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[20 * 16]; 971233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 972233d2500723e5594f3e7c70896ffeeef32b9c950ywan 973233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 974233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 975233d2500723e5594f3e7c70896ffeeef32b9c950ywan 976233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 977233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 17, 8, hfilter); 978233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter); 979233d2500723e5594f3e7c70896ffeeef32b9c950ywan 980233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance8x16(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 981233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 982233d2500723e5594f3e7c70896ffeeef32b9c950ywan 983233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, 984233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 985233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 986233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 987233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 988233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 989233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse, 990233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *second_pred) { 991233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering 992233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[20 * 16]; 993233d2500723e5594f3e7c70896ffeeef32b9c950ywan DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); // compound pred buffer 994233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 995233d2500723e5594f3e7c70896ffeeef32b9c950ywan 996233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 997233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 998233d2500723e5594f3e7c70896ffeeef32b9c950ywan 999233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1000233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 17, 8, hfilter); 1001233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter); 1002233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8); 1003233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse); 1004233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 1005233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1006233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr, 1007233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 1008233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 1009233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 1010233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 1011233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 1012233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 1013233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering 1014233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[20 * 16]; 1015233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 1016233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1017233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1018233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1019233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1020233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1021233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 5, 8, hfilter); 1022233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter); 1023233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1024233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance8x4(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 1025233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 1026233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1027233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, 1028233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 1029233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 1030233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 1031233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 1032233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 1033233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse, 1034233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *second_pred) { 1035233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering 1036233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[20 * 16]; 1037233d2500723e5594f3e7c70896ffeeef32b9c950ywan DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer 1038233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 1039233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1040233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1041233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1042233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1043233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1044233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 5, 8, hfilter); 1045233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter); 1046233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8); 1047233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse); 1048233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 1049233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1050233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr, 1051233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 1052233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 1053233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 1054233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 1055233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 1056233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 1057233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering 1058233d2500723e5594f3e7c70896ffeeef32b9c950ywan // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be 1059233d2500723e5594f3e7c70896ffeeef32b9c950ywan // of this big? same issue appears in all other block size settings. 1060233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[20 * 16]; 1061233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 1062233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1063233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1064233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1065233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1066233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1067233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 9, 4, hfilter); 1068233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter); 1069233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1070233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance4x8(temp2, 4, dst_ptr, dst_pixels_per_line, sse); 1071233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 1072233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1073233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, 1074233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 1075233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 1076233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 1077233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst_ptr, 1078233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 1079233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse, 1080233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *second_pred) { 1081233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering 1082233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t temp2[20 * 16]; 1083233d2500723e5594f3e7c70896ffeeef32b9c950ywan DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer 1084233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *hfilter, *vfilter; 1085233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1086233d2500723e5594f3e7c70896ffeeef32b9c950ywan hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1087233d2500723e5594f3e7c70896ffeeef32b9c950ywan vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1088233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1089233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1090233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1, 9, 4, hfilter); 1091233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter); 1092233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4); 1093233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse); 1094233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 1095233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1096233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1097233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, 1098233d2500723e5594f3e7c70896ffeeef32b9c950ywan int height, const uint8_t *ref, int ref_stride) { 1099233d2500723e5594f3e7c70896ffeeef32b9c950ywan int i, j; 1100233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1101233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (i = 0; i < height; i++) { 1102233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (j = 0; j < width; j++) { 1103233d2500723e5594f3e7c70896ffeeef32b9c950ywan int tmp; 1104233d2500723e5594f3e7c70896ffeeef32b9c950ywan tmp = pred[j] + ref[j]; 1105233d2500723e5594f3e7c70896ffeeef32b9c950ywan comp_pred[j] = (tmp + 1) >> 1; 1106233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 1107233d2500723e5594f3e7c70896ffeeef32b9c950ywan comp_pred += width; 1108233d2500723e5594f3e7c70896ffeeef32b9c950ywan pred += width; 1109233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref += ref_stride; 1110233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 1111233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 1112