1736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov/*
2736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *
4736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *  Use of this source code is governed by a BSD-style license
5736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *  that can be found in the LICENSE file in the root of the source
6736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *  tree. An additional intellectual property rights grant can be found
7736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *  in the file PATENTS.  All contributing project authors may
8736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *  be found in the AUTHORS file in the root of the source tree.
9736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov */
10736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
11736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov#include "./vp9_rtcd.h"
12736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
13736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov#include "vpx_ports/mem.h"
14736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov#include "vpx/vpx_integer.h"
15736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
16736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov#include "vp9/common/vp9_common.h"
17736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov#include "vp9/common/vp9_filter.h"
18736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
19736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov#include "vp9/encoder/vp9_variance.h"
20736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
21736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovvoid variance(const uint8_t *src_ptr,
22736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov              int  source_stride,
23736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov              const uint8_t *ref_ptr,
24736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov              int  recon_stride,
25736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov              int  w,
26f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov              int  h,
27f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov              unsigned int *sse,
28736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov              int *sum) {
29736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  int i, j;
30736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  int diff;
31f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov
32f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov  *sum = 0;
33f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov  *sse = 0;
34736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
35736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  for (i = 0; i < h; i++) {
36736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov    for (j = 0; j < w; j++) {
37736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov      diff = src_ptr[j] - ref_ptr[j];
38736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov      *sum += diff;
39736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov      *sse += diff * diff;
40736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov    }
41736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
42736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov    src_ptr += source_stride;
43736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov    ref_ptr += recon_stride;
44736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  }
45736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
46736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
47736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov/****************************************************************************
48736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *
49736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *  ROUTINE       : filter_block2d_bil_first_pass
50736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *
51736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *  INPUTS        : uint8_t  *src_ptr          : Pointer to source block.
52736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  uint32_t src_pixels_per_line : Stride of input block.
53736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  uint32_t pixel_step        : Offset between filter input
54736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                                               samples (see notes).
55736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  uint32_t output_height     : Input block height.
56736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  uint32_t output_width      : Input block width.
57736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  int32_t  *vp9_filter       : Array of 2 bi-linear filter
58736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                                               taps.
59736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *
60736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *  OUTPUTS       : int32_t *output_ptr        : Pointer to filtered block.
61736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *
62736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *  RETURNS       : void
63736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *
64736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
65736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  either horizontal or vertical direction to produce the
66736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  filtered output block. Used to implement first-pass
67736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  of 2-D separable filter.
68736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *
69736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *  SPECIAL NOTES : Produces int32_t output to retain precision for next pass.
70736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  Two filter taps should sum to VP9_FILTER_WEIGHT.
71736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  pixel_step defines whether the filter is applied
72736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  horizontally (pixel_step=1) or vertically (pixel_step=
73736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  stride).
74736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  It defines the offset required to move from one input
75736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  to the next.
76736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *
77736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov ****************************************************************************/
78736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovstatic void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
79736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              uint16_t *output_ptr,
80736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              unsigned int src_pixels_per_line,
81736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              int pixel_step,
82736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              unsigned int output_height,
83736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              unsigned int output_width,
84736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              const int16_t *vp9_filter) {
85736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  unsigned int i, j;
86736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
8712a024ca681d877fe16b7e087356f7aff175a218Svetoslav Ganov  for (i = 0; i < output_height; i++) {
88736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov    for (j = 0; j < output_width; j++) {
89736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov      output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
90f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                          (int)src_ptr[pixel_step] * vp9_filter[1],
91f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                          FILTER_BITS);
92f804420d6e37748b75478406e989c69303756980Svetoslav Ganov
93f804420d6e37748b75478406e989c69303756980Svetoslav Ganov      src_ptr++;
94736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov    }
95736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
96736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov    // Next row...
97736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov    src_ptr    += src_pixels_per_line - output_width;
98736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov    output_ptr += output_width;
99736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  }
100736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
101736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
102736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov/****************************************************************************
103736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *
104736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *  ROUTINE       : filter_block2d_bil_second_pass
105736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *
106736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *  INPUTS        : int32_t  *src_ptr          : Pointer to source block.
107736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  uint32_t src_pixels_per_line : Stride of input block.
108736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  uint32_t pixel_step        : Offset between filter input
109736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                                               samples (see notes).
110736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  uint32_t output_height     : Input block height.
111736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  uint32_t output_width      : Input block width.
112736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  int32_t  *vp9_filter       : Array of 2 bi-linear filter
113736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                                               taps.
114736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *
115736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *  OUTPUTS       : uint16_t *output_ptr       : Pointer to filtered block.
116736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *
117736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *  RETURNS       : void
118736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *
119736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
120736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  either horizontal or vertical direction to produce the
121736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  filtered output block. Used to implement second-pass
122736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  of 2-D separable filter.
123736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *
124736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *  SPECIAL NOTES : Requires 32-bit input as produced by
125736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  filter_block2d_bil_first_pass.
126736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  Two filter taps should sum to VP9_FILTER_WEIGHT.
127736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  pixel_step defines whether the filter is applied
128736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  horizontally (pixel_step=1) or vertically (pixel_step=
129736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  stride).
130736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  It defines the offset required to move from one input
131736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov *                  to the next.
132f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov *
133f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov ****************************************************************************/
134f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganovstatic void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
135736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               uint8_t *output_ptr,
136736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               unsigned int src_pixels_per_line,
137736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               unsigned int pixel_step,
138736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               unsigned int output_height,
139736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               unsigned int output_width,
140736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               const int16_t *vp9_filter) {
141736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  unsigned int  i, j;
142736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
143736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  for (i = 0; i < output_height; i++) {
144736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov    for (j = 0; j < output_width; j++) {
145736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov      output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
146736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                          (int)src_ptr[pixel_step] * vp9_filter[1],
147736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                          FILTER_BITS);
148f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov      src_ptr++;
149736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov    }
150736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
151736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov    src_ptr += src_pixels_per_line - output_width;
152736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov    output_ptr += output_width;
153736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  }
154736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
155736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
156736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
157736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  unsigned int i, sum = 0;
158736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
159736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  for (i = 0; i < 256; i++) {
160736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov    sum += (src_ptr[i] * src_ptr[i]);
161736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  }
162736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
163736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return sum;
16451cccf0845b36539d42503495f0689d487712b3aSvetoslav Ganov}
165736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
166736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_variance64x32_c(const uint8_t *src_ptr,
167736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                 int  source_stride,
168736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                 const uint8_t *ref_ptr,
169736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                 int  recon_stride,
170736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                 unsigned int *sse) {
171736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  unsigned int var;
172736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  int avg;
173736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
174736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg);
175736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  *sse = var;
176736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return (var - (((int64_t)avg * avg) >> 11));
177736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
178736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
179736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr,
180736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           int  src_pixels_per_line,
181736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           int  xoffset,
182736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           int  yoffset,
183736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           const uint8_t *dst_ptr,
184736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           int dst_pixels_per_line,
185736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           unsigned int *sse) {
186736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
187736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint8_t temp2[68 * 64];
188736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  const int16_t *hfilter, *vfilter;
189736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
190736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
191736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
192736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
193736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
194736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                    1, 33, 64, hfilter);
195736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
196736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
197f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov  return vp9_variance64x32(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
198736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
199736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
200736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr,
201736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int  src_pixels_per_line,
202736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int  xoffset,
203736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int  yoffset,
204736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               const uint8_t *dst_ptr,
205f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                               int dst_pixels_per_line,
206406970b06c8472cbd44ecc278d643a12589c6b38Svetoslav Ganov                                               unsigned int *sse,
207f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov                                               const uint8_t *second_pred) {
208736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
20991feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov  uint8_t temp2[68 * 64];
210736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
211f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  const int16_t *hfilter, *vfilter;
212f804420d6e37748b75478406e989c69303756980Svetoslav Ganov
213f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
214f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
215f804420d6e37748b75478406e989c69303756980Svetoslav Ganov
216f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
217f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                    1, 33, 64, hfilter);
218f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
219f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  vp9_comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64);
220f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
221736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
222736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
223736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_variance32x64_c(const uint8_t *src_ptr,
224736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                 int  source_stride,
225736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                 const uint8_t *ref_ptr,
226736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                 int  recon_stride,
227736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                 unsigned int *sse) {
228736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  unsigned int var;
229f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov  int avg;
230f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov
231f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov  variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg);
232f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov  *sse = var;
233f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov  return (var - (((int64_t)avg * avg) >> 11));
234f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov}
235f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov
236f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganovunsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr,
237f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov                                           int  src_pixels_per_line,
238f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov                                           int  xoffset,
239f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov                                           int  yoffset,
240f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov                                           const uint8_t *dst_ptr,
241f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov                                           int dst_pixels_per_line,
242736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           unsigned int *sse) {
243736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
244736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint8_t temp2[68 * 64];
245736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  const int16_t *hfilter, *vfilter;
246736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
247736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
248736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
249736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
25091feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
25191feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov                                    1, 65, 32, hfilter);
25291feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
253736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
254736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_variance32x64(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
255736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
256736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
257736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr,
258736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int  src_pixels_per_line,
259736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int  xoffset,
260736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int  yoffset,
261736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               const uint8_t *dst_ptr,
262736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int dst_pixels_per_line,
263736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               unsigned int *sse,
264736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               const uint8_t *second_pred) {
265736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
266736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint8_t temp2[68 * 64];
267736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64);  // compound pred buffer
268736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  const int16_t *hfilter, *vfilter;
269736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
270736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
271736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
272f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov
27391feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
27491feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov                                    1, 65, 32, hfilter);
27591feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
27691feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov  vp9_comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32);
27791feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov  return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
27891feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov}
27991feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov
28091feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganovunsigned int vp9_variance32x16_c(const uint8_t *src_ptr,
281736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                 int  source_stride,
282736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                 const uint8_t *ref_ptr,
283736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                 int  recon_stride,
284736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                 unsigned int *sse) {
28591feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov  unsigned int var;
286736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  int avg;
287736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
288736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg);
289f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov  *sse = var;
290f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov  return (var - (((int64_t)avg * avg) >> 9));
291f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov}
292736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
293f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganovunsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr,
294736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           int  src_pixels_per_line,
295736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           int  xoffset,
296736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           int  yoffset,
297736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           const uint8_t *dst_ptr,
298736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           int dst_pixels_per_line,
299736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           unsigned int *sse) {
300736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
301f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov  uint8_t temp2[36 * 32];
30247e02711d78ecac9112aa7f66e5664cdc46fb3d1Svetoslav Ganov  const int16_t *hfilter, *vfilter;
303736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
304736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
305736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
306736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
307f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
308f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov                                    1, 17, 32, hfilter);
309736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
310736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
311f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  return vp9_variance32x16(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
312736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
313736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
314736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr,
315736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int  src_pixels_per_line,
31600f7b3f76515d1c6fbe5cf9fee9d3760787c03cdSvetoslav Ganov                                               int  xoffset,
317736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int  yoffset,
318736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               const uint8_t *dst_ptr,
319736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int dst_pixels_per_line,
320736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               unsigned int *sse,
32100f7b3f76515d1c6fbe5cf9fee9d3760787c03cdSvetoslav Ganov                                               const uint8_t *second_pred) {
32291feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
32391feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov  uint8_t temp2[36 * 32];
32491feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16);  // compound pred buffer
325736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  const int16_t *hfilter, *vfilter;
326736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
327736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
328f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
329736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
330736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
331736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                    1, 17, 32, hfilter);
332736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
33300f7b3f76515d1c6fbe5cf9fee9d3760787c03cdSvetoslav Ganov  vp9_comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32);
334736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
335736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
336736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
337736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_variance16x32_c(const uint8_t *src_ptr,
338f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                 int  source_stride,
339f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov                                 const uint8_t *ref_ptr,
340736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                 int  recon_stride,
341736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                 unsigned int *sse) {
342f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  unsigned int var;
343736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  int avg;
344736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
345736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg);
346736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  *sse = var;
347736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return (var - (((int64_t)avg * avg) >> 9));
348736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
349736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
350736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr,
35100f7b3f76515d1c6fbe5cf9fee9d3760787c03cdSvetoslav Ganov                                           int  src_pixels_per_line,
352736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           int  xoffset,
353736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           int  yoffset,
354736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           const uint8_t *dst_ptr,
355736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           int dst_pixels_per_line,
356736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           unsigned int *sse) {
357736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
35891feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov  uint8_t temp2[36 * 32];
35991feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov  const int16_t *hfilter, *vfilter;
360736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
361736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
362736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
363736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
364736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
365736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                    1, 33, 16, hfilter);
366736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
367f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov
368f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov  return vp9_variance16x32(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
369736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
370f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov
371736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr,
372736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int  src_pixels_per_line,
37312a024ca681d877fe16b7e087356f7aff175a218Svetoslav Ganov                                               int  xoffset,
37412a024ca681d877fe16b7e087356f7aff175a218Svetoslav Ganov                                               int  yoffset,
375f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov                                               const uint8_t *dst_ptr,
376f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov                                               int dst_pixels_per_line,
377f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov                                               unsigned int *sse,
378736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               const uint8_t *second_pred) {
379736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
380736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint8_t temp2[36 * 32];
381736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32);  // compound pred buffer
382736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  const int16_t *hfilter, *vfilter;
383736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
384736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
385736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
386736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
387736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
38812a024ca681d877fe16b7e087356f7aff175a218Svetoslav Ganov                                    1, 33, 16, hfilter);
38912a024ca681d877fe16b7e087356f7aff175a218Svetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
390736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vp9_comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16);
391736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
392736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
393736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
394f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganovunsigned int vp9_variance64x64_c(const uint8_t *src_ptr,
395736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                 int  source_stride,
396f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov                                 const uint8_t *ref_ptr,
397736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                 int  recon_stride,
398f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov                                 unsigned int *sse) {
399736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  unsigned int var;
400736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  int avg;
401736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
40212a024ca681d877fe16b7e087356f7aff175a218Svetoslav Ganov  variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg);
40312a024ca681d877fe16b7e087356f7aff175a218Svetoslav Ganov  *sse = var;
404736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return (var - (((int64_t)avg * avg) >> 12));
405736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
406736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
407736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_variance32x32_c(const uint8_t *src_ptr,
40812a024ca681d877fe16b7e087356f7aff175a218Svetoslav Ganov                                 int  source_stride,
40912a024ca681d877fe16b7e087356f7aff175a218Svetoslav Ganov                                 const uint8_t *ref_ptr,
41012a024ca681d877fe16b7e087356f7aff175a218Svetoslav Ganov                                 int  recon_stride,
41112a024ca681d877fe16b7e087356f7aff175a218Svetoslav Ganov                                 unsigned int *sse) {
41212a024ca681d877fe16b7e087356f7aff175a218Svetoslav Ganov  unsigned int var;
41312a024ca681d877fe16b7e087356f7aff175a218Svetoslav Ganov  int avg;
41412a024ca681d877fe16b7e087356f7aff175a218Svetoslav Ganov
415736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg);
416736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  *sse = var;
417736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return (var - (((int64_t)avg * avg) >> 10));
418736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
41912a024ca681d877fe16b7e087356f7aff175a218Svetoslav Ganov
42012a024ca681d877fe16b7e087356f7aff175a218Svetoslav Ganovvoid vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride,
421736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                             const uint8_t *ref_ptr, int ref_stride,
422736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                             unsigned int *sse, int *sum) {
423736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
424736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
425736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
426736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_variance16x16_c(const uint8_t *src_ptr,
427f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov                                 int  source_stride,
428f5a07905a3e025f95472a3f8d9935263e49ad6d3Svetoslav Ganov                                 const uint8_t *ref_ptr,
429f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                 int  recon_stride,
430f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                 unsigned int *sse) {
431f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  unsigned int var;
432736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  int avg;
433736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
434736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
435736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  *sse = var;
436736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return (var - (((unsigned int)avg * avg) >> 8));
437736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
438736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
439736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_variance8x16_c(const uint8_t *src_ptr,
440736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                int  source_stride,
441736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                const uint8_t *ref_ptr,
442736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                int  recon_stride,
443736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                unsigned int *sse) {
44491feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov  unsigned int var;
445736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  int avg;
446736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
447736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
448736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  *sse = var;
449736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return (var - (((unsigned int)avg * avg) >> 7));
450736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
451736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
452736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_variance16x8_c(const uint8_t *src_ptr,
453736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                int  source_stride,
45491feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov                                const uint8_t *ref_ptr,
455736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                int  recon_stride,
456736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                unsigned int *sse) {
457736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  unsigned int var;
458736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  int avg;
459736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
460736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
461736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  *sse = var;
462736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return (var - (((unsigned int)avg * avg) >> 7));
46391feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov}
46491feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov
465736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovvoid vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride,
466736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                       const uint8_t *ref_ptr, int ref_stride,
467736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                       unsigned int *sse, int *sum) {
468736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
469736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
47091feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov
47191feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganovunsigned int vp9_variance8x8_c(const uint8_t *src_ptr,
472736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                               int  source_stride,
473736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                               const uint8_t *ref_ptr,
474736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                               int  recon_stride,
475736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                               unsigned int *sse) {
476736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  unsigned int var;
477736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  int avg;
478736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
47991feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
48091feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov  *sse = var;
481736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return (var - (((unsigned int)avg * avg) >> 6));
482736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
483736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
484736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_variance8x4_c(const uint8_t *src_ptr,
485736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                               int  source_stride,
486736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                               const uint8_t *ref_ptr,
487736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                               int  recon_stride,
488736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                               unsigned int *sse) {
48991feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov  unsigned int var;
490736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  int avg;
491736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
492736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg);
493736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  *sse = var;
494736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return (var - (((unsigned int)avg * avg) >> 5));
495736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
496736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
497736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_variance4x8_c(const uint8_t *src_ptr,
498736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                               int  source_stride,
499736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                               const uint8_t *ref_ptr,
500736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                               int  recon_stride,
501736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                               unsigned int *sse) {
502736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  unsigned int var;
503736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  int avg;
504736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
505736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg);
506736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  *sse = var;
507736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return (var - (((unsigned int)avg * avg) >> 5));
508736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
509736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
510736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_variance4x4_c(const uint8_t *src_ptr,
511736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                               int  source_stride,
512736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                               const uint8_t *ref_ptr,
513736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                               int  recon_stride,
514736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                               unsigned int *sse) {
515736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  unsigned int var;
516736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  int avg;
517f804420d6e37748b75478406e989c69303756980Svetoslav Ganov
518f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
519736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  *sse = var;
520736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return (var - (((unsigned int)avg * avg) >> 4));
521736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
522736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
523736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
524736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_mse16x16_c(const uint8_t *src_ptr,
525736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                            int  source_stride,
526736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                            const uint8_t *ref_ptr,
527736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                            int  recon_stride,
528736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                            unsigned int *sse) {
529736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  unsigned int var;
530736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  int avg;
531736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
532736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
533736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  *sse = var;
534736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return var;
535736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
536736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
537736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_mse16x8_c(const uint8_t *src_ptr,
538736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                           int  source_stride,
539736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                           const uint8_t *ref_ptr,
540736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                           int  recon_stride,
541736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                           unsigned int *sse) {
542736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  unsigned int var;
543fe9f8ab03a63b1037f07dd85799fbea80ec6adaaJeff Brown  int avg;
544f804420d6e37748b75478406e989c69303756980Svetoslav Ganov
545f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
546736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  *sse = var;
547f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  return var;
548f804420d6e37748b75478406e989c69303756980Svetoslav Ganov}
549736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
550f804420d6e37748b75478406e989c69303756980Svetoslav Ganovunsigned int vp9_mse8x16_c(const uint8_t *src_ptr,
551f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                           int  source_stride,
552f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                           const uint8_t *ref_ptr,
553f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                           int  recon_stride,
554736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                           unsigned int *sse) {
555f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  unsigned int var;
556f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  int avg;
557f804420d6e37748b75478406e989c69303756980Svetoslav Ganov
558736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
559736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  *sse = var;
560736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return var;
561736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
562f804420d6e37748b75478406e989c69303756980Svetoslav Ganov
563736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_mse8x8_c(const uint8_t *src_ptr,
564736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                          int  source_stride,
565736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                          const uint8_t *ref_ptr,
566f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                          int  recon_stride,
567f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                          unsigned int *sse) {
568f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  unsigned int var;
569f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  int avg;
570f804420d6e37748b75478406e989c69303756980Svetoslav Ganov
571736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
572f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  *sse = var;
573f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  return var;
574f804420d6e37748b75478406e989c69303756980Svetoslav Ganov}
575f804420d6e37748b75478406e989c69303756980Svetoslav Ganov
576f804420d6e37748b75478406e989c69303756980Svetoslav Ganov
577f804420d6e37748b75478406e989c69303756980Svetoslav Ganovunsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
578f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                         int  src_pixels_per_line,
579f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                         int  xoffset,
580f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                         int  yoffset,
581f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                         const uint8_t *dst_ptr,
582f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                         int dst_pixels_per_line,
583f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                         unsigned int *sse) {
584736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint8_t temp2[20 * 16];
585736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  const int16_t *hfilter, *vfilter;
586736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
587736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
588736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
589736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
590736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
591736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  // First filter 1d Horizontal
592736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
593736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                    1, 5, 4, hfilter);
594736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
595fe9f8ab03a63b1037f07dd85799fbea80ec6adaaJeff Brown  // Now filter Verticaly
596f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
597f804420d6e37748b75478406e989c69303756980Svetoslav Ganov
598736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
599736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
600736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
601736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr,
602736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             int  src_pixels_per_line,
603736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             int  xoffset,
604f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                             int  yoffset,
605f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                             const uint8_t *dst_ptr,
606f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                             int dst_pixels_per_line,
607736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             unsigned int *sse,
608736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             const uint8_t *second_pred) {
609736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint8_t temp2[20 * 16];
610736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  const int16_t *hfilter, *vfilter;
611736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4);  // compound pred buffer
612736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
613736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
614736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
615736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
616736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
617736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  // First filter 1d Horizontal
618736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
619736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                    1, 5, 4, hfilter);
620736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
621f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  // Now filter Verticaly
622736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
623736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
624736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
625736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
626736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
627736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
628736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                         int  src_pixels_per_line,
629736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                         int  xoffset,
630736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                         int  yoffset,
631736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                         const uint8_t *dst_ptr,
632f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                         int dst_pixels_per_line,
633f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                         unsigned int *sse) {
634f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  uint16_t fdata3[9 * 8];  // Temp data buffer used in filtering
635f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  uint8_t temp2[20 * 16];
636f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  const int16_t *hfilter, *vfilter;
637f804420d6e37748b75478406e989c69303756980Svetoslav Ganov
638f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
639f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
640f804420d6e37748b75478406e989c69303756980Svetoslav Ganov
641f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
642f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                    1, 9, 8, hfilter);
643f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
64491feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov
64591feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov  return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
64691feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov}
64791feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov
64891feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganovunsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr,
64991feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov                                             int  src_pixels_per_line,
650736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             int  xoffset,
651736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             int  yoffset,
652f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                             const uint8_t *dst_ptr,
653736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             int dst_pixels_per_line,
654736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             unsigned int *sse,
655736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             const uint8_t *second_pred) {
656736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[9 * 8];  // Temp data buffer used in filtering
657736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint8_t temp2[20 * 16];
658736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8);  // compound pred buffer
659736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  const int16_t *hfilter, *vfilter;
660736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
661736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
662bd206d129fdd1777b9f9646a834d7fc342a8941eSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
663bd206d129fdd1777b9f9646a834d7fc342a8941eSvetoslav Ganov
664f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
665f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                    1, 9, 8, hfilter);
666f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
667736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
668736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
669736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
67091feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov
671736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
67200f7b3f76515d1c6fbe5cf9fee9d3760787c03cdSvetoslav Ganov                                           int  src_pixels_per_line,
67391feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov                                           int  xoffset,
67491feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov                                           int  yoffset,
675736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           const uint8_t *dst_ptr,
676736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           int dst_pixels_per_line,
67791feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov                                           unsigned int *sse) {
67891feae3c5994bd4768cea3507c62c65746adcfa6Svetoslav Ganov  uint16_t fdata3[17 * 16];  // Temp data buffer used in filtering
679f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  uint8_t temp2[20 * 16];
680f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  const int16_t *hfilter, *vfilter;
681f804420d6e37748b75478406e989c69303756980Svetoslav Ganov
682f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
683f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
684f804420d6e37748b75478406e989c69303756980Svetoslav Ganov
685f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
686f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                    1, 17, 16, hfilter);
687f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
688f804420d6e37748b75478406e989c69303756980Svetoslav Ganov
689f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  return vp9_variance16x16(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
690f804420d6e37748b75478406e989c69303756980Svetoslav Ganov}
691f804420d6e37748b75478406e989c69303756980Svetoslav Ganov
692f804420d6e37748b75478406e989c69303756980Svetoslav Ganovunsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr,
693f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                               int  src_pixels_per_line,
694f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                               int  xoffset,
695f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                               int  yoffset,
696f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                               const uint8_t *dst_ptr,
697f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                               int dst_pixels_per_line,
698f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                               unsigned int *sse,
699f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                               const uint8_t *second_pred) {
700f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  uint16_t fdata3[17 * 16];
701f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  uint8_t temp2[20 * 16];
702f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16);  // compound pred buffer
703f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  const int16_t *hfilter, *vfilter;
704f804420d6e37748b75478406e989c69303756980Svetoslav Ganov
705f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
706736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
707736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
708736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
709736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                    1, 17, 16, hfilter);
710736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
711736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
712736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vp9_comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16);
713736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
714736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
715736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
716736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
717736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           int  src_pixels_per_line,
718736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           int  xoffset,
719736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           int  yoffset,
720736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           const uint8_t *dst_ptr,
721736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           int dst_pixels_per_line,
722736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           unsigned int *sse) {
723736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
724736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint8_t temp2[68 * 64];
725736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  const int16_t *hfilter, *vfilter;
726736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
727736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
728736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
729736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
730736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
731736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                    1, 65, 64, hfilter);
732736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
733736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
734736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_variance64x64(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
735736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
736736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
737736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr,
738736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int  src_pixels_per_line,
739736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int  xoffset,
740736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int  yoffset,
741736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               const uint8_t *dst_ptr,
742736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int dst_pixels_per_line,
743736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               unsigned int *sse,
744736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               const uint8_t *second_pred) {
745736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
746736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint8_t temp2[68 * 64];
747736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
748736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  const int16_t *hfilter, *vfilter;
749736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
750736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
751736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
752736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
753736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
754736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                    1, 65, 64, hfilter);
755736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
756736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vp9_comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64);
757736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
758736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
759736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
760736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
761736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           int  src_pixels_per_line,
762736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           int  xoffset,
763736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           int  yoffset,
764736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           const uint8_t *dst_ptr,
765736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           int dst_pixels_per_line,
766736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                           unsigned int *sse) {
767736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
768f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  uint8_t temp2[36 * 32];
769f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  const int16_t *hfilter, *vfilter;
770f804420d6e37748b75478406e989c69303756980Svetoslav Ganov
771f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
772f804420d6e37748b75478406e989c69303756980Svetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
773736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
774736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
775736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                    1, 33, 32, hfilter);
776736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
777736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
778736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_variance32x32(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
779736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
780736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
781736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr,
782736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int  src_pixels_per_line,
783736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int  xoffset,
784f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                               int  yoffset,
785f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                               const uint8_t *dst_ptr,
786f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                               int dst_pixels_per_line,
787f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                               unsigned int *sse,
788f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                               const uint8_t *second_pred) {
789736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
790736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint8_t temp2[36 * 32];
791736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32);  // compound pred buffer
792736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  const int16_t *hfilter, *vfilter;
793736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
794736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
795736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
796736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
797736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
798736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                    1, 33, 32, hfilter);
79912a024ca681d877fe16b7e087356f7aff175a218Svetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
800736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vp9_comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32);
801736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
802736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
803736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
804736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr,
805736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              int  source_stride,
806736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              const uint8_t *ref_ptr,
807736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              int  recon_stride,
808736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              unsigned int *sse) {
809736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
810736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                       ref_ptr, recon_stride, sse);
811736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
812736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
813736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr,
814736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              int  source_stride,
815736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              const uint8_t *ref_ptr,
816736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              int  recon_stride,
817736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              unsigned int *sse) {
818736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0,
8193e4e4af45216aee4d4b009fe842c0324610918ebSvetoslav Ganov                                       ref_ptr, recon_stride, sse);
820736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
8213e4e4af45216aee4d4b009fe842c0324610918ebSvetoslav Ganov
822736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr,
823736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              int  source_stride,
824736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              const uint8_t *ref_ptr,
825736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              int  recon_stride,
826736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              unsigned int *sse) {
827736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0,
828736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                       ref_ptr, recon_stride, sse);
829736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
83051cccf0845b36539d42503495f0689d487712b3aSvetoslav Ganov
83151cccf0845b36539d42503495f0689d487712b3aSvetoslav Ganovunsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr,
83251cccf0845b36539d42503495f0689d487712b3aSvetoslav Ganov                                              int  source_stride,
83351cccf0845b36539d42503495f0689d487712b3aSvetoslav Ganov                                              const uint8_t *ref_ptr,
83451cccf0845b36539d42503495f0689d487712b3aSvetoslav Ganov                                              int  recon_stride,
83551cccf0845b36539d42503495f0689d487712b3aSvetoslav Ganov                                              unsigned int *sse) {
83651cccf0845b36539d42503495f0689d487712b3aSvetoslav Ganov  return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
83751cccf0845b36539d42503495f0689d487712b3aSvetoslav Ganov                                       ref_ptr, recon_stride, sse);
83851cccf0845b36539d42503495f0689d487712b3aSvetoslav Ganov}
83951cccf0845b36539d42503495f0689d487712b3aSvetoslav Ganov
84051cccf0845b36539d42503495f0689d487712b3aSvetoslav Ganovunsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr,
84151cccf0845b36539d42503495f0689d487712b3aSvetoslav Ganov                                              int  source_stride,
84251cccf0845b36539d42503495f0689d487712b3aSvetoslav Ganov                                              const uint8_t *ref_ptr,
84351cccf0845b36539d42503495f0689d487712b3aSvetoslav Ganov                                              int  recon_stride,
84451cccf0845b36539d42503495f0689d487712b3aSvetoslav Ganov                                              unsigned int *sse) {
84551cccf0845b36539d42503495f0689d487712b3aSvetoslav Ganov  return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
84651cccf0845b36539d42503495f0689d487712b3aSvetoslav Ganov                                       ref_ptr, recon_stride, sse);
84751cccf0845b36539d42503495f0689d487712b3aSvetoslav Ganov}
84851cccf0845b36539d42503495f0689d487712b3aSvetoslav Ganov
849736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr,
850736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              int  source_stride,
851736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              const uint8_t *ref_ptr,
852736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              int  recon_stride,
853736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              unsigned int *sse) {
854736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8,
855736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                       ref_ptr, recon_stride, sse);
856736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
857736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
858736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr,
859736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int  source_stride,
860736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               const uint8_t *ref_ptr,
861736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int  recon_stride,
862736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               unsigned int *sse) {
863736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
864736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                       ref_ptr, recon_stride, sse);
865736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
866736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
867736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr,
868736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int  source_stride,
869736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               const uint8_t *ref_ptr,
870736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int  recon_stride,
871736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               unsigned int *sse) {
872736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8,
873736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                       ref_ptr, recon_stride, sse);
874736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
875736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
876736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr,
877736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int  source_stride,
878736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               const uint8_t *ref_ptr,
879736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               int  recon_stride,
880736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                               unsigned int *sse) {
881736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8,
882736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                       ref_ptr, recon_stride, sse);
883736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
884736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
885736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr,
886736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                      int  src_pixels_per_line,
887736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                      int  xoffset,
888736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                      int  yoffset,
889736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                      const uint8_t *dst_ptr,
890736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                      int dst_pixels_per_line,
891736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                      unsigned int *sse) {
892736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line,
893736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                xoffset, yoffset, dst_ptr,
894736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                dst_pixels_per_line, sse);
895736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return *sse;
896736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
897736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
898736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr,
899736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                      int  src_pixels_per_line,
900736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                      int  xoffset,
901736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                      int  yoffset,
902736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                      const uint8_t *dst_ptr,
903736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                      int dst_pixels_per_line,
904736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                      unsigned int *sse) {
905736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line,
906736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                xoffset, yoffset, dst_ptr,
907736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                dst_pixels_per_line, sse);
908736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return *sse;
909736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
910736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
911736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr,
912736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                      int  src_pixels_per_line,
913736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                      int  xoffset,
914736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                      int  yoffset,
915736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                      const uint8_t *dst_ptr,
916736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                      int dst_pixels_per_line,
917736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                      unsigned int *sse) {
918736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line,
919736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                xoffset, yoffset, dst_ptr,
920736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                dst_pixels_per_line, sse);
921736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return *sse;
922736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
923736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
924736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
925736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                          int  src_pixels_per_line,
926736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                          int  xoffset,
927736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                          int  yoffset,
928736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                          const uint8_t *dst_ptr,
929736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                          int dst_pixels_per_line,
930736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                          unsigned int *sse) {
931736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[16 * 9];  // Temp data buffer used in filtering
932736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint8_t temp2[20 * 16];
933736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  const int16_t *hfilter, *vfilter;
934736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
935736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
936736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
937736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
938736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
939736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                    1, 9, 16, hfilter);
940736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
941736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
942736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_variance16x8(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
943736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
94400f7b3f76515d1c6fbe5cf9fee9d3760787c03cdSvetoslav Ganov
945736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr,
946736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              int  src_pixels_per_line,
947736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              int  xoffset,
948736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              int  yoffset,
949736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              const uint8_t *dst_ptr,
950736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              int dst_pixels_per_line,
951736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              unsigned int *sse,
952736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              const uint8_t *second_pred) {
95300f7b3f76515d1c6fbe5cf9fee9d3760787c03cdSvetoslav Ganov  uint16_t fdata3[16 * 9];  // Temp data buffer used in filtering
954736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint8_t temp2[20 * 16];
955736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8);  // compound pred buffer
956736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  const int16_t *hfilter, *vfilter;
957736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
958736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
959736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
960736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
961736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
962736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                    1, 9, 16, hfilter);
963736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
964736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vp9_comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16);
965736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
966736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
967736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
968736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
969736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                          int  src_pixels_per_line,
970736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                          int  xoffset,
971736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                          int  yoffset,
972736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                          const uint8_t *dst_ptr,
97300f7b3f76515d1c6fbe5cf9fee9d3760787c03cdSvetoslav Ganov                                          int dst_pixels_per_line,
974f804420d6e37748b75478406e989c69303756980Svetoslav Ganov                                          unsigned int *sse) {
975736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[9 * 16];  // Temp data buffer used in filtering
976736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint8_t temp2[20 * 16];
977736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  const int16_t *hfilter, *vfilter;
978736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
979736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
98000f7b3f76515d1c6fbe5cf9fee9d3760787c03cdSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
981736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
982736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
983736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                    1, 17, 8, hfilter);
984736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
985736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
986736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_variance8x16(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
987736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
988736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
989736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
990736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              int  src_pixels_per_line,
991736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              int  xoffset,
992736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              int  yoffset,
993736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              const uint8_t *dst_ptr,
994736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              int dst_pixels_per_line,
995736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              unsigned int *sse,
996736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                              const uint8_t *second_pred) {
997736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[9 * 16];  // Temp data buffer used in filtering
998736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint8_t temp2[20 * 16];
999736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16);  // compound pred buffer
1000736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  const int16_t *hfilter, *vfilter;
1001736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
1002736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1003736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1004736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
1005736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1006736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                    1, 17, 8, hfilter);
1007736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
1008736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vp9_comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
1009736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
1010736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
1011736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
1012736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
1013736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                         int  src_pixels_per_line,
1014736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                         int  xoffset,
1015736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                         int  yoffset,
1016736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                         const uint8_t *dst_ptr,
1017736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                         int dst_pixels_per_line,
1018736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                         unsigned int *sse) {
1019736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[8 * 5];  // Temp data buffer used in filtering
1020736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint8_t temp2[20 * 16];
1021736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  const int16_t *hfilter, *vfilter;
1022736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
1023736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1024736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1025736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
1026736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1027736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                    1, 5, 8, hfilter);
1028736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
1029736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
1030736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_variance8x4(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
1031736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
1032736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
1033736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
1034736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             int  src_pixels_per_line,
1035736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             int  xoffset,
1036736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             int  yoffset,
1037736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             const uint8_t *dst_ptr,
1038736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             int dst_pixels_per_line,
1039736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             unsigned int *sse,
1040736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             const uint8_t *second_pred) {
1041736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[8 * 5];  // Temp data buffer used in filtering
1042736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint8_t temp2[20 * 16];
1043736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4);  // compound pred buffer
1044736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  const int16_t *hfilter, *vfilter;
1045736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
1046736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1047736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1048736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
1049736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1050736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                    1, 5, 8, hfilter);
1051736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
1052736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vp9_comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
1053736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
1054736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
1055736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
1056736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
1057736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                         int  src_pixels_per_line,
1058736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                         int  xoffset,
1059736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                         int  yoffset,
1060736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                         const uint8_t *dst_ptr,
1061736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                         int dst_pixels_per_line,
1062736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                         unsigned int *sse) {
1063736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[5 * 8];  // Temp data buffer used in filtering
1064736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be
1065736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  // of this big? same issue appears in all other block size settings.
1066736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint8_t temp2[20 * 16];
1067736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  const int16_t *hfilter, *vfilter;
1068736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
1069736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1070736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1071736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
1072736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1073736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                    1, 9, 4, hfilter);
1074736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
1075736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
1076736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_variance4x8(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
1077736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
1078736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
1079736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovunsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
1080736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             int  src_pixels_per_line,
1081736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             int  xoffset,
1082736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             int  yoffset,
1083736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             const uint8_t *dst_ptr,
1084736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             int dst_pixels_per_line,
1085736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             unsigned int *sse,
1086736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                             const uint8_t *second_pred) {
1087736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint16_t fdata3[5 * 8];  // Temp data buffer used in filtering
1088736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  uint8_t temp2[20 * 16];
1089736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8);  // compound pred buffer
1090736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  const int16_t *hfilter, *vfilter;
1091736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
1092736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1093736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1094736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
1095736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1096736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                                    1, 9, 4, hfilter);
1097736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
1098736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  vp9_comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
1099736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
1100736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
1101736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
1102736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
1103736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganovvoid vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
1104736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov                       int height, const uint8_t *ref, int ref_stride) {
1105736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  int i, j;
1106736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov
1107736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  for (i = 0; i < height; i++) {
1108736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov    for (j = 0; j < width; j++) {
1109736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov      int tmp;
1110736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov      tmp = pred[j] + ref[j];
1111736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov      comp_pred[j] = (tmp + 1) >> 1;
1112736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov    }
1113736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov    comp_pred += width;
1114736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov    pred += width;
1115736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov    ref += ref_stride;
1116736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov  }
1117736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov}
1118736c2756bf3c14ae9fef7255c119057f7a2be1edSvetoslav Ganov