1233d2500723e5594f3e7c70896ffeeef32b9c950ywan/*
2233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
4233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  Use of this source code is governed by a BSD-style license
5233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  that can be found in the LICENSE file in the root of the source
6233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  tree. An additional intellectual property rights grant can be found
7233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  in the file PATENTS.  All contributing project authors may
8233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  be found in the AUTHORS file in the root of the source tree.
9233d2500723e5594f3e7c70896ffeeef32b9c950ywan */
10233d2500723e5594f3e7c70896ffeeef32b9c950ywan
11233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "./vp9_rtcd.h"
12233d2500723e5594f3e7c70896ffeeef32b9c950ywan
13233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vpx_ports/mem.h"
14233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vpx/vpx_integer.h"
15233d2500723e5594f3e7c70896ffeeef32b9c950ywan
16233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/vp9_common.h"
17233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/vp9_filter.h"
18233d2500723e5594f3e7c70896ffeeef32b9c950ywan
19233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/encoder/vp9_variance.h"
20233d2500723e5594f3e7c70896ffeeef32b9c950ywan
21233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid variance(const uint8_t *src_ptr,
22233d2500723e5594f3e7c70896ffeeef32b9c950ywan              int  source_stride,
23233d2500723e5594f3e7c70896ffeeef32b9c950ywan              const uint8_t *ref_ptr,
24233d2500723e5594f3e7c70896ffeeef32b9c950ywan              int  recon_stride,
25233d2500723e5594f3e7c70896ffeeef32b9c950ywan              int  w,
26233d2500723e5594f3e7c70896ffeeef32b9c950ywan              int  h,
27233d2500723e5594f3e7c70896ffeeef32b9c950ywan              unsigned int *sse,
28233d2500723e5594f3e7c70896ffeeef32b9c950ywan              int *sum) {
29233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int i, j;
30233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int diff;
31233d2500723e5594f3e7c70896ffeeef32b9c950ywan
32233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sum = 0;
33233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = 0;
34233d2500723e5594f3e7c70896ffeeef32b9c950ywan
35233d2500723e5594f3e7c70896ffeeef32b9c950ywan  for (i = 0; i < h; i++) {
36233d2500723e5594f3e7c70896ffeeef32b9c950ywan    for (j = 0; j < w; j++) {
37233d2500723e5594f3e7c70896ffeeef32b9c950ywan      diff = src_ptr[j] - ref_ptr[j];
38233d2500723e5594f3e7c70896ffeeef32b9c950ywan      *sum += diff;
39233d2500723e5594f3e7c70896ffeeef32b9c950ywan      *sse += diff * diff;
40233d2500723e5594f3e7c70896ffeeef32b9c950ywan    }
41233d2500723e5594f3e7c70896ffeeef32b9c950ywan
42233d2500723e5594f3e7c70896ffeeef32b9c950ywan    src_ptr += source_stride;
43233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ref_ptr += recon_stride;
44233d2500723e5594f3e7c70896ffeeef32b9c950ywan  }
45233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
46233d2500723e5594f3e7c70896ffeeef32b9c950ywan
47233d2500723e5594f3e7c70896ffeeef32b9c950ywan/****************************************************************************
48233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
49233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  ROUTINE       : filter_block2d_bil_first_pass
50233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
51233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  INPUTS        : uint8_t  *src_ptr          : Pointer to source block.
52233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  uint32_t src_pixels_per_line : Stride of input block.
53233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  uint32_t pixel_step        : Offset between filter input
54233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                                               samples (see notes).
55233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  uint32_t output_height     : Input block height.
56233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  uint32_t output_width      : Input block width.
57233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  int32_t  *vp9_filter       : Array of 2 bi-linear filter
58233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                                               taps.
59233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
60233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  OUTPUTS       : int32_t *output_ptr        : Pointer to filtered block.
61233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
62233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  RETURNS       : void
63233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
64233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
65233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  either horizontal or vertical direction to produce the
66233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  filtered output block. Used to implement first-pass
67233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  of 2-D separable filter.
68233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
69233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  SPECIAL NOTES : Produces int32_t output to retain precision for next pass.
70233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  Two filter taps should sum to VP9_FILTER_WEIGHT.
71233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  pixel_step defines whether the filter is applied
72233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  horizontally (pixel_step=1) or vertically (pixel_step=
73233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  stride).
74233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  It defines the offset required to move from one input
75233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  to the next.
76233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
77233d2500723e5594f3e7c70896ffeeef32b9c950ywan ****************************************************************************/
78233d2500723e5594f3e7c70896ffeeef32b9c950ywanstatic void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
79233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              uint16_t *output_ptr,
80233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              unsigned int src_pixels_per_line,
81233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int pixel_step,
82233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              unsigned int output_height,
83233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              unsigned int output_width,
84233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              const int16_t *vp9_filter) {
85233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int i, j;
86233d2500723e5594f3e7c70896ffeeef32b9c950ywan
87233d2500723e5594f3e7c70896ffeeef32b9c950ywan  for (i = 0; i < output_height; i++) {
88233d2500723e5594f3e7c70896ffeeef32b9c950ywan    for (j = 0; j < output_width; j++) {
89233d2500723e5594f3e7c70896ffeeef32b9c950ywan      output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
90233d2500723e5594f3e7c70896ffeeef32b9c950ywan                          (int)src_ptr[pixel_step] * vp9_filter[1],
91233d2500723e5594f3e7c70896ffeeef32b9c950ywan                          FILTER_BITS);
92233d2500723e5594f3e7c70896ffeeef32b9c950ywan
93233d2500723e5594f3e7c70896ffeeef32b9c950ywan      src_ptr++;
94233d2500723e5594f3e7c70896ffeeef32b9c950ywan    }
95233d2500723e5594f3e7c70896ffeeef32b9c950ywan
96233d2500723e5594f3e7c70896ffeeef32b9c950ywan    // Next row...
97233d2500723e5594f3e7c70896ffeeef32b9c950ywan    src_ptr    += src_pixels_per_line - output_width;
98233d2500723e5594f3e7c70896ffeeef32b9c950ywan    output_ptr += output_width;
99233d2500723e5594f3e7c70896ffeeef32b9c950ywan  }
100233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
101233d2500723e5594f3e7c70896ffeeef32b9c950ywan
102233d2500723e5594f3e7c70896ffeeef32b9c950ywan/****************************************************************************
103233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
104233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  ROUTINE       : filter_block2d_bil_second_pass
105233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
106233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  INPUTS        : int32_t  *src_ptr          : Pointer to source block.
107233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  uint32_t src_pixels_per_line : Stride of input block.
108233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  uint32_t pixel_step        : Offset between filter input
109233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                                               samples (see notes).
110233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  uint32_t output_height     : Input block height.
111233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  uint32_t output_width      : Input block width.
112233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  int32_t  *vp9_filter       : Array of 2 bi-linear filter
113233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                                               taps.
114233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
115233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  OUTPUTS       : uint16_t *output_ptr       : Pointer to filtered block.
116233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
117233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  RETURNS       : void
118233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
119233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
120233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  either horizontal or vertical direction to produce the
121233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  filtered output block. Used to implement second-pass
122233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  of 2-D separable filter.
123233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
124233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  SPECIAL NOTES : Requires 32-bit input as produced by
125233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  filter_block2d_bil_first_pass.
126233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  Two filter taps should sum to VP9_FILTER_WEIGHT.
127233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  pixel_step defines whether the filter is applied
128233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  horizontally (pixel_step=1) or vertically (pixel_step=
129233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  stride).
130233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  It defines the offset required to move from one input
131233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  to the next.
132233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
133233d2500723e5594f3e7c70896ffeeef32b9c950ywan ****************************************************************************/
134233d2500723e5594f3e7c70896ffeeef32b9c950ywanstatic void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
135233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               uint8_t *output_ptr,
136233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               unsigned int src_pixels_per_line,
137233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               unsigned int pixel_step,
138233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               unsigned int output_height,
139233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               unsigned int output_width,
140233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               const int16_t *vp9_filter) {
141233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int  i, j;
142233d2500723e5594f3e7c70896ffeeef32b9c950ywan
143233d2500723e5594f3e7c70896ffeeef32b9c950ywan  for (i = 0; i < output_height; i++) {
144233d2500723e5594f3e7c70896ffeeef32b9c950ywan    for (j = 0; j < output_width; j++) {
145233d2500723e5594f3e7c70896ffeeef32b9c950ywan      output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
146233d2500723e5594f3e7c70896ffeeef32b9c950ywan                          (int)src_ptr[pixel_step] * vp9_filter[1],
147233d2500723e5594f3e7c70896ffeeef32b9c950ywan                          FILTER_BITS);
148233d2500723e5594f3e7c70896ffeeef32b9c950ywan      src_ptr++;
149233d2500723e5594f3e7c70896ffeeef32b9c950ywan    }
150233d2500723e5594f3e7c70896ffeeef32b9c950ywan
151233d2500723e5594f3e7c70896ffeeef32b9c950ywan    src_ptr += src_pixels_per_line - output_width;
152233d2500723e5594f3e7c70896ffeeef32b9c950ywan    output_ptr += output_width;
153233d2500723e5594f3e7c70896ffeeef32b9c950ywan  }
154233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
155233d2500723e5594f3e7c70896ffeeef32b9c950ywan
156233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
157233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int i, sum = 0;
158233d2500723e5594f3e7c70896ffeeef32b9c950ywan
159233d2500723e5594f3e7c70896ffeeef32b9c950ywan  for (i = 0; i < 256; i++) {
160233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sum += (src_ptr[i] * src_ptr[i]);
161233d2500723e5594f3e7c70896ffeeef32b9c950ywan  }
162233d2500723e5594f3e7c70896ffeeef32b9c950ywan
163233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return sum;
164233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
165233d2500723e5594f3e7c70896ffeeef32b9c950ywan
166233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance64x32_c(const uint8_t *src_ptr,
167233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 int  source_stride,
168233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 const uint8_t *ref_ptr,
169233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 int  recon_stride,
170233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 unsigned int *sse) {
171233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int var;
172233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int avg;
173233d2500723e5594f3e7c70896ffeeef32b9c950ywan
174233d2500723e5594f3e7c70896ffeeef32b9c950ywan  variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg);
175233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
176233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return (var - (((int64_t)avg * avg) >> 11));
177233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
178233d2500723e5594f3e7c70896ffeeef32b9c950ywan
179233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr,
180233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  src_pixels_per_line,
181233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  xoffset,
182233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  yoffset,
183233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           const uint8_t *dst_ptr,
184233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int dst_pixels_per_line,
185233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           unsigned int *sse) {
186233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
187233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[68 * 64];
188233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
189233d2500723e5594f3e7c70896ffeeef32b9c950ywan
190233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
191233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
192233d2500723e5594f3e7c70896ffeeef32b9c950ywan
193233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
194233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 33, 64, hfilter);
195233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
196233d2500723e5594f3e7c70896ffeeef32b9c950ywan
197233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance64x32(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
198233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
199233d2500723e5594f3e7c70896ffeeef32b9c950ywan
200233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr,
201233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  src_pixels_per_line,
202233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  xoffset,
203233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  yoffset,
204233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               const uint8_t *dst_ptr,
205233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int dst_pixels_per_line,
206233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               unsigned int *sse,
207233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               const uint8_t *second_pred) {
208233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
209233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[68 * 64];
210233d2500723e5594f3e7c70896ffeeef32b9c950ywan  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
211233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
212233d2500723e5594f3e7c70896ffeeef32b9c950ywan
213233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
214233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
215233d2500723e5594f3e7c70896ffeeef32b9c950ywan
216233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
217233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 33, 64, hfilter);
218233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
219233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64);
220233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
221233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
222233d2500723e5594f3e7c70896ffeeef32b9c950ywan
223233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance32x64_c(const uint8_t *src_ptr,
224233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 int  source_stride,
225233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 const uint8_t *ref_ptr,
226233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 int  recon_stride,
227233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 unsigned int *sse) {
228233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int var;
229233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int avg;
230233d2500723e5594f3e7c70896ffeeef32b9c950ywan
231233d2500723e5594f3e7c70896ffeeef32b9c950ywan  variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg);
232233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
233233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return (var - (((int64_t)avg * avg) >> 11));
234233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
235233d2500723e5594f3e7c70896ffeeef32b9c950ywan
236233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr,
237233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  src_pixels_per_line,
238233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  xoffset,
239233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  yoffset,
240233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           const uint8_t *dst_ptr,
241233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int dst_pixels_per_line,
242233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           unsigned int *sse) {
243233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
244233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[68 * 64];
245233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
246233d2500723e5594f3e7c70896ffeeef32b9c950ywan
247233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
248233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
249233d2500723e5594f3e7c70896ffeeef32b9c950ywan
250233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
251233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 65, 32, hfilter);
252233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
253233d2500723e5594f3e7c70896ffeeef32b9c950ywan
254233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance32x64(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
255233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
256233d2500723e5594f3e7c70896ffeeef32b9c950ywan
257233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr,
258233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  src_pixels_per_line,
259233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  xoffset,
260233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  yoffset,
261233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               const uint8_t *dst_ptr,
262233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int dst_pixels_per_line,
263233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               unsigned int *sse,
264233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               const uint8_t *second_pred) {
265233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
266233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[68 * 64];
267233d2500723e5594f3e7c70896ffeeef32b9c950ywan  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64);  // compound pred buffer
268233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
269233d2500723e5594f3e7c70896ffeeef32b9c950ywan
270233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
271233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
272233d2500723e5594f3e7c70896ffeeef32b9c950ywan
273233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
274233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 65, 32, hfilter);
275233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
276233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32);
277233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
278233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
279233d2500723e5594f3e7c70896ffeeef32b9c950ywan
280233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance32x16_c(const uint8_t *src_ptr,
281233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 int  source_stride,
282233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 const uint8_t *ref_ptr,
283233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 int  recon_stride,
284233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 unsigned int *sse) {
285233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int var;
286233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int avg;
287233d2500723e5594f3e7c70896ffeeef32b9c950ywan
288233d2500723e5594f3e7c70896ffeeef32b9c950ywan  variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg);
289233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
290233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return (var - (((int64_t)avg * avg) >> 9));
291233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
292233d2500723e5594f3e7c70896ffeeef32b9c950ywan
293233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr,
294233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  src_pixels_per_line,
295233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  xoffset,
296233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  yoffset,
297233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           const uint8_t *dst_ptr,
298233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int dst_pixels_per_line,
299233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           unsigned int *sse) {
300233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
301233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[36 * 32];
302233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
303233d2500723e5594f3e7c70896ffeeef32b9c950ywan
304233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
305233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
306233d2500723e5594f3e7c70896ffeeef32b9c950ywan
307233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
308233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 17, 32, hfilter);
309233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
310233d2500723e5594f3e7c70896ffeeef32b9c950ywan
311233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance32x16(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
312233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
313233d2500723e5594f3e7c70896ffeeef32b9c950ywan
314233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr,
315233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  src_pixels_per_line,
316233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  xoffset,
317233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  yoffset,
318233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               const uint8_t *dst_ptr,
319233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int dst_pixels_per_line,
320233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               unsigned int *sse,
321233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               const uint8_t *second_pred) {
322233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
323233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[36 * 32];
324233d2500723e5594f3e7c70896ffeeef32b9c950ywan  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16);  // compound pred buffer
325233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
326233d2500723e5594f3e7c70896ffeeef32b9c950ywan
327233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
328233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
329233d2500723e5594f3e7c70896ffeeef32b9c950ywan
330233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
331233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 17, 32, hfilter);
332233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
333233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32);
334233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
335233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
336233d2500723e5594f3e7c70896ffeeef32b9c950ywan
337233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance16x32_c(const uint8_t *src_ptr,
338233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 int  source_stride,
339233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 const uint8_t *ref_ptr,
340233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 int  recon_stride,
341233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 unsigned int *sse) {
342233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int var;
343233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int avg;
344233d2500723e5594f3e7c70896ffeeef32b9c950ywan
345233d2500723e5594f3e7c70896ffeeef32b9c950ywan  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg);
346233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
347233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return (var - (((int64_t)avg * avg) >> 9));
348233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
349233d2500723e5594f3e7c70896ffeeef32b9c950ywan
350233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr,
351233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  src_pixels_per_line,
352233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  xoffset,
353233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  yoffset,
354233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           const uint8_t *dst_ptr,
355233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int dst_pixels_per_line,
356233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           unsigned int *sse) {
357233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
358233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[36 * 32];
359233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
360233d2500723e5594f3e7c70896ffeeef32b9c950ywan
361233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
362233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
363233d2500723e5594f3e7c70896ffeeef32b9c950ywan
364233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
365233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 33, 16, hfilter);
366233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
367233d2500723e5594f3e7c70896ffeeef32b9c950ywan
368233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance16x32(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
369233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
370233d2500723e5594f3e7c70896ffeeef32b9c950ywan
371233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr,
372233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  src_pixels_per_line,
373233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  xoffset,
374233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  yoffset,
375233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               const uint8_t *dst_ptr,
376233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int dst_pixels_per_line,
377233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               unsigned int *sse,
378233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               const uint8_t *second_pred) {
379233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
380233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[36 * 32];
381233d2500723e5594f3e7c70896ffeeef32b9c950ywan  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32);  // compound pred buffer
382233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
383233d2500723e5594f3e7c70896ffeeef32b9c950ywan
384233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
385233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
386233d2500723e5594f3e7c70896ffeeef32b9c950ywan
387233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
388233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 33, 16, hfilter);
389233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
390233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16);
391233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
392233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
393233d2500723e5594f3e7c70896ffeeef32b9c950ywan
394233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance64x64_c(const uint8_t *src_ptr,
395233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 int  source_stride,
396233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 const uint8_t *ref_ptr,
397233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 int  recon_stride,
398233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 unsigned int *sse) {
399233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int var;
400233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int avg;
401233d2500723e5594f3e7c70896ffeeef32b9c950ywan
402233d2500723e5594f3e7c70896ffeeef32b9c950ywan  variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg);
403233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
404233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return (var - (((int64_t)avg * avg) >> 12));
405233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
406233d2500723e5594f3e7c70896ffeeef32b9c950ywan
407233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance32x32_c(const uint8_t *src_ptr,
408233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 int  source_stride,
409233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 const uint8_t *ref_ptr,
410233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 int  recon_stride,
411233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 unsigned int *sse) {
412233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int var;
413233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int avg;
414233d2500723e5594f3e7c70896ffeeef32b9c950ywan
415233d2500723e5594f3e7c70896ffeeef32b9c950ywan  variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg);
416233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
417233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return (var - (((int64_t)avg * avg) >> 10));
418233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
419233d2500723e5594f3e7c70896ffeeef32b9c950ywan
420233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance16x16_c(const uint8_t *src_ptr,
421233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 int  source_stride,
422233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 const uint8_t *ref_ptr,
423233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 int  recon_stride,
424233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 unsigned int *sse) {
425233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int var;
426233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int avg;
427233d2500723e5594f3e7c70896ffeeef32b9c950ywan
428233d2500723e5594f3e7c70896ffeeef32b9c950ywan  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
429233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
430233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return (var - (((unsigned int)avg * avg) >> 8));
431233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
432233d2500723e5594f3e7c70896ffeeef32b9c950ywan
433233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance8x16_c(const uint8_t *src_ptr,
434233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                int  source_stride,
435233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                const uint8_t *ref_ptr,
436233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                int  recon_stride,
437233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                unsigned int *sse) {
438233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int var;
439233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int avg;
440233d2500723e5594f3e7c70896ffeeef32b9c950ywan
441233d2500723e5594f3e7c70896ffeeef32b9c950ywan  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
442233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
443233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return (var - (((unsigned int)avg * avg) >> 7));
444233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
445233d2500723e5594f3e7c70896ffeeef32b9c950ywan
446233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance16x8_c(const uint8_t *src_ptr,
447233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                int  source_stride,
448233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                const uint8_t *ref_ptr,
449233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                int  recon_stride,
450233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                unsigned int *sse) {
451233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int var;
452233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int avg;
453233d2500723e5594f3e7c70896ffeeef32b9c950ywan
454233d2500723e5594f3e7c70896ffeeef32b9c950ywan  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
455233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
456233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return (var - (((unsigned int)avg * avg) >> 7));
457233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
458233d2500723e5594f3e7c70896ffeeef32b9c950ywan
459233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride,
460233d2500723e5594f3e7c70896ffeeef32b9c950ywan                       const uint8_t *ref_ptr, int ref_stride,
461233d2500723e5594f3e7c70896ffeeef32b9c950ywan                       unsigned int *sse, int *sum) {
462233d2500723e5594f3e7c70896ffeeef32b9c950ywan  variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
463233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
464233d2500723e5594f3e7c70896ffeeef32b9c950ywan
465233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance8x8_c(const uint8_t *src_ptr,
466233d2500723e5594f3e7c70896ffeeef32b9c950ywan                               int  source_stride,
467233d2500723e5594f3e7c70896ffeeef32b9c950ywan                               const uint8_t *ref_ptr,
468233d2500723e5594f3e7c70896ffeeef32b9c950ywan                               int  recon_stride,
469233d2500723e5594f3e7c70896ffeeef32b9c950ywan                               unsigned int *sse) {
470233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int var;
471233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int avg;
472233d2500723e5594f3e7c70896ffeeef32b9c950ywan
473233d2500723e5594f3e7c70896ffeeef32b9c950ywan  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
474233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
475233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return (var - (((unsigned int)avg * avg) >> 6));
476233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
477233d2500723e5594f3e7c70896ffeeef32b9c950ywan
478233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance8x4_c(const uint8_t *src_ptr,
479233d2500723e5594f3e7c70896ffeeef32b9c950ywan                               int  source_stride,
480233d2500723e5594f3e7c70896ffeeef32b9c950ywan                               const uint8_t *ref_ptr,
481233d2500723e5594f3e7c70896ffeeef32b9c950ywan                               int  recon_stride,
482233d2500723e5594f3e7c70896ffeeef32b9c950ywan                               unsigned int *sse) {
483233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int var;
484233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int avg;
485233d2500723e5594f3e7c70896ffeeef32b9c950ywan
486233d2500723e5594f3e7c70896ffeeef32b9c950ywan  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg);
487233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
488233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return (var - (((unsigned int)avg * avg) >> 5));
489233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
490233d2500723e5594f3e7c70896ffeeef32b9c950ywan
491233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance4x8_c(const uint8_t *src_ptr,
492233d2500723e5594f3e7c70896ffeeef32b9c950ywan                               int  source_stride,
493233d2500723e5594f3e7c70896ffeeef32b9c950ywan                               const uint8_t *ref_ptr,
494233d2500723e5594f3e7c70896ffeeef32b9c950ywan                               int  recon_stride,
495233d2500723e5594f3e7c70896ffeeef32b9c950ywan                               unsigned int *sse) {
496233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int var;
497233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int avg;
498233d2500723e5594f3e7c70896ffeeef32b9c950ywan
499233d2500723e5594f3e7c70896ffeeef32b9c950ywan  variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg);
500233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
501233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return (var - (((unsigned int)avg * avg) >> 5));
502233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
503233d2500723e5594f3e7c70896ffeeef32b9c950ywan
504233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance4x4_c(const uint8_t *src_ptr,
505233d2500723e5594f3e7c70896ffeeef32b9c950ywan                               int  source_stride,
506233d2500723e5594f3e7c70896ffeeef32b9c950ywan                               const uint8_t *ref_ptr,
507233d2500723e5594f3e7c70896ffeeef32b9c950ywan                               int  recon_stride,
508233d2500723e5594f3e7c70896ffeeef32b9c950ywan                               unsigned int *sse) {
509233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int var;
510233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int avg;
511233d2500723e5594f3e7c70896ffeeef32b9c950ywan
512233d2500723e5594f3e7c70896ffeeef32b9c950ywan  variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
513233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
514233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return (var - (((unsigned int)avg * avg) >> 4));
515233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
516233d2500723e5594f3e7c70896ffeeef32b9c950ywan
517233d2500723e5594f3e7c70896ffeeef32b9c950ywan
518233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_mse16x16_c(const uint8_t *src_ptr,
519233d2500723e5594f3e7c70896ffeeef32b9c950ywan                            int  source_stride,
520233d2500723e5594f3e7c70896ffeeef32b9c950ywan                            const uint8_t *ref_ptr,
521233d2500723e5594f3e7c70896ffeeef32b9c950ywan                            int  recon_stride,
522233d2500723e5594f3e7c70896ffeeef32b9c950ywan                            unsigned int *sse) {
523233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int var;
524233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int avg;
525233d2500723e5594f3e7c70896ffeeef32b9c950ywan
526233d2500723e5594f3e7c70896ffeeef32b9c950ywan  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
527233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
528233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return var;
529233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
530233d2500723e5594f3e7c70896ffeeef32b9c950ywan
531233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_mse16x8_c(const uint8_t *src_ptr,
532233d2500723e5594f3e7c70896ffeeef32b9c950ywan                           int  source_stride,
533233d2500723e5594f3e7c70896ffeeef32b9c950ywan                           const uint8_t *ref_ptr,
534233d2500723e5594f3e7c70896ffeeef32b9c950ywan                           int  recon_stride,
535233d2500723e5594f3e7c70896ffeeef32b9c950ywan                           unsigned int *sse) {
536233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int var;
537233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int avg;
538233d2500723e5594f3e7c70896ffeeef32b9c950ywan
539233d2500723e5594f3e7c70896ffeeef32b9c950ywan  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
540233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
541233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return var;
542233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
543233d2500723e5594f3e7c70896ffeeef32b9c950ywan
544233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_mse8x16_c(const uint8_t *src_ptr,
545233d2500723e5594f3e7c70896ffeeef32b9c950ywan                           int  source_stride,
546233d2500723e5594f3e7c70896ffeeef32b9c950ywan                           const uint8_t *ref_ptr,
547233d2500723e5594f3e7c70896ffeeef32b9c950ywan                           int  recon_stride,
548233d2500723e5594f3e7c70896ffeeef32b9c950ywan                           unsigned int *sse) {
549233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int var;
550233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int avg;
551233d2500723e5594f3e7c70896ffeeef32b9c950ywan
552233d2500723e5594f3e7c70896ffeeef32b9c950ywan  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
553233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
554233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return var;
555233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
556233d2500723e5594f3e7c70896ffeeef32b9c950ywan
557233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_mse8x8_c(const uint8_t *src_ptr,
558233d2500723e5594f3e7c70896ffeeef32b9c950ywan                          int  source_stride,
559233d2500723e5594f3e7c70896ffeeef32b9c950ywan                          const uint8_t *ref_ptr,
560233d2500723e5594f3e7c70896ffeeef32b9c950ywan                          int  recon_stride,
561233d2500723e5594f3e7c70896ffeeef32b9c950ywan                          unsigned int *sse) {
562233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int var;
563233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int avg;
564233d2500723e5594f3e7c70896ffeeef32b9c950ywan
565233d2500723e5594f3e7c70896ffeeef32b9c950ywan  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
566233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *sse = var;
567233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return var;
568233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
569233d2500723e5594f3e7c70896ffeeef32b9c950ywan
570233d2500723e5594f3e7c70896ffeeef32b9c950ywan
571233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
572233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         int  src_pixels_per_line,
573233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         int  xoffset,
574233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         int  yoffset,
575233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         const uint8_t *dst_ptr,
576233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         int dst_pixels_per_line,
577233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         unsigned int *sse) {
578233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[20 * 16];
579233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
580233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
581233d2500723e5594f3e7c70896ffeeef32b9c950ywan
582233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
583233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
584233d2500723e5594f3e7c70896ffeeef32b9c950ywan
585233d2500723e5594f3e7c70896ffeeef32b9c950ywan  // First filter 1d Horizontal
586233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
587233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 5, 4, hfilter);
588233d2500723e5594f3e7c70896ffeeef32b9c950ywan
589233d2500723e5594f3e7c70896ffeeef32b9c950ywan  // Now filter Verticaly
590233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
591233d2500723e5594f3e7c70896ffeeef32b9c950ywan
592233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
593233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
594233d2500723e5594f3e7c70896ffeeef32b9c950ywan
595233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr,
596233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             int  src_pixels_per_line,
597233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             int  xoffset,
598233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             int  yoffset,
599233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             const uint8_t *dst_ptr,
600233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             int dst_pixels_per_line,
601233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             unsigned int *sse,
602233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             const uint8_t *second_pred) {
603233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[20 * 16];
604233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
605233d2500723e5594f3e7c70896ffeeef32b9c950ywan  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4);  // compound pred buffer
606233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
607233d2500723e5594f3e7c70896ffeeef32b9c950ywan
608233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
609233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
610233d2500723e5594f3e7c70896ffeeef32b9c950ywan
611233d2500723e5594f3e7c70896ffeeef32b9c950ywan  // First filter 1d Horizontal
612233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
613233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 5, 4, hfilter);
614233d2500723e5594f3e7c70896ffeeef32b9c950ywan
615233d2500723e5594f3e7c70896ffeeef32b9c950ywan  // Now filter Verticaly
616233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
617233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
618233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
619233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
620233d2500723e5594f3e7c70896ffeeef32b9c950ywan
621233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
622233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         int  src_pixels_per_line,
623233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         int  xoffset,
624233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         int  yoffset,
625233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         const uint8_t *dst_ptr,
626233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         int dst_pixels_per_line,
627233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         unsigned int *sse) {
628233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[9 * 8];  // Temp data buffer used in filtering
629233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[20 * 16];
630233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
631233d2500723e5594f3e7c70896ffeeef32b9c950ywan
632233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
633233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
634233d2500723e5594f3e7c70896ffeeef32b9c950ywan
635233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
636233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 9, 8, hfilter);
637233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
638233d2500723e5594f3e7c70896ffeeef32b9c950ywan
639233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
640233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
641233d2500723e5594f3e7c70896ffeeef32b9c950ywan
642233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr,
643233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             int  src_pixels_per_line,
644233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             int  xoffset,
645233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             int  yoffset,
646233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             const uint8_t *dst_ptr,
647233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             int dst_pixels_per_line,
648233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             unsigned int *sse,
649233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             const uint8_t *second_pred) {
650233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[9 * 8];  // Temp data buffer used in filtering
651233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[20 * 16];
652233d2500723e5594f3e7c70896ffeeef32b9c950ywan  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8);  // compound pred buffer
653233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
654233d2500723e5594f3e7c70896ffeeef32b9c950ywan
655233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
656233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
657233d2500723e5594f3e7c70896ffeeef32b9c950ywan
658233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
659233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 9, 8, hfilter);
660233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
661233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
662233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
663233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
664233d2500723e5594f3e7c70896ffeeef32b9c950ywan
665233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
666233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  src_pixels_per_line,
667233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  xoffset,
668233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  yoffset,
669233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           const uint8_t *dst_ptr,
670233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int dst_pixels_per_line,
671233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           unsigned int *sse) {
672233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[17 * 16];  // Temp data buffer used in filtering
673233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[20 * 16];
674233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
675233d2500723e5594f3e7c70896ffeeef32b9c950ywan
676233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
677233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
678233d2500723e5594f3e7c70896ffeeef32b9c950ywan
679233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
680233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 17, 16, hfilter);
681233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
682233d2500723e5594f3e7c70896ffeeef32b9c950ywan
683233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance16x16(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
684233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
685233d2500723e5594f3e7c70896ffeeef32b9c950ywan
686233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr,
687233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  src_pixels_per_line,
688233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  xoffset,
689233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  yoffset,
690233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               const uint8_t *dst_ptr,
691233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int dst_pixels_per_line,
692233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               unsigned int *sse,
693233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               const uint8_t *second_pred) {
694233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[17 * 16];
695233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[20 * 16];
696233d2500723e5594f3e7c70896ffeeef32b9c950ywan  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16);  // compound pred buffer
697233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
698233d2500723e5594f3e7c70896ffeeef32b9c950ywan
699233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
700233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
701233d2500723e5594f3e7c70896ffeeef32b9c950ywan
702233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
703233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 17, 16, hfilter);
704233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
705233d2500723e5594f3e7c70896ffeeef32b9c950ywan
706233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16);
707233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
708233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
709233d2500723e5594f3e7c70896ffeeef32b9c950ywan
710233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
711233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  src_pixels_per_line,
712233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  xoffset,
713233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  yoffset,
714233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           const uint8_t *dst_ptr,
715233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int dst_pixels_per_line,
716233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           unsigned int *sse) {
717233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
718233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[68 * 64];
719233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
720233d2500723e5594f3e7c70896ffeeef32b9c950ywan
721233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
722233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
723233d2500723e5594f3e7c70896ffeeef32b9c950ywan
724233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
725233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 65, 64, hfilter);
726233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
727233d2500723e5594f3e7c70896ffeeef32b9c950ywan
728233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance64x64(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
729233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
730233d2500723e5594f3e7c70896ffeeef32b9c950ywan
731233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr,
732233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  src_pixels_per_line,
733233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  xoffset,
734233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  yoffset,
735233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               const uint8_t *dst_ptr,
736233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int dst_pixels_per_line,
737233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               unsigned int *sse,
738233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               const uint8_t *second_pred) {
739233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
740233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[68 * 64];
741233d2500723e5594f3e7c70896ffeeef32b9c950ywan  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
742233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
743233d2500723e5594f3e7c70896ffeeef32b9c950ywan
744233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
745233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
746233d2500723e5594f3e7c70896ffeeef32b9c950ywan
747233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
748233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 65, 64, hfilter);
749233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
750233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64);
751233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
752233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
753233d2500723e5594f3e7c70896ffeeef32b9c950ywan
754233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
755233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  src_pixels_per_line,
756233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  xoffset,
757233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int  yoffset,
758233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           const uint8_t *dst_ptr,
759233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           int dst_pixels_per_line,
760233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                           unsigned int *sse) {
761233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
762233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[36 * 32];
763233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
764233d2500723e5594f3e7c70896ffeeef32b9c950ywan
765233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
766233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
767233d2500723e5594f3e7c70896ffeeef32b9c950ywan
768233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
769233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 33, 32, hfilter);
770233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
771233d2500723e5594f3e7c70896ffeeef32b9c950ywan
772233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance32x32(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
773233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
774233d2500723e5594f3e7c70896ffeeef32b9c950ywan
775233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr,
776233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  src_pixels_per_line,
777233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  xoffset,
778233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  yoffset,
779233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               const uint8_t *dst_ptr,
780233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int dst_pixels_per_line,
781233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               unsigned int *sse,
782233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               const uint8_t *second_pred) {
783233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
784233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[36 * 32];
785233d2500723e5594f3e7c70896ffeeef32b9c950ywan  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32);  // compound pred buffer
786233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
787233d2500723e5594f3e7c70896ffeeef32b9c950ywan
788233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
789233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
790233d2500723e5594f3e7c70896ffeeef32b9c950ywan
791233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
792233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 33, 32, hfilter);
793233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
794233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32);
795233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
796233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
797233d2500723e5594f3e7c70896ffeeef32b9c950ywan
798233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr,
799233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int  source_stride,
800233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              const uint8_t *ref_ptr,
801233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int  recon_stride,
802233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              unsigned int *sse) {
803233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
804233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                       ref_ptr, recon_stride, sse);
805233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
806233d2500723e5594f3e7c70896ffeeef32b9c950ywan
807233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr,
808233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int  source_stride,
809233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              const uint8_t *ref_ptr,
810233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int  recon_stride,
811233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              unsigned int *sse) {
812233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0,
813233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                       ref_ptr, recon_stride, sse);
814233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
815233d2500723e5594f3e7c70896ffeeef32b9c950ywan
816233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr,
817233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int  source_stride,
818233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              const uint8_t *ref_ptr,
819233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int  recon_stride,
820233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              unsigned int *sse) {
821233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0,
822233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                       ref_ptr, recon_stride, sse);
823233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
824233d2500723e5594f3e7c70896ffeeef32b9c950ywan
825233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr,
826233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int  source_stride,
827233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              const uint8_t *ref_ptr,
828233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int  recon_stride,
829233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              unsigned int *sse) {
830233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
831233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                       ref_ptr, recon_stride, sse);
832233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
833233d2500723e5594f3e7c70896ffeeef32b9c950ywan
834233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr,
835233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int  source_stride,
836233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              const uint8_t *ref_ptr,
837233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int  recon_stride,
838233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              unsigned int *sse) {
839233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
840233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                       ref_ptr, recon_stride, sse);
841233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
842233d2500723e5594f3e7c70896ffeeef32b9c950ywan
843233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr,
844233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int  source_stride,
845233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              const uint8_t *ref_ptr,
846233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int  recon_stride,
847233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              unsigned int *sse) {
848233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8,
849233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                       ref_ptr, recon_stride, sse);
850233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
851233d2500723e5594f3e7c70896ffeeef32b9c950ywan
852233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr,
853233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  source_stride,
854233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               const uint8_t *ref_ptr,
855233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  recon_stride,
856233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               unsigned int *sse) {
857233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
858233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                       ref_ptr, recon_stride, sse);
859233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
860233d2500723e5594f3e7c70896ffeeef32b9c950ywan
861233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr,
862233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  source_stride,
863233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               const uint8_t *ref_ptr,
864233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  recon_stride,
865233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               unsigned int *sse) {
866233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8,
867233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                       ref_ptr, recon_stride, sse);
868233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
869233d2500723e5594f3e7c70896ffeeef32b9c950ywan
870233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr,
871233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  source_stride,
872233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               const uint8_t *ref_ptr,
873233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               int  recon_stride,
874233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                               unsigned int *sse) {
875233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8,
876233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                       ref_ptr, recon_stride, sse);
877233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
878233d2500723e5594f3e7c70896ffeeef32b9c950ywan
879233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr,
880233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      int  src_pixels_per_line,
881233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      int  xoffset,
882233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      int  yoffset,
883233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      const uint8_t *dst_ptr,
884233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      int dst_pixels_per_line,
885233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      unsigned int *sse) {
886233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line,
887233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                xoffset, yoffset, dst_ptr,
888233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                dst_pixels_per_line, sse);
889233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return *sse;
890233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
891233d2500723e5594f3e7c70896ffeeef32b9c950ywan
892233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr,
893233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      int  src_pixels_per_line,
894233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      int  xoffset,
895233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      int  yoffset,
896233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      const uint8_t *dst_ptr,
897233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      int dst_pixels_per_line,
898233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      unsigned int *sse) {
899233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line,
900233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                xoffset, yoffset, dst_ptr,
901233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                dst_pixels_per_line, sse);
902233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return *sse;
903233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
904233d2500723e5594f3e7c70896ffeeef32b9c950ywan
905233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr,
906233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      int  src_pixels_per_line,
907233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      int  xoffset,
908233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      int  yoffset,
909233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      const uint8_t *dst_ptr,
910233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      int dst_pixels_per_line,
911233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      unsigned int *sse) {
912233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line,
913233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                xoffset, yoffset, dst_ptr,
914233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                dst_pixels_per_line, sse);
915233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return *sse;
916233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
917233d2500723e5594f3e7c70896ffeeef32b9c950ywan
918233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
919233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                          int  src_pixels_per_line,
920233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                          int  xoffset,
921233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                          int  yoffset,
922233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                          const uint8_t *dst_ptr,
923233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                          int dst_pixels_per_line,
924233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                          unsigned int *sse) {
925233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[16 * 9];  // Temp data buffer used in filtering
926233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[20 * 16];
927233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
928233d2500723e5594f3e7c70896ffeeef32b9c950ywan
929233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
930233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
931233d2500723e5594f3e7c70896ffeeef32b9c950ywan
932233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
933233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 9, 16, hfilter);
934233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
935233d2500723e5594f3e7c70896ffeeef32b9c950ywan
936233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance16x8(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
937233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
938233d2500723e5594f3e7c70896ffeeef32b9c950ywan
939233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr,
940233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int  src_pixels_per_line,
941233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int  xoffset,
942233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int  yoffset,
943233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              const uint8_t *dst_ptr,
944233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int dst_pixels_per_line,
945233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              unsigned int *sse,
946233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              const uint8_t *second_pred) {
947233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[16 * 9];  // Temp data buffer used in filtering
948233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[20 * 16];
949233d2500723e5594f3e7c70896ffeeef32b9c950ywan  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8);  // compound pred buffer
950233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
951233d2500723e5594f3e7c70896ffeeef32b9c950ywan
952233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
953233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
954233d2500723e5594f3e7c70896ffeeef32b9c950ywan
955233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
956233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 9, 16, hfilter);
957233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
958233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16);
959233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
960233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
961233d2500723e5594f3e7c70896ffeeef32b9c950ywan
962233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
963233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                          int  src_pixels_per_line,
964233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                          int  xoffset,
965233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                          int  yoffset,
966233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                          const uint8_t *dst_ptr,
967233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                          int dst_pixels_per_line,
968233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                          unsigned int *sse) {
969233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[9 * 16];  // Temp data buffer used in filtering
970233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[20 * 16];
971233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
972233d2500723e5594f3e7c70896ffeeef32b9c950ywan
973233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
974233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
975233d2500723e5594f3e7c70896ffeeef32b9c950ywan
976233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
977233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 17, 8, hfilter);
978233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
979233d2500723e5594f3e7c70896ffeeef32b9c950ywan
980233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance8x16(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
981233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
982233d2500723e5594f3e7c70896ffeeef32b9c950ywan
983233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
984233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int  src_pixels_per_line,
985233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int  xoffset,
986233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int  yoffset,
987233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              const uint8_t *dst_ptr,
988233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              int dst_pixels_per_line,
989233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              unsigned int *sse,
990233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                              const uint8_t *second_pred) {
991233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[9 * 16];  // Temp data buffer used in filtering
992233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[20 * 16];
993233d2500723e5594f3e7c70896ffeeef32b9c950ywan  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16);  // compound pred buffer
994233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
995233d2500723e5594f3e7c70896ffeeef32b9c950ywan
996233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
997233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
998233d2500723e5594f3e7c70896ffeeef32b9c950ywan
999233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1000233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 17, 8, hfilter);
1001233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
1002233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
1003233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
1004233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
1005233d2500723e5594f3e7c70896ffeeef32b9c950ywan
1006233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
1007233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         int  src_pixels_per_line,
1008233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         int  xoffset,
1009233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         int  yoffset,
1010233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         const uint8_t *dst_ptr,
1011233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         int dst_pixels_per_line,
1012233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         unsigned int *sse) {
1013233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[8 * 5];  // Temp data buffer used in filtering
1014233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[20 * 16];
1015233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
1016233d2500723e5594f3e7c70896ffeeef32b9c950ywan
1017233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1018233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1019233d2500723e5594f3e7c70896ffeeef32b9c950ywan
1020233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1021233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 5, 8, hfilter);
1022233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
1023233d2500723e5594f3e7c70896ffeeef32b9c950ywan
1024233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance8x4(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
1025233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
1026233d2500723e5594f3e7c70896ffeeef32b9c950ywan
1027233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
1028233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             int  src_pixels_per_line,
1029233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             int  xoffset,
1030233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             int  yoffset,
1031233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             const uint8_t *dst_ptr,
1032233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             int dst_pixels_per_line,
1033233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             unsigned int *sse,
1034233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             const uint8_t *second_pred) {
1035233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[8 * 5];  // Temp data buffer used in filtering
1036233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[20 * 16];
1037233d2500723e5594f3e7c70896ffeeef32b9c950ywan  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4);  // compound pred buffer
1038233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
1039233d2500723e5594f3e7c70896ffeeef32b9c950ywan
1040233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1041233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1042233d2500723e5594f3e7c70896ffeeef32b9c950ywan
1043233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1044233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 5, 8, hfilter);
1045233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
1046233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
1047233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
1048233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
1049233d2500723e5594f3e7c70896ffeeef32b9c950ywan
1050233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
1051233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         int  src_pixels_per_line,
1052233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         int  xoffset,
1053233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         int  yoffset,
1054233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         const uint8_t *dst_ptr,
1055233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         int dst_pixels_per_line,
1056233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         unsigned int *sse) {
1057233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[5 * 8];  // Temp data buffer used in filtering
1058233d2500723e5594f3e7c70896ffeeef32b9c950ywan  // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be
1059233d2500723e5594f3e7c70896ffeeef32b9c950ywan  // of this big? same issue appears in all other block size settings.
1060233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[20 * 16];
1061233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
1062233d2500723e5594f3e7c70896ffeeef32b9c950ywan
1063233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1064233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1065233d2500723e5594f3e7c70896ffeeef32b9c950ywan
1066233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1067233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 9, 4, hfilter);
1068233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
1069233d2500723e5594f3e7c70896ffeeef32b9c950ywan
1070233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance4x8(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
1071233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
1072233d2500723e5594f3e7c70896ffeeef32b9c950ywan
1073233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
1074233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             int  src_pixels_per_line,
1075233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             int  xoffset,
1076233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             int  yoffset,
1077233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             const uint8_t *dst_ptr,
1078233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             int dst_pixels_per_line,
1079233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             unsigned int *sse,
1080233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             const uint8_t *second_pred) {
1081233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint16_t fdata3[5 * 8];  // Temp data buffer used in filtering
1082233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t temp2[20 * 16];
1083233d2500723e5594f3e7c70896ffeeef32b9c950ywan  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8);  // compound pred buffer
1084233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const int16_t *hfilter, *vfilter;
1085233d2500723e5594f3e7c70896ffeeef32b9c950ywan
1086233d2500723e5594f3e7c70896ffeeef32b9c950ywan  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1087233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1088233d2500723e5594f3e7c70896ffeeef32b9c950ywan
1089233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1090233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                    1, 9, 4, hfilter);
1091233d2500723e5594f3e7c70896ffeeef32b9c950ywan  var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
1092233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
1093233d2500723e5594f3e7c70896ffeeef32b9c950ywan  return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
1094233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
1095233d2500723e5594f3e7c70896ffeeef32b9c950ywan
1096233d2500723e5594f3e7c70896ffeeef32b9c950ywan
1097233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
1098233d2500723e5594f3e7c70896ffeeef32b9c950ywan                       int height, const uint8_t *ref, int ref_stride) {
1099233d2500723e5594f3e7c70896ffeeef32b9c950ywan  int i, j;
1100233d2500723e5594f3e7c70896ffeeef32b9c950ywan
1101233d2500723e5594f3e7c70896ffeeef32b9c950ywan  for (i = 0; i < height; i++) {
1102233d2500723e5594f3e7c70896ffeeef32b9c950ywan    for (j = 0; j < width; j++) {
1103233d2500723e5594f3e7c70896ffeeef32b9c950ywan      int tmp;
1104233d2500723e5594f3e7c70896ffeeef32b9c950ywan      tmp = pred[j] + ref[j];
1105233d2500723e5594f3e7c70896ffeeef32b9c950ywan      comp_pred[j] = (tmp + 1) >> 1;
1106233d2500723e5594f3e7c70896ffeeef32b9c950ywan    }
1107233d2500723e5594f3e7c70896ffeeef32b9c950ywan    comp_pred += width;
1108233d2500723e5594f3e7c70896ffeeef32b9c950ywan    pred += width;
1109233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ref += ref_stride;
1110233d2500723e5594f3e7c70896ffeeef32b9c950ywan  }
1111233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
1112