1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "./vp9_rtcd.h"
12
13#include "vpx_ports/mem.h"
14#include "vpx/vpx_integer.h"
15
16#include "vp9/common/vp9_common.h"
17#include "vp9/common/vp9_filter.h"
18
19#include "vp9/encoder/vp9_variance.h"
20
21void variance(const uint8_t *src_ptr,
22              int  source_stride,
23              const uint8_t *ref_ptr,
24              int  recon_stride,
25              int  w,
26              int  h,
27              unsigned int *sse,
28              int *sum) {
29  int i, j;
30  int diff;
31
32  *sum = 0;
33  *sse = 0;
34
35  for (i = 0; i < h; i++) {
36    for (j = 0; j < w; j++) {
37      diff = src_ptr[j] - ref_ptr[j];
38      *sum += diff;
39      *sse += diff * diff;
40    }
41
42    src_ptr += source_stride;
43    ref_ptr += recon_stride;
44  }
45}
46
47/****************************************************************************
48 *
49 *  ROUTINE       : filter_block2d_bil_first_pass
50 *
51 *  INPUTS        : uint8_t  *src_ptr          : Pointer to source block.
52 *                  uint32_t src_pixels_per_line : Stride of input block.
53 *                  uint32_t pixel_step        : Offset between filter input
54 *                                               samples (see notes).
55 *                  uint32_t output_height     : Input block height.
56 *                  uint32_t output_width      : Input block width.
57 *                  int32_t  *vp9_filter       : Array of 2 bi-linear filter
58 *                                               taps.
59 *
60 *  OUTPUTS       : int32_t *output_ptr        : Pointer to filtered block.
61 *
62 *  RETURNS       : void
63 *
64 *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
65 *                  either horizontal or vertical direction to produce the
66 *                  filtered output block. Used to implement first-pass
67 *                  of 2-D separable filter.
68 *
69 *  SPECIAL NOTES : Produces int32_t output to retain precision for next pass.
70 *                  Two filter taps should sum to VP9_FILTER_WEIGHT.
71 *                  pixel_step defines whether the filter is applied
72 *                  horizontally (pixel_step=1) or vertically (pixel_step=
73 *                  stride).
74 *                  It defines the offset required to move from one input
75 *                  to the next.
76 *
77 ****************************************************************************/
78static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
79                                              uint16_t *output_ptr,
80                                              unsigned int src_pixels_per_line,
81                                              int pixel_step,
82                                              unsigned int output_height,
83                                              unsigned int output_width,
84                                              const int16_t *vp9_filter) {
85  unsigned int i, j;
86
87  for (i = 0; i < output_height; i++) {
88    for (j = 0; j < output_width; j++) {
89      output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
90                          (int)src_ptr[pixel_step] * vp9_filter[1],
91                          FILTER_BITS);
92
93      src_ptr++;
94    }
95
96    // Next row...
97    src_ptr    += src_pixels_per_line - output_width;
98    output_ptr += output_width;
99  }
100}
101
102/****************************************************************************
103 *
104 *  ROUTINE       : filter_block2d_bil_second_pass
105 *
106 *  INPUTS        : int32_t  *src_ptr          : Pointer to source block.
107 *                  uint32_t src_pixels_per_line : Stride of input block.
108 *                  uint32_t pixel_step        : Offset between filter input
109 *                                               samples (see notes).
110 *                  uint32_t output_height     : Input block height.
111 *                  uint32_t output_width      : Input block width.
112 *                  int32_t  *vp9_filter       : Array of 2 bi-linear filter
113 *                                               taps.
114 *
115 *  OUTPUTS       : uint16_t *output_ptr       : Pointer to filtered block.
116 *
117 *  RETURNS       : void
118 *
119 *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
120 *                  either horizontal or vertical direction to produce the
121 *                  filtered output block. Used to implement second-pass
122 *                  of 2-D separable filter.
123 *
124 *  SPECIAL NOTES : Requires 32-bit input as produced by
125 *                  filter_block2d_bil_first_pass.
126 *                  Two filter taps should sum to VP9_FILTER_WEIGHT.
127 *                  pixel_step defines whether the filter is applied
128 *                  horizontally (pixel_step=1) or vertically (pixel_step=
129 *                  stride).
130 *                  It defines the offset required to move from one input
131 *                  to the next.
132 *
133 ****************************************************************************/
134static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
135                                               uint8_t *output_ptr,
136                                               unsigned int src_pixels_per_line,
137                                               unsigned int pixel_step,
138                                               unsigned int output_height,
139                                               unsigned int output_width,
140                                               const int16_t *vp9_filter) {
141  unsigned int  i, j;
142
143  for (i = 0; i < output_height; i++) {
144    for (j = 0; j < output_width; j++) {
145      output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
146                          (int)src_ptr[pixel_step] * vp9_filter[1],
147                          FILTER_BITS);
148      src_ptr++;
149    }
150
151    src_ptr += src_pixels_per_line - output_width;
152    output_ptr += output_width;
153  }
154}
155
156unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
157  unsigned int i, sum = 0;
158
159  for (i = 0; i < 256; i++) {
160    sum += (src_ptr[i] * src_ptr[i]);
161  }
162
163  return sum;
164}
165
166unsigned int vp9_variance64x32_c(const uint8_t *src_ptr,
167                                 int  source_stride,
168                                 const uint8_t *ref_ptr,
169                                 int  recon_stride,
170                                 unsigned int *sse) {
171  unsigned int var;
172  int avg;
173
174  variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg);
175  *sse = var;
176  return (var - (((int64_t)avg * avg) >> 11));
177}
178
179unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr,
180                                           int  src_pixels_per_line,
181                                           int  xoffset,
182                                           int  yoffset,
183                                           const uint8_t *dst_ptr,
184                                           int dst_pixels_per_line,
185                                           unsigned int *sse) {
186  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
187  uint8_t temp2[68 * 64];
188  const int16_t *hfilter, *vfilter;
189
190  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
191  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
192
193  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
194                                    1, 33, 64, hfilter);
195  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
196
197  return vp9_variance64x32(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
198}
199
200unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr,
201                                               int  src_pixels_per_line,
202                                               int  xoffset,
203                                               int  yoffset,
204                                               const uint8_t *dst_ptr,
205                                               int dst_pixels_per_line,
206                                               unsigned int *sse,
207                                               const uint8_t *second_pred) {
208  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
209  uint8_t temp2[68 * 64];
210  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
211  const int16_t *hfilter, *vfilter;
212
213  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
214  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
215
216  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
217                                    1, 33, 64, hfilter);
218  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
219  vp9_comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64);
220  return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
221}
222
223unsigned int vp9_variance32x64_c(const uint8_t *src_ptr,
224                                 int  source_stride,
225                                 const uint8_t *ref_ptr,
226                                 int  recon_stride,
227                                 unsigned int *sse) {
228  unsigned int var;
229  int avg;
230
231  variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg);
232  *sse = var;
233  return (var - (((int64_t)avg * avg) >> 11));
234}
235
236unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr,
237                                           int  src_pixels_per_line,
238                                           int  xoffset,
239                                           int  yoffset,
240                                           const uint8_t *dst_ptr,
241                                           int dst_pixels_per_line,
242                                           unsigned int *sse) {
243  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
244  uint8_t temp2[68 * 64];
245  const int16_t *hfilter, *vfilter;
246
247  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
248  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
249
250  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
251                                    1, 65, 32, hfilter);
252  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
253
254  return vp9_variance32x64(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
255}
256
257unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr,
258                                               int  src_pixels_per_line,
259                                               int  xoffset,
260                                               int  yoffset,
261                                               const uint8_t *dst_ptr,
262                                               int dst_pixels_per_line,
263                                               unsigned int *sse,
264                                               const uint8_t *second_pred) {
265  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
266  uint8_t temp2[68 * 64];
267  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64);  // compound pred buffer
268  const int16_t *hfilter, *vfilter;
269
270  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
271  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
272
273  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
274                                    1, 65, 32, hfilter);
275  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
276  vp9_comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32);
277  return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
278}
279
280unsigned int vp9_variance32x16_c(const uint8_t *src_ptr,
281                                 int  source_stride,
282                                 const uint8_t *ref_ptr,
283                                 int  recon_stride,
284                                 unsigned int *sse) {
285  unsigned int var;
286  int avg;
287
288  variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg);
289  *sse = var;
290  return (var - (((int64_t)avg * avg) >> 9));
291}
292
293unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr,
294                                           int  src_pixels_per_line,
295                                           int  xoffset,
296                                           int  yoffset,
297                                           const uint8_t *dst_ptr,
298                                           int dst_pixels_per_line,
299                                           unsigned int *sse) {
300  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
301  uint8_t temp2[36 * 32];
302  const int16_t *hfilter, *vfilter;
303
304  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
305  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
306
307  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
308                                    1, 17, 32, hfilter);
309  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
310
311  return vp9_variance32x16(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
312}
313
314unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr,
315                                               int  src_pixels_per_line,
316                                               int  xoffset,
317                                               int  yoffset,
318                                               const uint8_t *dst_ptr,
319                                               int dst_pixels_per_line,
320                                               unsigned int *sse,
321                                               const uint8_t *second_pred) {
322  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
323  uint8_t temp2[36 * 32];
324  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16);  // compound pred buffer
325  const int16_t *hfilter, *vfilter;
326
327  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
328  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
329
330  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
331                                    1, 17, 32, hfilter);
332  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
333  vp9_comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32);
334  return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
335}
336
337unsigned int vp9_variance16x32_c(const uint8_t *src_ptr,
338                                 int  source_stride,
339                                 const uint8_t *ref_ptr,
340                                 int  recon_stride,
341                                 unsigned int *sse) {
342  unsigned int var;
343  int avg;
344
345  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg);
346  *sse = var;
347  return (var - (((int64_t)avg * avg) >> 9));
348}
349
350unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr,
351                                           int  src_pixels_per_line,
352                                           int  xoffset,
353                                           int  yoffset,
354                                           const uint8_t *dst_ptr,
355                                           int dst_pixels_per_line,
356                                           unsigned int *sse) {
357  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
358  uint8_t temp2[36 * 32];
359  const int16_t *hfilter, *vfilter;
360
361  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
362  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
363
364  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
365                                    1, 33, 16, hfilter);
366  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
367
368  return vp9_variance16x32(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
369}
370
371unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr,
372                                               int  src_pixels_per_line,
373                                               int  xoffset,
374                                               int  yoffset,
375                                               const uint8_t *dst_ptr,
376                                               int dst_pixels_per_line,
377                                               unsigned int *sse,
378                                               const uint8_t *second_pred) {
379  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
380  uint8_t temp2[36 * 32];
381  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32);  // compound pred buffer
382  const int16_t *hfilter, *vfilter;
383
384  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
385  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
386
387  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
388                                    1, 33, 16, hfilter);
389  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
390  vp9_comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16);
391  return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
392}
393
394unsigned int vp9_variance64x64_c(const uint8_t *src_ptr,
395                                 int  source_stride,
396                                 const uint8_t *ref_ptr,
397                                 int  recon_stride,
398                                 unsigned int *sse) {
399  unsigned int var;
400  int avg;
401
402  variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg);
403  *sse = var;
404  return (var - (((int64_t)avg * avg) >> 12));
405}
406
407unsigned int vp9_variance32x32_c(const uint8_t *src_ptr,
408                                 int  source_stride,
409                                 const uint8_t *ref_ptr,
410                                 int  recon_stride,
411                                 unsigned int *sse) {
412  unsigned int var;
413  int avg;
414
415  variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg);
416  *sse = var;
417  return (var - (((int64_t)avg * avg) >> 10));
418}
419
420void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride,
421                             const uint8_t *ref_ptr, int ref_stride,
422                             unsigned int *sse, int *sum) {
423  variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
424}
425
426unsigned int vp9_variance16x16_c(const uint8_t *src_ptr,
427                                 int  source_stride,
428                                 const uint8_t *ref_ptr,
429                                 int  recon_stride,
430                                 unsigned int *sse) {
431  unsigned int var;
432  int avg;
433
434  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
435  *sse = var;
436  return (var - (((unsigned int)avg * avg) >> 8));
437}
438
439unsigned int vp9_variance8x16_c(const uint8_t *src_ptr,
440                                int  source_stride,
441                                const uint8_t *ref_ptr,
442                                int  recon_stride,
443                                unsigned int *sse) {
444  unsigned int var;
445  int avg;
446
447  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
448  *sse = var;
449  return (var - (((unsigned int)avg * avg) >> 7));
450}
451
452unsigned int vp9_variance16x8_c(const uint8_t *src_ptr,
453                                int  source_stride,
454                                const uint8_t *ref_ptr,
455                                int  recon_stride,
456                                unsigned int *sse) {
457  unsigned int var;
458  int avg;
459
460  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
461  *sse = var;
462  return (var - (((unsigned int)avg * avg) >> 7));
463}
464
465void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride,
466                       const uint8_t *ref_ptr, int ref_stride,
467                       unsigned int *sse, int *sum) {
468  variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
469}
470
471unsigned int vp9_variance8x8_c(const uint8_t *src_ptr,
472                               int  source_stride,
473                               const uint8_t *ref_ptr,
474                               int  recon_stride,
475                               unsigned int *sse) {
476  unsigned int var;
477  int avg;
478
479  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
480  *sse = var;
481  return (var - (((unsigned int)avg * avg) >> 6));
482}
483
484unsigned int vp9_variance8x4_c(const uint8_t *src_ptr,
485                               int  source_stride,
486                               const uint8_t *ref_ptr,
487                               int  recon_stride,
488                               unsigned int *sse) {
489  unsigned int var;
490  int avg;
491
492  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg);
493  *sse = var;
494  return (var - (((unsigned int)avg * avg) >> 5));
495}
496
497unsigned int vp9_variance4x8_c(const uint8_t *src_ptr,
498                               int  source_stride,
499                               const uint8_t *ref_ptr,
500                               int  recon_stride,
501                               unsigned int *sse) {
502  unsigned int var;
503  int avg;
504
505  variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg);
506  *sse = var;
507  return (var - (((unsigned int)avg * avg) >> 5));
508}
509
510unsigned int vp9_variance4x4_c(const uint8_t *src_ptr,
511                               int  source_stride,
512                               const uint8_t *ref_ptr,
513                               int  recon_stride,
514                               unsigned int *sse) {
515  unsigned int var;
516  int avg;
517
518  variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
519  *sse = var;
520  return (var - (((unsigned int)avg * avg) >> 4));
521}
522
523
524unsigned int vp9_mse16x16_c(const uint8_t *src_ptr,
525                            int  source_stride,
526                            const uint8_t *ref_ptr,
527                            int  recon_stride,
528                            unsigned int *sse) {
529  unsigned int var;
530  int avg;
531
532  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
533  *sse = var;
534  return var;
535}
536
537unsigned int vp9_mse16x8_c(const uint8_t *src_ptr,
538                           int  source_stride,
539                           const uint8_t *ref_ptr,
540                           int  recon_stride,
541                           unsigned int *sse) {
542  unsigned int var;
543  int avg;
544
545  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
546  *sse = var;
547  return var;
548}
549
550unsigned int vp9_mse8x16_c(const uint8_t *src_ptr,
551                           int  source_stride,
552                           const uint8_t *ref_ptr,
553                           int  recon_stride,
554                           unsigned int *sse) {
555  unsigned int var;
556  int avg;
557
558  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
559  *sse = var;
560  return var;
561}
562
563unsigned int vp9_mse8x8_c(const uint8_t *src_ptr,
564                          int  source_stride,
565                          const uint8_t *ref_ptr,
566                          int  recon_stride,
567                          unsigned int *sse) {
568  unsigned int var;
569  int avg;
570
571  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
572  *sse = var;
573  return var;
574}
575
576
577unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
578                                         int  src_pixels_per_line,
579                                         int  xoffset,
580                                         int  yoffset,
581                                         const uint8_t *dst_ptr,
582                                         int dst_pixels_per_line,
583                                         unsigned int *sse) {
584  uint8_t temp2[20 * 16];
585  const int16_t *hfilter, *vfilter;
586  uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
587
588  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
589  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
590
591  // First filter 1d Horizontal
592  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
593                                    1, 5, 4, hfilter);
594
595  // Now filter Verticaly
596  var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
597
598  return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
599}
600
601unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr,
602                                             int  src_pixels_per_line,
603                                             int  xoffset,
604                                             int  yoffset,
605                                             const uint8_t *dst_ptr,
606                                             int dst_pixels_per_line,
607                                             unsigned int *sse,
608                                             const uint8_t *second_pred) {
609  uint8_t temp2[20 * 16];
610  const int16_t *hfilter, *vfilter;
611  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4);  // compound pred buffer
612  uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
613
614  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
615  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
616
617  // First filter 1d Horizontal
618  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
619                                    1, 5, 4, hfilter);
620
621  // Now filter Verticaly
622  var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
623  vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
624  return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
625}
626
627unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
628                                         int  src_pixels_per_line,
629                                         int  xoffset,
630                                         int  yoffset,
631                                         const uint8_t *dst_ptr,
632                                         int dst_pixels_per_line,
633                                         unsigned int *sse) {
634  uint16_t fdata3[9 * 8];  // Temp data buffer used in filtering
635  uint8_t temp2[20 * 16];
636  const int16_t *hfilter, *vfilter;
637
638  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
639  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
640
641  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
642                                    1, 9, 8, hfilter);
643  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
644
645  return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
646}
647
648unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr,
649                                             int  src_pixels_per_line,
650                                             int  xoffset,
651                                             int  yoffset,
652                                             const uint8_t *dst_ptr,
653                                             int dst_pixels_per_line,
654                                             unsigned int *sse,
655                                             const uint8_t *second_pred) {
656  uint16_t fdata3[9 * 8];  // Temp data buffer used in filtering
657  uint8_t temp2[20 * 16];
658  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8);  // compound pred buffer
659  const int16_t *hfilter, *vfilter;
660
661  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
662  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
663
664  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
665                                    1, 9, 8, hfilter);
666  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
667  vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
668  return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
669}
670
671unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
672                                           int  src_pixels_per_line,
673                                           int  xoffset,
674                                           int  yoffset,
675                                           const uint8_t *dst_ptr,
676                                           int dst_pixels_per_line,
677                                           unsigned int *sse) {
678  uint16_t fdata3[17 * 16];  // Temp data buffer used in filtering
679  uint8_t temp2[20 * 16];
680  const int16_t *hfilter, *vfilter;
681
682  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
683  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
684
685  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
686                                    1, 17, 16, hfilter);
687  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
688
689  return vp9_variance16x16(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
690}
691
692unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr,
693                                               int  src_pixels_per_line,
694                                               int  xoffset,
695                                               int  yoffset,
696                                               const uint8_t *dst_ptr,
697                                               int dst_pixels_per_line,
698                                               unsigned int *sse,
699                                               const uint8_t *second_pred) {
700  uint16_t fdata3[17 * 16];
701  uint8_t temp2[20 * 16];
702  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16);  // compound pred buffer
703  const int16_t *hfilter, *vfilter;
704
705  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
706  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
707
708  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
709                                    1, 17, 16, hfilter);
710  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
711
712  vp9_comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16);
713  return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
714}
715
716unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
717                                           int  src_pixels_per_line,
718                                           int  xoffset,
719                                           int  yoffset,
720                                           const uint8_t *dst_ptr,
721                                           int dst_pixels_per_line,
722                                           unsigned int *sse) {
723  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
724  uint8_t temp2[68 * 64];
725  const int16_t *hfilter, *vfilter;
726
727  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
728  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
729
730  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
731                                    1, 65, 64, hfilter);
732  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
733
734  return vp9_variance64x64(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
735}
736
737unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr,
738                                               int  src_pixels_per_line,
739                                               int  xoffset,
740                                               int  yoffset,
741                                               const uint8_t *dst_ptr,
742                                               int dst_pixels_per_line,
743                                               unsigned int *sse,
744                                               const uint8_t *second_pred) {
745  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
746  uint8_t temp2[68 * 64];
747  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
748  const int16_t *hfilter, *vfilter;
749
750  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
751  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
752
753  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
754                                    1, 65, 64, hfilter);
755  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
756  vp9_comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64);
757  return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
758}
759
760unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
761                                           int  src_pixels_per_line,
762                                           int  xoffset,
763                                           int  yoffset,
764                                           const uint8_t *dst_ptr,
765                                           int dst_pixels_per_line,
766                                           unsigned int *sse) {
767  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
768  uint8_t temp2[36 * 32];
769  const int16_t *hfilter, *vfilter;
770
771  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
772  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
773
774  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
775                                    1, 33, 32, hfilter);
776  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
777
778  return vp9_variance32x32(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
779}
780
781unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr,
782                                               int  src_pixels_per_line,
783                                               int  xoffset,
784                                               int  yoffset,
785                                               const uint8_t *dst_ptr,
786                                               int dst_pixels_per_line,
787                                               unsigned int *sse,
788                                               const uint8_t *second_pred) {
789  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
790  uint8_t temp2[36 * 32];
791  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32);  // compound pred buffer
792  const int16_t *hfilter, *vfilter;
793
794  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
795  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
796
797  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
798                                    1, 33, 32, hfilter);
799  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
800  vp9_comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32);
801  return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
802}
803
804unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr,
805                                              int  source_stride,
806                                              const uint8_t *ref_ptr,
807                                              int  recon_stride,
808                                              unsigned int *sse) {
809  return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
810                                       ref_ptr, recon_stride, sse);
811}
812
813unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr,
814                                              int  source_stride,
815                                              const uint8_t *ref_ptr,
816                                              int  recon_stride,
817                                              unsigned int *sse) {
818  return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0,
819                                       ref_ptr, recon_stride, sse);
820}
821
822unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr,
823                                              int  source_stride,
824                                              const uint8_t *ref_ptr,
825                                              int  recon_stride,
826                                              unsigned int *sse) {
827  return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0,
828                                       ref_ptr, recon_stride, sse);
829}
830
831unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr,
832                                              int  source_stride,
833                                              const uint8_t *ref_ptr,
834                                              int  recon_stride,
835                                              unsigned int *sse) {
836  return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
837                                       ref_ptr, recon_stride, sse);
838}
839
840unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr,
841                                              int  source_stride,
842                                              const uint8_t *ref_ptr,
843                                              int  recon_stride,
844                                              unsigned int *sse) {
845  return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
846                                       ref_ptr, recon_stride, sse);
847}
848
849unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr,
850                                              int  source_stride,
851                                              const uint8_t *ref_ptr,
852                                              int  recon_stride,
853                                              unsigned int *sse) {
854  return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8,
855                                       ref_ptr, recon_stride, sse);
856}
857
858unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr,
859                                               int  source_stride,
860                                               const uint8_t *ref_ptr,
861                                               int  recon_stride,
862                                               unsigned int *sse) {
863  return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
864                                       ref_ptr, recon_stride, sse);
865}
866
867unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr,
868                                               int  source_stride,
869                                               const uint8_t *ref_ptr,
870                                               int  recon_stride,
871                                               unsigned int *sse) {
872  return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8,
873                                       ref_ptr, recon_stride, sse);
874}
875
876unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr,
877                                               int  source_stride,
878                                               const uint8_t *ref_ptr,
879                                               int  recon_stride,
880                                               unsigned int *sse) {
881  return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8,
882                                       ref_ptr, recon_stride, sse);
883}
884
885unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr,
886                                      int  src_pixels_per_line,
887                                      int  xoffset,
888                                      int  yoffset,
889                                      const uint8_t *dst_ptr,
890                                      int dst_pixels_per_line,
891                                      unsigned int *sse) {
892  vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line,
893                                xoffset, yoffset, dst_ptr,
894                                dst_pixels_per_line, sse);
895  return *sse;
896}
897
898unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr,
899                                      int  src_pixels_per_line,
900                                      int  xoffset,
901                                      int  yoffset,
902                                      const uint8_t *dst_ptr,
903                                      int dst_pixels_per_line,
904                                      unsigned int *sse) {
905  vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line,
906                                xoffset, yoffset, dst_ptr,
907                                dst_pixels_per_line, sse);
908  return *sse;
909}
910
911unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr,
912                                      int  src_pixels_per_line,
913                                      int  xoffset,
914                                      int  yoffset,
915                                      const uint8_t *dst_ptr,
916                                      int dst_pixels_per_line,
917                                      unsigned int *sse) {
918  vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line,
919                                xoffset, yoffset, dst_ptr,
920                                dst_pixels_per_line, sse);
921  return *sse;
922}
923
924unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
925                                          int  src_pixels_per_line,
926                                          int  xoffset,
927                                          int  yoffset,
928                                          const uint8_t *dst_ptr,
929                                          int dst_pixels_per_line,
930                                          unsigned int *sse) {
931  uint16_t fdata3[16 * 9];  // Temp data buffer used in filtering
932  uint8_t temp2[20 * 16];
933  const int16_t *hfilter, *vfilter;
934
935  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
936  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
937
938  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
939                                    1, 9, 16, hfilter);
940  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
941
942  return vp9_variance16x8(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
943}
944
945unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr,
946                                              int  src_pixels_per_line,
947                                              int  xoffset,
948                                              int  yoffset,
949                                              const uint8_t *dst_ptr,
950                                              int dst_pixels_per_line,
951                                              unsigned int *sse,
952                                              const uint8_t *second_pred) {
953  uint16_t fdata3[16 * 9];  // Temp data buffer used in filtering
954  uint8_t temp2[20 * 16];
955  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8);  // compound pred buffer
956  const int16_t *hfilter, *vfilter;
957
958  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
959  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
960
961  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
962                                    1, 9, 16, hfilter);
963  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
964  vp9_comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16);
965  return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
966}
967
968unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
969                                          int  src_pixels_per_line,
970                                          int  xoffset,
971                                          int  yoffset,
972                                          const uint8_t *dst_ptr,
973                                          int dst_pixels_per_line,
974                                          unsigned int *sse) {
975  uint16_t fdata3[9 * 16];  // Temp data buffer used in filtering
976  uint8_t temp2[20 * 16];
977  const int16_t *hfilter, *vfilter;
978
979  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
980  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
981
982  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
983                                    1, 17, 8, hfilter);
984  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
985
986  return vp9_variance8x16(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
987}
988
989unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
990                                              int  src_pixels_per_line,
991                                              int  xoffset,
992                                              int  yoffset,
993                                              const uint8_t *dst_ptr,
994                                              int dst_pixels_per_line,
995                                              unsigned int *sse,
996                                              const uint8_t *second_pred) {
997  uint16_t fdata3[9 * 16];  // Temp data buffer used in filtering
998  uint8_t temp2[20 * 16];
999  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16);  // compound pred buffer
1000  const int16_t *hfilter, *vfilter;
1001
1002  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1003  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1004
1005  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1006                                    1, 17, 8, hfilter);
1007  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
1008  vp9_comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
1009  return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
1010}
1011
1012unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
1013                                         int  src_pixels_per_line,
1014                                         int  xoffset,
1015                                         int  yoffset,
1016                                         const uint8_t *dst_ptr,
1017                                         int dst_pixels_per_line,
1018                                         unsigned int *sse) {
1019  uint16_t fdata3[8 * 5];  // Temp data buffer used in filtering
1020  uint8_t temp2[20 * 16];
1021  const int16_t *hfilter, *vfilter;
1022
1023  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1024  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1025
1026  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1027                                    1, 5, 8, hfilter);
1028  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
1029
1030  return vp9_variance8x4(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
1031}
1032
1033unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
1034                                             int  src_pixels_per_line,
1035                                             int  xoffset,
1036                                             int  yoffset,
1037                                             const uint8_t *dst_ptr,
1038                                             int dst_pixels_per_line,
1039                                             unsigned int *sse,
1040                                             const uint8_t *second_pred) {
1041  uint16_t fdata3[8 * 5];  // Temp data buffer used in filtering
1042  uint8_t temp2[20 * 16];
1043  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4);  // compound pred buffer
1044  const int16_t *hfilter, *vfilter;
1045
1046  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1047  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1048
1049  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1050                                    1, 5, 8, hfilter);
1051  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
1052  vp9_comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
1053  return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
1054}
1055
1056unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
1057                                         int  src_pixels_per_line,
1058                                         int  xoffset,
1059                                         int  yoffset,
1060                                         const uint8_t *dst_ptr,
1061                                         int dst_pixels_per_line,
1062                                         unsigned int *sse) {
1063  uint16_t fdata3[5 * 8];  // Temp data buffer used in filtering
1064  // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be
1065  // of this big? same issue appears in all other block size settings.
1066  uint8_t temp2[20 * 16];
1067  const int16_t *hfilter, *vfilter;
1068
1069  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1070  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1071
1072  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1073                                    1, 9, 4, hfilter);
1074  var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
1075
1076  return vp9_variance4x8(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
1077}
1078
1079unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
1080                                             int  src_pixels_per_line,
1081                                             int  xoffset,
1082                                             int  yoffset,
1083                                             const uint8_t *dst_ptr,
1084                                             int dst_pixels_per_line,
1085                                             unsigned int *sse,
1086                                             const uint8_t *second_pred) {
1087  uint16_t fdata3[5 * 8];  // Temp data buffer used in filtering
1088  uint8_t temp2[20 * 16];
1089  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8);  // compound pred buffer
1090  const int16_t *hfilter, *vfilter;
1091
1092  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1093  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1094
1095  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1096                                    1, 9, 4, hfilter);
1097  var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
1098  vp9_comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
1099  return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
1100}
1101
1102
1103void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
1104                       int height, const uint8_t *ref, int ref_stride) {
1105  int i, j;
1106
1107  for (i = 0; i < height; i++) {
1108    for (j = 0; j < width; j++) {
1109      int tmp;
1110      tmp = pred[j] + ref[j];
1111      comp_pred[j] = (tmp + 1) >> 1;
1112    }
1113    comp_pred += width;
1114    pred += width;
1115    ref += ref_stride;
1116  }
1117}
1118