1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "./vp9_rtcd.h"
12
13#include "vpx_ports/mem.h"
14#include "vpx/vpx_integer.h"
15
16#include "vp9/common/vp9_common.h"
17#include "vp9/common/vp9_filter.h"
18
19#include "vp9/encoder/vp9_variance.h"
20
21void variance(const uint8_t *src_ptr,
22              int  source_stride,
23              const uint8_t *ref_ptr,
24              int  recon_stride,
25              int  w,
26              int  h,
27              unsigned int *sse,
28              int *sum) {
29  int i, j;
30  int diff;
31
32  *sum = 0;
33  *sse = 0;
34
35  for (i = 0; i < h; i++) {
36    for (j = 0; j < w; j++) {
37      diff = src_ptr[j] - ref_ptr[j];
38      *sum += diff;
39      *sse += diff * diff;
40    }
41
42    src_ptr += source_stride;
43    ref_ptr += recon_stride;
44  }
45}
46
47/****************************************************************************
48 *
49 *  ROUTINE       : filter_block2d_bil_first_pass
50 *
51 *  INPUTS        : uint8_t  *src_ptr          : Pointer to source block.
52 *                  uint32_t src_pixels_per_line : Stride of input block.
53 *                  uint32_t pixel_step        : Offset between filter input
54 *                                               samples (see notes).
55 *                  uint32_t output_height     : Input block height.
56 *                  uint32_t output_width      : Input block width.
57 *                  int32_t  *vp9_filter       : Array of 2 bi-linear filter
58 *                                               taps.
59 *
60 *  OUTPUTS       : int32_t *output_ptr        : Pointer to filtered block.
61 *
62 *  RETURNS       : void
63 *
64 *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
65 *                  either horizontal or vertical direction to produce the
66 *                  filtered output block. Used to implement first-pass
67 *                  of 2-D separable filter.
68 *
69 *  SPECIAL NOTES : Produces int32_t output to retain precision for next pass.
70 *                  Two filter taps should sum to VP9_FILTER_WEIGHT.
71 *                  pixel_step defines whether the filter is applied
72 *                  horizontally (pixel_step=1) or vertically (pixel_step=
73 *                  stride).
74 *                  It defines the offset required to move from one input
75 *                  to the next.
76 *
77 ****************************************************************************/
78static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
79                                              uint16_t *output_ptr,
80                                              unsigned int src_pixels_per_line,
81                                              int pixel_step,
82                                              unsigned int output_height,
83                                              unsigned int output_width,
84                                              const int16_t *vp9_filter) {
85  unsigned int i, j;
86
87  for (i = 0; i < output_height; i++) {
88    for (j = 0; j < output_width; j++) {
89      output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
90                          (int)src_ptr[pixel_step] * vp9_filter[1],
91                          FILTER_BITS);
92
93      src_ptr++;
94    }
95
96    // Next row...
97    src_ptr    += src_pixels_per_line - output_width;
98    output_ptr += output_width;
99  }
100}
101
102/****************************************************************************
103 *
104 *  ROUTINE       : filter_block2d_bil_second_pass
105 *
106 *  INPUTS        : int32_t  *src_ptr          : Pointer to source block.
107 *                  uint32_t src_pixels_per_line : Stride of input block.
108 *                  uint32_t pixel_step        : Offset between filter input
109 *                                               samples (see notes).
110 *                  uint32_t output_height     : Input block height.
111 *                  uint32_t output_width      : Input block width.
112 *                  int32_t  *vp9_filter       : Array of 2 bi-linear filter
113 *                                               taps.
114 *
115 *  OUTPUTS       : uint16_t *output_ptr       : Pointer to filtered block.
116 *
117 *  RETURNS       : void
118 *
119 *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
120 *                  either horizontal or vertical direction to produce the
121 *                  filtered output block. Used to implement second-pass
122 *                  of 2-D separable filter.
123 *
124 *  SPECIAL NOTES : Requires 32-bit input as produced by
125 *                  filter_block2d_bil_first_pass.
126 *                  Two filter taps should sum to VP9_FILTER_WEIGHT.
127 *                  pixel_step defines whether the filter is applied
128 *                  horizontally (pixel_step=1) or vertically (pixel_step=
129 *                  stride).
130 *                  It defines the offset required to move from one input
131 *                  to the next.
132 *
133 ****************************************************************************/
134static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
135                                               uint8_t *output_ptr,
136                                               unsigned int src_pixels_per_line,
137                                               unsigned int pixel_step,
138                                               unsigned int output_height,
139                                               unsigned int output_width,
140                                               const int16_t *vp9_filter) {
141  unsigned int  i, j;
142
143  for (i = 0; i < output_height; i++) {
144    for (j = 0; j < output_width; j++) {
145      output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
146                          (int)src_ptr[pixel_step] * vp9_filter[1],
147                          FILTER_BITS);
148      src_ptr++;
149    }
150
151    src_ptr += src_pixels_per_line - output_width;
152    output_ptr += output_width;
153  }
154}
155
156unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
157  unsigned int i, sum = 0;
158
159  for (i = 0; i < 256; i++) {
160    sum += (src_ptr[i] * src_ptr[i]);
161  }
162
163  return sum;
164}
165
166unsigned int vp9_variance64x32_c(const uint8_t *src_ptr,
167                                 int  source_stride,
168                                 const uint8_t *ref_ptr,
169                                 int  recon_stride,
170                                 unsigned int *sse) {
171  unsigned int var;
172  int avg;
173
174  variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg);
175  *sse = var;
176  return (var - (((int64_t)avg * avg) >> 11));
177}
178
179unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr,
180                                           int  src_pixels_per_line,
181                                           int  xoffset,
182                                           int  yoffset,
183                                           const uint8_t *dst_ptr,
184                                           int dst_pixels_per_line,
185                                           unsigned int *sse) {
186  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
187  uint8_t temp2[68 * 64];
188  const int16_t *hfilter, *vfilter;
189
190  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
191  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
192
193  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
194                                    1, 33, 64, hfilter);
195  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
196
197  return vp9_variance64x32(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
198}
199
200unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr,
201                                               int  src_pixels_per_line,
202                                               int  xoffset,
203                                               int  yoffset,
204                                               const uint8_t *dst_ptr,
205                                               int dst_pixels_per_line,
206                                               unsigned int *sse,
207                                               const uint8_t *second_pred) {
208  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
209  uint8_t temp2[68 * 64];
210  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
211  const int16_t *hfilter, *vfilter;
212
213  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
214  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
215
216  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
217                                    1, 33, 64, hfilter);
218  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
219  vp9_comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64);
220  return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
221}
222
223unsigned int vp9_variance32x64_c(const uint8_t *src_ptr,
224                                 int  source_stride,
225                                 const uint8_t *ref_ptr,
226                                 int  recon_stride,
227                                 unsigned int *sse) {
228  unsigned int var;
229  int avg;
230
231  variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg);
232  *sse = var;
233  return (var - (((int64_t)avg * avg) >> 11));
234}
235
236unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr,
237                                           int  src_pixels_per_line,
238                                           int  xoffset,
239                                           int  yoffset,
240                                           const uint8_t *dst_ptr,
241                                           int dst_pixels_per_line,
242                                           unsigned int *sse) {
243  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
244  uint8_t temp2[68 * 64];
245  const int16_t *hfilter, *vfilter;
246
247  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
248  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
249
250  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
251                                    1, 65, 32, hfilter);
252  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
253
254  return vp9_variance32x64(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
255}
256
257unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr,
258                                               int  src_pixels_per_line,
259                                               int  xoffset,
260                                               int  yoffset,
261                                               const uint8_t *dst_ptr,
262                                               int dst_pixels_per_line,
263                                               unsigned int *sse,
264                                               const uint8_t *second_pred) {
265  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
266  uint8_t temp2[68 * 64];
267  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64);  // compound pred buffer
268  const int16_t *hfilter, *vfilter;
269
270  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
271  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
272
273  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
274                                    1, 65, 32, hfilter);
275  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
276  vp9_comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32);
277  return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
278}
279
280unsigned int vp9_variance32x16_c(const uint8_t *src_ptr,
281                                 int  source_stride,
282                                 const uint8_t *ref_ptr,
283                                 int  recon_stride,
284                                 unsigned int *sse) {
285  unsigned int var;
286  int avg;
287
288  variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg);
289  *sse = var;
290  return (var - (((int64_t)avg * avg) >> 9));
291}
292
293unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr,
294                                           int  src_pixels_per_line,
295                                           int  xoffset,
296                                           int  yoffset,
297                                           const uint8_t *dst_ptr,
298                                           int dst_pixels_per_line,
299                                           unsigned int *sse) {
300  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
301  uint8_t temp2[36 * 32];
302  const int16_t *hfilter, *vfilter;
303
304  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
305  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
306
307  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
308                                    1, 17, 32, hfilter);
309  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
310
311  return vp9_variance32x16(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
312}
313
314unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr,
315                                               int  src_pixels_per_line,
316                                               int  xoffset,
317                                               int  yoffset,
318                                               const uint8_t *dst_ptr,
319                                               int dst_pixels_per_line,
320                                               unsigned int *sse,
321                                               const uint8_t *second_pred) {
322  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
323  uint8_t temp2[36 * 32];
324  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16);  // compound pred buffer
325  const int16_t *hfilter, *vfilter;
326
327  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
328  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
329
330  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
331                                    1, 17, 32, hfilter);
332  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
333  vp9_comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32);
334  return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
335}
336
337unsigned int vp9_variance16x32_c(const uint8_t *src_ptr,
338                                 int  source_stride,
339                                 const uint8_t *ref_ptr,
340                                 int  recon_stride,
341                                 unsigned int *sse) {
342  unsigned int var;
343  int avg;
344
345  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg);
346  *sse = var;
347  return (var - (((int64_t)avg * avg) >> 9));
348}
349
350unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr,
351                                           int  src_pixels_per_line,
352                                           int  xoffset,
353                                           int  yoffset,
354                                           const uint8_t *dst_ptr,
355                                           int dst_pixels_per_line,
356                                           unsigned int *sse) {
357  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
358  uint8_t temp2[36 * 32];
359  const int16_t *hfilter, *vfilter;
360
361  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
362  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
363
364  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
365                                    1, 33, 16, hfilter);
366  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
367
368  return vp9_variance16x32(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
369}
370
371unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr,
372                                               int  src_pixels_per_line,
373                                               int  xoffset,
374                                               int  yoffset,
375                                               const uint8_t *dst_ptr,
376                                               int dst_pixels_per_line,
377                                               unsigned int *sse,
378                                               const uint8_t *second_pred) {
379  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
380  uint8_t temp2[36 * 32];
381  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32);  // compound pred buffer
382  const int16_t *hfilter, *vfilter;
383
384  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
385  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
386
387  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
388                                    1, 33, 16, hfilter);
389  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
390  vp9_comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16);
391  return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
392}
393
394unsigned int vp9_variance64x64_c(const uint8_t *src_ptr,
395                                 int  source_stride,
396                                 const uint8_t *ref_ptr,
397                                 int  recon_stride,
398                                 unsigned int *sse) {
399  unsigned int var;
400  int avg;
401
402  variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg);
403  *sse = var;
404  return (var - (((int64_t)avg * avg) >> 12));
405}
406
407unsigned int vp9_variance32x32_c(const uint8_t *src_ptr,
408                                 int  source_stride,
409                                 const uint8_t *ref_ptr,
410                                 int  recon_stride,
411                                 unsigned int *sse) {
412  unsigned int var;
413  int avg;
414
415  variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg);
416  *sse = var;
417  return (var - (((int64_t)avg * avg) >> 10));
418}
419
420unsigned int vp9_variance16x16_c(const uint8_t *src_ptr,
421                                 int  source_stride,
422                                 const uint8_t *ref_ptr,
423                                 int  recon_stride,
424                                 unsigned int *sse) {
425  unsigned int var;
426  int avg;
427
428  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
429  *sse = var;
430  return (var - (((unsigned int)avg * avg) >> 8));
431}
432
433unsigned int vp9_variance8x16_c(const uint8_t *src_ptr,
434                                int  source_stride,
435                                const uint8_t *ref_ptr,
436                                int  recon_stride,
437                                unsigned int *sse) {
438  unsigned int var;
439  int avg;
440
441  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
442  *sse = var;
443  return (var - (((unsigned int)avg * avg) >> 7));
444}
445
446unsigned int vp9_variance16x8_c(const uint8_t *src_ptr,
447                                int  source_stride,
448                                const uint8_t *ref_ptr,
449                                int  recon_stride,
450                                unsigned int *sse) {
451  unsigned int var;
452  int avg;
453
454  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
455  *sse = var;
456  return (var - (((unsigned int)avg * avg) >> 7));
457}
458
459void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride,
460                       const uint8_t *ref_ptr, int ref_stride,
461                       unsigned int *sse, int *sum) {
462  variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
463}
464
465unsigned int vp9_variance8x8_c(const uint8_t *src_ptr,
466                               int  source_stride,
467                               const uint8_t *ref_ptr,
468                               int  recon_stride,
469                               unsigned int *sse) {
470  unsigned int var;
471  int avg;
472
473  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
474  *sse = var;
475  return (var - (((unsigned int)avg * avg) >> 6));
476}
477
478unsigned int vp9_variance8x4_c(const uint8_t *src_ptr,
479                               int  source_stride,
480                               const uint8_t *ref_ptr,
481                               int  recon_stride,
482                               unsigned int *sse) {
483  unsigned int var;
484  int avg;
485
486  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg);
487  *sse = var;
488  return (var - (((unsigned int)avg * avg) >> 5));
489}
490
491unsigned int vp9_variance4x8_c(const uint8_t *src_ptr,
492                               int  source_stride,
493                               const uint8_t *ref_ptr,
494                               int  recon_stride,
495                               unsigned int *sse) {
496  unsigned int var;
497  int avg;
498
499  variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg);
500  *sse = var;
501  return (var - (((unsigned int)avg * avg) >> 5));
502}
503
504unsigned int vp9_variance4x4_c(const uint8_t *src_ptr,
505                               int  source_stride,
506                               const uint8_t *ref_ptr,
507                               int  recon_stride,
508                               unsigned int *sse) {
509  unsigned int var;
510  int avg;
511
512  variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
513  *sse = var;
514  return (var - (((unsigned int)avg * avg) >> 4));
515}
516
517
518unsigned int vp9_mse16x16_c(const uint8_t *src_ptr,
519                            int  source_stride,
520                            const uint8_t *ref_ptr,
521                            int  recon_stride,
522                            unsigned int *sse) {
523  unsigned int var;
524  int avg;
525
526  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
527  *sse = var;
528  return var;
529}
530
531unsigned int vp9_mse16x8_c(const uint8_t *src_ptr,
532                           int  source_stride,
533                           const uint8_t *ref_ptr,
534                           int  recon_stride,
535                           unsigned int *sse) {
536  unsigned int var;
537  int avg;
538
539  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
540  *sse = var;
541  return var;
542}
543
544unsigned int vp9_mse8x16_c(const uint8_t *src_ptr,
545                           int  source_stride,
546                           const uint8_t *ref_ptr,
547                           int  recon_stride,
548                           unsigned int *sse) {
549  unsigned int var;
550  int avg;
551
552  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
553  *sse = var;
554  return var;
555}
556
557unsigned int vp9_mse8x8_c(const uint8_t *src_ptr,
558                          int  source_stride,
559                          const uint8_t *ref_ptr,
560                          int  recon_stride,
561                          unsigned int *sse) {
562  unsigned int var;
563  int avg;
564
565  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
566  *sse = var;
567  return var;
568}
569
570
571unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
572                                         int  src_pixels_per_line,
573                                         int  xoffset,
574                                         int  yoffset,
575                                         const uint8_t *dst_ptr,
576                                         int dst_pixels_per_line,
577                                         unsigned int *sse) {
578  uint8_t temp2[20 * 16];
579  const int16_t *hfilter, *vfilter;
580  uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
581
582  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
583  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
584
585  // First filter 1d Horizontal
586  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
587                                    1, 5, 4, hfilter);
588
589  // Now filter Verticaly
590  var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
591
592  return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
593}
594
595unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr,
596                                             int  src_pixels_per_line,
597                                             int  xoffset,
598                                             int  yoffset,
599                                             const uint8_t *dst_ptr,
600                                             int dst_pixels_per_line,
601                                             unsigned int *sse,
602                                             const uint8_t *second_pred) {
603  uint8_t temp2[20 * 16];
604  const int16_t *hfilter, *vfilter;
605  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4);  // compound pred buffer
606  uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
607
608  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
609  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
610
611  // First filter 1d Horizontal
612  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
613                                    1, 5, 4, hfilter);
614
615  // Now filter Verticaly
616  var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
617  vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
618  return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
619}
620
621unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
622                                         int  src_pixels_per_line,
623                                         int  xoffset,
624                                         int  yoffset,
625                                         const uint8_t *dst_ptr,
626                                         int dst_pixels_per_line,
627                                         unsigned int *sse) {
628  uint16_t fdata3[9 * 8];  // Temp data buffer used in filtering
629  uint8_t temp2[20 * 16];
630  const int16_t *hfilter, *vfilter;
631
632  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
633  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
634
635  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
636                                    1, 9, 8, hfilter);
637  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
638
639  return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
640}
641
642unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr,
643                                             int  src_pixels_per_line,
644                                             int  xoffset,
645                                             int  yoffset,
646                                             const uint8_t *dst_ptr,
647                                             int dst_pixels_per_line,
648                                             unsigned int *sse,
649                                             const uint8_t *second_pred) {
650  uint16_t fdata3[9 * 8];  // Temp data buffer used in filtering
651  uint8_t temp2[20 * 16];
652  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8);  // compound pred buffer
653  const int16_t *hfilter, *vfilter;
654
655  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
656  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
657
658  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
659                                    1, 9, 8, hfilter);
660  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
661  vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
662  return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
663}
664
665unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
666                                           int  src_pixels_per_line,
667                                           int  xoffset,
668                                           int  yoffset,
669                                           const uint8_t *dst_ptr,
670                                           int dst_pixels_per_line,
671                                           unsigned int *sse) {
672  uint16_t fdata3[17 * 16];  // Temp data buffer used in filtering
673  uint8_t temp2[20 * 16];
674  const int16_t *hfilter, *vfilter;
675
676  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
677  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
678
679  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
680                                    1, 17, 16, hfilter);
681  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
682
683  return vp9_variance16x16(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
684}
685
686unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr,
687                                               int  src_pixels_per_line,
688                                               int  xoffset,
689                                               int  yoffset,
690                                               const uint8_t *dst_ptr,
691                                               int dst_pixels_per_line,
692                                               unsigned int *sse,
693                                               const uint8_t *second_pred) {
694  uint16_t fdata3[17 * 16];
695  uint8_t temp2[20 * 16];
696  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16);  // compound pred buffer
697  const int16_t *hfilter, *vfilter;
698
699  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
700  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
701
702  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
703                                    1, 17, 16, hfilter);
704  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
705
706  vp9_comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16);
707  return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
708}
709
710unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
711                                           int  src_pixels_per_line,
712                                           int  xoffset,
713                                           int  yoffset,
714                                           const uint8_t *dst_ptr,
715                                           int dst_pixels_per_line,
716                                           unsigned int *sse) {
717  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
718  uint8_t temp2[68 * 64];
719  const int16_t *hfilter, *vfilter;
720
721  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
722  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
723
724  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
725                                    1, 65, 64, hfilter);
726  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
727
728  return vp9_variance64x64(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
729}
730
731unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr,
732                                               int  src_pixels_per_line,
733                                               int  xoffset,
734                                               int  yoffset,
735                                               const uint8_t *dst_ptr,
736                                               int dst_pixels_per_line,
737                                               unsigned int *sse,
738                                               const uint8_t *second_pred) {
739  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
740  uint8_t temp2[68 * 64];
741  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
742  const int16_t *hfilter, *vfilter;
743
744  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
745  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
746
747  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
748                                    1, 65, 64, hfilter);
749  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
750  vp9_comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64);
751  return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
752}
753
754unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
755                                           int  src_pixels_per_line,
756                                           int  xoffset,
757                                           int  yoffset,
758                                           const uint8_t *dst_ptr,
759                                           int dst_pixels_per_line,
760                                           unsigned int *sse) {
761  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
762  uint8_t temp2[36 * 32];
763  const int16_t *hfilter, *vfilter;
764
765  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
766  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
767
768  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
769                                    1, 33, 32, hfilter);
770  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
771
772  return vp9_variance32x32(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
773}
774
775unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr,
776                                               int  src_pixels_per_line,
777                                               int  xoffset,
778                                               int  yoffset,
779                                               const uint8_t *dst_ptr,
780                                               int dst_pixels_per_line,
781                                               unsigned int *sse,
782                                               const uint8_t *second_pred) {
783  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
784  uint8_t temp2[36 * 32];
785  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32);  // compound pred buffer
786  const int16_t *hfilter, *vfilter;
787
788  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
789  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
790
791  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
792                                    1, 33, 32, hfilter);
793  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
794  vp9_comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32);
795  return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
796}
797
798unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr,
799                                              int  source_stride,
800                                              const uint8_t *ref_ptr,
801                                              int  recon_stride,
802                                              unsigned int *sse) {
803  return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
804                                       ref_ptr, recon_stride, sse);
805}
806
807unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr,
808                                              int  source_stride,
809                                              const uint8_t *ref_ptr,
810                                              int  recon_stride,
811                                              unsigned int *sse) {
812  return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0,
813                                       ref_ptr, recon_stride, sse);
814}
815
816unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr,
817                                              int  source_stride,
818                                              const uint8_t *ref_ptr,
819                                              int  recon_stride,
820                                              unsigned int *sse) {
821  return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0,
822                                       ref_ptr, recon_stride, sse);
823}
824
825unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr,
826                                              int  source_stride,
827                                              const uint8_t *ref_ptr,
828                                              int  recon_stride,
829                                              unsigned int *sse) {
830  return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
831                                       ref_ptr, recon_stride, sse);
832}
833
834unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr,
835                                              int  source_stride,
836                                              const uint8_t *ref_ptr,
837                                              int  recon_stride,
838                                              unsigned int *sse) {
839  return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
840                                       ref_ptr, recon_stride, sse);
841}
842
843unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr,
844                                              int  source_stride,
845                                              const uint8_t *ref_ptr,
846                                              int  recon_stride,
847                                              unsigned int *sse) {
848  return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8,
849                                       ref_ptr, recon_stride, sse);
850}
851
852unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr,
853                                               int  source_stride,
854                                               const uint8_t *ref_ptr,
855                                               int  recon_stride,
856                                               unsigned int *sse) {
857  return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
858                                       ref_ptr, recon_stride, sse);
859}
860
861unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr,
862                                               int  source_stride,
863                                               const uint8_t *ref_ptr,
864                                               int  recon_stride,
865                                               unsigned int *sse) {
866  return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8,
867                                       ref_ptr, recon_stride, sse);
868}
869
870unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr,
871                                               int  source_stride,
872                                               const uint8_t *ref_ptr,
873                                               int  recon_stride,
874                                               unsigned int *sse) {
875  return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8,
876                                       ref_ptr, recon_stride, sse);
877}
878
879unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr,
880                                      int  src_pixels_per_line,
881                                      int  xoffset,
882                                      int  yoffset,
883                                      const uint8_t *dst_ptr,
884                                      int dst_pixels_per_line,
885                                      unsigned int *sse) {
886  vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line,
887                                xoffset, yoffset, dst_ptr,
888                                dst_pixels_per_line, sse);
889  return *sse;
890}
891
892unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr,
893                                      int  src_pixels_per_line,
894                                      int  xoffset,
895                                      int  yoffset,
896                                      const uint8_t *dst_ptr,
897                                      int dst_pixels_per_line,
898                                      unsigned int *sse) {
899  vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line,
900                                xoffset, yoffset, dst_ptr,
901                                dst_pixels_per_line, sse);
902  return *sse;
903}
904
905unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr,
906                                      int  src_pixels_per_line,
907                                      int  xoffset,
908                                      int  yoffset,
909                                      const uint8_t *dst_ptr,
910                                      int dst_pixels_per_line,
911                                      unsigned int *sse) {
912  vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line,
913                                xoffset, yoffset, dst_ptr,
914                                dst_pixels_per_line, sse);
915  return *sse;
916}
917
918unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
919                                          int  src_pixels_per_line,
920                                          int  xoffset,
921                                          int  yoffset,
922                                          const uint8_t *dst_ptr,
923                                          int dst_pixels_per_line,
924                                          unsigned int *sse) {
925  uint16_t fdata3[16 * 9];  // Temp data buffer used in filtering
926  uint8_t temp2[20 * 16];
927  const int16_t *hfilter, *vfilter;
928
929  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
930  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
931
932  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
933                                    1, 9, 16, hfilter);
934  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
935
936  return vp9_variance16x8(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
937}
938
939unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr,
940                                              int  src_pixels_per_line,
941                                              int  xoffset,
942                                              int  yoffset,
943                                              const uint8_t *dst_ptr,
944                                              int dst_pixels_per_line,
945                                              unsigned int *sse,
946                                              const uint8_t *second_pred) {
947  uint16_t fdata3[16 * 9];  // Temp data buffer used in filtering
948  uint8_t temp2[20 * 16];
949  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8);  // compound pred buffer
950  const int16_t *hfilter, *vfilter;
951
952  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
953  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
954
955  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
956                                    1, 9, 16, hfilter);
957  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
958  vp9_comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16);
959  return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
960}
961
962unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
963                                          int  src_pixels_per_line,
964                                          int  xoffset,
965                                          int  yoffset,
966                                          const uint8_t *dst_ptr,
967                                          int dst_pixels_per_line,
968                                          unsigned int *sse) {
969  uint16_t fdata3[9 * 16];  // Temp data buffer used in filtering
970  uint8_t temp2[20 * 16];
971  const int16_t *hfilter, *vfilter;
972
973  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
974  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
975
976  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
977                                    1, 17, 8, hfilter);
978  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
979
980  return vp9_variance8x16(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
981}
982
983unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
984                                              int  src_pixels_per_line,
985                                              int  xoffset,
986                                              int  yoffset,
987                                              const uint8_t *dst_ptr,
988                                              int dst_pixels_per_line,
989                                              unsigned int *sse,
990                                              const uint8_t *second_pred) {
991  uint16_t fdata3[9 * 16];  // Temp data buffer used in filtering
992  uint8_t temp2[20 * 16];
993  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16);  // compound pred buffer
994  const int16_t *hfilter, *vfilter;
995
996  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
997  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
998
999  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1000                                    1, 17, 8, hfilter);
1001  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
1002  vp9_comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
1003  return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
1004}
1005
1006unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
1007                                         int  src_pixels_per_line,
1008                                         int  xoffset,
1009                                         int  yoffset,
1010                                         const uint8_t *dst_ptr,
1011                                         int dst_pixels_per_line,
1012                                         unsigned int *sse) {
1013  uint16_t fdata3[8 * 5];  // Temp data buffer used in filtering
1014  uint8_t temp2[20 * 16];
1015  const int16_t *hfilter, *vfilter;
1016
1017  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1018  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1019
1020  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1021                                    1, 5, 8, hfilter);
1022  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
1023
1024  return vp9_variance8x4(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
1025}
1026
1027unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
1028                                             int  src_pixels_per_line,
1029                                             int  xoffset,
1030                                             int  yoffset,
1031                                             const uint8_t *dst_ptr,
1032                                             int dst_pixels_per_line,
1033                                             unsigned int *sse,
1034                                             const uint8_t *second_pred) {
1035  uint16_t fdata3[8 * 5];  // Temp data buffer used in filtering
1036  uint8_t temp2[20 * 16];
1037  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4);  // compound pred buffer
1038  const int16_t *hfilter, *vfilter;
1039
1040  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1041  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1042
1043  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1044                                    1, 5, 8, hfilter);
1045  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
1046  vp9_comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
1047  return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
1048}
1049
1050unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
1051                                         int  src_pixels_per_line,
1052                                         int  xoffset,
1053                                         int  yoffset,
1054                                         const uint8_t *dst_ptr,
1055                                         int dst_pixels_per_line,
1056                                         unsigned int *sse) {
1057  uint16_t fdata3[5 * 8];  // Temp data buffer used in filtering
1058  // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be
1059  // of this big? same issue appears in all other block size settings.
1060  uint8_t temp2[20 * 16];
1061  const int16_t *hfilter, *vfilter;
1062
1063  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1064  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1065
1066  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1067                                    1, 9, 4, hfilter);
1068  var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
1069
1070  return vp9_variance4x8(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
1071}
1072
1073unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
1074                                             int  src_pixels_per_line,
1075                                             int  xoffset,
1076                                             int  yoffset,
1077                                             const uint8_t *dst_ptr,
1078                                             int dst_pixels_per_line,
1079                                             unsigned int *sse,
1080                                             const uint8_t *second_pred) {
1081  uint16_t fdata3[5 * 8];  // Temp data buffer used in filtering
1082  uint8_t temp2[20 * 16];
1083  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8);  // compound pred buffer
1084  const int16_t *hfilter, *vfilter;
1085
1086  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1087  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1088
1089  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1090                                    1, 9, 4, hfilter);
1091  var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
1092  vp9_comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
1093  return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
1094}
1095
1096
1097void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
1098                       int height, const uint8_t *ref, int ref_stride) {
1099  int i, j;
1100
1101  for (i = 0; i < height; i++) {
1102    for (j = 0; j < width; j++) {
1103      int tmp;
1104      tmp = pred[j] + ref[j];
1105      comp_pred[j] = (tmp + 1) >> 1;
1106    }
1107    comp_pred += width;
1108    pred += width;
1109    ref += ref_stride;
1110  }
1111}
1112