1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "./vp9_rtcd.h"
12
13#include "vpx_ports/mem.h"
14#include "vpx/vpx_integer.h"
15
16#include "vp9/common/vp9_common.h"
17#include "vp9/common/vp9_filter.h"
18
19#include "vp9/encoder/vp9_variance.h"
20
21void variance(const uint8_t *a, int  a_stride,
22              const uint8_t *b, int  b_stride,
23              int  w, int  h, unsigned int *sse, int *sum) {
24  int i, j;
25
26  *sum = 0;
27  *sse = 0;
28
29  for (i = 0; i < h; i++) {
30    for (j = 0; j < w; j++) {
31      const int diff = a[j] - b[j];
32      *sum += diff;
33      *sse += diff * diff;
34    }
35
36    a += a_stride;
37    b += b_stride;
38  }
39}
40
41// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
42// or vertical direction to produce the filtered output block. Used to implement
43// first-pass of 2-D separable filter.
44//
45// Produces int32_t output to retain precision for next pass. Two filter taps
46// should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the filter is
47// applied horizontally (pixel_step=1) or vertically (pixel_step=stride). It
48// defines the offset required to move from one input to the next.
49static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
50                                              uint16_t *output_ptr,
51                                              unsigned int src_pixels_per_line,
52                                              int pixel_step,
53                                              unsigned int output_height,
54                                              unsigned int output_width,
55                                              const int16_t *vp9_filter) {
56  unsigned int i, j;
57
58  for (i = 0; i < output_height; i++) {
59    for (j = 0; j < output_width; j++) {
60      output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
61                          (int)src_ptr[pixel_step] * vp9_filter[1],
62                          FILTER_BITS);
63
64      src_ptr++;
65    }
66
67    // Next row...
68    src_ptr    += src_pixels_per_line - output_width;
69    output_ptr += output_width;
70  }
71}
72
73// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
74// or vertical direction to produce the filtered output block. Used to implement
75// second-pass of 2-D separable filter.
76//
77// Requires 32-bit input as produced by filter_block2d_bil_first_pass. Two
78// filter taps should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the
79// filter is applied horizontally (pixel_step=1) or vertically (pixel_step=
80// stride). It defines the offset required to move from one input to the next.
81static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
82                                               uint8_t *output_ptr,
83                                               unsigned int src_pixels_per_line,
84                                               unsigned int pixel_step,
85                                               unsigned int output_height,
86                                               unsigned int output_width,
87                                               const int16_t *vp9_filter) {
88  unsigned int  i, j;
89
90  for (i = 0; i < output_height; i++) {
91    for (j = 0; j < output_width; j++) {
92      output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
93                          (int)src_ptr[pixel_step] * vp9_filter[1],
94                          FILTER_BITS);
95      src_ptr++;
96    }
97
98    src_ptr += src_pixels_per_line - output_width;
99    output_ptr += output_width;
100  }
101}
102
103unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
104  unsigned int i, sum = 0;
105
106  for (i = 0; i < 256; ++i) {
107    sum += src_ptr[i] * src_ptr[i];
108  }
109
110  return sum;
111}
112
113#define VAR(W, H) \
114unsigned int vp9_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
115                                       const uint8_t *b, int b_stride, \
116                                       unsigned int *sse) { \
117  int sum; \
118  variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
119  return *sse - (((int64_t)sum * sum) / (W * H)); \
120}
121
122#define SUBPIX_VAR(W, H) \
123unsigned int vp9_sub_pixel_variance##W##x##H##_c( \
124  const uint8_t *src, int  src_stride, \
125  int xoffset, int  yoffset, \
126  const uint8_t *dst, int dst_stride, \
127  unsigned int *sse) { \
128  uint16_t fdata3[(H + 1) * W]; \
129  uint8_t temp2[H * W]; \
130\
131  var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
132                                    BILINEAR_FILTERS_2TAP(xoffset)); \
133  var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
134                                     BILINEAR_FILTERS_2TAP(yoffset)); \
135\
136  return vp9_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \
137}
138
139#define SUBPIX_AVG_VAR(W, H) \
140unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \
141  const uint8_t *src, int  src_stride, \
142  int xoffset, int  yoffset, \
143  const uint8_t *dst, int dst_stride, \
144  unsigned int *sse, \
145  const uint8_t *second_pred) { \
146  uint16_t fdata3[(H + 1) * W]; \
147  uint8_t temp2[H * W]; \
148  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, H * W); \
149\
150  var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
151                                    BILINEAR_FILTERS_2TAP(xoffset)); \
152  var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
153                                     BILINEAR_FILTERS_2TAP(yoffset)); \
154\
155  vp9_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
156\
157  return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \
158}
159
160void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride,
161                       const uint8_t *ref_ptr, int ref_stride,
162                       unsigned int *sse, int *sum) {
163  variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
164}
165
166void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride,
167                     const uint8_t *ref_ptr, int ref_stride,
168                     unsigned int *sse, int *sum) {
169  variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
170}
171
172unsigned int vp9_mse16x16_c(const uint8_t *src, int src_stride,
173                            const uint8_t *ref, int ref_stride,
174                            unsigned int *sse) {
175  int sum;
176  variance(src, src_stride, ref, ref_stride, 16, 16, sse, &sum);
177  return *sse;
178}
179
180unsigned int vp9_mse16x8_c(const uint8_t *src, int src_stride,
181                           const uint8_t *ref, int ref_stride,
182                           unsigned int *sse) {
183  int sum;
184  variance(src, src_stride, ref, ref_stride, 16, 8, sse, &sum);
185  return *sse;
186}
187
188unsigned int vp9_mse8x16_c(const uint8_t *src, int src_stride,
189                           const uint8_t *ref, int ref_stride,
190                           unsigned int *sse) {
191  int sum;
192  variance(src, src_stride, ref, ref_stride, 8, 16, sse, &sum);
193  return *sse;
194}
195
196unsigned int vp9_mse8x8_c(const uint8_t *src, int src_stride,
197                          const uint8_t *ref, int ref_stride,
198                          unsigned int *sse) {
199  int sum;
200  variance(src, src_stride, ref, ref_stride, 8, 8, sse, &sum);
201  return *sse;
202}
203
204VAR(4, 4)
205SUBPIX_VAR(4, 4)
206SUBPIX_AVG_VAR(4, 4)
207
208VAR(4, 8)
209SUBPIX_VAR(4, 8)
210SUBPIX_AVG_VAR(4, 8)
211
212VAR(8, 4)
213SUBPIX_VAR(8, 4)
214SUBPIX_AVG_VAR(8, 4)
215
216VAR(8, 8)
217SUBPIX_VAR(8, 8)
218SUBPIX_AVG_VAR(8, 8)
219
220VAR(8, 16)
221SUBPIX_VAR(8, 16)
222SUBPIX_AVG_VAR(8, 16)
223
224VAR(16, 8)
225SUBPIX_VAR(16, 8)
226SUBPIX_AVG_VAR(16, 8)
227
228VAR(16, 16)
229SUBPIX_VAR(16, 16)
230SUBPIX_AVG_VAR(16, 16)
231
232VAR(16, 32)
233SUBPIX_VAR(16, 32)
234SUBPIX_AVG_VAR(16, 32)
235
236VAR(32, 16)
237SUBPIX_VAR(32, 16)
238SUBPIX_AVG_VAR(32, 16)
239
240VAR(32, 32)
241SUBPIX_VAR(32, 32)
242SUBPIX_AVG_VAR(32, 32)
243
244VAR(32, 64)
245SUBPIX_VAR(32, 64)
246SUBPIX_AVG_VAR(32, 64)
247
248VAR(64, 32)
249SUBPIX_VAR(64, 32)
250SUBPIX_AVG_VAR(64, 32)
251
252VAR(64, 64)
253SUBPIX_VAR(64, 64)
254SUBPIX_AVG_VAR(64, 64)
255
256void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
257                       int height, const uint8_t *ref, int ref_stride) {
258  int i, j;
259
260  for (i = 0; i < height; i++) {
261    for (j = 0; j < width; j++) {
262      const int tmp = pred[j] + ref[j];
263      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
264    }
265    comp_pred += width;
266    pred += width;
267    ref += ref_stride;
268  }
269}
270
271#if CONFIG_VP9_HIGHBITDEPTH
272void high_variance64(const uint8_t *a8, int  a_stride,
273                     const uint8_t *b8, int  b_stride,
274                     int w, int h, uint64_t *sse,
275                     uint64_t *sum) {
276  int i, j;
277
278  uint16_t *a = CONVERT_TO_SHORTPTR(a8);
279  uint16_t *b = CONVERT_TO_SHORTPTR(b8);
280  *sum = 0;
281  *sse = 0;
282
283  for (i = 0; i < h; i++) {
284    for (j = 0; j < w; j++) {
285      const int diff = a[j] - b[j];
286      *sum += diff;
287      *sse += diff * diff;
288    }
289    a += a_stride;
290    b += b_stride;
291  }
292}
293
294void high_variance(const uint8_t *a8, int  a_stride,
295                   const uint8_t *b8, int  b_stride,
296                   int w, int h, unsigned int *sse,
297                   int *sum) {
298  uint64_t sse_long = 0;
299  uint64_t sum_long = 0;
300  high_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
301  *sse = sse_long;
302  *sum = sum_long;
303}
304
305void high_10_variance(const uint8_t *a8, int  a_stride,
306                      const uint8_t *b8, int  b_stride,
307                      int w, int h, unsigned int *sse,
308                      int *sum) {
309  uint64_t sse_long = 0;
310  uint64_t sum_long = 0;
311  high_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
312  *sum = ROUND_POWER_OF_TWO(sum_long, 2);
313  *sse = ROUND_POWER_OF_TWO(sse_long, 4);
314}
315
316void high_12_variance(const uint8_t *a8, int  a_stride,
317                      const uint8_t *b8, int  b_stride,
318                      int w, int h, unsigned int *sse,
319                      int *sum) {
320  uint64_t sse_long = 0;
321  uint64_t sum_long = 0;
322  high_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
323  *sum = ROUND_POWER_OF_TWO(sum_long, 4);
324  *sse = ROUND_POWER_OF_TWO(sse_long, 8);
325}
326
327static void high_var_filter_block2d_bil_first_pass(
328    const uint8_t *src_ptr8,
329    uint16_t *output_ptr,
330    unsigned int src_pixels_per_line,
331    int pixel_step,
332    unsigned int output_height,
333    unsigned int output_width,
334    const int16_t *vp9_filter) {
335  unsigned int i, j;
336  uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
337  for (i = 0; i < output_height; i++) {
338    for (j = 0; j < output_width; j++) {
339      output_ptr[j] =
340          ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
341                             (int)src_ptr[pixel_step] * vp9_filter[1],
342                             FILTER_BITS);
343
344      src_ptr++;
345    }
346
347    // Next row...
348    src_ptr += src_pixels_per_line - output_width;
349    output_ptr += output_width;
350  }
351}
352
353static void high_var_filter_block2d_bil_second_pass(
354    const uint16_t *src_ptr,
355    uint16_t *output_ptr,
356    unsigned int src_pixels_per_line,
357    unsigned int pixel_step,
358    unsigned int output_height,
359    unsigned int output_width,
360    const int16_t *vp9_filter) {
361  unsigned int  i, j;
362
363  for (i = 0; i < output_height; i++) {
364    for (j = 0; j < output_width; j++) {
365      output_ptr[j] =
366          ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
367                             (int)src_ptr[pixel_step] * vp9_filter[1],
368                             FILTER_BITS);
369      src_ptr++;
370    }
371
372    src_ptr += src_pixels_per_line - output_width;
373    output_ptr += output_width;
374  }
375}
376
377#define HIGH_VAR(W, H) \
378unsigned int vp9_high_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
379                                            const uint8_t *b, int b_stride, \
380                                            unsigned int *sse) { \
381  int sum; \
382  high_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
383  return *sse - (((int64_t)sum * sum) / (W * H)); \
384} \
385\
386unsigned int vp9_high_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
387                                               const uint8_t *b, int b_stride, \
388                                                unsigned int *sse) { \
389  int sum; \
390  high_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
391  return *sse - (((int64_t)sum * sum) / (W * H)); \
392} \
393\
394unsigned int vp9_high_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
395                                               const uint8_t *b, int b_stride, \
396                                               unsigned int *sse) { \
397  int sum; \
398  high_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
399  return *sse - (((int64_t)sum * sum) / (W * H)); \
400}
401
402#define HIGH_SUBPIX_VAR(W, H) \
403unsigned int vp9_high_sub_pixel_variance##W##x##H##_c( \
404  const uint8_t *src, int  src_stride, \
405  int xoffset, int  yoffset, \
406  const uint8_t *dst, int dst_stride, \
407  unsigned int *sse) { \
408  uint16_t fdata3[(H + 1) * W]; \
409  uint16_t temp2[H * W]; \
410\
411  high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
412                                         W, BILINEAR_FILTERS_2TAP(xoffset)); \
413  high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
414                                          BILINEAR_FILTERS_2TAP(yoffset)); \
415\
416  return vp9_high_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
417                                        dst_stride, sse); \
418} \
419\
420unsigned int vp9_high_10_sub_pixel_variance##W##x##H##_c( \
421  const uint8_t *src, int  src_stride, \
422  int xoffset, int  yoffset, \
423  const uint8_t *dst, int dst_stride, \
424  unsigned int *sse) { \
425  uint16_t fdata3[(H + 1) * W]; \
426  uint16_t temp2[H * W]; \
427\
428  high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
429                                         W, BILINEAR_FILTERS_2TAP(xoffset)); \
430  high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
431                                          BILINEAR_FILTERS_2TAP(yoffset)); \
432\
433  return vp9_high_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
434                                           dst_stride, sse); \
435} \
436\
437unsigned int vp9_high_12_sub_pixel_variance##W##x##H##_c( \
438  const uint8_t *src, int  src_stride, \
439  int xoffset, int  yoffset, \
440  const uint8_t *dst, int dst_stride, \
441  unsigned int *sse) { \
442  uint16_t fdata3[(H + 1) * W]; \
443  uint16_t temp2[H * W]; \
444\
445  high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
446                                         W, BILINEAR_FILTERS_2TAP(xoffset)); \
447  high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
448                                          BILINEAR_FILTERS_2TAP(yoffset)); \
449\
450  return vp9_high_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
451                                           dst_stride, sse); \
452}
453
454#define HIGH_SUBPIX_AVG_VAR(W, H) \
455unsigned int vp9_high_sub_pixel_avg_variance##W##x##H##_c( \
456  const uint8_t *src, int  src_stride, \
457  int xoffset, int  yoffset, \
458  const uint8_t *dst, int dst_stride, \
459  unsigned int *sse, \
460  const uint8_t *second_pred) { \
461  uint16_t fdata3[(H + 1) * W]; \
462  uint16_t temp2[H * W]; \
463  DECLARE_ALIGNED_ARRAY(16, uint16_t, temp3, H * W); \
464\
465  high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
466                                         W, BILINEAR_FILTERS_2TAP(xoffset)); \
467  high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
468                                          BILINEAR_FILTERS_2TAP(yoffset)); \
469\
470  vp9_high_comp_avg_pred(temp3, second_pred, W, H, CONVERT_TO_BYTEPTR(temp2), \
471                         W); \
472\
473  return vp9_high_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \
474                                        dst_stride, sse); \
475} \
476\
477unsigned int vp9_high_10_sub_pixel_avg_variance##W##x##H##_c( \
478  const uint8_t *src, int  src_stride, \
479  int xoffset, int  yoffset, \
480  const uint8_t *dst, int dst_stride, \
481  unsigned int *sse, \
482  const uint8_t *second_pred) { \
483  uint16_t fdata3[(H + 1) * W]; \
484  uint16_t temp2[H * W]; \
485  DECLARE_ALIGNED_ARRAY(16, uint16_t, temp3, H * W); \
486\
487  high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
488                                         W, BILINEAR_FILTERS_2TAP(xoffset)); \
489  high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
490                                          BILINEAR_FILTERS_2TAP(yoffset)); \
491\
492  vp9_high_comp_avg_pred(temp3, second_pred, W, H, CONVERT_TO_BYTEPTR(temp2), \
493                         W); \
494\
495  return vp9_high_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \
496                                        dst_stride, sse); \
497} \
498\
499unsigned int vp9_high_12_sub_pixel_avg_variance##W##x##H##_c( \
500  const uint8_t *src, int  src_stride, \
501  int xoffset, int  yoffset, \
502  const uint8_t *dst, int dst_stride, \
503  unsigned int *sse, \
504  const uint8_t *second_pred) { \
505  uint16_t fdata3[(H + 1) * W]; \
506  uint16_t temp2[H * W]; \
507  DECLARE_ALIGNED_ARRAY(16, uint16_t, temp3, H * W); \
508\
509  high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
510                                         W, BILINEAR_FILTERS_2TAP(xoffset)); \
511  high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
512                                          BILINEAR_FILTERS_2TAP(yoffset)); \
513\
514  vp9_high_comp_avg_pred(temp3, second_pred, W, H, CONVERT_TO_BYTEPTR(temp2), \
515                         W); \
516\
517  return vp9_high_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \
518                                        dst_stride, sse); \
519}
520
521#define HIGH_GET_VAR(S) \
522void vp9_high_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
523                                  const uint8_t *ref, int ref_stride, \
524                                  unsigned int *sse, int *sum) { \
525  high_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
526} \
527\
528void vp9_high_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
529                                     const uint8_t *ref, int ref_stride, \
530                                     unsigned int *sse, int *sum) { \
531  high_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
532} \
533\
534void vp9_high_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
535                                     const uint8_t *ref, int ref_stride, \
536                                     unsigned int *sse, int *sum) { \
537  high_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
538}
539
540#define HIGH_MSE(W, H) \
541unsigned int vp9_high_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
542                                       const uint8_t *ref, int ref_stride, \
543                                       unsigned int *sse) { \
544  int sum; \
545  high_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
546  return *sse; \
547} \
548\
549unsigned int vp9_high_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
550                                          const uint8_t *ref, int ref_stride, \
551                                          unsigned int *sse) { \
552  int sum; \
553  high_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
554  return *sse; \
555} \
556\
557unsigned int vp9_high_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
558                                          const uint8_t *ref, int ref_stride, \
559                                          unsigned int *sse) { \
560  int sum; \
561  high_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
562  return *sse; \
563}
564
565HIGH_GET_VAR(8)
566HIGH_GET_VAR(16)
567
568HIGH_MSE(16, 16)
569HIGH_MSE(16, 8)
570HIGH_MSE(8, 16)
571HIGH_MSE(8, 8)
572
573HIGH_VAR(4, 4)
574HIGH_SUBPIX_VAR(4, 4)
575HIGH_SUBPIX_AVG_VAR(4, 4)
576
577HIGH_VAR(4, 8)
578HIGH_SUBPIX_VAR(4, 8)
579HIGH_SUBPIX_AVG_VAR(4, 8)
580
581HIGH_VAR(8, 4)
582HIGH_SUBPIX_VAR(8, 4)
583HIGH_SUBPIX_AVG_VAR(8, 4)
584
585HIGH_VAR(8, 8)
586HIGH_SUBPIX_VAR(8, 8)
587HIGH_SUBPIX_AVG_VAR(8, 8)
588
589HIGH_VAR(8, 16)
590HIGH_SUBPIX_VAR(8, 16)
591HIGH_SUBPIX_AVG_VAR(8, 16)
592
593HIGH_VAR(16, 8)
594HIGH_SUBPIX_VAR(16, 8)
595HIGH_SUBPIX_AVG_VAR(16, 8)
596
597HIGH_VAR(16, 16)
598HIGH_SUBPIX_VAR(16, 16)
599HIGH_SUBPIX_AVG_VAR(16, 16)
600
601HIGH_VAR(16, 32)
602HIGH_SUBPIX_VAR(16, 32)
603HIGH_SUBPIX_AVG_VAR(16, 32)
604
605HIGH_VAR(32, 16)
606HIGH_SUBPIX_VAR(32, 16)
607HIGH_SUBPIX_AVG_VAR(32, 16)
608
609HIGH_VAR(32, 32)
610HIGH_SUBPIX_VAR(32, 32)
611HIGH_SUBPIX_AVG_VAR(32, 32)
612
613HIGH_VAR(32, 64)
614HIGH_SUBPIX_VAR(32, 64)
615HIGH_SUBPIX_AVG_VAR(32, 64)
616
617HIGH_VAR(64, 32)
618HIGH_SUBPIX_VAR(64, 32)
619HIGH_SUBPIX_AVG_VAR(64, 32)
620
621HIGH_VAR(64, 64)
622HIGH_SUBPIX_VAR(64, 64)
623HIGH_SUBPIX_AVG_VAR(64, 64)
624
625void vp9_high_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8,
626                            int width, int height, const uint8_t *ref8,
627                            int ref_stride) {
628  int i, j;
629  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
630  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
631  for (i = 0; i < height; i++) {
632    for (j = 0; j < width; j++) {
633      const int tmp = pred[j] + ref[j];
634      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
635    }
636    comp_pred += width;
637    pred += width;
638    ref += ref_stride;
639  }
640}
641#endif  // CONFIG_VP9_HIGHBITDEPTH
642