vpx_convolve.c revision 0a39d0a697ff3603e8c100300fda363658e10b23
1/*
2 *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <assert.h>
12#include <string.h>
13
14#include "./vpx_config.h"
15#include "./vpx_dsp_rtcd.h"
16#include "vpx/vpx_integer.h"
17#include "vpx_dsp/vpx_convolve.h"
18#include "vpx_dsp/vpx_dsp_common.h"
19#include "vpx_dsp/vpx_filter.h"
20#include "vpx_ports/mem.h"
21
22static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
23                           uint8_t *dst, ptrdiff_t dst_stride,
24                           const InterpKernel *x_filters, int x0_q4,
25                           int x_step_q4, int w, int h) {
26  int x, y;
27  src -= SUBPEL_TAPS / 2 - 1;
28
29  for (y = 0; y < h; ++y) {
30    int x_q4 = x0_q4;
31    for (x = 0; x < w; ++x) {
32      const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
33      const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
34      int k, sum = 0;
35      for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
36      dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
37      x_q4 += x_step_q4;
38    }
39    src += src_stride;
40    dst += dst_stride;
41  }
42}
43
44static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
45                               uint8_t *dst, ptrdiff_t dst_stride,
46                               const InterpKernel *x_filters, int x0_q4,
47                               int x_step_q4, int w, int h) {
48  int x, y;
49  src -= SUBPEL_TAPS / 2 - 1;
50
51  for (y = 0; y < h; ++y) {
52    int x_q4 = x0_q4;
53    for (x = 0; x < w; ++x) {
54      const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
55      const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
56      int k, sum = 0;
57      for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
58      dst[x] = ROUND_POWER_OF_TWO(
59          dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
60      x_q4 += x_step_q4;
61    }
62    src += src_stride;
63    dst += dst_stride;
64  }
65}
66
67static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
68                          uint8_t *dst, ptrdiff_t dst_stride,
69                          const InterpKernel *y_filters, int y0_q4,
70                          int y_step_q4, int w, int h) {
71  int x, y;
72  src -= src_stride * (SUBPEL_TAPS / 2 - 1);
73
74  for (x = 0; x < w; ++x) {
75    int y_q4 = y0_q4;
76    for (y = 0; y < h; ++y) {
77      const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
78      const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
79      int k, sum = 0;
80      for (k = 0; k < SUBPEL_TAPS; ++k)
81        sum += src_y[k * src_stride] * y_filter[k];
82      dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
83      y_q4 += y_step_q4;
84    }
85    ++src;
86    ++dst;
87  }
88}
89
90static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
91                              uint8_t *dst, ptrdiff_t dst_stride,
92                              const InterpKernel *y_filters, int y0_q4,
93                              int y_step_q4, int w, int h) {
94  int x, y;
95  src -= src_stride * (SUBPEL_TAPS / 2 - 1);
96
97  for (x = 0; x < w; ++x) {
98    int y_q4 = y0_q4;
99    for (y = 0; y < h; ++y) {
100      const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
101      const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
102      int k, sum = 0;
103      for (k = 0; k < SUBPEL_TAPS; ++k)
104        sum += src_y[k * src_stride] * y_filter[k];
105      dst[y * dst_stride] = ROUND_POWER_OF_TWO(
106          dst[y * dst_stride] +
107              clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)),
108          1);
109      y_q4 += y_step_q4;
110    }
111    ++src;
112    ++dst;
113  }
114}
115
116static void convolve(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
117                     ptrdiff_t dst_stride, const InterpKernel *const x_filters,
118                     int x0_q4, int x_step_q4,
119                     const InterpKernel *const y_filters, int y0_q4,
120                     int y_step_q4, int w, int h) {
121  // Note: Fixed size intermediate buffer, temp, places limits on parameters.
122  // 2d filtering proceeds in 2 steps:
123  //   (1) Interpolate horizontally into an intermediate buffer, temp.
124  //   (2) Interpolate temp vertically to derive the sub-pixel result.
125  // Deriving the maximum number of rows in the temp buffer (135):
126  // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
127  // --Largest block size is 64x64 pixels.
128  // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
129  //   original frame (in 1/16th pixel units).
130  // --Must round-up because block may be located at sub-pixel position.
131  // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
132  // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
133  uint8_t temp[64 * 135];
134  const int intermediate_height =
135      (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
136
137  assert(w <= 64);
138  assert(h <= 64);
139  assert(y_step_q4 <= 32);
140  assert(x_step_q4 <= 32);
141
142  convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
143                 x_filters, x0_q4, x_step_q4, w, intermediate_height);
144  convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
145                y_filters, y0_q4, y_step_q4, w, h);
146}
147
148void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
149                           uint8_t *dst, ptrdiff_t dst_stride,
150                           const int16_t *filter_x, int x_step_q4,
151                           const int16_t *filter_y, int y_step_q4, int w,
152                           int h) {
153  const InterpKernel *const filters_x = get_filter_base(filter_x);
154  const int x0_q4 = get_filter_offset(filter_x, filters_x);
155
156  (void)filter_y;
157  (void)y_step_q4;
158
159  convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
160                 w, h);
161}
162
163void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
164                               uint8_t *dst, ptrdiff_t dst_stride,
165                               const int16_t *filter_x, int x_step_q4,
166                               const int16_t *filter_y, int y_step_q4, int w,
167                               int h) {
168  const InterpKernel *const filters_x = get_filter_base(filter_x);
169  const int x0_q4 = get_filter_offset(filter_x, filters_x);
170
171  (void)filter_y;
172  (void)y_step_q4;
173
174  convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
175                     x_step_q4, w, h);
176}
177
178void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
179                          uint8_t *dst, ptrdiff_t dst_stride,
180                          const int16_t *filter_x, int x_step_q4,
181                          const int16_t *filter_y, int y_step_q4, int w,
182                          int h) {
183  const InterpKernel *const filters_y = get_filter_base(filter_y);
184  const int y0_q4 = get_filter_offset(filter_y, filters_y);
185
186  (void)filter_x;
187  (void)x_step_q4;
188
189  convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4, y_step_q4,
190                w, h);
191}
192
193void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
194                              uint8_t *dst, ptrdiff_t dst_stride,
195                              const int16_t *filter_x, int x_step_q4,
196                              const int16_t *filter_y, int y_step_q4, int w,
197                              int h) {
198  const InterpKernel *const filters_y = get_filter_base(filter_y);
199  const int y0_q4 = get_filter_offset(filter_y, filters_y);
200
201  (void)filter_x;
202  (void)x_step_q4;
203
204  convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
205                    y_step_q4, w, h);
206}
207
208void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
209                     ptrdiff_t dst_stride, const int16_t *filter_x,
210                     int x_step_q4, const int16_t *filter_y, int y_step_q4,
211                     int w, int h) {
212  const InterpKernel *const filters_x = get_filter_base(filter_x);
213  const int x0_q4 = get_filter_offset(filter_x, filters_x);
214  const InterpKernel *const filters_y = get_filter_base(filter_y);
215  const int y0_q4 = get_filter_offset(filter_y, filters_y);
216
217  convolve(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
218           filters_y, y0_q4, y_step_q4, w, h);
219}
220
221void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
222                         ptrdiff_t dst_stride, const int16_t *filter_x,
223                         int x_step_q4, const int16_t *filter_y, int y_step_q4,
224                         int w, int h) {
225  // Fixed size intermediate buffer places limits on parameters.
226  DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]);
227  assert(w <= 64);
228  assert(h <= 64);
229
230  vpx_convolve8_c(src, src_stride, temp, 64, filter_x, x_step_q4, filter_y,
231                  y_step_q4, w, h);
232  vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
233}
234
235void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
236                         ptrdiff_t dst_stride, const int16_t *filter_x,
237                         int filter_x_stride, const int16_t *filter_y,
238                         int filter_y_stride, int w, int h) {
239  int r;
240
241  (void)filter_x;
242  (void)filter_x_stride;
243  (void)filter_y;
244  (void)filter_y_stride;
245
246  for (r = h; r > 0; --r) {
247    memcpy(dst, src, w);
248    src += src_stride;
249    dst += dst_stride;
250  }
251}
252
253void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
254                        ptrdiff_t dst_stride, const int16_t *filter_x,
255                        int filter_x_stride, const int16_t *filter_y,
256                        int filter_y_stride, int w, int h) {
257  int x, y;
258
259  (void)filter_x;
260  (void)filter_x_stride;
261  (void)filter_y;
262  (void)filter_y_stride;
263
264  for (y = 0; y < h; ++y) {
265    for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
266    src += src_stride;
267    dst += dst_stride;
268  }
269}
270
271void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
272                        ptrdiff_t dst_stride, const int16_t *filter_x,
273                        int x_step_q4, const int16_t *filter_y, int y_step_q4,
274                        int w, int h) {
275  vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
276                        filter_y, y_step_q4, w, h);
277}
278
279void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
280                       ptrdiff_t dst_stride, const int16_t *filter_x,
281                       int x_step_q4, const int16_t *filter_y, int y_step_q4,
282                       int w, int h) {
283  vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
284                       filter_y, y_step_q4, w, h);
285}
286
287void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
288                     ptrdiff_t dst_stride, const int16_t *filter_x,
289                     int x_step_q4, const int16_t *filter_y, int y_step_q4,
290                     int w, int h) {
291  vpx_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
292                  filter_y, y_step_q4, w, h);
293}
294
295void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
296                            uint8_t *dst, ptrdiff_t dst_stride,
297                            const int16_t *filter_x, int x_step_q4,
298                            const int16_t *filter_y, int y_step_q4, int w,
299                            int h) {
300  vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
301                            x_step_q4, filter_y, y_step_q4, w, h);
302}
303
304void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
305                           uint8_t *dst, ptrdiff_t dst_stride,
306                           const int16_t *filter_x, int x_step_q4,
307                           const int16_t *filter_y, int y_step_q4, int w,
308                           int h) {
309  vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
310                           x_step_q4, filter_y, y_step_q4, w, h);
311}
312
313void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
314                         ptrdiff_t dst_stride, const int16_t *filter_x,
315                         int x_step_q4, const int16_t *filter_y, int y_step_q4,
316                         int w, int h) {
317  vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
318                      filter_y, y_step_q4, w, h);
319}
320
321#if CONFIG_VP9_HIGHBITDEPTH
322static void highbd_convolve_horiz(const uint16_t *src, ptrdiff_t src_stride,
323                                  uint16_t *dst, ptrdiff_t dst_stride,
324                                  const InterpKernel *x_filters, int x0_q4,
325                                  int x_step_q4, int w, int h, int bd) {
326  int x, y;
327  src -= SUBPEL_TAPS / 2 - 1;
328
329  for (y = 0; y < h; ++y) {
330    int x_q4 = x0_q4;
331    for (x = 0; x < w; ++x) {
332      const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
333      const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
334      int k, sum = 0;
335      for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
336      dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
337      x_q4 += x_step_q4;
338    }
339    src += src_stride;
340    dst += dst_stride;
341  }
342}
343
344static void highbd_convolve_avg_horiz(const uint16_t *src, ptrdiff_t src_stride,
345                                      uint16_t *dst, ptrdiff_t dst_stride,
346                                      const InterpKernel *x_filters, int x0_q4,
347                                      int x_step_q4, int w, int h, int bd) {
348  int x, y;
349  src -= SUBPEL_TAPS / 2 - 1;
350
351  for (y = 0; y < h; ++y) {
352    int x_q4 = x0_q4;
353    for (x = 0; x < w; ++x) {
354      const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
355      const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
356      int k, sum = 0;
357      for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
358      dst[x] = ROUND_POWER_OF_TWO(
359          dst[x] + clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
360          1);
361      x_q4 += x_step_q4;
362    }
363    src += src_stride;
364    dst += dst_stride;
365  }
366}
367
368static void highbd_convolve_vert(const uint16_t *src, ptrdiff_t src_stride,
369                                 uint16_t *dst, ptrdiff_t dst_stride,
370                                 const InterpKernel *y_filters, int y0_q4,
371                                 int y_step_q4, int w, int h, int bd) {
372  int x, y;
373  src -= src_stride * (SUBPEL_TAPS / 2 - 1);
374
375  for (x = 0; x < w; ++x) {
376    int y_q4 = y0_q4;
377    for (y = 0; y < h; ++y) {
378      const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
379      const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
380      int k, sum = 0;
381      for (k = 0; k < SUBPEL_TAPS; ++k)
382        sum += src_y[k * src_stride] * y_filter[k];
383      dst[y * dst_stride] =
384          clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
385      y_q4 += y_step_q4;
386    }
387    ++src;
388    ++dst;
389  }
390}
391
392static void highbd_convolve_avg_vert(const uint16_t *src, ptrdiff_t src_stride,
393                                     uint16_t *dst, ptrdiff_t dst_stride,
394                                     const InterpKernel *y_filters, int y0_q4,
395                                     int y_step_q4, int w, int h, int bd) {
396  int x, y;
397  src -= src_stride * (SUBPEL_TAPS / 2 - 1);
398
399  for (x = 0; x < w; ++x) {
400    int y_q4 = y0_q4;
401    for (y = 0; y < h; ++y) {
402      const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
403      const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
404      int k, sum = 0;
405      for (k = 0; k < SUBPEL_TAPS; ++k)
406        sum += src_y[k * src_stride] * y_filter[k];
407      dst[y * dst_stride] = ROUND_POWER_OF_TWO(
408          dst[y * dst_stride] +
409              clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
410          1);
411      y_q4 += y_step_q4;
412    }
413    ++src;
414    ++dst;
415  }
416}
417
418static void highbd_convolve(const uint16_t *src, ptrdiff_t src_stride,
419                            uint16_t *dst, ptrdiff_t dst_stride,
420                            const InterpKernel *const x_filters, int x0_q4,
421                            int x_step_q4, const InterpKernel *const y_filters,
422                            int y0_q4, int y_step_q4, int w, int h, int bd) {
423  // Note: Fixed size intermediate buffer, temp, places limits on parameters.
424  // 2d filtering proceeds in 2 steps:
425  //   (1) Interpolate horizontally into an intermediate buffer, temp.
426  //   (2) Interpolate temp vertically to derive the sub-pixel result.
427  // Deriving the maximum number of rows in the temp buffer (135):
428  // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
429  // --Largest block size is 64x64 pixels.
430  // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
431  //   original frame (in 1/16th pixel units).
432  // --Must round-up because block may be located at sub-pixel position.
433  // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
434  // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
435  uint16_t temp[64 * 135];
436  const int intermediate_height =
437      (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
438
439  assert(w <= 64);
440  assert(h <= 64);
441  assert(y_step_q4 <= 32);
442  assert(x_step_q4 <= 32);
443
444  highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,
445                        temp, 64, x_filters, x0_q4, x_step_q4, w,
446                        intermediate_height, bd);
447  highbd_convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
448                       y_filters, y0_q4, y_step_q4, w, h, bd);
449}
450
451void vpx_highbd_convolve8_horiz_c(const uint16_t *src, ptrdiff_t src_stride,
452                                  uint16_t *dst, ptrdiff_t dst_stride,
453                                  const int16_t *filter_x, int x_step_q4,
454                                  const int16_t *filter_y, int y_step_q4, int w,
455                                  int h, int bd) {
456  const InterpKernel *const filters_x = get_filter_base(filter_x);
457  const int x0_q4 = get_filter_offset(filter_x, filters_x);
458
459  (void)filter_y;
460  (void)y_step_q4;
461
462  highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
463                        x_step_q4, w, h, bd);
464}
465
466void vpx_highbd_convolve8_avg_horiz_c(const uint16_t *src, ptrdiff_t src_stride,
467                                      uint16_t *dst, ptrdiff_t dst_stride,
468                                      const int16_t *filter_x, int x_step_q4,
469                                      const int16_t *filter_y, int y_step_q4,
470                                      int w, int h, int bd) {
471  const InterpKernel *const filters_x = get_filter_base(filter_x);
472  const int x0_q4 = get_filter_offset(filter_x, filters_x);
473
474  (void)filter_y;
475  (void)y_step_q4;
476
477  highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
478                            x_step_q4, w, h, bd);
479}
480
481void vpx_highbd_convolve8_vert_c(const uint16_t *src, ptrdiff_t src_stride,
482                                 uint16_t *dst, ptrdiff_t dst_stride,
483                                 const int16_t *filter_x, int x_step_q4,
484                                 const int16_t *filter_y, int y_step_q4, int w,
485                                 int h, int bd) {
486  const InterpKernel *const filters_y = get_filter_base(filter_y);
487  const int y0_q4 = get_filter_offset(filter_y, filters_y);
488
489  (void)filter_x;
490  (void)x_step_q4;
491
492  highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
493                       y_step_q4, w, h, bd);
494}
495
496void vpx_highbd_convolve8_avg_vert_c(const uint16_t *src, ptrdiff_t src_stride,
497                                     uint16_t *dst, ptrdiff_t dst_stride,
498                                     const int16_t *filter_x, int x_step_q4,
499                                     const int16_t *filter_y, int y_step_q4,
500                                     int w, int h, int bd) {
501  const InterpKernel *const filters_y = get_filter_base(filter_y);
502  const int y0_q4 = get_filter_offset(filter_y, filters_y);
503
504  (void)filter_x;
505  (void)x_step_q4;
506
507  highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
508                           y_step_q4, w, h, bd);
509}
510
511void vpx_highbd_convolve8_c(const uint16_t *src, ptrdiff_t src_stride,
512                            uint16_t *dst, ptrdiff_t dst_stride,
513                            const int16_t *filter_x, int x_step_q4,
514                            const int16_t *filter_y, int y_step_q4, int w,
515                            int h, int bd) {
516  const InterpKernel *const filters_x = get_filter_base(filter_x);
517  const int x0_q4 = get_filter_offset(filter_x, filters_x);
518  const InterpKernel *const filters_y = get_filter_base(filter_y);
519  const int y0_q4 = get_filter_offset(filter_y, filters_y);
520
521  highbd_convolve(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
522                  filters_y, y0_q4, y_step_q4, w, h, bd);
523}
524
525void vpx_highbd_convolve8_avg_c(const uint16_t *src, ptrdiff_t src_stride,
526                                uint16_t *dst, ptrdiff_t dst_stride,
527                                const int16_t *filter_x, int x_step_q4,
528                                const int16_t *filter_y, int y_step_q4, int w,
529                                int h, int bd) {
530  // Fixed size intermediate buffer places limits on parameters.
531  DECLARE_ALIGNED(16, uint16_t, temp[64 * 64]);
532  assert(w <= 64);
533  assert(h <= 64);
534
535  vpx_highbd_convolve8_c(src, src_stride, temp, 64, filter_x, x_step_q4,
536                         filter_y, y_step_q4, w, h, bd);
537  vpx_highbd_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h,
538                            bd);
539}
540
541void vpx_highbd_convolve_copy_c(const uint16_t *src, ptrdiff_t src_stride,
542                                uint16_t *dst, ptrdiff_t dst_stride,
543                                const int16_t *filter_x, int filter_x_stride,
544                                const int16_t *filter_y, int filter_y_stride,
545                                int w, int h, int bd) {
546  int r;
547
548  (void)filter_x;
549  (void)filter_x_stride;
550  (void)filter_y;
551  (void)filter_y_stride;
552  (void)bd;
553
554  for (r = h; r > 0; --r) {
555    memcpy(dst, src, w * sizeof(uint16_t));
556    src += src_stride;
557    dst += dst_stride;
558  }
559}
560
561void vpx_highbd_convolve_avg_c(const uint16_t *src, ptrdiff_t src_stride,
562                               uint16_t *dst, ptrdiff_t dst_stride,
563                               const int16_t *filter_x, int filter_x_stride,
564                               const int16_t *filter_y, int filter_y_stride,
565                               int w, int h, int bd) {
566  int x, y;
567
568  (void)filter_x;
569  (void)filter_x_stride;
570  (void)filter_y;
571  (void)filter_y_stride;
572  (void)bd;
573
574  for (y = 0; y < h; ++y) {
575    for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
576    src += src_stride;
577    dst += dst_stride;
578  }
579}
580#endif
581