1/*
2 *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <assert.h>
12#include <string.h>
13
14#include "./vpx_config.h"
15#include "./vpx_dsp_rtcd.h"
16#include "vpx/vpx_integer.h"
17#include "vpx_dsp/vpx_convolve.h"
18#include "vpx_dsp/vpx_dsp_common.h"
19#include "vpx_dsp/vpx_filter.h"
20#include "vpx_ports/mem.h"
21
22static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
23                           uint8_t *dst, ptrdiff_t dst_stride,
24                           const InterpKernel *x_filters,
25                           int x0_q4, int x_step_q4, int w, int h) {
26  int x, y;
27  src -= SUBPEL_TAPS / 2 - 1;
28  for (y = 0; y < h; ++y) {
29    int x_q4 = x0_q4;
30    for (x = 0; x < w; ++x) {
31      const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
32      const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
33      int k, sum = 0;
34      for (k = 0; k < SUBPEL_TAPS; ++k)
35        sum += src_x[k] * x_filter[k];
36      dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
37      x_q4 += x_step_q4;
38    }
39    src += src_stride;
40    dst += dst_stride;
41  }
42}
43
44static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
45                               uint8_t *dst, ptrdiff_t dst_stride,
46                               const InterpKernel *x_filters,
47                               int x0_q4, int x_step_q4, int w, int h) {
48  int x, y;
49  src -= SUBPEL_TAPS / 2 - 1;
50  for (y = 0; y < h; ++y) {
51    int x_q4 = x0_q4;
52    for (x = 0; x < w; ++x) {
53      const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
54      const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
55      int k, sum = 0;
56      for (k = 0; k < SUBPEL_TAPS; ++k)
57        sum += src_x[k] * x_filter[k];
58      dst[x] = ROUND_POWER_OF_TWO(dst[x] +
59          clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
60      x_q4 += x_step_q4;
61    }
62    src += src_stride;
63    dst += dst_stride;
64  }
65}
66
67static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
68                          uint8_t *dst, ptrdiff_t dst_stride,
69                          const InterpKernel *y_filters,
70                          int y0_q4, int y_step_q4, int w, int h) {
71  int x, y;
72  src -= src_stride * (SUBPEL_TAPS / 2 - 1);
73
74  for (x = 0; x < w; ++x) {
75    int y_q4 = y0_q4;
76    for (y = 0; y < h; ++y) {
77      const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
78      const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
79      int k, sum = 0;
80      for (k = 0; k < SUBPEL_TAPS; ++k)
81        sum += src_y[k * src_stride] * y_filter[k];
82      dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
83      y_q4 += y_step_q4;
84    }
85    ++src;
86    ++dst;
87  }
88}
89
90static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
91                              uint8_t *dst, ptrdiff_t dst_stride,
92                              const InterpKernel *y_filters,
93                              int y0_q4, int y_step_q4, int w, int h) {
94  int x, y;
95  src -= src_stride * (SUBPEL_TAPS / 2 - 1);
96
97  for (x = 0; x < w; ++x) {
98    int y_q4 = y0_q4;
99    for (y = 0; y < h; ++y) {
100      const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
101      const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
102      int k, sum = 0;
103      for (k = 0; k < SUBPEL_TAPS; ++k)
104        sum += src_y[k * src_stride] * y_filter[k];
105      dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
106          clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
107      y_q4 += y_step_q4;
108    }
109    ++src;
110    ++dst;
111  }
112}
113
114static void convolve(const uint8_t *src, ptrdiff_t src_stride,
115                     uint8_t *dst, ptrdiff_t dst_stride,
116                     const InterpKernel *const x_filters,
117                     int x0_q4, int x_step_q4,
118                     const InterpKernel *const y_filters,
119                     int y0_q4, int y_step_q4,
120                     int w, int h) {
121  // Note: Fixed size intermediate buffer, temp, places limits on parameters.
122  // 2d filtering proceeds in 2 steps:
123  //   (1) Interpolate horizontally into an intermediate buffer, temp.
124  //   (2) Interpolate temp vertically to derive the sub-pixel result.
125  // Deriving the maximum number of rows in the temp buffer (135):
126  // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
127  // --Largest block size is 64x64 pixels.
128  // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
129  //   original frame (in 1/16th pixel units).
130  // --Must round-up because block may be located at sub-pixel position.
131  // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
132  // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
133  uint8_t temp[135 * 64];
134  int intermediate_height =
135          (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
136
137  assert(w <= 64);
138  assert(h <= 64);
139  assert(y_step_q4 <= 32);
140  assert(x_step_q4 <= 32);
141
142  convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
143                 x_filters, x0_q4, x_step_q4, w, intermediate_height);
144  convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
145                y_filters, y0_q4, y_step_q4, w, h);
146}
147
148static const InterpKernel *get_filter_base(const int16_t *filter) {
149  // NOTE: This assumes that the filter table is 256-byte aligned.
150  // TODO(agrange) Modify to make independent of table alignment.
151  return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
152}
153
154static int get_filter_offset(const int16_t *f, const InterpKernel *base) {
155  return (int)((const InterpKernel *)(intptr_t)f - base);
156}
157
158void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
159                           uint8_t *dst, ptrdiff_t dst_stride,
160                           const int16_t *filter_x, int x_step_q4,
161                           const int16_t *filter_y, int y_step_q4,
162                           int w, int h) {
163  const InterpKernel *const filters_x = get_filter_base(filter_x);
164  const int x0_q4 = get_filter_offset(filter_x, filters_x);
165
166  (void)filter_y;
167  (void)y_step_q4;
168
169  convolve_horiz(src, src_stride, dst, dst_stride, filters_x,
170                 x0_q4, x_step_q4, w, h);
171}
172
173void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
174                               uint8_t *dst, ptrdiff_t dst_stride,
175                               const int16_t *filter_x, int x_step_q4,
176                               const int16_t *filter_y, int y_step_q4,
177                               int w, int h) {
178  const InterpKernel *const filters_x = get_filter_base(filter_x);
179  const int x0_q4 = get_filter_offset(filter_x, filters_x);
180
181  (void)filter_y;
182  (void)y_step_q4;
183
184  convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,
185                     x0_q4, x_step_q4, w, h);
186}
187
188void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
189                          uint8_t *dst, ptrdiff_t dst_stride,
190                          const int16_t *filter_x, int x_step_q4,
191                          const int16_t *filter_y, int y_step_q4,
192                          int w, int h) {
193  const InterpKernel *const filters_y = get_filter_base(filter_y);
194  const int y0_q4 = get_filter_offset(filter_y, filters_y);
195
196  (void)filter_x;
197  (void)x_step_q4;
198
199  convolve_vert(src, src_stride, dst, dst_stride, filters_y,
200                y0_q4, y_step_q4, w, h);
201}
202
203void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
204                              uint8_t *dst, ptrdiff_t dst_stride,
205                              const int16_t *filter_x, int x_step_q4,
206                              const int16_t *filter_y, int y_step_q4,
207                              int w, int h) {
208  const InterpKernel *const filters_y = get_filter_base(filter_y);
209  const int y0_q4 = get_filter_offset(filter_y, filters_y);
210
211  (void)filter_x;
212  (void)x_step_q4;
213
214  convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,
215                    y0_q4, y_step_q4, w, h);
216}
217
218void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
219                     uint8_t *dst, ptrdiff_t dst_stride,
220                     const int16_t *filter_x, int x_step_q4,
221                     const int16_t *filter_y, int y_step_q4,
222                     int w, int h) {
223  const InterpKernel *const filters_x = get_filter_base(filter_x);
224  const int x0_q4 = get_filter_offset(filter_x, filters_x);
225
226  const InterpKernel *const filters_y = get_filter_base(filter_y);
227  const int y0_q4 = get_filter_offset(filter_y, filters_y);
228
229  convolve(src, src_stride, dst, dst_stride,
230           filters_x, x0_q4, x_step_q4,
231           filters_y, y0_q4, y_step_q4, w, h);
232}
233
234void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
235                         uint8_t *dst, ptrdiff_t dst_stride,
236                         const int16_t *filter_x, int x_step_q4,
237                         const int16_t *filter_y, int y_step_q4,
238                         int w, int h) {
239  /* Fixed size intermediate buffer places limits on parameters. */
240  DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]);
241  assert(w <= 64);
242  assert(h <= 64);
243
244  vpx_convolve8_c(src, src_stride, temp, 64,
245                  filter_x, x_step_q4, filter_y, y_step_q4, w, h);
246  vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
247}
248
249void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
250                         uint8_t *dst, ptrdiff_t dst_stride,
251                         const int16_t *filter_x, int filter_x_stride,
252                         const int16_t *filter_y, int filter_y_stride,
253                         int w, int h) {
254  int r;
255
256  (void)filter_x;  (void)filter_x_stride;
257  (void)filter_y;  (void)filter_y_stride;
258
259  for (r = h; r > 0; --r) {
260    memcpy(dst, src, w);
261    src += src_stride;
262    dst += dst_stride;
263  }
264}
265
266void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
267                        uint8_t *dst, ptrdiff_t dst_stride,
268                        const int16_t *filter_x, int filter_x_stride,
269                        const int16_t *filter_y, int filter_y_stride,
270                        int w, int h) {
271  int x, y;
272
273  (void)filter_x;  (void)filter_x_stride;
274  (void)filter_y;  (void)filter_y_stride;
275
276  for (y = 0; y < h; ++y) {
277    for (x = 0; x < w; ++x)
278      dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
279
280    src += src_stride;
281    dst += dst_stride;
282  }
283}
284
285void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
286                        uint8_t *dst, ptrdiff_t dst_stride,
287                        const int16_t *filter_x, int x_step_q4,
288                        const int16_t *filter_y, int y_step_q4,
289                        int w, int h) {
290  vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
291                        filter_y, y_step_q4, w, h);
292}
293
294void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride,
295                       uint8_t *dst, ptrdiff_t dst_stride,
296                       const int16_t *filter_x, int x_step_q4,
297                       const int16_t *filter_y, int y_step_q4,
298                       int w, int h) {
299  vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
300                       filter_y, y_step_q4, w, h);
301}
302
303void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride,
304                     uint8_t *dst, ptrdiff_t dst_stride,
305                     const int16_t *filter_x, int x_step_q4,
306                     const int16_t *filter_y, int y_step_q4,
307                     int w, int h) {
308  vpx_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
309                  filter_y, y_step_q4, w, h);
310}
311
312void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
313                            uint8_t *dst, ptrdiff_t dst_stride,
314                            const int16_t *filter_x, int x_step_q4,
315                            const int16_t *filter_y, int y_step_q4,
316                            int w, int h) {
317  vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
318                            x_step_q4, filter_y, y_step_q4, w, h);
319}
320
321void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
322                           uint8_t *dst, ptrdiff_t dst_stride,
323                           const int16_t *filter_x, int x_step_q4,
324                           const int16_t *filter_y, int y_step_q4,
325                           int w, int h) {
326  vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
327                           x_step_q4, filter_y, y_step_q4, w, h);
328}
329
330void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride,
331                     uint8_t *dst, ptrdiff_t dst_stride,
332                     const int16_t *filter_x, int x_step_q4,
333                     const int16_t *filter_y, int y_step_q4,
334                     int w, int h) {
335  vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
336                      filter_y, y_step_q4, w, h);
337}
338
339#if CONFIG_VP9_HIGHBITDEPTH
340static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
341                                  uint8_t *dst8, ptrdiff_t dst_stride,
342                                  const InterpKernel *x_filters,
343                                  int x0_q4, int x_step_q4,
344                                  int w, int h, int bd) {
345  int x, y;
346  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
347  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
348  src -= SUBPEL_TAPS / 2 - 1;
349  for (y = 0; y < h; ++y) {
350    int x_q4 = x0_q4;
351    for (x = 0; x < w; ++x) {
352      const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
353      const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
354      int k, sum = 0;
355      for (k = 0; k < SUBPEL_TAPS; ++k)
356        sum += src_x[k] * x_filter[k];
357      dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
358      x_q4 += x_step_q4;
359    }
360    src += src_stride;
361    dst += dst_stride;
362  }
363}
364
365static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,
366                                      uint8_t *dst8, ptrdiff_t dst_stride,
367                                      const InterpKernel *x_filters,
368                                      int x0_q4, int x_step_q4,
369                                      int w, int h, int bd) {
370  int x, y;
371  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
372  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
373  src -= SUBPEL_TAPS / 2 - 1;
374  for (y = 0; y < h; ++y) {
375    int x_q4 = x0_q4;
376    for (x = 0; x < w; ++x) {
377      const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
378      const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
379      int k, sum = 0;
380      for (k = 0; k < SUBPEL_TAPS; ++k)
381        sum += src_x[k] * x_filter[k];
382      dst[x] = ROUND_POWER_OF_TWO(dst[x] +
383          clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);
384      x_q4 += x_step_q4;
385    }
386    src += src_stride;
387    dst += dst_stride;
388  }
389}
390
391static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,
392                                 uint8_t *dst8, ptrdiff_t dst_stride,
393                                 const InterpKernel *y_filters,
394                                 int y0_q4, int y_step_q4, int w, int h,
395                                 int bd) {
396  int x, y;
397  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
398  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
399  src -= src_stride * (SUBPEL_TAPS / 2 - 1);
400  for (x = 0; x < w; ++x) {
401    int y_q4 = y0_q4;
402    for (y = 0; y < h; ++y) {
403      const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
404      const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
405      int k, sum = 0;
406      for (k = 0; k < SUBPEL_TAPS; ++k)
407        sum += src_y[k * src_stride] * y_filter[k];
408      dst[y * dst_stride] = clip_pixel_highbd(
409          ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
410      y_q4 += y_step_q4;
411    }
412    ++src;
413    ++dst;
414  }
415}
416
417static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,
418                                     uint8_t *dst8, ptrdiff_t dst_stride,
419                                     const InterpKernel *y_filters,
420                                     int y0_q4, int y_step_q4, int w, int h,
421                                     int bd) {
422  int x, y;
423  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
424  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
425  src -= src_stride * (SUBPEL_TAPS / 2 - 1);
426  for (x = 0; x < w; ++x) {
427    int y_q4 = y0_q4;
428    for (y = 0; y < h; ++y) {
429      const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
430      const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
431      int k, sum = 0;
432      for (k = 0; k < SUBPEL_TAPS; ++k)
433        sum += src_y[k * src_stride] * y_filter[k];
434      dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
435          clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);
436      y_q4 += y_step_q4;
437    }
438    ++src;
439    ++dst;
440  }
441}
442
443static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,
444                            uint8_t *dst, ptrdiff_t dst_stride,
445                            const InterpKernel *const x_filters,
446                            int x0_q4, int x_step_q4,
447                            const InterpKernel *const y_filters,
448                            int y0_q4, int y_step_q4,
449                            int w, int h, int bd) {
450  // Note: Fixed size intermediate buffer, temp, places limits on parameters.
451  // 2d filtering proceeds in 2 steps:
452  //   (1) Interpolate horizontally into an intermediate buffer, temp.
453  //   (2) Interpolate temp vertically to derive the sub-pixel result.
454  // Deriving the maximum number of rows in the temp buffer (135):
455  // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
456  // --Largest block size is 64x64 pixels.
457  // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
458  //   original frame (in 1/16th pixel units).
459  // --Must round-up because block may be located at sub-pixel position.
460  // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
461  // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
462  uint16_t temp[64 * 135];
463  int intermediate_height =
464          (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
465
466  assert(w <= 64);
467  assert(h <= 64);
468  assert(y_step_q4 <= 32);
469  assert(x_step_q4 <= 32);
470
471  highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1),
472                        src_stride, CONVERT_TO_BYTEPTR(temp), 64,
473                        x_filters, x0_q4, x_step_q4, w,
474                        intermediate_height, bd);
475  highbd_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1),
476                       64, dst, dst_stride, y_filters, y0_q4, y_step_q4,
477                       w, h, bd);
478}
479
480
481void vpx_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
482                                  uint8_t *dst, ptrdiff_t dst_stride,
483                                  const int16_t *filter_x, int x_step_q4,
484                                  const int16_t *filter_y, int y_step_q4,
485                                  int w, int h, int bd) {
486  const InterpKernel *const filters_x = get_filter_base(filter_x);
487  const int x0_q4 = get_filter_offset(filter_x, filters_x);
488  (void)filter_y;
489  (void)y_step_q4;
490
491  highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x,
492                        x0_q4, x_step_q4, w, h, bd);
493}
494
495void vpx_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
496                                      uint8_t *dst, ptrdiff_t dst_stride,
497                                      const int16_t *filter_x, int x_step_q4,
498                                      const int16_t *filter_y, int y_step_q4,
499                                      int w, int h, int bd) {
500  const InterpKernel *const filters_x = get_filter_base(filter_x);
501  const int x0_q4 = get_filter_offset(filter_x, filters_x);
502  (void)filter_y;
503  (void)y_step_q4;
504
505  highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,
506                            x0_q4, x_step_q4, w, h, bd);
507}
508
509void vpx_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
510                                 uint8_t *dst, ptrdiff_t dst_stride,
511                                 const int16_t *filter_x, int x_step_q4,
512                                 const int16_t *filter_y, int y_step_q4,
513                                 int w, int h, int bd) {
514  const InterpKernel *const filters_y = get_filter_base(filter_y);
515  const int y0_q4 = get_filter_offset(filter_y, filters_y);
516  (void)filter_x;
517  (void)x_step_q4;
518
519  highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y,
520                       y0_q4, y_step_q4, w, h, bd);
521}
522
523void vpx_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
524                                     uint8_t *dst, ptrdiff_t dst_stride,
525                                     const int16_t *filter_x, int x_step_q4,
526                                     const int16_t *filter_y, int y_step_q4,
527                                     int w, int h, int bd) {
528  const InterpKernel *const filters_y = get_filter_base(filter_y);
529  const int y0_q4 = get_filter_offset(filter_y, filters_y);
530  (void)filter_x;
531  (void)x_step_q4;
532
533  highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,
534                           y0_q4, y_step_q4, w, h, bd);
535}
536
537void vpx_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
538                            uint8_t *dst, ptrdiff_t dst_stride,
539                            const int16_t *filter_x, int x_step_q4,
540                            const int16_t *filter_y, int y_step_q4,
541                            int w, int h, int bd) {
542  const InterpKernel *const filters_x = get_filter_base(filter_x);
543  const int x0_q4 = get_filter_offset(filter_x, filters_x);
544
545  const InterpKernel *const filters_y = get_filter_base(filter_y);
546  const int y0_q4 = get_filter_offset(filter_y, filters_y);
547
548  highbd_convolve(src, src_stride, dst, dst_stride,
549                  filters_x, x0_q4, x_step_q4,
550                  filters_y, y0_q4, y_step_q4, w, h, bd);
551}
552
553void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
554                                uint8_t *dst, ptrdiff_t dst_stride,
555                                const int16_t *filter_x, int x_step_q4,
556                                const int16_t *filter_y, int y_step_q4,
557                                int w, int h, int bd) {
558  // Fixed size intermediate buffer places limits on parameters.
559  DECLARE_ALIGNED(16, uint16_t, temp[64 * 64]);
560  assert(w <= 64);
561  assert(h <= 64);
562
563  vpx_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64,
564                         filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);
565  vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride,
566                            NULL, 0, NULL, 0, w, h, bd);
567}
568
569void vpx_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
570                                uint8_t *dst8, ptrdiff_t dst_stride,
571                                const int16_t *filter_x, int filter_x_stride,
572                                const int16_t *filter_y, int filter_y_stride,
573                                int w, int h, int bd) {
574  int r;
575  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
576  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
577  (void)filter_x;
578  (void)filter_y;
579  (void)filter_x_stride;
580  (void)filter_y_stride;
581  (void)bd;
582
583  for (r = h; r > 0; --r) {
584    memcpy(dst, src, w * sizeof(uint16_t));
585    src += src_stride;
586    dst += dst_stride;
587  }
588}
589
590void vpx_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,
591                               uint8_t *dst8, ptrdiff_t dst_stride,
592                               const int16_t *filter_x, int filter_x_stride,
593                               const int16_t *filter_y, int filter_y_stride,
594                               int w, int h, int bd) {
595  int x, y;
596  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
597  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
598  (void)filter_x;
599  (void)filter_y;
600  (void)filter_x_stride;
601  (void)filter_y_stride;
602  (void)bd;
603
604  for (y = 0; y < h; ++y) {
605    for (x = 0; x < w; ++x) {
606      dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
607    }
608    src += src_stride;
609    dst += dst_stride;
610  }
611}
612#endif
613