1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <string.h>
12#include "test/acm_random.h"
13#include "test/register_state_check.h"
14#include "test/util.h"
15#include "third_party/googletest/src/include/gtest/gtest.h"
16
17#include "./vpx_config.h"
18#include "./vp9_rtcd.h"
19#include "vp9/common/vp9_filter.h"
20#include "vpx_mem/vpx_mem.h"
21#include "vpx_ports/mem.h"
22
23namespace {
24
25static const unsigned int kMaxDimension = 64;
26
27typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
28                             uint8_t *dst, ptrdiff_t dst_stride,
29                             const int16_t *filter_x, int filter_x_stride,
30                             const int16_t *filter_y, int filter_y_stride,
31                             int w, int h);
32
33struct ConvolveFunctions {
34  ConvolveFunctions(ConvolveFunc h8, ConvolveFunc h8_avg,
35                    ConvolveFunc v8, ConvolveFunc v8_avg,
36                    ConvolveFunc hv8, ConvolveFunc hv8_avg,
37                    int bd)
38      : h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg), v8_avg_(v8_avg),
39        hv8_avg_(hv8_avg), use_high_bd_(bd) {}
40
41  ConvolveFunc h8_;
42  ConvolveFunc v8_;
43  ConvolveFunc hv8_;
44  ConvolveFunc h8_avg_;
45  ConvolveFunc v8_avg_;
46  ConvolveFunc hv8_avg_;
47  int use_high_bd_;  // 0 if high bitdepth not used, else the actual bit depth.
48};
49
50typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
51
52// Reference 8-tap subpixel filter, slightly modified to fit into this test.
53#define VP9_FILTER_WEIGHT 128
54#define VP9_FILTER_SHIFT 7
55uint8_t clip_pixel(int x) {
56  return x < 0 ? 0 :
57         x > 255 ? 255 :
58         x;
59}
60
61void filter_block2d_8_c(const uint8_t *src_ptr,
62                        const unsigned int src_stride,
63                        const int16_t *HFilter,
64                        const int16_t *VFilter,
65                        uint8_t *dst_ptr,
66                        unsigned int dst_stride,
67                        unsigned int output_width,
68                        unsigned int output_height) {
69  // Between passes, we use an intermediate buffer whose height is extended to
70  // have enough horizontally filtered values as input for the vertical pass.
71  // This buffer is allocated to be big enough for the largest block type we
72  // support.
73  const int kInterp_Extend = 4;
74  const unsigned int intermediate_height =
75      (kInterp_Extend - 1) + output_height + kInterp_Extend;
76  unsigned int i, j;
77
78  // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
79  // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
80  //                                 + kInterp_Extend
81  //                               = 3 + 16 + 4
82  //                               = 23
83  // and filter_max_width          = 16
84  //
85  uint8_t intermediate_buffer[71 * kMaxDimension];
86  const int intermediate_next_stride = 1 - intermediate_height * output_width;
87
88  // Horizontal pass (src -> transposed intermediate).
89  uint8_t *output_ptr = intermediate_buffer;
90  const int src_next_row_stride = src_stride - output_width;
91  src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
92  for (i = 0; i < intermediate_height; ++i) {
93    for (j = 0; j < output_width; ++j) {
94      // Apply filter...
95      const int temp = (src_ptr[0] * HFilter[0]) +
96          (src_ptr[1] * HFilter[1]) +
97          (src_ptr[2] * HFilter[2]) +
98          (src_ptr[3] * HFilter[3]) +
99          (src_ptr[4] * HFilter[4]) +
100          (src_ptr[5] * HFilter[5]) +
101          (src_ptr[6] * HFilter[6]) +
102          (src_ptr[7] * HFilter[7]) +
103          (VP9_FILTER_WEIGHT >> 1);  // Rounding
104
105      // Normalize back to 0-255...
106      *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT);
107      ++src_ptr;
108      output_ptr += intermediate_height;
109    }
110    src_ptr += src_next_row_stride;
111    output_ptr += intermediate_next_stride;
112  }
113
114  // Vertical pass (transposed intermediate -> dst).
115  src_ptr = intermediate_buffer;
116  const int dst_next_row_stride = dst_stride - output_width;
117  for (i = 0; i < output_height; ++i) {
118    for (j = 0; j < output_width; ++j) {
119      // Apply filter...
120      const int temp = (src_ptr[0] * VFilter[0]) +
121          (src_ptr[1] * VFilter[1]) +
122          (src_ptr[2] * VFilter[2]) +
123          (src_ptr[3] * VFilter[3]) +
124          (src_ptr[4] * VFilter[4]) +
125          (src_ptr[5] * VFilter[5]) +
126          (src_ptr[6] * VFilter[6]) +
127          (src_ptr[7] * VFilter[7]) +
128          (VP9_FILTER_WEIGHT >> 1);  // Rounding
129
130      // Normalize back to 0-255...
131      *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT);
132      src_ptr += intermediate_height;
133    }
134    src_ptr += intermediate_next_stride;
135    dst_ptr += dst_next_row_stride;
136  }
137}
138
139void block2d_average_c(uint8_t *src,
140                       unsigned int src_stride,
141                       uint8_t *output_ptr,
142                       unsigned int output_stride,
143                       unsigned int output_width,
144                       unsigned int output_height) {
145  unsigned int i, j;
146  for (i = 0; i < output_height; ++i) {
147    for (j = 0; j < output_width; ++j) {
148      output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
149    }
150    output_ptr += output_stride;
151  }
152}
153
154void filter_average_block2d_8_c(const uint8_t *src_ptr,
155                                const unsigned int src_stride,
156                                const int16_t *HFilter,
157                                const int16_t *VFilter,
158                                uint8_t *dst_ptr,
159                                unsigned int dst_stride,
160                                unsigned int output_width,
161                                unsigned int output_height) {
162  uint8_t tmp[kMaxDimension * kMaxDimension];
163
164  assert(output_width <= kMaxDimension);
165  assert(output_height <= kMaxDimension);
166  filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
167                     output_width, output_height);
168  block2d_average_c(tmp, 64, dst_ptr, dst_stride,
169                    output_width, output_height);
170}
171
172#if CONFIG_VP9_HIGHBITDEPTH
173void high_filter_block2d_8_c(const uint16_t *src_ptr,
174                             const unsigned int src_stride,
175                             const int16_t *HFilter,
176                             const int16_t *VFilter,
177                             uint16_t *dst_ptr,
178                             unsigned int dst_stride,
179                             unsigned int output_width,
180                             unsigned int output_height,
181                             int bd) {
182  // Between passes, we use an intermediate buffer whose height is extended to
183  // have enough horizontally filtered values as input for the vertical pass.
184  // This buffer is allocated to be big enough for the largest block type we
185  // support.
186  const int kInterp_Extend = 4;
187  const unsigned int intermediate_height =
188      (kInterp_Extend - 1) + output_height + kInterp_Extend;
189
190  /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
191   * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
192   *                                 + kInterp_Extend
193   *                               = 3 + 16 + 4
194   *                               = 23
195   * and filter_max_width = 16
196   */
197  uint16_t intermediate_buffer[71 * kMaxDimension];
198  const int intermediate_next_stride = 1 - intermediate_height * output_width;
199
200  // Horizontal pass (src -> transposed intermediate).
201  {
202    uint16_t *output_ptr = intermediate_buffer;
203    const int src_next_row_stride = src_stride - output_width;
204    unsigned int i, j;
205    src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
206    for (i = 0; i < intermediate_height; ++i) {
207      for (j = 0; j < output_width; ++j) {
208        // Apply filter...
209        const int temp = (src_ptr[0] * HFilter[0]) +
210                         (src_ptr[1] * HFilter[1]) +
211                         (src_ptr[2] * HFilter[2]) +
212                         (src_ptr[3] * HFilter[3]) +
213                         (src_ptr[4] * HFilter[4]) +
214                         (src_ptr[5] * HFilter[5]) +
215                         (src_ptr[6] * HFilter[6]) +
216                         (src_ptr[7] * HFilter[7]) +
217                         (VP9_FILTER_WEIGHT >> 1);  // Rounding
218
219        // Normalize back to 0-255...
220        *output_ptr = clip_pixel_high(temp >> VP9_FILTER_SHIFT, bd);
221        ++src_ptr;
222        output_ptr += intermediate_height;
223      }
224      src_ptr += src_next_row_stride;
225      output_ptr += intermediate_next_stride;
226    }
227  }
228
229  // Vertical pass (transposed intermediate -> dst).
230  {
231    uint16_t *src_ptr = intermediate_buffer;
232    const int dst_next_row_stride = dst_stride - output_width;
233    unsigned int i, j;
234    for (i = 0; i < output_height; ++i) {
235      for (j = 0; j < output_width; ++j) {
236        // Apply filter...
237        const int temp = (src_ptr[0] * VFilter[0]) +
238                         (src_ptr[1] * VFilter[1]) +
239                         (src_ptr[2] * VFilter[2]) +
240                         (src_ptr[3] * VFilter[3]) +
241                         (src_ptr[4] * VFilter[4]) +
242                         (src_ptr[5] * VFilter[5]) +
243                         (src_ptr[6] * VFilter[6]) +
244                         (src_ptr[7] * VFilter[7]) +
245                         (VP9_FILTER_WEIGHT >> 1);  // Rounding
246
247        // Normalize back to 0-255...
248        *dst_ptr++ = clip_pixel_high(temp >> VP9_FILTER_SHIFT, bd);
249        src_ptr += intermediate_height;
250      }
251      src_ptr += intermediate_next_stride;
252      dst_ptr += dst_next_row_stride;
253    }
254  }
255}
256
257void high_block2d_average_c(uint16_t *src,
258                            unsigned int src_stride,
259                            uint16_t *output_ptr,
260                            unsigned int output_stride,
261                            unsigned int output_width,
262                            unsigned int output_height,
263                            int bd) {
264  unsigned int i, j;
265  for (i = 0; i < output_height; ++i) {
266    for (j = 0; j < output_width; ++j) {
267      output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
268    }
269    output_ptr += output_stride;
270  }
271}
272
273void high_filter_average_block2d_8_c(const uint16_t *src_ptr,
274                                     const unsigned int src_stride,
275                                     const int16_t *HFilter,
276                                     const int16_t *VFilter,
277                                     uint16_t *dst_ptr,
278                                     unsigned int dst_stride,
279                                     unsigned int output_width,
280                                     unsigned int output_height,
281                                     int bd) {
282  uint16_t tmp[kMaxDimension * kMaxDimension];
283
284  assert(output_width <= kMaxDimension);
285  assert(output_height <= kMaxDimension);
286  high_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
287                          output_width, output_height, bd);
288  high_block2d_average_c(tmp, 64, dst_ptr, dst_stride,
289                         output_width, output_height, bd);
290}
291#endif  // CONFIG_VP9_HIGHBITDEPTH
292
293class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
294 public:
295  static void SetUpTestCase() {
296    // Force input_ to be unaligned, output to be 16 byte aligned.
297    input_ = reinterpret_cast<uint8_t*>(
298        vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1;
299    output_ = reinterpret_cast<uint8_t*>(
300        vpx_memalign(kDataAlignment, kOutputBufferSize));
301#if CONFIG_VP9_HIGHBITDEPTH
302    input16_ = reinterpret_cast<uint16_t*>(
303        vpx_memalign(kDataAlignment,
304                     (kInputBufferSize + 1) * sizeof(uint16_t))) + 1;
305    output16_ = reinterpret_cast<uint16_t*>(
306        vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
307#endif
308  }
309
310  static void TearDownTestCase() {
311    vpx_free(input_ - 1);
312    input_ = NULL;
313    vpx_free(output_);
314    output_ = NULL;
315#if CONFIG_VP9_HIGHBITDEPTH
316    vpx_free(input16_ - 1);
317    input16_ = NULL;
318    vpx_free(output16_);
319    output16_ = NULL;
320#endif
321  }
322
323 protected:
324  static const int kDataAlignment = 16;
325  static const int kOuterBlockSize = 256;
326  static const int kInputStride = kOuterBlockSize;
327  static const int kOutputStride = kOuterBlockSize;
328  static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
329  static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
330
331  int Width() const { return GET_PARAM(0); }
332  int Height() const { return GET_PARAM(1); }
333  int BorderLeft() const {
334    const int center = (kOuterBlockSize - Width()) / 2;
335    return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
336  }
337  int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
338
339  bool IsIndexInBorder(int i) {
340    return (i < BorderTop() * kOuterBlockSize ||
341            i >= (BorderTop() + Height()) * kOuterBlockSize ||
342            i % kOuterBlockSize < BorderLeft() ||
343            i % kOuterBlockSize >= (BorderLeft() + Width()));
344  }
345
346  virtual void SetUp() {
347    UUT_ = GET_PARAM(2);
348#if CONFIG_VP9_HIGHBITDEPTH
349    if (UUT_->use_high_bd_ != 0)
350      mask_ = (1 << UUT_->use_high_bd_) - 1;
351    else
352      mask_ = 255;
353#endif
354    /* Set up guard blocks for an inner block centered in the outer block */
355    for (int i = 0; i < kOutputBufferSize; ++i) {
356      if (IsIndexInBorder(i))
357        output_[i] = 255;
358      else
359        output_[i] = 0;
360    }
361
362    ::libvpx_test::ACMRandom prng;
363    for (int i = 0; i < kInputBufferSize; ++i) {
364      if (i & 1) {
365        input_[i] = 255;
366#if CONFIG_VP9_HIGHBITDEPTH
367        input16_[i] = mask_;
368#endif
369      } else {
370        input_[i] = prng.Rand8Extremes();
371#if CONFIG_VP9_HIGHBITDEPTH
372        input16_[i] = prng.Rand16() & mask_;
373#endif
374      }
375    }
376  }
377
378  void SetConstantInput(int value) {
379    memset(input_, value, kInputBufferSize);
380#if CONFIG_VP9_HIGHBITDEPTH
381    vpx_memset16(input16_, value, kInputBufferSize);
382#endif
383  }
384
385  void CheckGuardBlocks() {
386    for (int i = 0; i < kOutputBufferSize; ++i) {
387      if (IsIndexInBorder(i))
388        EXPECT_EQ(255, output_[i]);
389    }
390  }
391
392  uint8_t *input() const {
393#if CONFIG_VP9_HIGHBITDEPTH
394    if (UUT_->use_high_bd_ == 0) {
395      return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
396    } else {
397      return CONVERT_TO_BYTEPTR(input16_ + BorderTop() * kOuterBlockSize +
398                                BorderLeft());
399    }
400#else
401    return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
402#endif
403  }
404
405  uint8_t *output() const {
406#if CONFIG_VP9_HIGHBITDEPTH
407    if (UUT_->use_high_bd_ == 0) {
408      return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
409    } else {
410      return CONVERT_TO_BYTEPTR(output16_ + BorderTop() * kOuterBlockSize +
411                                BorderLeft());
412    }
413#else
414    return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
415#endif
416  }
417
418  uint16_t lookup(uint8_t *list, int index) const {
419#if CONFIG_VP9_HIGHBITDEPTH
420    if (UUT_->use_high_bd_ == 0) {
421      return list[index];
422    } else {
423      return CONVERT_TO_SHORTPTR(list)[index];
424    }
425#else
426    return list[index];
427#endif
428  }
429
430  void assign_val(uint8_t *list, int index, uint16_t val) const {
431#if CONFIG_VP9_HIGHBITDEPTH
432    if (UUT_->use_high_bd_ == 0) {
433      list[index] = (uint8_t) val;
434    } else {
435      CONVERT_TO_SHORTPTR(list)[index] = val;
436    }
437#else
438    list[index] = (uint8_t) val;
439#endif
440  }
441
442  void wrapper_filter_average_block2d_8_c(const uint8_t *src_ptr,
443                                          const unsigned int src_stride,
444                                          const int16_t *HFilter,
445                                          const int16_t *VFilter,
446                                          uint8_t *dst_ptr,
447                                          unsigned int dst_stride,
448                                          unsigned int output_width,
449                                          unsigned int output_height) {
450#if CONFIG_VP9_HIGHBITDEPTH
451    if (UUT_->use_high_bd_ == 0) {
452      filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
453                                 dst_ptr, dst_stride, output_width,
454                                 output_height);
455    } else {
456      high_filter_average_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
457                                      HFilter, VFilter,
458                                      CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
459                                      output_width, output_height,
460                                      UUT_->use_high_bd_);
461    }
462#else
463    filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
464                               dst_ptr, dst_stride, output_width,
465                               output_height);
466#endif
467  }
468
469  void wrapper_filter_block2d_8_c(const uint8_t *src_ptr,
470                                  const unsigned int src_stride,
471                                  const int16_t *HFilter,
472                                  const int16_t *VFilter,
473                                  uint8_t *dst_ptr,
474                                  unsigned int dst_stride,
475                                  unsigned int output_width,
476                                  unsigned int output_height) {
477#if CONFIG_VP9_HIGHBITDEPTH
478    if (UUT_->use_high_bd_ == 0) {
479      filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
480                         dst_ptr, dst_stride, output_width, output_height);
481    } else {
482      high_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
483                              HFilter, VFilter,
484                              CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
485                              output_width, output_height, UUT_->use_high_bd_);
486    }
487#else
488    filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
489                       dst_ptr, dst_stride, output_width, output_height);
490#endif
491  }
492
493  const ConvolveFunctions* UUT_;
494  static uint8_t* input_;
495  static uint8_t* output_;
496#if CONFIG_VP9_HIGHBITDEPTH
497  static uint16_t* input16_;
498  static uint16_t* output16_;
499  int mask_;
500#endif
501};
502
503uint8_t* ConvolveTest::input_ = NULL;
504uint8_t* ConvolveTest::output_ = NULL;
505#if CONFIG_VP9_HIGHBITDEPTH
506uint16_t* ConvolveTest::input16_ = NULL;
507uint16_t* ConvolveTest::output16_ = NULL;
508#endif
509
510TEST_P(ConvolveTest, GuardBlocks) {
511  CheckGuardBlocks();
512}
513
514TEST_P(ConvolveTest, CopyHoriz) {
515  uint8_t* const in = input();
516  uint8_t* const out = output();
517  DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
518
519  ASM_REGISTER_STATE_CHECK(
520      UUT_->h8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
521                Width(), Height()));
522
523  CheckGuardBlocks();
524
525  for (int y = 0; y < Height(); ++y)
526    for (int x = 0; x < Width(); ++x)
527      ASSERT_EQ(lookup(out, y * kOutputStride + x),
528                lookup(in, y * kInputStride + x))
529          << "(" << x << "," << y << ")";
530}
531
532TEST_P(ConvolveTest, CopyVert) {
533  uint8_t* const in = input();
534  uint8_t* const out = output();
535  DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
536
537  ASM_REGISTER_STATE_CHECK(
538      UUT_->v8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
539                Width(), Height()));
540
541  CheckGuardBlocks();
542
543  for (int y = 0; y < Height(); ++y)
544    for (int x = 0; x < Width(); ++x)
545      ASSERT_EQ(lookup(out, y * kOutputStride + x),
546                lookup(in, y * kInputStride + x))
547          << "(" << x << "," << y << ")";
548}
549
550TEST_P(ConvolveTest, Copy2D) {
551  uint8_t* const in = input();
552  uint8_t* const out = output();
553  DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
554
555  ASM_REGISTER_STATE_CHECK(
556      UUT_->hv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
557                 Width(), Height()));
558
559  CheckGuardBlocks();
560
561  for (int y = 0; y < Height(); ++y)
562    for (int x = 0; x < Width(); ++x)
563      ASSERT_EQ(lookup(out, y * kOutputStride + x),
564                lookup(in, y * kInputStride + x))
565          << "(" << x << "," << y << ")";
566}
567
568const int kNumFilterBanks = 4;
569const int kNumFilters = 16;
570
571TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
572  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
573    const InterpKernel *filters =
574        vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
575    for (int i = 0; i < kNumFilters; i++) {
576      const int p0 = filters[i][0] + filters[i][1];
577      const int p1 = filters[i][2] + filters[i][3];
578      const int p2 = filters[i][4] + filters[i][5];
579      const int p3 = filters[i][6] + filters[i][7];
580      EXPECT_LE(p0, 128);
581      EXPECT_LE(p1, 128);
582      EXPECT_LE(p2, 128);
583      EXPECT_LE(p3, 128);
584      EXPECT_LE(p0 + p3, 128);
585      EXPECT_LE(p0 + p3 + p1, 128);
586      EXPECT_LE(p0 + p3 + p1 + p2, 128);
587      EXPECT_EQ(p0 + p1 + p2 + p3, 128);
588    }
589  }
590}
591
592const int16_t kInvalidFilter[8] = { 0 };
593
594TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
595  uint8_t* const in = input();
596  uint8_t* const out = output();
597#if CONFIG_VP9_HIGHBITDEPTH
598  uint8_t ref8[kOutputStride * kMaxDimension];
599  uint16_t ref16[kOutputStride * kMaxDimension];
600  uint8_t* ref;
601  if (UUT_->use_high_bd_ == 0) {
602    ref = ref8;
603  } else {
604    ref = CONVERT_TO_BYTEPTR(ref16);
605  }
606#else
607  uint8_t ref[kOutputStride * kMaxDimension];
608#endif
609
610  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
611    const InterpKernel *filters =
612        vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
613    const InterpKernel *const eighttap_smooth =
614        vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
615
616    for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
617      for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
618        wrapper_filter_block2d_8_c(in, kInputStride,
619                                   filters[filter_x], filters[filter_y],
620                                   ref, kOutputStride,
621                                   Width(), Height());
622
623        if (filters == eighttap_smooth || (filter_x && filter_y))
624          ASM_REGISTER_STATE_CHECK(
625              UUT_->hv8_(in, kInputStride, out, kOutputStride,
626                         filters[filter_x], 16, filters[filter_y], 16,
627                         Width(), Height()));
628        else if (filter_y)
629          ASM_REGISTER_STATE_CHECK(
630              UUT_->v8_(in, kInputStride, out, kOutputStride,
631                        kInvalidFilter, 16, filters[filter_y], 16,
632                        Width(), Height()));
633        else
634          ASM_REGISTER_STATE_CHECK(
635              UUT_->h8_(in, kInputStride, out, kOutputStride,
636                        filters[filter_x], 16, kInvalidFilter, 16,
637                        Width(), Height()));
638
639        CheckGuardBlocks();
640
641        for (int y = 0; y < Height(); ++y)
642          for (int x = 0; x < Width(); ++x)
643            ASSERT_EQ(lookup(ref, y * kOutputStride + x),
644                      lookup(out, y * kOutputStride + x))
645                << "mismatch at (" << x << "," << y << "), "
646                << "filters (" << filter_bank << ","
647                << filter_x << "," << filter_y << ")";
648      }
649    }
650  }
651}
652
653TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
654  uint8_t* const in = input();
655  uint8_t* const out = output();
656#if CONFIG_VP9_HIGHBITDEPTH
657  uint8_t ref8[kOutputStride * kMaxDimension];
658  uint16_t ref16[kOutputStride * kMaxDimension];
659  uint8_t* ref;
660  if (UUT_->use_high_bd_ == 0) {
661    ref = ref8;
662  } else {
663    ref = CONVERT_TO_BYTEPTR(ref16);
664  }
665#else
666  uint8_t ref[kOutputStride * kMaxDimension];
667#endif
668
669  // Populate ref and out with some random data
670  ::libvpx_test::ACMRandom prng;
671  for (int y = 0; y < Height(); ++y) {
672    for (int x = 0; x < Width(); ++x) {
673      uint16_t r;
674#if CONFIG_VP9_HIGHBITDEPTH
675      if (UUT_->use_high_bd_ == 0 || UUT_->use_high_bd_ == 8) {
676        r = prng.Rand8Extremes();
677      } else {
678        r = prng.Rand16() & mask_;
679      }
680#else
681      r = prng.Rand8Extremes();
682#endif
683
684      assign_val(out, y * kOutputStride + x, r);
685      assign_val(ref, y * kOutputStride + x, r);
686    }
687  }
688
689  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
690    const InterpKernel *filters =
691        vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
692    const InterpKernel *const eighttap_smooth =
693        vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
694
695    for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
696      for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
697        wrapper_filter_average_block2d_8_c(in, kInputStride,
698                                           filters[filter_x], filters[filter_y],
699                                           ref, kOutputStride,
700                                           Width(), Height());
701
702        if (filters == eighttap_smooth || (filter_x && filter_y))
703          ASM_REGISTER_STATE_CHECK(
704              UUT_->hv8_avg_(in, kInputStride, out, kOutputStride,
705                             filters[filter_x], 16, filters[filter_y], 16,
706                             Width(), Height()));
707        else if (filter_y)
708          ASM_REGISTER_STATE_CHECK(
709              UUT_->v8_avg_(in, kInputStride, out, kOutputStride,
710                            filters[filter_x], 16, filters[filter_y], 16,
711                            Width(), Height()));
712        else
713          ASM_REGISTER_STATE_CHECK(
714              UUT_->h8_avg_(in, kInputStride, out, kOutputStride,
715                            filters[filter_x], 16, filters[filter_y], 16,
716                            Width(), Height()));
717
718        CheckGuardBlocks();
719
720        for (int y = 0; y < Height(); ++y)
721          for (int x = 0; x < Width(); ++x)
722            ASSERT_EQ(lookup(ref, y * kOutputStride + x),
723                      lookup(out, y * kOutputStride + x))
724                << "mismatch at (" << x << "," << y << "), "
725                << "filters (" << filter_bank << ","
726                << filter_x << "," << filter_y << ")";
727      }
728    }
729  }
730}
731
732TEST_P(ConvolveTest, FilterExtremes) {
733  uint8_t *const in = input();
734  uint8_t *const out = output();
735#if CONFIG_VP9_HIGHBITDEPTH
736  uint8_t ref8[kOutputStride * kMaxDimension];
737  uint16_t ref16[kOutputStride * kMaxDimension];
738  uint8_t *ref;
739  if (UUT_->use_high_bd_ == 0) {
740    ref = ref8;
741  } else {
742    ref = CONVERT_TO_BYTEPTR(ref16);
743  }
744#else
745  uint8_t ref[kOutputStride * kMaxDimension];
746#endif
747
748  // Populate ref and out with some random data
749  ::libvpx_test::ACMRandom prng;
750  for (int y = 0; y < Height(); ++y) {
751    for (int x = 0; x < Width(); ++x) {
752      uint16_t r;
753#if CONFIG_VP9_HIGHBITDEPTH
754      if (UUT_->use_high_bd_ == 0 || UUT_->use_high_bd_ == 8) {
755        r = prng.Rand8Extremes();
756      } else {
757        r = prng.Rand16() & mask_;
758      }
759#else
760      r = prng.Rand8Extremes();
761#endif
762      assign_val(out, y * kOutputStride + x, r);
763      assign_val(ref, y * kOutputStride + x, r);
764    }
765  }
766
767  for (int axis = 0; axis < 2; axis++) {
768    int seed_val = 0;
769    while (seed_val < 256) {
770      for (int y = 0; y < 8; ++y) {
771        for (int x = 0; x < 8; ++x) {
772#if CONFIG_VP9_HIGHBITDEPTH
773            assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
774                       ((seed_val >> (axis ? y : x)) & 1) * mask_);
775#else
776            assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
777                       ((seed_val >> (axis ? y : x)) & 1) * 255);
778#endif
779          if (axis) seed_val++;
780        }
781        if (axis)
782          seed_val-= 8;
783        else
784          seed_val++;
785      }
786      if (axis) seed_val += 8;
787
788      for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
789        const InterpKernel *filters =
790            vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
791        const InterpKernel *const eighttap_smooth =
792            vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
793        for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
794          for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
795            wrapper_filter_block2d_8_c(in, kInputStride,
796                                       filters[filter_x], filters[filter_y],
797                                       ref, kOutputStride,
798                                       Width(), Height());
799            if (filters == eighttap_smooth || (filter_x && filter_y))
800              ASM_REGISTER_STATE_CHECK(
801                  UUT_->hv8_(in, kInputStride, out, kOutputStride,
802                             filters[filter_x], 16, filters[filter_y], 16,
803                             Width(), Height()));
804            else if (filter_y)
805              ASM_REGISTER_STATE_CHECK(
806                  UUT_->v8_(in, kInputStride, out, kOutputStride,
807                            kInvalidFilter, 16, filters[filter_y], 16,
808                            Width(), Height()));
809            else
810              ASM_REGISTER_STATE_CHECK(
811                  UUT_->h8_(in, kInputStride, out, kOutputStride,
812                            filters[filter_x], 16, kInvalidFilter, 16,
813                            Width(), Height()));
814
815            for (int y = 0; y < Height(); ++y)
816              for (int x = 0; x < Width(); ++x)
817                ASSERT_EQ(lookup(ref, y * kOutputStride + x),
818                          lookup(out, y * kOutputStride + x))
819                    << "mismatch at (" << x << "," << y << "), "
820                    << "filters (" << filter_bank << ","
821                    << filter_x << "," << filter_y << ")";
822          }
823        }
824      }
825    }
826  }
827}
828
829DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = {
830    { 0,   0,   0,   0,   0,   0,   0, 128},
831    { 0,   0,   0,   0,   0,   0, 128},
832    { 0,   0,   0,   0,   0, 128},
833    { 0,   0,   0,   0, 128},
834    { 0,   0,   0, 128},
835    { 0,   0, 128},
836    { 0, 128},
837    { 128},
838    { 0,   0,   0,   0,   0,   0,   0, 128},
839    { 0,   0,   0,   0,   0,   0, 128},
840    { 0,   0,   0,   0,   0, 128},
841    { 0,   0,   0,   0, 128},
842    { 0,   0,   0, 128},
843    { 0,   0, 128},
844    { 0, 128},
845    { 128}
846};
847
848/* This test exercises the horizontal and vertical filter functions. */
849TEST_P(ConvolveTest, ChangeFilterWorks) {
850  uint8_t* const in = input();
851  uint8_t* const out = output();
852
853  /* Assume that the first input sample is at the 8/16th position. */
854  const int kInitialSubPelOffset = 8;
855
856  /* Filters are 8-tap, so the first filter tap will be applied to the pixel
857   * at position -3 with respect to the current filtering position. Since
858   * kInitialSubPelOffset is set to 8, we first select sub-pixel filter 8,
859   * which is non-zero only in the last tap. So, applying the filter at the
860   * current input position will result in an output equal to the pixel at
861   * offset +4 (-3 + 7) with respect to the current filtering position.
862   */
863  const int kPixelSelected = 4;
864
865  /* Assume that each output pixel requires us to step on by 17/16th pixels in
866   * the input.
867   */
868  const int kInputPixelStep = 17;
869
870  /* The filters are setup in such a way that the expected output produces
871   * sets of 8 identical output samples. As the filter position moves to the
872   * next 1/16th pixel position the only active (=128) filter tap moves one
873   * position to the left, resulting in the same input pixel being replicated
874   * in to the output for 8 consecutive samples. After each set of 8 positions
875   * the filters select a different input pixel. kFilterPeriodAdjust below
876   * computes which input pixel is written to the output for a specified
877   * x or y position.
878   */
879
880  /* Test the horizontal filter. */
881  ASM_REGISTER_STATE_CHECK(
882      UUT_->h8_(in, kInputStride, out, kOutputStride,
883                kChangeFilters[kInitialSubPelOffset],
884                kInputPixelStep, NULL, 0, Width(), Height()));
885
886  for (int x = 0; x < Width(); ++x) {
887    const int kFilterPeriodAdjust = (x >> 3) << 3;
888    const int ref_x =
889        kPixelSelected + ((kInitialSubPelOffset
890            + kFilterPeriodAdjust * kInputPixelStep)
891                          >> SUBPEL_BITS);
892    ASSERT_EQ(lookup(in, ref_x), lookup(out, x))
893        << "x == " << x << "width = " << Width();
894  }
895
896  /* Test the vertical filter. */
897  ASM_REGISTER_STATE_CHECK(
898      UUT_->v8_(in, kInputStride, out, kOutputStride,
899                NULL, 0, kChangeFilters[kInitialSubPelOffset],
900                kInputPixelStep, Width(), Height()));
901
902  for (int y = 0; y < Height(); ++y) {
903    const int kFilterPeriodAdjust = (y >> 3) << 3;
904    const int ref_y =
905        kPixelSelected + ((kInitialSubPelOffset
906            + kFilterPeriodAdjust * kInputPixelStep)
907                          >> SUBPEL_BITS);
908    ASSERT_EQ(lookup(in, ref_y * kInputStride), lookup(out, y * kInputStride))
909        << "y == " << y;
910  }
911
912  /* Test the horizontal and vertical filters in combination. */
913  ASM_REGISTER_STATE_CHECK(
914      UUT_->hv8_(in, kInputStride, out, kOutputStride,
915                 kChangeFilters[kInitialSubPelOffset], kInputPixelStep,
916                 kChangeFilters[kInitialSubPelOffset], kInputPixelStep,
917                 Width(), Height()));
918
919  for (int y = 0; y < Height(); ++y) {
920    const int kFilterPeriodAdjustY = (y >> 3) << 3;
921    const int ref_y =
922        kPixelSelected + ((kInitialSubPelOffset
923            + kFilterPeriodAdjustY * kInputPixelStep)
924                          >> SUBPEL_BITS);
925    for (int x = 0; x < Width(); ++x) {
926      const int kFilterPeriodAdjustX = (x >> 3) << 3;
927      const int ref_x =
928          kPixelSelected + ((kInitialSubPelOffset
929              + kFilterPeriodAdjustX * kInputPixelStep)
930                            >> SUBPEL_BITS);
931
932      ASSERT_EQ(lookup(in, ref_y * kInputStride + ref_x),
933                lookup(out, y * kOutputStride + x))
934          << "x == " << x << ", y == " << y;
935    }
936  }
937}
938
939/* This test exercises that enough rows and columns are filtered with every
940   possible initial fractional positions and scaling steps. */
941TEST_P(ConvolveTest, CheckScalingFiltering) {
942  uint8_t* const in = input();
943  uint8_t* const out = output();
944  const InterpKernel *const eighttap = vp9_get_interp_kernel(EIGHTTAP);
945
946  SetConstantInput(127);
947
948  for (int frac = 0; frac < 16; ++frac) {
949    for (int step = 1; step <= 32; ++step) {
950      /* Test the horizontal and vertical filters in combination. */
951      ASM_REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
952                                          eighttap[frac], step,
953                                          eighttap[frac], step,
954                                          Width(), Height()));
955
956      CheckGuardBlocks();
957
958      for (int y = 0; y < Height(); ++y) {
959        for (int x = 0; x < Width(); ++x) {
960          ASSERT_EQ(lookup(in, y * kInputStride + x),
961                    lookup(out, y * kOutputStride + x))
962              << "x == " << x << ", y == " << y
963              << ", frac == " << frac << ", step == " << step;
964        }
965      }
966    }
967  }
968}
969
970using std::tr1::make_tuple;
971
972#if CONFIG_VP9_HIGHBITDEPTH
973#if HAVE_SSE2 && ARCH_X86_64
974void wrap_convolve8_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
975                                 uint8_t *dst, ptrdiff_t dst_stride,
976                                 const int16_t *filter_x,
977                                 int filter_x_stride,
978                                 const int16_t *filter_y,
979                                 int filter_y_stride,
980                                 int w, int h) {
981  vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
982                                filter_x_stride, filter_y, filter_y_stride,
983                                w, h, 8);
984}
985
986void wrap_convolve8_avg_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
987                                     uint8_t *dst, ptrdiff_t dst_stride,
988                                     const int16_t *filter_x,
989                                     int filter_x_stride,
990                                     const int16_t *filter_y,
991                                     int filter_y_stride,
992                                     int w, int h) {
993  vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
994    filter_x_stride, filter_y, filter_y_stride, w, h, 8);
995}
996
997void wrap_convolve8_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
998                                uint8_t *dst, ptrdiff_t dst_stride,
999                                const int16_t *filter_x,
1000                                int filter_x_stride,
1001                                const int16_t *filter_y,
1002                                int filter_y_stride,
1003                                int w, int h) {
1004  vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1005    filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1006}
1007
1008void wrap_convolve8_avg_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
1009                                    uint8_t *dst, ptrdiff_t dst_stride,
1010                                    const int16_t *filter_x,
1011                                    int filter_x_stride,
1012                                    const int16_t *filter_y,
1013                                    int filter_y_stride,
1014                                    int w, int h) {
1015  vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1016                                   filter_x_stride, filter_y, filter_y_stride,
1017                                   w, h, 8);
1018}
1019
1020void wrap_convolve8_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
1021                           uint8_t *dst, ptrdiff_t dst_stride,
1022                           const int16_t *filter_x,
1023                           int filter_x_stride,
1024                           const int16_t *filter_y,
1025                           int filter_y_stride,
1026                           int w, int h) {
1027  vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x,
1028    filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1029}
1030
1031void wrap_convolve8_avg_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
1032                               uint8_t *dst, ptrdiff_t dst_stride,
1033                               const int16_t *filter_x,
1034                               int filter_x_stride,
1035                               const int16_t *filter_y,
1036                               int filter_y_stride,
1037                               int w, int h) {
1038  vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x,
1039    filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1040}
1041
1042void wrap_convolve8_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1043                                  uint8_t *dst, ptrdiff_t dst_stride,
1044                                  const int16_t *filter_x,
1045                                  int filter_x_stride,
1046                                  const int16_t *filter_y,
1047                                  int filter_y_stride,
1048                                  int w, int h) {
1049  vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
1050    filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1051}
1052
1053void wrap_convolve8_avg_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1054                                      uint8_t *dst, ptrdiff_t dst_stride,
1055                                      const int16_t *filter_x,
1056                                      int filter_x_stride,
1057                                      const int16_t *filter_y,
1058                                      int filter_y_stride,
1059                                      int w, int h) {
1060  vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
1061    filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1062}
1063
1064void wrap_convolve8_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1065                                 uint8_t *dst, ptrdiff_t dst_stride,
1066                                 const int16_t *filter_x,
1067                                 int filter_x_stride,
1068                                 const int16_t *filter_y,
1069                                 int filter_y_stride,
1070                                 int w, int h) {
1071  vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1072    filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1073}
1074
1075void wrap_convolve8_avg_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1076                                     uint8_t *dst, ptrdiff_t dst_stride,
1077                                     const int16_t *filter_x,
1078                                     int filter_x_stride,
1079                                     const int16_t *filter_y,
1080                                     int filter_y_stride,
1081                                     int w, int h) {
1082  vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1083    filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1084}
1085
1086void wrap_convolve8_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1087                            uint8_t *dst, ptrdiff_t dst_stride,
1088                            const int16_t *filter_x,
1089                            int filter_x_stride,
1090                            const int16_t *filter_y,
1091                            int filter_y_stride,
1092                            int w, int h) {
1093  vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x,
1094    filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1095}
1096
1097void wrap_convolve8_avg_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1098                                uint8_t *dst, ptrdiff_t dst_stride,
1099                                const int16_t *filter_x,
1100                                int filter_x_stride,
1101                                const int16_t *filter_y,
1102                                int filter_y_stride,
1103                                int w, int h) {
1104  vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x,
1105                              filter_x_stride, filter_y, filter_y_stride,
1106                              w, h, 10);
1107}
1108
1109void wrap_convolve8_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1110                                  uint8_t *dst, ptrdiff_t dst_stride,
1111                                  const int16_t *filter_x,
1112                                  int filter_x_stride,
1113                                  const int16_t *filter_y,
1114                                  int filter_y_stride,
1115                                  int w, int h) {
1116  vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
1117                                filter_x_stride, filter_y, filter_y_stride,
1118                                w, h, 12);
1119}
1120
1121void wrap_convolve8_avg_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1122                                      uint8_t *dst, ptrdiff_t dst_stride,
1123                                      const int16_t *filter_x,
1124                                      int filter_x_stride,
1125                                      const int16_t *filter_y,
1126                                      int filter_y_stride,
1127                                      int w, int h) {
1128  vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
1129                                    filter_x_stride, filter_y, filter_y_stride,
1130                                    w, h, 12);
1131}
1132
1133void wrap_convolve8_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1134                                 uint8_t *dst, ptrdiff_t dst_stride,
1135                                 const int16_t *filter_x,
1136                                 int filter_x_stride,
1137                                 const int16_t *filter_y,
1138                                 int filter_y_stride,
1139                                 int w, int h) {
1140  vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1141                               filter_x_stride, filter_y, filter_y_stride,
1142                               w, h, 12);
1143}
1144
1145void wrap_convolve8_avg_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1146                                     uint8_t *dst, ptrdiff_t dst_stride,
1147                                     const int16_t *filter_x,
1148                                     int filter_x_stride,
1149                                     const int16_t *filter_y,
1150                                     int filter_y_stride,
1151                                     int w, int h) {
1152  vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1153                                   filter_x_stride, filter_y, filter_y_stride, w, h, 12);
1154}
1155
1156void wrap_convolve8_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1157                            uint8_t *dst, ptrdiff_t dst_stride,
1158                            const int16_t *filter_x,
1159                            int filter_x_stride,
1160                            const int16_t *filter_y,
1161                            int filter_y_stride,
1162                            int w, int h) {
1163  vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x,
1164    filter_x_stride, filter_y, filter_y_stride, w, h, 12);
1165}
1166
1167void wrap_convolve8_avg_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1168                                uint8_t *dst, ptrdiff_t dst_stride,
1169                                const int16_t *filter_x,
1170                                int filter_x_stride,
1171                                const int16_t *filter_y,
1172                                int filter_y_stride,
1173                                int w, int h) {
1174  vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x,
1175                              filter_x_stride, filter_y, filter_y_stride, w, h, 12);
1176}
1177#endif  // HAVE_SSE2 && ARCH_X86_64
1178
1179void wrap_convolve8_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
1180                              uint8_t *dst, ptrdiff_t dst_stride,
1181                              const int16_t *filter_x,
1182                              int filter_x_stride,
1183                              const int16_t *filter_y,
1184                              int filter_y_stride,
1185                              int w, int h) {
1186  vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1187                             filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1188}
1189
1190void wrap_convolve8_avg_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
1191                                  uint8_t *dst, ptrdiff_t dst_stride,
1192                                  const int16_t *filter_x,
1193                                  int filter_x_stride,
1194                                  const int16_t *filter_y,
1195                                  int filter_y_stride,
1196                                  int w, int h) {
1197  vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1198                                 filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1199}
1200
1201void wrap_convolve8_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
1202                             uint8_t *dst, ptrdiff_t dst_stride,
1203                             const int16_t *filter_x,
1204                             int filter_x_stride,
1205                             const int16_t *filter_y,
1206                             int filter_y_stride,
1207                             int w, int h) {
1208  vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
1209                            filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1210}
1211
1212void wrap_convolve8_avg_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
1213                                 uint8_t *dst, ptrdiff_t dst_stride,
1214                                 const int16_t *filter_x,
1215                                 int filter_x_stride,
1216                                 const int16_t *filter_y,
1217                                 int filter_y_stride,
1218                                 int w, int h) {
1219  vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
1220                                filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1221}
1222
1223void wrap_convolve8_c_8(const uint8_t *src, ptrdiff_t src_stride,
1224                        uint8_t *dst, ptrdiff_t dst_stride,
1225                        const int16_t *filter_x,
1226                        int filter_x_stride,
1227                        const int16_t *filter_y,
1228                        int filter_y_stride,
1229                        int w, int h) {
1230  vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x,
1231                       filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1232}
1233
1234void wrap_convolve8_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
1235                            uint8_t *dst, ptrdiff_t dst_stride,
1236                            const int16_t *filter_x,
1237                            int filter_x_stride,
1238                            const int16_t *filter_y,
1239                            int filter_y_stride,
1240                            int w, int h) {
1241  vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x,
1242                           filter_x_stride, filter_y, filter_y_stride,
1243                           w, h, 8);
1244}
1245
1246void wrap_convolve8_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
1247                               uint8_t *dst, ptrdiff_t dst_stride,
1248                               const int16_t *filter_x,
1249                               int filter_x_stride,
1250                               const int16_t *filter_y,
1251                               int filter_y_stride,
1252                               int w, int h) {
1253  vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1254                             filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1255}
1256
1257void wrap_convolve8_avg_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
1258                                   uint8_t *dst, ptrdiff_t dst_stride,
1259                                   const int16_t *filter_x,
1260                                   int filter_x_stride,
1261                                   const int16_t *filter_y,
1262                                   int filter_y_stride,
1263                                   int w, int h) {
1264  vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1265                                 filter_x_stride, filter_y, filter_y_stride,
1266                                 w, h, 10);
1267}
1268
1269void wrap_convolve8_vert_c_10(const uint8_t *src, ptrdiff_t src_stride,
1270                              uint8_t *dst, ptrdiff_t dst_stride,
1271                              const int16_t *filter_x,
1272                              int filter_x_stride,
1273                              const int16_t *filter_y,
1274                              int filter_y_stride,
1275                              int w, int h) {
1276  vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
1277                            filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1278}
1279
1280void wrap_convolve8_avg_vert_c_10(const uint8_t *src, ptrdiff_t src_stride,
1281                                  uint8_t *dst, ptrdiff_t dst_stride,
1282                                  const int16_t *filter_x,
1283                                  int filter_x_stride,
1284                                  const int16_t *filter_y,
1285                                  int filter_y_stride,
1286                                  int w, int h) {
1287  vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
1288                                filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1289}
1290
1291void wrap_convolve8_c_10(const uint8_t *src, ptrdiff_t src_stride,
1292                         uint8_t *dst, ptrdiff_t dst_stride,
1293                         const int16_t *filter_x,
1294                         int filter_x_stride,
1295                         const int16_t *filter_y,
1296                         int filter_y_stride,
1297                         int w, int h) {
1298  vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x,
1299    filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1300}
1301
1302void wrap_convolve8_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
1303                             uint8_t *dst, ptrdiff_t dst_stride,
1304                             const int16_t *filter_x,
1305                             int filter_x_stride,
1306                             const int16_t *filter_y,
1307                             int filter_y_stride,
1308                             int w, int h) {
1309  vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x,
1310                           filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1311}
1312
1313void wrap_convolve8_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
1314                               uint8_t *dst, ptrdiff_t dst_stride,
1315                               const int16_t *filter_x,
1316                               int filter_x_stride,
1317                               const int16_t *filter_y,
1318                               int filter_y_stride,
1319                               int w, int h) {
1320  vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1321                             filter_x_stride, filter_y, filter_y_stride,
1322                             w, h, 12);
1323}
1324
1325void wrap_convolve8_avg_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
1326                                   uint8_t *dst, ptrdiff_t dst_stride,
1327                                   const int16_t *filter_x,
1328                                   int filter_x_stride,
1329                                   const int16_t *filter_y,
1330                                   int filter_y_stride,
1331                                   int w, int h) {
1332  vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1333                                 filter_x_stride, filter_y, filter_y_stride,
1334                                 w, h, 12);
1335}
1336
1337void wrap_convolve8_vert_c_12(const uint8_t *src, ptrdiff_t src_stride,
1338                              uint8_t *dst, ptrdiff_t dst_stride,
1339                              const int16_t *filter_x,
1340                              int filter_x_stride,
1341                              const int16_t *filter_y,
1342                              int filter_y_stride,
1343                              int w, int h) {
1344  vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
1345                            filter_x_stride, filter_y, filter_y_stride,
1346                            w, h, 12);
1347}
1348
1349void wrap_convolve8_avg_vert_c_12(const uint8_t *src, ptrdiff_t src_stride,
1350                                  uint8_t *dst, ptrdiff_t dst_stride,
1351                                  const int16_t *filter_x,
1352                                  int filter_x_stride,
1353                                  const int16_t *filter_y,
1354                                  int filter_y_stride,
1355                                  int w, int h) {
1356  vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
1357                                filter_x_stride, filter_y, filter_y_stride,
1358                                w, h, 12);
1359}
1360
1361void wrap_convolve8_c_12(const uint8_t *src, ptrdiff_t src_stride,
1362                         uint8_t *dst, ptrdiff_t dst_stride,
1363                         const int16_t *filter_x,
1364                         int filter_x_stride,
1365                         const int16_t *filter_y,
1366                         int filter_y_stride,
1367                         int w, int h) {
1368  vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x,
1369                       filter_x_stride, filter_y, filter_y_stride,
1370                       w, h, 12);
1371}
1372
1373void wrap_convolve8_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
1374                             uint8_t *dst, ptrdiff_t dst_stride,
1375                             const int16_t *filter_x,
1376                             int filter_x_stride,
1377                             const int16_t *filter_y,
1378                             int filter_y_stride,
1379                             int w, int h) {
1380  vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x,
1381                           filter_x_stride, filter_y, filter_y_stride,
1382                           w, h, 12);
1383}
1384
1385const ConvolveFunctions convolve8_c(
1386    wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
1387    wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
1388    wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
1389INSTANTIATE_TEST_CASE_P(C_8, ConvolveTest, ::testing::Values(
1390    make_tuple(4, 4, &convolve8_c),
1391    make_tuple(8, 4, &convolve8_c),
1392    make_tuple(4, 8, &convolve8_c),
1393    make_tuple(8, 8, &convolve8_c),
1394    make_tuple(16, 8, &convolve8_c),
1395    make_tuple(8, 16, &convolve8_c),
1396    make_tuple(16, 16, &convolve8_c),
1397    make_tuple(32, 16, &convolve8_c),
1398    make_tuple(16, 32, &convolve8_c),
1399    make_tuple(32, 32, &convolve8_c),
1400    make_tuple(64, 32, &convolve8_c),
1401    make_tuple(32, 64, &convolve8_c),
1402    make_tuple(64, 64, &convolve8_c)));
1403const ConvolveFunctions convolve10_c(
1404    wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
1405    wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
1406    wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10);
1407INSTANTIATE_TEST_CASE_P(C_10, ConvolveTest, ::testing::Values(
1408    make_tuple(4, 4, &convolve10_c),
1409    make_tuple(8, 4, &convolve10_c),
1410    make_tuple(4, 8, &convolve10_c),
1411    make_tuple(8, 8, &convolve10_c),
1412    make_tuple(16, 8, &convolve10_c),
1413    make_tuple(8, 16, &convolve10_c),
1414    make_tuple(16, 16, &convolve10_c),
1415    make_tuple(32, 16, &convolve10_c),
1416    make_tuple(16, 32, &convolve10_c),
1417    make_tuple(32, 32, &convolve10_c),
1418    make_tuple(64, 32, &convolve10_c),
1419    make_tuple(32, 64, &convolve10_c),
1420    make_tuple(64, 64, &convolve10_c)));
1421const ConvolveFunctions convolve12_c(
1422    wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
1423    wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
1424    wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12);
1425INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values(
1426    make_tuple(4, 4, &convolve12_c),
1427    make_tuple(8, 4, &convolve12_c),
1428    make_tuple(4, 8, &convolve12_c),
1429    make_tuple(8, 8, &convolve12_c),
1430    make_tuple(16, 8, &convolve12_c),
1431    make_tuple(8, 16, &convolve12_c),
1432    make_tuple(16, 16, &convolve12_c),
1433    make_tuple(32, 16, &convolve12_c),
1434    make_tuple(16, 32, &convolve12_c),
1435    make_tuple(32, 32, &convolve12_c),
1436    make_tuple(64, 32, &convolve12_c),
1437    make_tuple(32, 64, &convolve12_c),
1438    make_tuple(64, 64, &convolve12_c)));
1439
1440#else
1441
1442const ConvolveFunctions convolve8_c(
1443    vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c,
1444    vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c,
1445    vp9_convolve8_c, vp9_convolve8_avg_c, 0);
1446
1447INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
1448    make_tuple(4, 4, &convolve8_c),
1449    make_tuple(8, 4, &convolve8_c),
1450    make_tuple(4, 8, &convolve8_c),
1451    make_tuple(8, 8, &convolve8_c),
1452    make_tuple(16, 8, &convolve8_c),
1453    make_tuple(8, 16, &convolve8_c),
1454    make_tuple(16, 16, &convolve8_c),
1455    make_tuple(32, 16, &convolve8_c),
1456    make_tuple(16, 32, &convolve8_c),
1457    make_tuple(32, 32, &convolve8_c),
1458    make_tuple(64, 32, &convolve8_c),
1459    make_tuple(32, 64, &convolve8_c),
1460    make_tuple(64, 64, &convolve8_c)));
1461#endif
1462
1463#if HAVE_SSE2 && ARCH_X86_64
1464#if CONFIG_VP9_HIGHBITDEPTH
1465const ConvolveFunctions convolve8_sse2(
1466    wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
1467    wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
1468    wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
1469INSTANTIATE_TEST_CASE_P(SSE2_8, ConvolveTest, ::testing::Values(
1470    make_tuple(4, 4, &convolve8_sse2),
1471    make_tuple(8, 4, &convolve8_sse2),
1472    make_tuple(4, 8, &convolve8_sse2),
1473    make_tuple(8, 8, &convolve8_sse2),
1474    make_tuple(16, 8, &convolve8_sse2),
1475    make_tuple(8, 16, &convolve8_sse2),
1476    make_tuple(16, 16, &convolve8_sse2),
1477    make_tuple(32, 16, &convolve8_sse2),
1478    make_tuple(16, 32, &convolve8_sse2),
1479    make_tuple(32, 32, &convolve8_sse2),
1480    make_tuple(64, 32, &convolve8_sse2),
1481    make_tuple(32, 64, &convolve8_sse2),
1482    make_tuple(64, 64, &convolve8_sse2)));
1483const ConvolveFunctions convolve10_sse2(
1484    wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
1485    wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
1486    wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
1487INSTANTIATE_TEST_CASE_P(SSE2_10, ConvolveTest, ::testing::Values(
1488    make_tuple(4, 4, &convolve10_sse2),
1489    make_tuple(8, 4, &convolve10_sse2),
1490    make_tuple(4, 8, &convolve10_sse2),
1491    make_tuple(8, 8, &convolve10_sse2),
1492    make_tuple(16, 8, &convolve10_sse2),
1493    make_tuple(8, 16, &convolve10_sse2),
1494    make_tuple(16, 16, &convolve10_sse2),
1495    make_tuple(32, 16, &convolve10_sse2),
1496    make_tuple(16, 32, &convolve10_sse2),
1497    make_tuple(32, 32, &convolve10_sse2),
1498    make_tuple(64, 32, &convolve10_sse2),
1499    make_tuple(32, 64, &convolve10_sse2),
1500    make_tuple(64, 64, &convolve10_sse2)));
1501const ConvolveFunctions convolve12_sse2(
1502    wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
1503    wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
1504    wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
1505INSTANTIATE_TEST_CASE_P(SSE2_12, ConvolveTest, ::testing::Values(
1506    make_tuple(4, 4, &convolve12_sse2),
1507    make_tuple(8, 4, &convolve12_sse2),
1508    make_tuple(4, 8, &convolve12_sse2),
1509    make_tuple(8, 8, &convolve12_sse2),
1510    make_tuple(16, 8, &convolve12_sse2),
1511    make_tuple(8, 16, &convolve12_sse2),
1512    make_tuple(16, 16, &convolve12_sse2),
1513    make_tuple(32, 16, &convolve12_sse2),
1514    make_tuple(16, 32, &convolve12_sse2),
1515    make_tuple(32, 32, &convolve12_sse2),
1516    make_tuple(64, 32, &convolve12_sse2),
1517    make_tuple(32, 64, &convolve12_sse2),
1518    make_tuple(64, 64, &convolve12_sse2)));
1519#else
1520const ConvolveFunctions convolve8_sse2(
1521    vp9_convolve8_horiz_sse2, vp9_convolve8_avg_horiz_sse2,
1522    vp9_convolve8_vert_sse2, vp9_convolve8_avg_vert_sse2,
1523    vp9_convolve8_sse2, vp9_convolve8_avg_sse2, 0);
1524
1525INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
1526    make_tuple(4, 4, &convolve8_sse2),
1527    make_tuple(8, 4, &convolve8_sse2),
1528    make_tuple(4, 8, &convolve8_sse2),
1529    make_tuple(8, 8, &convolve8_sse2),
1530    make_tuple(16, 8, &convolve8_sse2),
1531    make_tuple(8, 16, &convolve8_sse2),
1532    make_tuple(16, 16, &convolve8_sse2),
1533    make_tuple(32, 16, &convolve8_sse2),
1534    make_tuple(16, 32, &convolve8_sse2),
1535    make_tuple(32, 32, &convolve8_sse2),
1536    make_tuple(64, 32, &convolve8_sse2),
1537    make_tuple(32, 64, &convolve8_sse2),
1538    make_tuple(64, 64, &convolve8_sse2)));
1539#endif  // CONFIG_VP9_HIGHBITDEPTH
1540#endif
1541
1542#if HAVE_SSSE3
1543const ConvolveFunctions convolve8_ssse3(
1544    vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3,
1545    vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3,
1546    vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3, 0);
1547
1548INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
1549    make_tuple(4, 4, &convolve8_ssse3),
1550    make_tuple(8, 4, &convolve8_ssse3),
1551    make_tuple(4, 8, &convolve8_ssse3),
1552    make_tuple(8, 8, &convolve8_ssse3),
1553    make_tuple(16, 8, &convolve8_ssse3),
1554    make_tuple(8, 16, &convolve8_ssse3),
1555    make_tuple(16, 16, &convolve8_ssse3),
1556    make_tuple(32, 16, &convolve8_ssse3),
1557    make_tuple(16, 32, &convolve8_ssse3),
1558    make_tuple(32, 32, &convolve8_ssse3),
1559    make_tuple(64, 32, &convolve8_ssse3),
1560    make_tuple(32, 64, &convolve8_ssse3),
1561    make_tuple(64, 64, &convolve8_ssse3)));
1562#endif
1563
1564#if HAVE_AVX2 && HAVE_SSSE3
1565const ConvolveFunctions convolve8_avx2(
1566    vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3,
1567    vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3,
1568    vp9_convolve8_avx2, vp9_convolve8_avg_ssse3, 0);
1569
1570INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
1571    make_tuple(4, 4, &convolve8_avx2),
1572    make_tuple(8, 4, &convolve8_avx2),
1573    make_tuple(4, 8, &convolve8_avx2),
1574    make_tuple(8, 8, &convolve8_avx2),
1575    make_tuple(8, 16, &convolve8_avx2),
1576    make_tuple(16, 8, &convolve8_avx2),
1577    make_tuple(16, 16, &convolve8_avx2),
1578    make_tuple(32, 16, &convolve8_avx2),
1579    make_tuple(16, 32, &convolve8_avx2),
1580    make_tuple(32, 32, &convolve8_avx2),
1581    make_tuple(64, 32, &convolve8_avx2),
1582    make_tuple(32, 64, &convolve8_avx2),
1583    make_tuple(64, 64, &convolve8_avx2)));
1584#endif  // HAVE_AVX2 && HAVE_SSSE3
1585
1586#if HAVE_NEON_ASM
1587const ConvolveFunctions convolve8_neon(
1588    vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
1589    vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
1590    vp9_convolve8_neon, vp9_convolve8_avg_neon, 0);
1591
1592INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
1593    make_tuple(4, 4, &convolve8_neon),
1594    make_tuple(8, 4, &convolve8_neon),
1595    make_tuple(4, 8, &convolve8_neon),
1596    make_tuple(8, 8, &convolve8_neon),
1597    make_tuple(16, 8, &convolve8_neon),
1598    make_tuple(8, 16, &convolve8_neon),
1599    make_tuple(16, 16, &convolve8_neon),
1600    make_tuple(32, 16, &convolve8_neon),
1601    make_tuple(16, 32, &convolve8_neon),
1602    make_tuple(32, 32, &convolve8_neon),
1603    make_tuple(64, 32, &convolve8_neon),
1604    make_tuple(32, 64, &convolve8_neon),
1605    make_tuple(64, 64, &convolve8_neon)));
1606#endif
1607
1608#if HAVE_DSPR2
1609const ConvolveFunctions convolve8_dspr2(
1610    vp9_convolve8_horiz_dspr2, vp9_convolve8_avg_horiz_dspr2,
1611    vp9_convolve8_vert_dspr2, vp9_convolve8_avg_vert_dspr2,
1612    vp9_convolve8_dspr2, vp9_convolve8_avg_dspr2, 0);
1613
1614INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
1615    make_tuple(4, 4, &convolve8_dspr2),
1616    make_tuple(8, 4, &convolve8_dspr2),
1617    make_tuple(4, 8, &convolve8_dspr2),
1618    make_tuple(8, 8, &convolve8_dspr2),
1619    make_tuple(16, 8, &convolve8_dspr2),
1620    make_tuple(8, 16, &convolve8_dspr2),
1621    make_tuple(16, 16, &convolve8_dspr2),
1622    make_tuple(32, 16, &convolve8_dspr2),
1623    make_tuple(16, 32, &convolve8_dspr2),
1624    make_tuple(32, 32, &convolve8_dspr2),
1625    make_tuple(64, 32, &convolve8_dspr2),
1626    make_tuple(32, 64, &convolve8_dspr2),
1627    make_tuple(64, 64, &convolve8_dspr2)));
1628#endif
1629}  // namespace
1630