1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <string.h>
12
13#include "third_party/googletest/src/include/gtest/gtest.h"
14
15#include "./vpx_config.h"
16#include "./vp9_rtcd.h"
17#include "./vpx_dsp_rtcd.h"
18#include "test/acm_random.h"
19#include "test/clear_system_state.h"
20#include "test/register_state_check.h"
21#include "test/util.h"
22#include "vp9/common/vp9_common.h"
23#include "vp9/common/vp9_filter.h"
24#include "vpx_dsp/vpx_dsp_common.h"
25#include "vpx_dsp/vpx_filter.h"
26#include "vpx_mem/vpx_mem.h"
27#include "vpx_ports/mem.h"
28
29namespace {
30
31static const unsigned int kMaxDimension = 64;
32
33typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
34                             uint8_t *dst, ptrdiff_t dst_stride,
35                             const int16_t *filter_x, int filter_x_stride,
36                             const int16_t *filter_y, int filter_y_stride,
37                             int w, int h);
38
39struct ConvolveFunctions {
40  ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg,
41                    ConvolveFunc h8, ConvolveFunc h8_avg,
42                    ConvolveFunc v8, ConvolveFunc v8_avg,
43                    ConvolveFunc hv8, ConvolveFunc hv8_avg,
44                    ConvolveFunc sh8, ConvolveFunc sh8_avg,
45                    ConvolveFunc sv8, ConvolveFunc sv8_avg,
46                    ConvolveFunc shv8, ConvolveFunc shv8_avg,
47                    int bd)
48      : copy_(copy), avg_(avg), h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg),
49        v8_avg_(v8_avg), hv8_avg_(hv8_avg), sh8_(sh8), sv8_(sv8), shv8_(shv8),
50        sh8_avg_(sh8_avg), sv8_avg_(sv8_avg), shv8_avg_(shv8_avg),
51        use_highbd_(bd) {}
52
53  ConvolveFunc copy_;
54  ConvolveFunc avg_;
55  ConvolveFunc h8_;
56  ConvolveFunc v8_;
57  ConvolveFunc hv8_;
58  ConvolveFunc h8_avg_;
59  ConvolveFunc v8_avg_;
60  ConvolveFunc hv8_avg_;
61  ConvolveFunc sh8_;        // scaled horiz
62  ConvolveFunc sv8_;        // scaled vert
63  ConvolveFunc shv8_;       // scaled horiz/vert
64  ConvolveFunc sh8_avg_;    // scaled avg horiz
65  ConvolveFunc sv8_avg_;    // scaled avg vert
66  ConvolveFunc shv8_avg_;   // scaled avg horiz/vert
67  int use_highbd_;  // 0 if high bitdepth not used, else the actual bit depth.
68};
69
70typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
71
72// Reference 8-tap subpixel filter, slightly modified to fit into this test.
73#define VP9_FILTER_WEIGHT 128
74#define VP9_FILTER_SHIFT 7
75uint8_t clip_pixel(int x) {
76  return x < 0 ? 0 :
77         x > 255 ? 255 :
78         x;
79}
80
81void filter_block2d_8_c(const uint8_t *src_ptr,
82                        const unsigned int src_stride,
83                        const int16_t *HFilter,
84                        const int16_t *VFilter,
85                        uint8_t *dst_ptr,
86                        unsigned int dst_stride,
87                        unsigned int output_width,
88                        unsigned int output_height) {
89  // Between passes, we use an intermediate buffer whose height is extended to
90  // have enough horizontally filtered values as input for the vertical pass.
91  // This buffer is allocated to be big enough for the largest block type we
92  // support.
93  const int kInterp_Extend = 4;
94  const unsigned int intermediate_height =
95      (kInterp_Extend - 1) + output_height + kInterp_Extend;
96  unsigned int i, j;
97
98  // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
99  // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
100  //                                 + kInterp_Extend
101  //                               = 3 + 16 + 4
102  //                               = 23
103  // and filter_max_width          = 16
104  //
105  uint8_t intermediate_buffer[71 * kMaxDimension];
106  const int intermediate_next_stride = 1 - intermediate_height * output_width;
107
108  // Horizontal pass (src -> transposed intermediate).
109  uint8_t *output_ptr = intermediate_buffer;
110  const int src_next_row_stride = src_stride - output_width;
111  src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
112  for (i = 0; i < intermediate_height; ++i) {
113    for (j = 0; j < output_width; ++j) {
114      // Apply filter...
115      const int temp = (src_ptr[0] * HFilter[0]) +
116          (src_ptr[1] * HFilter[1]) +
117          (src_ptr[2] * HFilter[2]) +
118          (src_ptr[3] * HFilter[3]) +
119          (src_ptr[4] * HFilter[4]) +
120          (src_ptr[5] * HFilter[5]) +
121          (src_ptr[6] * HFilter[6]) +
122          (src_ptr[7] * HFilter[7]) +
123          (VP9_FILTER_WEIGHT >> 1);  // Rounding
124
125      // Normalize back to 0-255...
126      *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT);
127      ++src_ptr;
128      output_ptr += intermediate_height;
129    }
130    src_ptr += src_next_row_stride;
131    output_ptr += intermediate_next_stride;
132  }
133
134  // Vertical pass (transposed intermediate -> dst).
135  src_ptr = intermediate_buffer;
136  const int dst_next_row_stride = dst_stride - output_width;
137  for (i = 0; i < output_height; ++i) {
138    for (j = 0; j < output_width; ++j) {
139      // Apply filter...
140      const int temp = (src_ptr[0] * VFilter[0]) +
141          (src_ptr[1] * VFilter[1]) +
142          (src_ptr[2] * VFilter[2]) +
143          (src_ptr[3] * VFilter[3]) +
144          (src_ptr[4] * VFilter[4]) +
145          (src_ptr[5] * VFilter[5]) +
146          (src_ptr[6] * VFilter[6]) +
147          (src_ptr[7] * VFilter[7]) +
148          (VP9_FILTER_WEIGHT >> 1);  // Rounding
149
150      // Normalize back to 0-255...
151      *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT);
152      src_ptr += intermediate_height;
153    }
154    src_ptr += intermediate_next_stride;
155    dst_ptr += dst_next_row_stride;
156  }
157}
158
159void block2d_average_c(uint8_t *src,
160                       unsigned int src_stride,
161                       uint8_t *output_ptr,
162                       unsigned int output_stride,
163                       unsigned int output_width,
164                       unsigned int output_height) {
165  unsigned int i, j;
166  for (i = 0; i < output_height; ++i) {
167    for (j = 0; j < output_width; ++j) {
168      output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
169    }
170    output_ptr += output_stride;
171  }
172}
173
174void filter_average_block2d_8_c(const uint8_t *src_ptr,
175                                const unsigned int src_stride,
176                                const int16_t *HFilter,
177                                const int16_t *VFilter,
178                                uint8_t *dst_ptr,
179                                unsigned int dst_stride,
180                                unsigned int output_width,
181                                unsigned int output_height) {
182  uint8_t tmp[kMaxDimension * kMaxDimension];
183
184  assert(output_width <= kMaxDimension);
185  assert(output_height <= kMaxDimension);
186  filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
187                     output_width, output_height);
188  block2d_average_c(tmp, 64, dst_ptr, dst_stride,
189                    output_width, output_height);
190}
191
192#if CONFIG_VP9_HIGHBITDEPTH
193void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
194                               const unsigned int src_stride,
195                               const int16_t *HFilter,
196                               const int16_t *VFilter,
197                               uint16_t *dst_ptr,
198                               unsigned int dst_stride,
199                               unsigned int output_width,
200                               unsigned int output_height,
201                               int bd) {
202  // Between passes, we use an intermediate buffer whose height is extended to
203  // have enough horizontally filtered values as input for the vertical pass.
204  // This buffer is allocated to be big enough for the largest block type we
205  // support.
206  const int kInterp_Extend = 4;
207  const unsigned int intermediate_height =
208      (kInterp_Extend - 1) + output_height + kInterp_Extend;
209
210  /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
211   * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
212   *                                 + kInterp_Extend
213   *                               = 3 + 16 + 4
214   *                               = 23
215   * and filter_max_width = 16
216   */
217  uint16_t intermediate_buffer[71 * kMaxDimension];
218  const int intermediate_next_stride = 1 - intermediate_height * output_width;
219
220  // Horizontal pass (src -> transposed intermediate).
221  {
222    uint16_t *output_ptr = intermediate_buffer;
223    const int src_next_row_stride = src_stride - output_width;
224    unsigned int i, j;
225    src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
226    for (i = 0; i < intermediate_height; ++i) {
227      for (j = 0; j < output_width; ++j) {
228        // Apply filter...
229        const int temp = (src_ptr[0] * HFilter[0]) +
230                         (src_ptr[1] * HFilter[1]) +
231                         (src_ptr[2] * HFilter[2]) +
232                         (src_ptr[3] * HFilter[3]) +
233                         (src_ptr[4] * HFilter[4]) +
234                         (src_ptr[5] * HFilter[5]) +
235                         (src_ptr[6] * HFilter[6]) +
236                         (src_ptr[7] * HFilter[7]) +
237                         (VP9_FILTER_WEIGHT >> 1);  // Rounding
238
239        // Normalize back to 0-255...
240        *output_ptr = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd);
241        ++src_ptr;
242        output_ptr += intermediate_height;
243      }
244      src_ptr += src_next_row_stride;
245      output_ptr += intermediate_next_stride;
246    }
247  }
248
249  // Vertical pass (transposed intermediate -> dst).
250  {
251    uint16_t *src_ptr = intermediate_buffer;
252    const int dst_next_row_stride = dst_stride - output_width;
253    unsigned int i, j;
254    for (i = 0; i < output_height; ++i) {
255      for (j = 0; j < output_width; ++j) {
256        // Apply filter...
257        const int temp = (src_ptr[0] * VFilter[0]) +
258                         (src_ptr[1] * VFilter[1]) +
259                         (src_ptr[2] * VFilter[2]) +
260                         (src_ptr[3] * VFilter[3]) +
261                         (src_ptr[4] * VFilter[4]) +
262                         (src_ptr[5] * VFilter[5]) +
263                         (src_ptr[6] * VFilter[6]) +
264                         (src_ptr[7] * VFilter[7]) +
265                         (VP9_FILTER_WEIGHT >> 1);  // Rounding
266
267        // Normalize back to 0-255...
268        *dst_ptr++ = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd);
269        src_ptr += intermediate_height;
270      }
271      src_ptr += intermediate_next_stride;
272      dst_ptr += dst_next_row_stride;
273    }
274  }
275}
276
277void highbd_block2d_average_c(uint16_t *src,
278                              unsigned int src_stride,
279                              uint16_t *output_ptr,
280                              unsigned int output_stride,
281                              unsigned int output_width,
282                              unsigned int output_height,
283                              int bd) {
284  unsigned int i, j;
285  for (i = 0; i < output_height; ++i) {
286    for (j = 0; j < output_width; ++j) {
287      output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
288    }
289    output_ptr += output_stride;
290  }
291}
292
293void highbd_filter_average_block2d_8_c(const uint16_t *src_ptr,
294                                       const unsigned int src_stride,
295                                       const int16_t *HFilter,
296                                       const int16_t *VFilter,
297                                       uint16_t *dst_ptr,
298                                       unsigned int dst_stride,
299                                       unsigned int output_width,
300                                       unsigned int output_height,
301                                       int bd) {
302  uint16_t tmp[kMaxDimension * kMaxDimension];
303
304  assert(output_width <= kMaxDimension);
305  assert(output_height <= kMaxDimension);
306  highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
307                            output_width, output_height, bd);
308  highbd_block2d_average_c(tmp, 64, dst_ptr, dst_stride,
309                           output_width, output_height, bd);
310}
311#endif  // CONFIG_VP9_HIGHBITDEPTH
312
313class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
314 public:
315  static void SetUpTestCase() {
316    // Force input_ to be unaligned, output to be 16 byte aligned.
317    input_ = reinterpret_cast<uint8_t*>(
318        vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1;
319    output_ = reinterpret_cast<uint8_t*>(
320        vpx_memalign(kDataAlignment, kOutputBufferSize));
321    output_ref_ = reinterpret_cast<uint8_t*>(
322        vpx_memalign(kDataAlignment, kOutputBufferSize));
323#if CONFIG_VP9_HIGHBITDEPTH
324    input16_ = reinterpret_cast<uint16_t*>(
325        vpx_memalign(kDataAlignment,
326                     (kInputBufferSize + 1) * sizeof(uint16_t))) + 1;
327    output16_ = reinterpret_cast<uint16_t*>(
328        vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
329    output16_ref_ = reinterpret_cast<uint16_t*>(
330        vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
331#endif
332  }
333
334  virtual void TearDown() { libvpx_test::ClearSystemState(); }
335
336  static void TearDownTestCase() {
337    vpx_free(input_ - 1);
338    input_ = NULL;
339    vpx_free(output_);
340    output_ = NULL;
341    vpx_free(output_ref_);
342    output_ref_ = NULL;
343#if CONFIG_VP9_HIGHBITDEPTH
344    vpx_free(input16_ - 1);
345    input16_ = NULL;
346    vpx_free(output16_);
347    output16_ = NULL;
348    vpx_free(output16_ref_);
349    output16_ref_ = NULL;
350#endif
351  }
352
353 protected:
354  static const int kDataAlignment = 16;
355  static const int kOuterBlockSize = 256;
356  static const int kInputStride = kOuterBlockSize;
357  static const int kOutputStride = kOuterBlockSize;
358  static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
359  static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
360
361  int Width() const { return GET_PARAM(0); }
362  int Height() const { return GET_PARAM(1); }
363  int BorderLeft() const {
364    const int center = (kOuterBlockSize - Width()) / 2;
365    return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
366  }
367  int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
368
369  bool IsIndexInBorder(int i) {
370    return (i < BorderTop() * kOuterBlockSize ||
371            i >= (BorderTop() + Height()) * kOuterBlockSize ||
372            i % kOuterBlockSize < BorderLeft() ||
373            i % kOuterBlockSize >= (BorderLeft() + Width()));
374  }
375
376  virtual void SetUp() {
377    UUT_ = GET_PARAM(2);
378#if CONFIG_VP9_HIGHBITDEPTH
379    if (UUT_->use_highbd_ != 0)
380      mask_ = (1 << UUT_->use_highbd_) - 1;
381    else
382      mask_ = 255;
383#endif
384    /* Set up guard blocks for an inner block centered in the outer block */
385    for (int i = 0; i < kOutputBufferSize; ++i) {
386      if (IsIndexInBorder(i))
387        output_[i] = 255;
388      else
389        output_[i] = 0;
390    }
391
392    ::libvpx_test::ACMRandom prng;
393    for (int i = 0; i < kInputBufferSize; ++i) {
394      if (i & 1) {
395        input_[i] = 255;
396#if CONFIG_VP9_HIGHBITDEPTH
397        input16_[i] = mask_;
398#endif
399      } else {
400        input_[i] = prng.Rand8Extremes();
401#if CONFIG_VP9_HIGHBITDEPTH
402        input16_[i] = prng.Rand16() & mask_;
403#endif
404      }
405    }
406  }
407
408  void SetConstantInput(int value) {
409    memset(input_, value, kInputBufferSize);
410#if CONFIG_VP9_HIGHBITDEPTH
411    vpx_memset16(input16_, value, kInputBufferSize);
412#endif
413  }
414
415  void CopyOutputToRef() {
416    memcpy(output_ref_, output_, kOutputBufferSize);
417#if CONFIG_VP9_HIGHBITDEPTH
418    memcpy(output16_ref_, output16_, kOutputBufferSize);
419#endif
420  }
421
422  void CheckGuardBlocks() {
423    for (int i = 0; i < kOutputBufferSize; ++i) {
424      if (IsIndexInBorder(i))
425        EXPECT_EQ(255, output_[i]);
426    }
427  }
428
429  uint8_t *input() const {
430#if CONFIG_VP9_HIGHBITDEPTH
431    if (UUT_->use_highbd_ == 0) {
432      return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
433    } else {
434      return CONVERT_TO_BYTEPTR(input16_ + BorderTop() * kOuterBlockSize +
435                                BorderLeft());
436    }
437#else
438    return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
439#endif
440  }
441
442  uint8_t *output() const {
443#if CONFIG_VP9_HIGHBITDEPTH
444    if (UUT_->use_highbd_ == 0) {
445      return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
446    } else {
447      return CONVERT_TO_BYTEPTR(output16_ + BorderTop() * kOuterBlockSize +
448                                BorderLeft());
449    }
450#else
451    return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
452#endif
453  }
454
455  uint8_t *output_ref() const {
456#if CONFIG_VP9_HIGHBITDEPTH
457    if (UUT_->use_highbd_ == 0) {
458      return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft();
459    } else {
460      return CONVERT_TO_BYTEPTR(output16_ref_ + BorderTop() * kOuterBlockSize +
461                                BorderLeft());
462    }
463#else
464    return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft();
465#endif
466  }
467
468  uint16_t lookup(uint8_t *list, int index) const {
469#if CONFIG_VP9_HIGHBITDEPTH
470    if (UUT_->use_highbd_ == 0) {
471      return list[index];
472    } else {
473      return CONVERT_TO_SHORTPTR(list)[index];
474    }
475#else
476    return list[index];
477#endif
478  }
479
480  void assign_val(uint8_t *list, int index, uint16_t val) const {
481#if CONFIG_VP9_HIGHBITDEPTH
482    if (UUT_->use_highbd_ == 0) {
483      list[index] = (uint8_t) val;
484    } else {
485      CONVERT_TO_SHORTPTR(list)[index] = val;
486    }
487#else
488    list[index] = (uint8_t) val;
489#endif
490  }
491
492  void wrapper_filter_average_block2d_8_c(const uint8_t *src_ptr,
493                                          const unsigned int src_stride,
494                                          const int16_t *HFilter,
495                                          const int16_t *VFilter,
496                                          uint8_t *dst_ptr,
497                                          unsigned int dst_stride,
498                                          unsigned int output_width,
499                                          unsigned int output_height) {
500#if CONFIG_VP9_HIGHBITDEPTH
501    if (UUT_->use_highbd_ == 0) {
502      filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
503                                 dst_ptr, dst_stride, output_width,
504                                 output_height);
505    } else {
506      highbd_filter_average_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr),
507                                        src_stride, HFilter, VFilter,
508                                        CONVERT_TO_SHORTPTR(dst_ptr),
509                                        dst_stride, output_width, output_height,
510                                        UUT_->use_highbd_);
511    }
512#else
513    filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
514                               dst_ptr, dst_stride, output_width,
515                               output_height);
516#endif
517  }
518
519  void wrapper_filter_block2d_8_c(const uint8_t *src_ptr,
520                                  const unsigned int src_stride,
521                                  const int16_t *HFilter,
522                                  const int16_t *VFilter,
523                                  uint8_t *dst_ptr,
524                                  unsigned int dst_stride,
525                                  unsigned int output_width,
526                                  unsigned int output_height) {
527#if CONFIG_VP9_HIGHBITDEPTH
528    if (UUT_->use_highbd_ == 0) {
529      filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
530                         dst_ptr, dst_stride, output_width, output_height);
531    } else {
532      highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
533                                HFilter, VFilter,
534                                CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
535                                output_width, output_height, UUT_->use_highbd_);
536    }
537#else
538    filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
539                       dst_ptr, dst_stride, output_width, output_height);
540#endif
541  }
542
543  const ConvolveFunctions* UUT_;
544  static uint8_t* input_;
545  static uint8_t* output_;
546  static uint8_t* output_ref_;
547#if CONFIG_VP9_HIGHBITDEPTH
548  static uint16_t* input16_;
549  static uint16_t* output16_;
550  static uint16_t* output16_ref_;
551  int mask_;
552#endif
553};
554
555uint8_t* ConvolveTest::input_ = NULL;
556uint8_t* ConvolveTest::output_ = NULL;
557uint8_t* ConvolveTest::output_ref_ = NULL;
558#if CONFIG_VP9_HIGHBITDEPTH
559uint16_t* ConvolveTest::input16_ = NULL;
560uint16_t* ConvolveTest::output16_ = NULL;
561uint16_t* ConvolveTest::output16_ref_ = NULL;
562#endif
563
564TEST_P(ConvolveTest, GuardBlocks) {
565  CheckGuardBlocks();
566}
567
568TEST_P(ConvolveTest, Copy) {
569  uint8_t* const in = input();
570  uint8_t* const out = output();
571
572  ASM_REGISTER_STATE_CHECK(
573      UUT_->copy_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
574                  Width(), Height()));
575
576  CheckGuardBlocks();
577
578  for (int y = 0; y < Height(); ++y)
579    for (int x = 0; x < Width(); ++x)
580      ASSERT_EQ(lookup(out, y * kOutputStride + x),
581                lookup(in, y * kInputStride + x))
582          << "(" << x << "," << y << ")";
583}
584
585TEST_P(ConvolveTest, Avg) {
586  uint8_t* const in = input();
587  uint8_t* const out = output();
588  uint8_t* const out_ref = output_ref();
589  CopyOutputToRef();
590
591  ASM_REGISTER_STATE_CHECK(
592      UUT_->avg_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
593                Width(), Height()));
594
595  CheckGuardBlocks();
596
597  for (int y = 0; y < Height(); ++y)
598    for (int x = 0; x < Width(); ++x)
599      ASSERT_EQ(lookup(out, y * kOutputStride + x),
600                ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) +
601                                   lookup(out_ref, y * kOutputStride + x), 1))
602          << "(" << x << "," << y << ")";
603}
604
605TEST_P(ConvolveTest, CopyHoriz) {
606  uint8_t* const in = input();
607  uint8_t* const out = output();
608  DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
609
610  ASM_REGISTER_STATE_CHECK(
611      UUT_->sh8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
612                 Width(), Height()));
613
614  CheckGuardBlocks();
615
616  for (int y = 0; y < Height(); ++y)
617    for (int x = 0; x < Width(); ++x)
618      ASSERT_EQ(lookup(out, y * kOutputStride + x),
619                lookup(in, y * kInputStride + x))
620          << "(" << x << "," << y << ")";
621}
622
623TEST_P(ConvolveTest, CopyVert) {
624  uint8_t* const in = input();
625  uint8_t* const out = output();
626  DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
627
628  ASM_REGISTER_STATE_CHECK(
629      UUT_->sv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
630                 Width(), Height()));
631
632  CheckGuardBlocks();
633
634  for (int y = 0; y < Height(); ++y)
635    for (int x = 0; x < Width(); ++x)
636      ASSERT_EQ(lookup(out, y * kOutputStride + x),
637                lookup(in, y * kInputStride + x))
638          << "(" << x << "," << y << ")";
639}
640
641TEST_P(ConvolveTest, Copy2D) {
642  uint8_t* const in = input();
643  uint8_t* const out = output();
644  DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
645
646  ASM_REGISTER_STATE_CHECK(
647      UUT_->shv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8,
648                  16, Width(), Height()));
649
650  CheckGuardBlocks();
651
652  for (int y = 0; y < Height(); ++y)
653    for (int x = 0; x < Width(); ++x)
654      ASSERT_EQ(lookup(out, y * kOutputStride + x),
655                lookup(in, y * kInputStride + x))
656          << "(" << x << "," << y << ")";
657}
658
659const int kNumFilterBanks = 4;
660const int kNumFilters = 16;
661
662TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
663  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
664    const InterpKernel *filters =
665        vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
666    for (int i = 0; i < kNumFilters; i++) {
667      const int p0 = filters[i][0] + filters[i][1];
668      const int p1 = filters[i][2] + filters[i][3];
669      const int p2 = filters[i][4] + filters[i][5];
670      const int p3 = filters[i][6] + filters[i][7];
671      EXPECT_LE(p0, 128);
672      EXPECT_LE(p1, 128);
673      EXPECT_LE(p2, 128);
674      EXPECT_LE(p3, 128);
675      EXPECT_LE(p0 + p3, 128);
676      EXPECT_LE(p0 + p3 + p1, 128);
677      EXPECT_LE(p0 + p3 + p1 + p2, 128);
678      EXPECT_EQ(p0 + p1 + p2 + p3, 128);
679    }
680  }
681}
682
683const int16_t kInvalidFilter[8] = { 0 };
684
685TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
686  uint8_t* const in = input();
687  uint8_t* const out = output();
688#if CONFIG_VP9_HIGHBITDEPTH
689  uint8_t ref8[kOutputStride * kMaxDimension];
690  uint16_t ref16[kOutputStride * kMaxDimension];
691  uint8_t* ref;
692  if (UUT_->use_highbd_ == 0) {
693    ref = ref8;
694  } else {
695    ref = CONVERT_TO_BYTEPTR(ref16);
696  }
697#else
698  uint8_t ref[kOutputStride * kMaxDimension];
699#endif
700
701  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
702    const InterpKernel *filters =
703        vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
704
705    for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
706      for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
707        wrapper_filter_block2d_8_c(in, kInputStride,
708                                   filters[filter_x], filters[filter_y],
709                                   ref, kOutputStride,
710                                   Width(), Height());
711
712        if (filter_x && filter_y)
713          ASM_REGISTER_STATE_CHECK(
714              UUT_->hv8_(in, kInputStride, out, kOutputStride,
715                         filters[filter_x], 16, filters[filter_y], 16,
716                         Width(), Height()));
717        else if (filter_y)
718          ASM_REGISTER_STATE_CHECK(
719              UUT_->v8_(in, kInputStride, out, kOutputStride,
720                        kInvalidFilter, 16, filters[filter_y], 16,
721                        Width(), Height()));
722        else if (filter_x)
723          ASM_REGISTER_STATE_CHECK(
724              UUT_->h8_(in, kInputStride, out, kOutputStride,
725                        filters[filter_x], 16, kInvalidFilter, 16,
726                        Width(), Height()));
727        else
728          ASM_REGISTER_STATE_CHECK(
729              UUT_->copy_(in, kInputStride, out, kOutputStride,
730                          kInvalidFilter, 0, kInvalidFilter, 0,
731                          Width(), Height()));
732
733        CheckGuardBlocks();
734
735        for (int y = 0; y < Height(); ++y)
736          for (int x = 0; x < Width(); ++x)
737            ASSERT_EQ(lookup(ref, y * kOutputStride + x),
738                      lookup(out, y * kOutputStride + x))
739                << "mismatch at (" << x << "," << y << "), "
740                << "filters (" << filter_bank << ","
741                << filter_x << "," << filter_y << ")";
742      }
743    }
744  }
745}
746
747TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
748  uint8_t* const in = input();
749  uint8_t* const out = output();
750#if CONFIG_VP9_HIGHBITDEPTH
751  uint8_t ref8[kOutputStride * kMaxDimension];
752  uint16_t ref16[kOutputStride * kMaxDimension];
753  uint8_t* ref;
754  if (UUT_->use_highbd_ == 0) {
755    ref = ref8;
756  } else {
757    ref = CONVERT_TO_BYTEPTR(ref16);
758  }
759#else
760  uint8_t ref[kOutputStride * kMaxDimension];
761#endif
762
763  // Populate ref and out with some random data
764  ::libvpx_test::ACMRandom prng;
765  for (int y = 0; y < Height(); ++y) {
766    for (int x = 0; x < Width(); ++x) {
767      uint16_t r;
768#if CONFIG_VP9_HIGHBITDEPTH
769      if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
770        r = prng.Rand8Extremes();
771      } else {
772        r = prng.Rand16() & mask_;
773      }
774#else
775      r = prng.Rand8Extremes();
776#endif
777
778      assign_val(out, y * kOutputStride + x, r);
779      assign_val(ref, y * kOutputStride + x, r);
780    }
781  }
782
783  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
784    const InterpKernel *filters =
785        vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
786
787    for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
788      for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
789        wrapper_filter_average_block2d_8_c(in, kInputStride,
790                                           filters[filter_x], filters[filter_y],
791                                           ref, kOutputStride,
792                                           Width(), Height());
793
794        if (filter_x && filter_y)
795          ASM_REGISTER_STATE_CHECK(
796              UUT_->hv8_avg_(in, kInputStride, out, kOutputStride,
797                             filters[filter_x], 16, filters[filter_y], 16,
798                             Width(), Height()));
799        else if (filter_y)
800          ASM_REGISTER_STATE_CHECK(
801              UUT_->v8_avg_(in, kInputStride, out, kOutputStride,
802                            kInvalidFilter, 16, filters[filter_y], 16,
803                            Width(), Height()));
804        else if (filter_x)
805          ASM_REGISTER_STATE_CHECK(
806              UUT_->h8_avg_(in, kInputStride, out, kOutputStride,
807                            filters[filter_x], 16, kInvalidFilter, 16,
808                            Width(), Height()));
809        else
810          ASM_REGISTER_STATE_CHECK(
811              UUT_->avg_(in, kInputStride, out, kOutputStride,
812                          kInvalidFilter, 0, kInvalidFilter, 0,
813                          Width(), Height()));
814
815        CheckGuardBlocks();
816
817        for (int y = 0; y < Height(); ++y)
818          for (int x = 0; x < Width(); ++x)
819            ASSERT_EQ(lookup(ref, y * kOutputStride + x),
820                      lookup(out, y * kOutputStride + x))
821                << "mismatch at (" << x << "," << y << "), "
822                << "filters (" << filter_bank << ","
823                << filter_x << "," << filter_y << ")";
824      }
825    }
826  }
827}
828
829TEST_P(ConvolveTest, FilterExtremes) {
830  uint8_t *const in = input();
831  uint8_t *const out = output();
832#if CONFIG_VP9_HIGHBITDEPTH
833  uint8_t ref8[kOutputStride * kMaxDimension];
834  uint16_t ref16[kOutputStride * kMaxDimension];
835  uint8_t *ref;
836  if (UUT_->use_highbd_ == 0) {
837    ref = ref8;
838  } else {
839    ref = CONVERT_TO_BYTEPTR(ref16);
840  }
841#else
842  uint8_t ref[kOutputStride * kMaxDimension];
843#endif
844
845  // Populate ref and out with some random data
846  ::libvpx_test::ACMRandom prng;
847  for (int y = 0; y < Height(); ++y) {
848    for (int x = 0; x < Width(); ++x) {
849      uint16_t r;
850#if CONFIG_VP9_HIGHBITDEPTH
851      if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
852        r = prng.Rand8Extremes();
853      } else {
854        r = prng.Rand16() & mask_;
855      }
856#else
857      r = prng.Rand8Extremes();
858#endif
859      assign_val(out, y * kOutputStride + x, r);
860      assign_val(ref, y * kOutputStride + x, r);
861    }
862  }
863
864  for (int axis = 0; axis < 2; axis++) {
865    int seed_val = 0;
866    while (seed_val < 256) {
867      for (int y = 0; y < 8; ++y) {
868        for (int x = 0; x < 8; ++x) {
869#if CONFIG_VP9_HIGHBITDEPTH
870            assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
871                       ((seed_val >> (axis ? y : x)) & 1) * mask_);
872#else
873            assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
874                       ((seed_val >> (axis ? y : x)) & 1) * 255);
875#endif
876          if (axis) seed_val++;
877        }
878        if (axis)
879          seed_val-= 8;
880        else
881          seed_val++;
882      }
883      if (axis) seed_val += 8;
884
885      for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
886        const InterpKernel *filters =
887            vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
888        for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
889          for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
890            wrapper_filter_block2d_8_c(in, kInputStride,
891                                       filters[filter_x], filters[filter_y],
892                                       ref, kOutputStride,
893                                       Width(), Height());
894            if (filter_x && filter_y)
895              ASM_REGISTER_STATE_CHECK(
896                  UUT_->hv8_(in, kInputStride, out, kOutputStride,
897                             filters[filter_x], 16, filters[filter_y], 16,
898                             Width(), Height()));
899            else if (filter_y)
900              ASM_REGISTER_STATE_CHECK(
901                  UUT_->v8_(in, kInputStride, out, kOutputStride,
902                            kInvalidFilter, 16, filters[filter_y], 16,
903                            Width(), Height()));
904            else if (filter_x)
905              ASM_REGISTER_STATE_CHECK(
906                  UUT_->h8_(in, kInputStride, out, kOutputStride,
907                            filters[filter_x], 16, kInvalidFilter, 16,
908                            Width(), Height()));
909            else
910              ASM_REGISTER_STATE_CHECK(
911                  UUT_->copy_(in, kInputStride, out, kOutputStride,
912                              kInvalidFilter, 0, kInvalidFilter, 0,
913                              Width(), Height()));
914
915            for (int y = 0; y < Height(); ++y)
916              for (int x = 0; x < Width(); ++x)
917                ASSERT_EQ(lookup(ref, y * kOutputStride + x),
918                          lookup(out, y * kOutputStride + x))
919                    << "mismatch at (" << x << "," << y << "), "
920                    << "filters (" << filter_bank << ","
921                    << filter_x << "," << filter_y << ")";
922          }
923        }
924      }
925    }
926  }
927}
928
929/* This test exercises that enough rows and columns are filtered with every
930   possible initial fractional positions and scaling steps. */
931TEST_P(ConvolveTest, CheckScalingFiltering) {
932  uint8_t* const in = input();
933  uint8_t* const out = output();
934  const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP];
935
936  SetConstantInput(127);
937
938  for (int frac = 0; frac < 16; ++frac) {
939    for (int step = 1; step <= 32; ++step) {
940      /* Test the horizontal and vertical filters in combination. */
941      ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride,
942                                           eighttap[frac], step,
943                                           eighttap[frac], step,
944                                           Width(), Height()));
945
946      CheckGuardBlocks();
947
948      for (int y = 0; y < Height(); ++y) {
949        for (int x = 0; x < Width(); ++x) {
950          ASSERT_EQ(lookup(in, y * kInputStride + x),
951                    lookup(out, y * kOutputStride + x))
952              << "x == " << x << ", y == " << y
953              << ", frac == " << frac << ", step == " << step;
954        }
955      }
956    }
957  }
958}
959
960using std::tr1::make_tuple;
961
962#if CONFIG_VP9_HIGHBITDEPTH
963#define WRAP(func, bd) \
964void wrap_ ## func ## _ ## bd(const uint8_t *src, ptrdiff_t src_stride, \
965                              uint8_t *dst, ptrdiff_t dst_stride, \
966                              const int16_t *filter_x, \
967                              int filter_x_stride, \
968                              const int16_t *filter_y, \
969                              int filter_y_stride, \
970                              int w, int h) { \
971  vpx_highbd_ ## func(src, src_stride, dst, dst_stride, filter_x, \
972                      filter_x_stride, filter_y, filter_y_stride, \
973                      w, h, bd); \
974}
975#if HAVE_SSE2 && ARCH_X86_64
976#if CONFIG_USE_X86INC
977WRAP(convolve_copy_sse2, 8)
978WRAP(convolve_avg_sse2, 8)
979WRAP(convolve_copy_sse2, 10)
980WRAP(convolve_avg_sse2, 10)
981WRAP(convolve_copy_sse2, 12)
982WRAP(convolve_avg_sse2, 12)
983#endif  // CONFIG_USE_X86INC
984WRAP(convolve8_horiz_sse2, 8)
985WRAP(convolve8_avg_horiz_sse2, 8)
986WRAP(convolve8_vert_sse2, 8)
987WRAP(convolve8_avg_vert_sse2, 8)
988WRAP(convolve8_sse2, 8)
989WRAP(convolve8_avg_sse2, 8)
990WRAP(convolve8_horiz_sse2, 10)
991WRAP(convolve8_avg_horiz_sse2, 10)
992WRAP(convolve8_vert_sse2, 10)
993WRAP(convolve8_avg_vert_sse2, 10)
994WRAP(convolve8_sse2, 10)
995WRAP(convolve8_avg_sse2, 10)
996WRAP(convolve8_horiz_sse2, 12)
997WRAP(convolve8_avg_horiz_sse2, 12)
998WRAP(convolve8_vert_sse2, 12)
999WRAP(convolve8_avg_vert_sse2, 12)
1000WRAP(convolve8_sse2, 12)
1001WRAP(convolve8_avg_sse2, 12)
1002#endif  // HAVE_SSE2 && ARCH_X86_64
1003
1004WRAP(convolve_copy_c, 8)
1005WRAP(convolve_avg_c, 8)
1006WRAP(convolve8_horiz_c, 8)
1007WRAP(convolve8_avg_horiz_c, 8)
1008WRAP(convolve8_vert_c, 8)
1009WRAP(convolve8_avg_vert_c, 8)
1010WRAP(convolve8_c, 8)
1011WRAP(convolve8_avg_c, 8)
1012WRAP(convolve_copy_c, 10)
1013WRAP(convolve_avg_c, 10)
1014WRAP(convolve8_horiz_c, 10)
1015WRAP(convolve8_avg_horiz_c, 10)
1016WRAP(convolve8_vert_c, 10)
1017WRAP(convolve8_avg_vert_c, 10)
1018WRAP(convolve8_c, 10)
1019WRAP(convolve8_avg_c, 10)
1020WRAP(convolve_copy_c, 12)
1021WRAP(convolve_avg_c, 12)
1022WRAP(convolve8_horiz_c, 12)
1023WRAP(convolve8_avg_horiz_c, 12)
1024WRAP(convolve8_vert_c, 12)
1025WRAP(convolve8_avg_vert_c, 12)
1026WRAP(convolve8_c, 12)
1027WRAP(convolve8_avg_c, 12)
1028#undef WRAP
1029
1030const ConvolveFunctions convolve8_c(
1031    wrap_convolve_copy_c_8, wrap_convolve_avg_c_8,
1032    wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
1033    wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
1034    wrap_convolve8_c_8, wrap_convolve8_avg_c_8,
1035    wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
1036    wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
1037    wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
1038INSTANTIATE_TEST_CASE_P(C_8, ConvolveTest, ::testing::Values(
1039    make_tuple(4, 4, &convolve8_c),
1040    make_tuple(8, 4, &convolve8_c),
1041    make_tuple(4, 8, &convolve8_c),
1042    make_tuple(8, 8, &convolve8_c),
1043    make_tuple(16, 8, &convolve8_c),
1044    make_tuple(8, 16, &convolve8_c),
1045    make_tuple(16, 16, &convolve8_c),
1046    make_tuple(32, 16, &convolve8_c),
1047    make_tuple(16, 32, &convolve8_c),
1048    make_tuple(32, 32, &convolve8_c),
1049    make_tuple(64, 32, &convolve8_c),
1050    make_tuple(32, 64, &convolve8_c),
1051    make_tuple(64, 64, &convolve8_c)));
1052const ConvolveFunctions convolve10_c(
1053    wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
1054    wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
1055    wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
1056    wrap_convolve8_c_10, wrap_convolve8_avg_c_10,
1057    wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
1058    wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
1059    wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10);
1060INSTANTIATE_TEST_CASE_P(C_10, ConvolveTest, ::testing::Values(
1061    make_tuple(4, 4, &convolve10_c),
1062    make_tuple(8, 4, &convolve10_c),
1063    make_tuple(4, 8, &convolve10_c),
1064    make_tuple(8, 8, &convolve10_c),
1065    make_tuple(16, 8, &convolve10_c),
1066    make_tuple(8, 16, &convolve10_c),
1067    make_tuple(16, 16, &convolve10_c),
1068    make_tuple(32, 16, &convolve10_c),
1069    make_tuple(16, 32, &convolve10_c),
1070    make_tuple(32, 32, &convolve10_c),
1071    make_tuple(64, 32, &convolve10_c),
1072    make_tuple(32, 64, &convolve10_c),
1073    make_tuple(64, 64, &convolve10_c)));
1074const ConvolveFunctions convolve12_c(
1075    wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
1076    wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
1077    wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
1078    wrap_convolve8_c_12, wrap_convolve8_avg_c_12,
1079    wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
1080    wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
1081    wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12);
1082INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values(
1083    make_tuple(4, 4, &convolve12_c),
1084    make_tuple(8, 4, &convolve12_c),
1085    make_tuple(4, 8, &convolve12_c),
1086    make_tuple(8, 8, &convolve12_c),
1087    make_tuple(16, 8, &convolve12_c),
1088    make_tuple(8, 16, &convolve12_c),
1089    make_tuple(16, 16, &convolve12_c),
1090    make_tuple(32, 16, &convolve12_c),
1091    make_tuple(16, 32, &convolve12_c),
1092    make_tuple(32, 32, &convolve12_c),
1093    make_tuple(64, 32, &convolve12_c),
1094    make_tuple(32, 64, &convolve12_c),
1095    make_tuple(64, 64, &convolve12_c)));
1096
1097#else
1098
1099const ConvolveFunctions convolve8_c(
1100    vpx_convolve_copy_c, vpx_convolve_avg_c,
1101    vpx_convolve8_horiz_c, vpx_convolve8_avg_horiz_c,
1102    vpx_convolve8_vert_c, vpx_convolve8_avg_vert_c,
1103    vpx_convolve8_c, vpx_convolve8_avg_c,
1104    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1105    vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1106    vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1107
1108INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
1109    make_tuple(4, 4, &convolve8_c),
1110    make_tuple(8, 4, &convolve8_c),
1111    make_tuple(4, 8, &convolve8_c),
1112    make_tuple(8, 8, &convolve8_c),
1113    make_tuple(16, 8, &convolve8_c),
1114    make_tuple(8, 16, &convolve8_c),
1115    make_tuple(16, 16, &convolve8_c),
1116    make_tuple(32, 16, &convolve8_c),
1117    make_tuple(16, 32, &convolve8_c),
1118    make_tuple(32, 32, &convolve8_c),
1119    make_tuple(64, 32, &convolve8_c),
1120    make_tuple(32, 64, &convolve8_c),
1121    make_tuple(64, 64, &convolve8_c)));
1122#endif
1123
1124#if HAVE_SSE2 && ARCH_X86_64
1125#if CONFIG_VP9_HIGHBITDEPTH
1126const ConvolveFunctions convolve8_sse2(
1127#if CONFIG_USE_X86INC
1128    wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8,
1129#else
1130    wrap_convolve_copy_c_8, wrap_convolve_avg_c_8,
1131#endif  // CONFIG_USE_X86INC
1132    wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
1133    wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
1134    wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8,
1135    wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
1136    wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
1137    wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
1138const ConvolveFunctions convolve10_sse2(
1139#if CONFIG_USE_X86INC
1140    wrap_convolve_copy_sse2_10, wrap_convolve_avg_sse2_10,
1141#else
1142    wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
1143#endif  // CONFIG_USE_X86INC
1144    wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
1145    wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
1146    wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10,
1147    wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
1148    wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
1149    wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
1150const ConvolveFunctions convolve12_sse2(
1151#if CONFIG_USE_X86INC
1152    wrap_convolve_copy_sse2_12, wrap_convolve_avg_sse2_12,
1153#else
1154    wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
1155#endif  // CONFIG_USE_X86INC
1156    wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
1157    wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
1158    wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12,
1159    wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
1160    wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
1161    wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
1162INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
1163    make_tuple(4, 4, &convolve8_sse2),
1164    make_tuple(8, 4, &convolve8_sse2),
1165    make_tuple(4, 8, &convolve8_sse2),
1166    make_tuple(8, 8, &convolve8_sse2),
1167    make_tuple(16, 8, &convolve8_sse2),
1168    make_tuple(8, 16, &convolve8_sse2),
1169    make_tuple(16, 16, &convolve8_sse2),
1170    make_tuple(32, 16, &convolve8_sse2),
1171    make_tuple(16, 32, &convolve8_sse2),
1172    make_tuple(32, 32, &convolve8_sse2),
1173    make_tuple(64, 32, &convolve8_sse2),
1174    make_tuple(32, 64, &convolve8_sse2),
1175    make_tuple(64, 64, &convolve8_sse2),
1176    make_tuple(4, 4, &convolve10_sse2),
1177    make_tuple(8, 4, &convolve10_sse2),
1178    make_tuple(4, 8, &convolve10_sse2),
1179    make_tuple(8, 8, &convolve10_sse2),
1180    make_tuple(16, 8, &convolve10_sse2),
1181    make_tuple(8, 16, &convolve10_sse2),
1182    make_tuple(16, 16, &convolve10_sse2),
1183    make_tuple(32, 16, &convolve10_sse2),
1184    make_tuple(16, 32, &convolve10_sse2),
1185    make_tuple(32, 32, &convolve10_sse2),
1186    make_tuple(64, 32, &convolve10_sse2),
1187    make_tuple(32, 64, &convolve10_sse2),
1188    make_tuple(64, 64, &convolve10_sse2),
1189    make_tuple(4, 4, &convolve12_sse2),
1190    make_tuple(8, 4, &convolve12_sse2),
1191    make_tuple(4, 8, &convolve12_sse2),
1192    make_tuple(8, 8, &convolve12_sse2),
1193    make_tuple(16, 8, &convolve12_sse2),
1194    make_tuple(8, 16, &convolve12_sse2),
1195    make_tuple(16, 16, &convolve12_sse2),
1196    make_tuple(32, 16, &convolve12_sse2),
1197    make_tuple(16, 32, &convolve12_sse2),
1198    make_tuple(32, 32, &convolve12_sse2),
1199    make_tuple(64, 32, &convolve12_sse2),
1200    make_tuple(32, 64, &convolve12_sse2),
1201    make_tuple(64, 64, &convolve12_sse2)));
1202#else
1203const ConvolveFunctions convolve8_sse2(
1204#if CONFIG_USE_X86INC
1205    vpx_convolve_copy_sse2, vpx_convolve_avg_sse2,
1206#else
1207    vpx_convolve_copy_c, vpx_convolve_avg_c,
1208#endif  // CONFIG_USE_X86INC
1209    vpx_convolve8_horiz_sse2, vpx_convolve8_avg_horiz_sse2,
1210    vpx_convolve8_vert_sse2, vpx_convolve8_avg_vert_sse2,
1211    vpx_convolve8_sse2, vpx_convolve8_avg_sse2,
1212    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1213    vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1214    vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1215
1216INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
1217    make_tuple(4, 4, &convolve8_sse2),
1218    make_tuple(8, 4, &convolve8_sse2),
1219    make_tuple(4, 8, &convolve8_sse2),
1220    make_tuple(8, 8, &convolve8_sse2),
1221    make_tuple(16, 8, &convolve8_sse2),
1222    make_tuple(8, 16, &convolve8_sse2),
1223    make_tuple(16, 16, &convolve8_sse2),
1224    make_tuple(32, 16, &convolve8_sse2),
1225    make_tuple(16, 32, &convolve8_sse2),
1226    make_tuple(32, 32, &convolve8_sse2),
1227    make_tuple(64, 32, &convolve8_sse2),
1228    make_tuple(32, 64, &convolve8_sse2),
1229    make_tuple(64, 64, &convolve8_sse2)));
1230#endif  // CONFIG_VP9_HIGHBITDEPTH
1231#endif
1232
1233#if HAVE_SSSE3
1234const ConvolveFunctions convolve8_ssse3(
1235    vpx_convolve_copy_c, vpx_convolve_avg_c,
1236    vpx_convolve8_horiz_ssse3, vpx_convolve8_avg_horiz_ssse3,
1237    vpx_convolve8_vert_ssse3, vpx_convolve8_avg_vert_ssse3,
1238    vpx_convolve8_ssse3, vpx_convolve8_avg_ssse3,
1239    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1240    vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1241    vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1242
1243INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
1244    make_tuple(4, 4, &convolve8_ssse3),
1245    make_tuple(8, 4, &convolve8_ssse3),
1246    make_tuple(4, 8, &convolve8_ssse3),
1247    make_tuple(8, 8, &convolve8_ssse3),
1248    make_tuple(16, 8, &convolve8_ssse3),
1249    make_tuple(8, 16, &convolve8_ssse3),
1250    make_tuple(16, 16, &convolve8_ssse3),
1251    make_tuple(32, 16, &convolve8_ssse3),
1252    make_tuple(16, 32, &convolve8_ssse3),
1253    make_tuple(32, 32, &convolve8_ssse3),
1254    make_tuple(64, 32, &convolve8_ssse3),
1255    make_tuple(32, 64, &convolve8_ssse3),
1256    make_tuple(64, 64, &convolve8_ssse3)));
1257#endif
1258
1259#if HAVE_AVX2 && HAVE_SSSE3
1260const ConvolveFunctions convolve8_avx2(
1261    vpx_convolve_copy_c, vpx_convolve_avg_c,
1262    vpx_convolve8_horiz_avx2, vpx_convolve8_avg_horiz_ssse3,
1263    vpx_convolve8_vert_avx2, vpx_convolve8_avg_vert_ssse3,
1264    vpx_convolve8_avx2, vpx_convolve8_avg_ssse3,
1265    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1266    vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1267    vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1268
1269INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
1270    make_tuple(4, 4, &convolve8_avx2),
1271    make_tuple(8, 4, &convolve8_avx2),
1272    make_tuple(4, 8, &convolve8_avx2),
1273    make_tuple(8, 8, &convolve8_avx2),
1274    make_tuple(8, 16, &convolve8_avx2),
1275    make_tuple(16, 8, &convolve8_avx2),
1276    make_tuple(16, 16, &convolve8_avx2),
1277    make_tuple(32, 16, &convolve8_avx2),
1278    make_tuple(16, 32, &convolve8_avx2),
1279    make_tuple(32, 32, &convolve8_avx2),
1280    make_tuple(64, 32, &convolve8_avx2),
1281    make_tuple(32, 64, &convolve8_avx2),
1282    make_tuple(64, 64, &convolve8_avx2)));
1283#endif  // HAVE_AVX2 && HAVE_SSSE3
1284
1285#if HAVE_NEON
1286#if HAVE_NEON_ASM
1287const ConvolveFunctions convolve8_neon(
1288    vpx_convolve_copy_neon, vpx_convolve_avg_neon,
1289    vpx_convolve8_horiz_neon, vpx_convolve8_avg_horiz_neon,
1290    vpx_convolve8_vert_neon, vpx_convolve8_avg_vert_neon,
1291    vpx_convolve8_neon, vpx_convolve8_avg_neon,
1292    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1293    vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1294    vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1295#else  // HAVE_NEON
1296const ConvolveFunctions convolve8_neon(
1297    vpx_convolve_copy_neon, vpx_convolve_avg_neon,
1298    vpx_convolve8_horiz_neon, vpx_convolve8_avg_horiz_neon,
1299    vpx_convolve8_vert_neon, vpx_convolve8_avg_vert_neon,
1300    vpx_convolve8_neon, vpx_convolve8_avg_neon,
1301    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1302    vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1303    vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1304#endif  // HAVE_NEON_ASM
1305
1306INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
1307    make_tuple(4, 4, &convolve8_neon),
1308    make_tuple(8, 4, &convolve8_neon),
1309    make_tuple(4, 8, &convolve8_neon),
1310    make_tuple(8, 8, &convolve8_neon),
1311    make_tuple(16, 8, &convolve8_neon),
1312    make_tuple(8, 16, &convolve8_neon),
1313    make_tuple(16, 16, &convolve8_neon),
1314    make_tuple(32, 16, &convolve8_neon),
1315    make_tuple(16, 32, &convolve8_neon),
1316    make_tuple(32, 32, &convolve8_neon),
1317    make_tuple(64, 32, &convolve8_neon),
1318    make_tuple(32, 64, &convolve8_neon),
1319    make_tuple(64, 64, &convolve8_neon)));
1320#endif  // HAVE_NEON
1321
1322#if HAVE_DSPR2
1323const ConvolveFunctions convolve8_dspr2(
1324    vpx_convolve_copy_dspr2, vpx_convolve_avg_dspr2,
1325    vpx_convolve8_horiz_dspr2, vpx_convolve8_avg_horiz_dspr2,
1326    vpx_convolve8_vert_dspr2, vpx_convolve8_avg_vert_dspr2,
1327    vpx_convolve8_dspr2, vpx_convolve8_avg_dspr2,
1328    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1329    vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1330    vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1331
1332INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
1333    make_tuple(4, 4, &convolve8_dspr2),
1334    make_tuple(8, 4, &convolve8_dspr2),
1335    make_tuple(4, 8, &convolve8_dspr2),
1336    make_tuple(8, 8, &convolve8_dspr2),
1337    make_tuple(16, 8, &convolve8_dspr2),
1338    make_tuple(8, 16, &convolve8_dspr2),
1339    make_tuple(16, 16, &convolve8_dspr2),
1340    make_tuple(32, 16, &convolve8_dspr2),
1341    make_tuple(16, 32, &convolve8_dspr2),
1342    make_tuple(32, 32, &convolve8_dspr2),
1343    make_tuple(64, 32, &convolve8_dspr2),
1344    make_tuple(32, 64, &convolve8_dspr2),
1345    make_tuple(64, 64, &convolve8_dspr2)));
1346#endif
1347
1348#if HAVE_MSA
1349const ConvolveFunctions convolve8_msa(
1350    vpx_convolve_copy_msa, vpx_convolve_avg_msa,
1351    vpx_convolve8_horiz_msa, vpx_convolve8_avg_horiz_msa,
1352    vpx_convolve8_vert_msa, vpx_convolve8_avg_vert_msa,
1353    vpx_convolve8_msa, vpx_convolve8_avg_msa,
1354    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
1355    vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1356    vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1357
1358INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, ::testing::Values(
1359    make_tuple(4, 4, &convolve8_msa),
1360    make_tuple(8, 4, &convolve8_msa),
1361    make_tuple(4, 8, &convolve8_msa),
1362    make_tuple(8, 8, &convolve8_msa),
1363    make_tuple(16, 8, &convolve8_msa),
1364    make_tuple(8, 16, &convolve8_msa),
1365    make_tuple(16, 16, &convolve8_msa),
1366    make_tuple(32, 16, &convolve8_msa),
1367    make_tuple(16, 32, &convolve8_msa),
1368    make_tuple(32, 32, &convolve8_msa),
1369    make_tuple(64, 32, &convolve8_msa),
1370    make_tuple(32, 64, &convolve8_msa),
1371    make_tuple(64, 64, &convolve8_msa)));
1372#endif  // HAVE_MSA
1373}  // namespace
1374