1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include <string.h> 12#include "test/acm_random.h" 13#include "test/register_state_check.h" 14#include "test/util.h" 15#include "third_party/googletest/src/include/gtest/gtest.h" 16 17#include "./vpx_config.h" 18#include "./vp9_rtcd.h" 19#include "vp9/common/vp9_filter.h" 20#include "vpx_mem/vpx_mem.h" 21#include "vpx_ports/mem.h" 22 23namespace { 24 25static const unsigned int kMaxDimension = 64; 26 27typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride, 28 uint8_t *dst, ptrdiff_t dst_stride, 29 const int16_t *filter_x, int filter_x_stride, 30 const int16_t *filter_y, int filter_y_stride, 31 int w, int h); 32 33struct ConvolveFunctions { 34 ConvolveFunctions(ConvolveFunc h8, ConvolveFunc h8_avg, 35 ConvolveFunc v8, ConvolveFunc v8_avg, 36 ConvolveFunc hv8, ConvolveFunc hv8_avg, 37 int bd) 38 : h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg), v8_avg_(v8_avg), 39 hv8_avg_(hv8_avg), use_high_bd_(bd) {} 40 41 ConvolveFunc h8_; 42 ConvolveFunc v8_; 43 ConvolveFunc hv8_; 44 ConvolveFunc h8_avg_; 45 ConvolveFunc v8_avg_; 46 ConvolveFunc hv8_avg_; 47 int use_high_bd_; // 0 if high bitdepth not used, else the actual bit depth. 48}; 49 50typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam; 51 52// Reference 8-tap subpixel filter, slightly modified to fit into this test. 53#define VP9_FILTER_WEIGHT 128 54#define VP9_FILTER_SHIFT 7 55uint8_t clip_pixel(int x) { 56 return x < 0 ? 0 : 57 x > 255 ? 255 : 58 x; 59} 60 61void filter_block2d_8_c(const uint8_t *src_ptr, 62 const unsigned int src_stride, 63 const int16_t *HFilter, 64 const int16_t *VFilter, 65 uint8_t *dst_ptr, 66 unsigned int dst_stride, 67 unsigned int output_width, 68 unsigned int output_height) { 69 // Between passes, we use an intermediate buffer whose height is extended to 70 // have enough horizontally filtered values as input for the vertical pass. 71 // This buffer is allocated to be big enough for the largest block type we 72 // support. 73 const int kInterp_Extend = 4; 74 const unsigned int intermediate_height = 75 (kInterp_Extend - 1) + output_height + kInterp_Extend; 76 unsigned int i, j; 77 78 // Size of intermediate_buffer is max_intermediate_height * filter_max_width, 79 // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height 80 // + kInterp_Extend 81 // = 3 + 16 + 4 82 // = 23 83 // and filter_max_width = 16 84 // 85 uint8_t intermediate_buffer[71 * kMaxDimension]; 86 const int intermediate_next_stride = 1 - intermediate_height * output_width; 87 88 // Horizontal pass (src -> transposed intermediate). 89 uint8_t *output_ptr = intermediate_buffer; 90 const int src_next_row_stride = src_stride - output_width; 91 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); 92 for (i = 0; i < intermediate_height; ++i) { 93 for (j = 0; j < output_width; ++j) { 94 // Apply filter... 95 const int temp = (src_ptr[0] * HFilter[0]) + 96 (src_ptr[1] * HFilter[1]) + 97 (src_ptr[2] * HFilter[2]) + 98 (src_ptr[3] * HFilter[3]) + 99 (src_ptr[4] * HFilter[4]) + 100 (src_ptr[5] * HFilter[5]) + 101 (src_ptr[6] * HFilter[6]) + 102 (src_ptr[7] * HFilter[7]) + 103 (VP9_FILTER_WEIGHT >> 1); // Rounding 104 105 // Normalize back to 0-255... 106 *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT); 107 ++src_ptr; 108 output_ptr += intermediate_height; 109 } 110 src_ptr += src_next_row_stride; 111 output_ptr += intermediate_next_stride; 112 } 113 114 // Vertical pass (transposed intermediate -> dst). 115 src_ptr = intermediate_buffer; 116 const int dst_next_row_stride = dst_stride - output_width; 117 for (i = 0; i < output_height; ++i) { 118 for (j = 0; j < output_width; ++j) { 119 // Apply filter... 120 const int temp = (src_ptr[0] * VFilter[0]) + 121 (src_ptr[1] * VFilter[1]) + 122 (src_ptr[2] * VFilter[2]) + 123 (src_ptr[3] * VFilter[3]) + 124 (src_ptr[4] * VFilter[4]) + 125 (src_ptr[5] * VFilter[5]) + 126 (src_ptr[6] * VFilter[6]) + 127 (src_ptr[7] * VFilter[7]) + 128 (VP9_FILTER_WEIGHT >> 1); // Rounding 129 130 // Normalize back to 0-255... 131 *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT); 132 src_ptr += intermediate_height; 133 } 134 src_ptr += intermediate_next_stride; 135 dst_ptr += dst_next_row_stride; 136 } 137} 138 139void block2d_average_c(uint8_t *src, 140 unsigned int src_stride, 141 uint8_t *output_ptr, 142 unsigned int output_stride, 143 unsigned int output_width, 144 unsigned int output_height) { 145 unsigned int i, j; 146 for (i = 0; i < output_height; ++i) { 147 for (j = 0; j < output_width; ++j) { 148 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; 149 } 150 output_ptr += output_stride; 151 } 152} 153 154void filter_average_block2d_8_c(const uint8_t *src_ptr, 155 const unsigned int src_stride, 156 const int16_t *HFilter, 157 const int16_t *VFilter, 158 uint8_t *dst_ptr, 159 unsigned int dst_stride, 160 unsigned int output_width, 161 unsigned int output_height) { 162 uint8_t tmp[kMaxDimension * kMaxDimension]; 163 164 assert(output_width <= kMaxDimension); 165 assert(output_height <= kMaxDimension); 166 filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64, 167 output_width, output_height); 168 block2d_average_c(tmp, 64, dst_ptr, dst_stride, 169 output_width, output_height); 170} 171 172#if CONFIG_VP9_HIGHBITDEPTH 173void high_filter_block2d_8_c(const uint16_t *src_ptr, 174 const unsigned int src_stride, 175 const int16_t *HFilter, 176 const int16_t *VFilter, 177 uint16_t *dst_ptr, 178 unsigned int dst_stride, 179 unsigned int output_width, 180 unsigned int output_height, 181 int bd) { 182 // Between passes, we use an intermediate buffer whose height is extended to 183 // have enough horizontally filtered values as input for the vertical pass. 184 // This buffer is allocated to be big enough for the largest block type we 185 // support. 186 const int kInterp_Extend = 4; 187 const unsigned int intermediate_height = 188 (kInterp_Extend - 1) + output_height + kInterp_Extend; 189 190 /* Size of intermediate_buffer is max_intermediate_height * filter_max_width, 191 * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height 192 * + kInterp_Extend 193 * = 3 + 16 + 4 194 * = 23 195 * and filter_max_width = 16 196 */ 197 uint16_t intermediate_buffer[71 * kMaxDimension]; 198 const int intermediate_next_stride = 1 - intermediate_height * output_width; 199 200 // Horizontal pass (src -> transposed intermediate). 201 { 202 uint16_t *output_ptr = intermediate_buffer; 203 const int src_next_row_stride = src_stride - output_width; 204 unsigned int i, j; 205 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); 206 for (i = 0; i < intermediate_height; ++i) { 207 for (j = 0; j < output_width; ++j) { 208 // Apply filter... 209 const int temp = (src_ptr[0] * HFilter[0]) + 210 (src_ptr[1] * HFilter[1]) + 211 (src_ptr[2] * HFilter[2]) + 212 (src_ptr[3] * HFilter[3]) + 213 (src_ptr[4] * HFilter[4]) + 214 (src_ptr[5] * HFilter[5]) + 215 (src_ptr[6] * HFilter[6]) + 216 (src_ptr[7] * HFilter[7]) + 217 (VP9_FILTER_WEIGHT >> 1); // Rounding 218 219 // Normalize back to 0-255... 220 *output_ptr = clip_pixel_high(temp >> VP9_FILTER_SHIFT, bd); 221 ++src_ptr; 222 output_ptr += intermediate_height; 223 } 224 src_ptr += src_next_row_stride; 225 output_ptr += intermediate_next_stride; 226 } 227 } 228 229 // Vertical pass (transposed intermediate -> dst). 230 { 231 uint16_t *src_ptr = intermediate_buffer; 232 const int dst_next_row_stride = dst_stride - output_width; 233 unsigned int i, j; 234 for (i = 0; i < output_height; ++i) { 235 for (j = 0; j < output_width; ++j) { 236 // Apply filter... 237 const int temp = (src_ptr[0] * VFilter[0]) + 238 (src_ptr[1] * VFilter[1]) + 239 (src_ptr[2] * VFilter[2]) + 240 (src_ptr[3] * VFilter[3]) + 241 (src_ptr[4] * VFilter[4]) + 242 (src_ptr[5] * VFilter[5]) + 243 (src_ptr[6] * VFilter[6]) + 244 (src_ptr[7] * VFilter[7]) + 245 (VP9_FILTER_WEIGHT >> 1); // Rounding 246 247 // Normalize back to 0-255... 248 *dst_ptr++ = clip_pixel_high(temp >> VP9_FILTER_SHIFT, bd); 249 src_ptr += intermediate_height; 250 } 251 src_ptr += intermediate_next_stride; 252 dst_ptr += dst_next_row_stride; 253 } 254 } 255} 256 257void high_block2d_average_c(uint16_t *src, 258 unsigned int src_stride, 259 uint16_t *output_ptr, 260 unsigned int output_stride, 261 unsigned int output_width, 262 unsigned int output_height, 263 int bd) { 264 unsigned int i, j; 265 for (i = 0; i < output_height; ++i) { 266 for (j = 0; j < output_width; ++j) { 267 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; 268 } 269 output_ptr += output_stride; 270 } 271} 272 273void high_filter_average_block2d_8_c(const uint16_t *src_ptr, 274 const unsigned int src_stride, 275 const int16_t *HFilter, 276 const int16_t *VFilter, 277 uint16_t *dst_ptr, 278 unsigned int dst_stride, 279 unsigned int output_width, 280 unsigned int output_height, 281 int bd) { 282 uint16_t tmp[kMaxDimension * kMaxDimension]; 283 284 assert(output_width <= kMaxDimension); 285 assert(output_height <= kMaxDimension); 286 high_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64, 287 output_width, output_height, bd); 288 high_block2d_average_c(tmp, 64, dst_ptr, dst_stride, 289 output_width, output_height, bd); 290} 291#endif // CONFIG_VP9_HIGHBITDEPTH 292 293class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> { 294 public: 295 static void SetUpTestCase() { 296 // Force input_ to be unaligned, output to be 16 byte aligned. 297 input_ = reinterpret_cast<uint8_t*>( 298 vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1; 299 output_ = reinterpret_cast<uint8_t*>( 300 vpx_memalign(kDataAlignment, kOutputBufferSize)); 301#if CONFIG_VP9_HIGHBITDEPTH 302 input16_ = reinterpret_cast<uint16_t*>( 303 vpx_memalign(kDataAlignment, 304 (kInputBufferSize + 1) * sizeof(uint16_t))) + 1; 305 output16_ = reinterpret_cast<uint16_t*>( 306 vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t))); 307#endif 308 } 309 310 static void TearDownTestCase() { 311 vpx_free(input_ - 1); 312 input_ = NULL; 313 vpx_free(output_); 314 output_ = NULL; 315#if CONFIG_VP9_HIGHBITDEPTH 316 vpx_free(input16_ - 1); 317 input16_ = NULL; 318 vpx_free(output16_); 319 output16_ = NULL; 320#endif 321 } 322 323 protected: 324 static const int kDataAlignment = 16; 325 static const int kOuterBlockSize = 256; 326 static const int kInputStride = kOuterBlockSize; 327 static const int kOutputStride = kOuterBlockSize; 328 static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize; 329 static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize; 330 331 int Width() const { return GET_PARAM(0); } 332 int Height() const { return GET_PARAM(1); } 333 int BorderLeft() const { 334 const int center = (kOuterBlockSize - Width()) / 2; 335 return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1); 336 } 337 int BorderTop() const { return (kOuterBlockSize - Height()) / 2; } 338 339 bool IsIndexInBorder(int i) { 340 return (i < BorderTop() * kOuterBlockSize || 341 i >= (BorderTop() + Height()) * kOuterBlockSize || 342 i % kOuterBlockSize < BorderLeft() || 343 i % kOuterBlockSize >= (BorderLeft() + Width())); 344 } 345 346 virtual void SetUp() { 347 UUT_ = GET_PARAM(2); 348#if CONFIG_VP9_HIGHBITDEPTH 349 if (UUT_->use_high_bd_ != 0) 350 mask_ = (1 << UUT_->use_high_bd_) - 1; 351 else 352 mask_ = 255; 353#endif 354 /* Set up guard blocks for an inner block centered in the outer block */ 355 for (int i = 0; i < kOutputBufferSize; ++i) { 356 if (IsIndexInBorder(i)) 357 output_[i] = 255; 358 else 359 output_[i] = 0; 360 } 361 362 ::libvpx_test::ACMRandom prng; 363 for (int i = 0; i < kInputBufferSize; ++i) { 364 if (i & 1) { 365 input_[i] = 255; 366#if CONFIG_VP9_HIGHBITDEPTH 367 input16_[i] = mask_; 368#endif 369 } else { 370 input_[i] = prng.Rand8Extremes(); 371#if CONFIG_VP9_HIGHBITDEPTH 372 input16_[i] = prng.Rand16() & mask_; 373#endif 374 } 375 } 376 } 377 378 void SetConstantInput(int value) { 379 memset(input_, value, kInputBufferSize); 380#if CONFIG_VP9_HIGHBITDEPTH 381 vpx_memset16(input16_, value, kInputBufferSize); 382#endif 383 } 384 385 void CheckGuardBlocks() { 386 for (int i = 0; i < kOutputBufferSize; ++i) { 387 if (IsIndexInBorder(i)) 388 EXPECT_EQ(255, output_[i]); 389 } 390 } 391 392 uint8_t *input() const { 393#if CONFIG_VP9_HIGHBITDEPTH 394 if (UUT_->use_high_bd_ == 0) { 395 return input_ + BorderTop() * kOuterBlockSize + BorderLeft(); 396 } else { 397 return CONVERT_TO_BYTEPTR(input16_ + BorderTop() * kOuterBlockSize + 398 BorderLeft()); 399 } 400#else 401 return input_ + BorderTop() * kOuterBlockSize + BorderLeft(); 402#endif 403 } 404 405 uint8_t *output() const { 406#if CONFIG_VP9_HIGHBITDEPTH 407 if (UUT_->use_high_bd_ == 0) { 408 return output_ + BorderTop() * kOuterBlockSize + BorderLeft(); 409 } else { 410 return CONVERT_TO_BYTEPTR(output16_ + BorderTop() * kOuterBlockSize + 411 BorderLeft()); 412 } 413#else 414 return output_ + BorderTop() * kOuterBlockSize + BorderLeft(); 415#endif 416 } 417 418 uint16_t lookup(uint8_t *list, int index) const { 419#if CONFIG_VP9_HIGHBITDEPTH 420 if (UUT_->use_high_bd_ == 0) { 421 return list[index]; 422 } else { 423 return CONVERT_TO_SHORTPTR(list)[index]; 424 } 425#else 426 return list[index]; 427#endif 428 } 429 430 void assign_val(uint8_t *list, int index, uint16_t val) const { 431#if CONFIG_VP9_HIGHBITDEPTH 432 if (UUT_->use_high_bd_ == 0) { 433 list[index] = (uint8_t) val; 434 } else { 435 CONVERT_TO_SHORTPTR(list)[index] = val; 436 } 437#else 438 list[index] = (uint8_t) val; 439#endif 440 } 441 442 void wrapper_filter_average_block2d_8_c(const uint8_t *src_ptr, 443 const unsigned int src_stride, 444 const int16_t *HFilter, 445 const int16_t *VFilter, 446 uint8_t *dst_ptr, 447 unsigned int dst_stride, 448 unsigned int output_width, 449 unsigned int output_height) { 450#if CONFIG_VP9_HIGHBITDEPTH 451 if (UUT_->use_high_bd_ == 0) { 452 filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, 453 dst_ptr, dst_stride, output_width, 454 output_height); 455 } else { 456 high_filter_average_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride, 457 HFilter, VFilter, 458 CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, 459 output_width, output_height, 460 UUT_->use_high_bd_); 461 } 462#else 463 filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, 464 dst_ptr, dst_stride, output_width, 465 output_height); 466#endif 467 } 468 469 void wrapper_filter_block2d_8_c(const uint8_t *src_ptr, 470 const unsigned int src_stride, 471 const int16_t *HFilter, 472 const int16_t *VFilter, 473 uint8_t *dst_ptr, 474 unsigned int dst_stride, 475 unsigned int output_width, 476 unsigned int output_height) { 477#if CONFIG_VP9_HIGHBITDEPTH 478 if (UUT_->use_high_bd_ == 0) { 479 filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, 480 dst_ptr, dst_stride, output_width, output_height); 481 } else { 482 high_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride, 483 HFilter, VFilter, 484 CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, 485 output_width, output_height, UUT_->use_high_bd_); 486 } 487#else 488 filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, 489 dst_ptr, dst_stride, output_width, output_height); 490#endif 491 } 492 493 const ConvolveFunctions* UUT_; 494 static uint8_t* input_; 495 static uint8_t* output_; 496#if CONFIG_VP9_HIGHBITDEPTH 497 static uint16_t* input16_; 498 static uint16_t* output16_; 499 int mask_; 500#endif 501}; 502 503uint8_t* ConvolveTest::input_ = NULL; 504uint8_t* ConvolveTest::output_ = NULL; 505#if CONFIG_VP9_HIGHBITDEPTH 506uint16_t* ConvolveTest::input16_ = NULL; 507uint16_t* ConvolveTest::output16_ = NULL; 508#endif 509 510TEST_P(ConvolveTest, GuardBlocks) { 511 CheckGuardBlocks(); 512} 513 514TEST_P(ConvolveTest, CopyHoriz) { 515 uint8_t* const in = input(); 516 uint8_t* const out = output(); 517 DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0}; 518 519 ASM_REGISTER_STATE_CHECK( 520 UUT_->h8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16, 521 Width(), Height())); 522 523 CheckGuardBlocks(); 524 525 for (int y = 0; y < Height(); ++y) 526 for (int x = 0; x < Width(); ++x) 527 ASSERT_EQ(lookup(out, y * kOutputStride + x), 528 lookup(in, y * kInputStride + x)) 529 << "(" << x << "," << y << ")"; 530} 531 532TEST_P(ConvolveTest, CopyVert) { 533 uint8_t* const in = input(); 534 uint8_t* const out = output(); 535 DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0}; 536 537 ASM_REGISTER_STATE_CHECK( 538 UUT_->v8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16, 539 Width(), Height())); 540 541 CheckGuardBlocks(); 542 543 for (int y = 0; y < Height(); ++y) 544 for (int x = 0; x < Width(); ++x) 545 ASSERT_EQ(lookup(out, y * kOutputStride + x), 546 lookup(in, y * kInputStride + x)) 547 << "(" << x << "," << y << ")"; 548} 549 550TEST_P(ConvolveTest, Copy2D) { 551 uint8_t* const in = input(); 552 uint8_t* const out = output(); 553 DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0}; 554 555 ASM_REGISTER_STATE_CHECK( 556 UUT_->hv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16, 557 Width(), Height())); 558 559 CheckGuardBlocks(); 560 561 for (int y = 0; y < Height(); ++y) 562 for (int x = 0; x < Width(); ++x) 563 ASSERT_EQ(lookup(out, y * kOutputStride + x), 564 lookup(in, y * kInputStride + x)) 565 << "(" << x << "," << y << ")"; 566} 567 568const int kNumFilterBanks = 4; 569const int kNumFilters = 16; 570 571TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) { 572 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { 573 const InterpKernel *filters = 574 vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank)); 575 for (int i = 0; i < kNumFilters; i++) { 576 const int p0 = filters[i][0] + filters[i][1]; 577 const int p1 = filters[i][2] + filters[i][3]; 578 const int p2 = filters[i][4] + filters[i][5]; 579 const int p3 = filters[i][6] + filters[i][7]; 580 EXPECT_LE(p0, 128); 581 EXPECT_LE(p1, 128); 582 EXPECT_LE(p2, 128); 583 EXPECT_LE(p3, 128); 584 EXPECT_LE(p0 + p3, 128); 585 EXPECT_LE(p0 + p3 + p1, 128); 586 EXPECT_LE(p0 + p3 + p1 + p2, 128); 587 EXPECT_EQ(p0 + p1 + p2 + p3, 128); 588 } 589 } 590} 591 592const int16_t kInvalidFilter[8] = { 0 }; 593 594TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) { 595 uint8_t* const in = input(); 596 uint8_t* const out = output(); 597#if CONFIG_VP9_HIGHBITDEPTH 598 uint8_t ref8[kOutputStride * kMaxDimension]; 599 uint16_t ref16[kOutputStride * kMaxDimension]; 600 uint8_t* ref; 601 if (UUT_->use_high_bd_ == 0) { 602 ref = ref8; 603 } else { 604 ref = CONVERT_TO_BYTEPTR(ref16); 605 } 606#else 607 uint8_t ref[kOutputStride * kMaxDimension]; 608#endif 609 610 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { 611 const InterpKernel *filters = 612 vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank)); 613 const InterpKernel *const eighttap_smooth = 614 vp9_get_interp_kernel(EIGHTTAP_SMOOTH); 615 616 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { 617 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { 618 wrapper_filter_block2d_8_c(in, kInputStride, 619 filters[filter_x], filters[filter_y], 620 ref, kOutputStride, 621 Width(), Height()); 622 623 if (filters == eighttap_smooth || (filter_x && filter_y)) 624 ASM_REGISTER_STATE_CHECK( 625 UUT_->hv8_(in, kInputStride, out, kOutputStride, 626 filters[filter_x], 16, filters[filter_y], 16, 627 Width(), Height())); 628 else if (filter_y) 629 ASM_REGISTER_STATE_CHECK( 630 UUT_->v8_(in, kInputStride, out, kOutputStride, 631 kInvalidFilter, 16, filters[filter_y], 16, 632 Width(), Height())); 633 else 634 ASM_REGISTER_STATE_CHECK( 635 UUT_->h8_(in, kInputStride, out, kOutputStride, 636 filters[filter_x], 16, kInvalidFilter, 16, 637 Width(), Height())); 638 639 CheckGuardBlocks(); 640 641 for (int y = 0; y < Height(); ++y) 642 for (int x = 0; x < Width(); ++x) 643 ASSERT_EQ(lookup(ref, y * kOutputStride + x), 644 lookup(out, y * kOutputStride + x)) 645 << "mismatch at (" << x << "," << y << "), " 646 << "filters (" << filter_bank << "," 647 << filter_x << "," << filter_y << ")"; 648 } 649 } 650 } 651} 652 653TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) { 654 uint8_t* const in = input(); 655 uint8_t* const out = output(); 656#if CONFIG_VP9_HIGHBITDEPTH 657 uint8_t ref8[kOutputStride * kMaxDimension]; 658 uint16_t ref16[kOutputStride * kMaxDimension]; 659 uint8_t* ref; 660 if (UUT_->use_high_bd_ == 0) { 661 ref = ref8; 662 } else { 663 ref = CONVERT_TO_BYTEPTR(ref16); 664 } 665#else 666 uint8_t ref[kOutputStride * kMaxDimension]; 667#endif 668 669 // Populate ref and out with some random data 670 ::libvpx_test::ACMRandom prng; 671 for (int y = 0; y < Height(); ++y) { 672 for (int x = 0; x < Width(); ++x) { 673 uint16_t r; 674#if CONFIG_VP9_HIGHBITDEPTH 675 if (UUT_->use_high_bd_ == 0 || UUT_->use_high_bd_ == 8) { 676 r = prng.Rand8Extremes(); 677 } else { 678 r = prng.Rand16() & mask_; 679 } 680#else 681 r = prng.Rand8Extremes(); 682#endif 683 684 assign_val(out, y * kOutputStride + x, r); 685 assign_val(ref, y * kOutputStride + x, r); 686 } 687 } 688 689 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { 690 const InterpKernel *filters = 691 vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank)); 692 const InterpKernel *const eighttap_smooth = 693 vp9_get_interp_kernel(EIGHTTAP_SMOOTH); 694 695 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { 696 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { 697 wrapper_filter_average_block2d_8_c(in, kInputStride, 698 filters[filter_x], filters[filter_y], 699 ref, kOutputStride, 700 Width(), Height()); 701 702 if (filters == eighttap_smooth || (filter_x && filter_y)) 703 ASM_REGISTER_STATE_CHECK( 704 UUT_->hv8_avg_(in, kInputStride, out, kOutputStride, 705 filters[filter_x], 16, filters[filter_y], 16, 706 Width(), Height())); 707 else if (filter_y) 708 ASM_REGISTER_STATE_CHECK( 709 UUT_->v8_avg_(in, kInputStride, out, kOutputStride, 710 filters[filter_x], 16, filters[filter_y], 16, 711 Width(), Height())); 712 else 713 ASM_REGISTER_STATE_CHECK( 714 UUT_->h8_avg_(in, kInputStride, out, kOutputStride, 715 filters[filter_x], 16, filters[filter_y], 16, 716 Width(), Height())); 717 718 CheckGuardBlocks(); 719 720 for (int y = 0; y < Height(); ++y) 721 for (int x = 0; x < Width(); ++x) 722 ASSERT_EQ(lookup(ref, y * kOutputStride + x), 723 lookup(out, y * kOutputStride + x)) 724 << "mismatch at (" << x << "," << y << "), " 725 << "filters (" << filter_bank << "," 726 << filter_x << "," << filter_y << ")"; 727 } 728 } 729 } 730} 731 732TEST_P(ConvolveTest, FilterExtremes) { 733 uint8_t *const in = input(); 734 uint8_t *const out = output(); 735#if CONFIG_VP9_HIGHBITDEPTH 736 uint8_t ref8[kOutputStride * kMaxDimension]; 737 uint16_t ref16[kOutputStride * kMaxDimension]; 738 uint8_t *ref; 739 if (UUT_->use_high_bd_ == 0) { 740 ref = ref8; 741 } else { 742 ref = CONVERT_TO_BYTEPTR(ref16); 743 } 744#else 745 uint8_t ref[kOutputStride * kMaxDimension]; 746#endif 747 748 // Populate ref and out with some random data 749 ::libvpx_test::ACMRandom prng; 750 for (int y = 0; y < Height(); ++y) { 751 for (int x = 0; x < Width(); ++x) { 752 uint16_t r; 753#if CONFIG_VP9_HIGHBITDEPTH 754 if (UUT_->use_high_bd_ == 0 || UUT_->use_high_bd_ == 8) { 755 r = prng.Rand8Extremes(); 756 } else { 757 r = prng.Rand16() & mask_; 758 } 759#else 760 r = prng.Rand8Extremes(); 761#endif 762 assign_val(out, y * kOutputStride + x, r); 763 assign_val(ref, y * kOutputStride + x, r); 764 } 765 } 766 767 for (int axis = 0; axis < 2; axis++) { 768 int seed_val = 0; 769 while (seed_val < 256) { 770 for (int y = 0; y < 8; ++y) { 771 for (int x = 0; x < 8; ++x) { 772#if CONFIG_VP9_HIGHBITDEPTH 773 assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1, 774 ((seed_val >> (axis ? y : x)) & 1) * mask_); 775#else 776 assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1, 777 ((seed_val >> (axis ? y : x)) & 1) * 255); 778#endif 779 if (axis) seed_val++; 780 } 781 if (axis) 782 seed_val-= 8; 783 else 784 seed_val++; 785 } 786 if (axis) seed_val += 8; 787 788 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { 789 const InterpKernel *filters = 790 vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank)); 791 const InterpKernel *const eighttap_smooth = 792 vp9_get_interp_kernel(EIGHTTAP_SMOOTH); 793 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { 794 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { 795 wrapper_filter_block2d_8_c(in, kInputStride, 796 filters[filter_x], filters[filter_y], 797 ref, kOutputStride, 798 Width(), Height()); 799 if (filters == eighttap_smooth || (filter_x && filter_y)) 800 ASM_REGISTER_STATE_CHECK( 801 UUT_->hv8_(in, kInputStride, out, kOutputStride, 802 filters[filter_x], 16, filters[filter_y], 16, 803 Width(), Height())); 804 else if (filter_y) 805 ASM_REGISTER_STATE_CHECK( 806 UUT_->v8_(in, kInputStride, out, kOutputStride, 807 kInvalidFilter, 16, filters[filter_y], 16, 808 Width(), Height())); 809 else 810 ASM_REGISTER_STATE_CHECK( 811 UUT_->h8_(in, kInputStride, out, kOutputStride, 812 filters[filter_x], 16, kInvalidFilter, 16, 813 Width(), Height())); 814 815 for (int y = 0; y < Height(); ++y) 816 for (int x = 0; x < Width(); ++x) 817 ASSERT_EQ(lookup(ref, y * kOutputStride + x), 818 lookup(out, y * kOutputStride + x)) 819 << "mismatch at (" << x << "," << y << "), " 820 << "filters (" << filter_bank << "," 821 << filter_x << "," << filter_y << ")"; 822 } 823 } 824 } 825 } 826 } 827} 828 829DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = { 830 { 0, 0, 0, 0, 0, 0, 0, 128}, 831 { 0, 0, 0, 0, 0, 0, 128}, 832 { 0, 0, 0, 0, 0, 128}, 833 { 0, 0, 0, 0, 128}, 834 { 0, 0, 0, 128}, 835 { 0, 0, 128}, 836 { 0, 128}, 837 { 128}, 838 { 0, 0, 0, 0, 0, 0, 0, 128}, 839 { 0, 0, 0, 0, 0, 0, 128}, 840 { 0, 0, 0, 0, 0, 128}, 841 { 0, 0, 0, 0, 128}, 842 { 0, 0, 0, 128}, 843 { 0, 0, 128}, 844 { 0, 128}, 845 { 128} 846}; 847 848/* This test exercises the horizontal and vertical filter functions. */ 849TEST_P(ConvolveTest, ChangeFilterWorks) { 850 uint8_t* const in = input(); 851 uint8_t* const out = output(); 852 853 /* Assume that the first input sample is at the 8/16th position. */ 854 const int kInitialSubPelOffset = 8; 855 856 /* Filters are 8-tap, so the first filter tap will be applied to the pixel 857 * at position -3 with respect to the current filtering position. Since 858 * kInitialSubPelOffset is set to 8, we first select sub-pixel filter 8, 859 * which is non-zero only in the last tap. So, applying the filter at the 860 * current input position will result in an output equal to the pixel at 861 * offset +4 (-3 + 7) with respect to the current filtering position. 862 */ 863 const int kPixelSelected = 4; 864 865 /* Assume that each output pixel requires us to step on by 17/16th pixels in 866 * the input. 867 */ 868 const int kInputPixelStep = 17; 869 870 /* The filters are setup in such a way that the expected output produces 871 * sets of 8 identical output samples. As the filter position moves to the 872 * next 1/16th pixel position the only active (=128) filter tap moves one 873 * position to the left, resulting in the same input pixel being replicated 874 * in to the output for 8 consecutive samples. After each set of 8 positions 875 * the filters select a different input pixel. kFilterPeriodAdjust below 876 * computes which input pixel is written to the output for a specified 877 * x or y position. 878 */ 879 880 /* Test the horizontal filter. */ 881 ASM_REGISTER_STATE_CHECK( 882 UUT_->h8_(in, kInputStride, out, kOutputStride, 883 kChangeFilters[kInitialSubPelOffset], 884 kInputPixelStep, NULL, 0, Width(), Height())); 885 886 for (int x = 0; x < Width(); ++x) { 887 const int kFilterPeriodAdjust = (x >> 3) << 3; 888 const int ref_x = 889 kPixelSelected + ((kInitialSubPelOffset 890 + kFilterPeriodAdjust * kInputPixelStep) 891 >> SUBPEL_BITS); 892 ASSERT_EQ(lookup(in, ref_x), lookup(out, x)) 893 << "x == " << x << "width = " << Width(); 894 } 895 896 /* Test the vertical filter. */ 897 ASM_REGISTER_STATE_CHECK( 898 UUT_->v8_(in, kInputStride, out, kOutputStride, 899 NULL, 0, kChangeFilters[kInitialSubPelOffset], 900 kInputPixelStep, Width(), Height())); 901 902 for (int y = 0; y < Height(); ++y) { 903 const int kFilterPeriodAdjust = (y >> 3) << 3; 904 const int ref_y = 905 kPixelSelected + ((kInitialSubPelOffset 906 + kFilterPeriodAdjust * kInputPixelStep) 907 >> SUBPEL_BITS); 908 ASSERT_EQ(lookup(in, ref_y * kInputStride), lookup(out, y * kInputStride)) 909 << "y == " << y; 910 } 911 912 /* Test the horizontal and vertical filters in combination. */ 913 ASM_REGISTER_STATE_CHECK( 914 UUT_->hv8_(in, kInputStride, out, kOutputStride, 915 kChangeFilters[kInitialSubPelOffset], kInputPixelStep, 916 kChangeFilters[kInitialSubPelOffset], kInputPixelStep, 917 Width(), Height())); 918 919 for (int y = 0; y < Height(); ++y) { 920 const int kFilterPeriodAdjustY = (y >> 3) << 3; 921 const int ref_y = 922 kPixelSelected + ((kInitialSubPelOffset 923 + kFilterPeriodAdjustY * kInputPixelStep) 924 >> SUBPEL_BITS); 925 for (int x = 0; x < Width(); ++x) { 926 const int kFilterPeriodAdjustX = (x >> 3) << 3; 927 const int ref_x = 928 kPixelSelected + ((kInitialSubPelOffset 929 + kFilterPeriodAdjustX * kInputPixelStep) 930 >> SUBPEL_BITS); 931 932 ASSERT_EQ(lookup(in, ref_y * kInputStride + ref_x), 933 lookup(out, y * kOutputStride + x)) 934 << "x == " << x << ", y == " << y; 935 } 936 } 937} 938 939/* This test exercises that enough rows and columns are filtered with every 940 possible initial fractional positions and scaling steps. */ 941TEST_P(ConvolveTest, CheckScalingFiltering) { 942 uint8_t* const in = input(); 943 uint8_t* const out = output(); 944 const InterpKernel *const eighttap = vp9_get_interp_kernel(EIGHTTAP); 945 946 SetConstantInput(127); 947 948 for (int frac = 0; frac < 16; ++frac) { 949 for (int step = 1; step <= 32; ++step) { 950 /* Test the horizontal and vertical filters in combination. */ 951 ASM_REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride, 952 eighttap[frac], step, 953 eighttap[frac], step, 954 Width(), Height())); 955 956 CheckGuardBlocks(); 957 958 for (int y = 0; y < Height(); ++y) { 959 for (int x = 0; x < Width(); ++x) { 960 ASSERT_EQ(lookup(in, y * kInputStride + x), 961 lookup(out, y * kOutputStride + x)) 962 << "x == " << x << ", y == " << y 963 << ", frac == " << frac << ", step == " << step; 964 } 965 } 966 } 967 } 968} 969 970using std::tr1::make_tuple; 971 972#if CONFIG_VP9_HIGHBITDEPTH 973#if HAVE_SSE2 && ARCH_X86_64 974void wrap_convolve8_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride, 975 uint8_t *dst, ptrdiff_t dst_stride, 976 const int16_t *filter_x, 977 int filter_x_stride, 978 const int16_t *filter_y, 979 int filter_y_stride, 980 int w, int h) { 981 vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x, 982 filter_x_stride, filter_y, filter_y_stride, 983 w, h, 8); 984} 985 986void wrap_convolve8_avg_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride, 987 uint8_t *dst, ptrdiff_t dst_stride, 988 const int16_t *filter_x, 989 int filter_x_stride, 990 const int16_t *filter_y, 991 int filter_y_stride, 992 int w, int h) { 993 vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x, 994 filter_x_stride, filter_y, filter_y_stride, w, h, 8); 995} 996 997void wrap_convolve8_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride, 998 uint8_t *dst, ptrdiff_t dst_stride, 999 const int16_t *filter_x, 1000 int filter_x_stride, 1001 const int16_t *filter_y, 1002 int filter_y_stride, 1003 int w, int h) { 1004 vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x, 1005 filter_x_stride, filter_y, filter_y_stride, w, h, 8); 1006} 1007 1008void wrap_convolve8_avg_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride, 1009 uint8_t *dst, ptrdiff_t dst_stride, 1010 const int16_t *filter_x, 1011 int filter_x_stride, 1012 const int16_t *filter_y, 1013 int filter_y_stride, 1014 int w, int h) { 1015 vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x, 1016 filter_x_stride, filter_y, filter_y_stride, 1017 w, h, 8); 1018} 1019 1020void wrap_convolve8_sse2_8(const uint8_t *src, ptrdiff_t src_stride, 1021 uint8_t *dst, ptrdiff_t dst_stride, 1022 const int16_t *filter_x, 1023 int filter_x_stride, 1024 const int16_t *filter_y, 1025 int filter_y_stride, 1026 int w, int h) { 1027 vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x, 1028 filter_x_stride, filter_y, filter_y_stride, w, h, 8); 1029} 1030 1031void wrap_convolve8_avg_sse2_8(const uint8_t *src, ptrdiff_t src_stride, 1032 uint8_t *dst, ptrdiff_t dst_stride, 1033 const int16_t *filter_x, 1034 int filter_x_stride, 1035 const int16_t *filter_y, 1036 int filter_y_stride, 1037 int w, int h) { 1038 vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x, 1039 filter_x_stride, filter_y, filter_y_stride, w, h, 8); 1040} 1041 1042void wrap_convolve8_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride, 1043 uint8_t *dst, ptrdiff_t dst_stride, 1044 const int16_t *filter_x, 1045 int filter_x_stride, 1046 const int16_t *filter_y, 1047 int filter_y_stride, 1048 int w, int h) { 1049 vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x, 1050 filter_x_stride, filter_y, filter_y_stride, w, h, 10); 1051} 1052 1053void wrap_convolve8_avg_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride, 1054 uint8_t *dst, ptrdiff_t dst_stride, 1055 const int16_t *filter_x, 1056 int filter_x_stride, 1057 const int16_t *filter_y, 1058 int filter_y_stride, 1059 int w, int h) { 1060 vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x, 1061 filter_x_stride, filter_y, filter_y_stride, w, h, 10); 1062} 1063 1064void wrap_convolve8_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride, 1065 uint8_t *dst, ptrdiff_t dst_stride, 1066 const int16_t *filter_x, 1067 int filter_x_stride, 1068 const int16_t *filter_y, 1069 int filter_y_stride, 1070 int w, int h) { 1071 vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x, 1072 filter_x_stride, filter_y, filter_y_stride, w, h, 10); 1073} 1074 1075void wrap_convolve8_avg_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride, 1076 uint8_t *dst, ptrdiff_t dst_stride, 1077 const int16_t *filter_x, 1078 int filter_x_stride, 1079 const int16_t *filter_y, 1080 int filter_y_stride, 1081 int w, int h) { 1082 vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x, 1083 filter_x_stride, filter_y, filter_y_stride, w, h, 10); 1084} 1085 1086void wrap_convolve8_sse2_10(const uint8_t *src, ptrdiff_t src_stride, 1087 uint8_t *dst, ptrdiff_t dst_stride, 1088 const int16_t *filter_x, 1089 int filter_x_stride, 1090 const int16_t *filter_y, 1091 int filter_y_stride, 1092 int w, int h) { 1093 vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x, 1094 filter_x_stride, filter_y, filter_y_stride, w, h, 10); 1095} 1096 1097void wrap_convolve8_avg_sse2_10(const uint8_t *src, ptrdiff_t src_stride, 1098 uint8_t *dst, ptrdiff_t dst_stride, 1099 const int16_t *filter_x, 1100 int filter_x_stride, 1101 const int16_t *filter_y, 1102 int filter_y_stride, 1103 int w, int h) { 1104 vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x, 1105 filter_x_stride, filter_y, filter_y_stride, 1106 w, h, 10); 1107} 1108 1109void wrap_convolve8_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride, 1110 uint8_t *dst, ptrdiff_t dst_stride, 1111 const int16_t *filter_x, 1112 int filter_x_stride, 1113 const int16_t *filter_y, 1114 int filter_y_stride, 1115 int w, int h) { 1116 vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x, 1117 filter_x_stride, filter_y, filter_y_stride, 1118 w, h, 12); 1119} 1120 1121void wrap_convolve8_avg_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride, 1122 uint8_t *dst, ptrdiff_t dst_stride, 1123 const int16_t *filter_x, 1124 int filter_x_stride, 1125 const int16_t *filter_y, 1126 int filter_y_stride, 1127 int w, int h) { 1128 vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x, 1129 filter_x_stride, filter_y, filter_y_stride, 1130 w, h, 12); 1131} 1132 1133void wrap_convolve8_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride, 1134 uint8_t *dst, ptrdiff_t dst_stride, 1135 const int16_t *filter_x, 1136 int filter_x_stride, 1137 const int16_t *filter_y, 1138 int filter_y_stride, 1139 int w, int h) { 1140 vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x, 1141 filter_x_stride, filter_y, filter_y_stride, 1142 w, h, 12); 1143} 1144 1145void wrap_convolve8_avg_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride, 1146 uint8_t *dst, ptrdiff_t dst_stride, 1147 const int16_t *filter_x, 1148 int filter_x_stride, 1149 const int16_t *filter_y, 1150 int filter_y_stride, 1151 int w, int h) { 1152 vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x, 1153 filter_x_stride, filter_y, filter_y_stride, w, h, 12); 1154} 1155 1156void wrap_convolve8_sse2_12(const uint8_t *src, ptrdiff_t src_stride, 1157 uint8_t *dst, ptrdiff_t dst_stride, 1158 const int16_t *filter_x, 1159 int filter_x_stride, 1160 const int16_t *filter_y, 1161 int filter_y_stride, 1162 int w, int h) { 1163 vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x, 1164 filter_x_stride, filter_y, filter_y_stride, w, h, 12); 1165} 1166 1167void wrap_convolve8_avg_sse2_12(const uint8_t *src, ptrdiff_t src_stride, 1168 uint8_t *dst, ptrdiff_t dst_stride, 1169 const int16_t *filter_x, 1170 int filter_x_stride, 1171 const int16_t *filter_y, 1172 int filter_y_stride, 1173 int w, int h) { 1174 vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x, 1175 filter_x_stride, filter_y, filter_y_stride, w, h, 12); 1176} 1177#endif // HAVE_SSE2 && ARCH_X86_64 1178 1179void wrap_convolve8_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride, 1180 uint8_t *dst, ptrdiff_t dst_stride, 1181 const int16_t *filter_x, 1182 int filter_x_stride, 1183 const int16_t *filter_y, 1184 int filter_y_stride, 1185 int w, int h) { 1186 vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, 1187 filter_x_stride, filter_y, filter_y_stride, w, h, 8); 1188} 1189 1190void wrap_convolve8_avg_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride, 1191 uint8_t *dst, ptrdiff_t dst_stride, 1192 const int16_t *filter_x, 1193 int filter_x_stride, 1194 const int16_t *filter_y, 1195 int filter_y_stride, 1196 int w, int h) { 1197 vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x, 1198 filter_x_stride, filter_y, filter_y_stride, w, h, 8); 1199} 1200 1201void wrap_convolve8_vert_c_8(const uint8_t *src, ptrdiff_t src_stride, 1202 uint8_t *dst, ptrdiff_t dst_stride, 1203 const int16_t *filter_x, 1204 int filter_x_stride, 1205 const int16_t *filter_y, 1206 int filter_y_stride, 1207 int w, int h) { 1208 vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, 1209 filter_x_stride, filter_y, filter_y_stride, w, h, 8); 1210} 1211 1212void wrap_convolve8_avg_vert_c_8(const uint8_t *src, ptrdiff_t src_stride, 1213 uint8_t *dst, ptrdiff_t dst_stride, 1214 const int16_t *filter_x, 1215 int filter_x_stride, 1216 const int16_t *filter_y, 1217 int filter_y_stride, 1218 int w, int h) { 1219 vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x, 1220 filter_x_stride, filter_y, filter_y_stride, w, h, 8); 1221} 1222 1223void wrap_convolve8_c_8(const uint8_t *src, ptrdiff_t src_stride, 1224 uint8_t *dst, ptrdiff_t dst_stride, 1225 const int16_t *filter_x, 1226 int filter_x_stride, 1227 const int16_t *filter_y, 1228 int filter_y_stride, 1229 int w, int h) { 1230 vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x, 1231 filter_x_stride, filter_y, filter_y_stride, w, h, 8); 1232} 1233 1234void wrap_convolve8_avg_c_8(const uint8_t *src, ptrdiff_t src_stride, 1235 uint8_t *dst, ptrdiff_t dst_stride, 1236 const int16_t *filter_x, 1237 int filter_x_stride, 1238 const int16_t *filter_y, 1239 int filter_y_stride, 1240 int w, int h) { 1241 vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, 1242 filter_x_stride, filter_y, filter_y_stride, 1243 w, h, 8); 1244} 1245 1246void wrap_convolve8_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride, 1247 uint8_t *dst, ptrdiff_t dst_stride, 1248 const int16_t *filter_x, 1249 int filter_x_stride, 1250 const int16_t *filter_y, 1251 int filter_y_stride, 1252 int w, int h) { 1253 vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, 1254 filter_x_stride, filter_y, filter_y_stride, w, h, 10); 1255} 1256 1257void wrap_convolve8_avg_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride, 1258 uint8_t *dst, ptrdiff_t dst_stride, 1259 const int16_t *filter_x, 1260 int filter_x_stride, 1261 const int16_t *filter_y, 1262 int filter_y_stride, 1263 int w, int h) { 1264 vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x, 1265 filter_x_stride, filter_y, filter_y_stride, 1266 w, h, 10); 1267} 1268 1269void wrap_convolve8_vert_c_10(const uint8_t *src, ptrdiff_t src_stride, 1270 uint8_t *dst, ptrdiff_t dst_stride, 1271 const int16_t *filter_x, 1272 int filter_x_stride, 1273 const int16_t *filter_y, 1274 int filter_y_stride, 1275 int w, int h) { 1276 vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, 1277 filter_x_stride, filter_y, filter_y_stride, w, h, 10); 1278} 1279 1280void wrap_convolve8_avg_vert_c_10(const uint8_t *src, ptrdiff_t src_stride, 1281 uint8_t *dst, ptrdiff_t dst_stride, 1282 const int16_t *filter_x, 1283 int filter_x_stride, 1284 const int16_t *filter_y, 1285 int filter_y_stride, 1286 int w, int h) { 1287 vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x, 1288 filter_x_stride, filter_y, filter_y_stride, w, h, 10); 1289} 1290 1291void wrap_convolve8_c_10(const uint8_t *src, ptrdiff_t src_stride, 1292 uint8_t *dst, ptrdiff_t dst_stride, 1293 const int16_t *filter_x, 1294 int filter_x_stride, 1295 const int16_t *filter_y, 1296 int filter_y_stride, 1297 int w, int h) { 1298 vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x, 1299 filter_x_stride, filter_y, filter_y_stride, w, h, 10); 1300} 1301 1302void wrap_convolve8_avg_c_10(const uint8_t *src, ptrdiff_t src_stride, 1303 uint8_t *dst, ptrdiff_t dst_stride, 1304 const int16_t *filter_x, 1305 int filter_x_stride, 1306 const int16_t *filter_y, 1307 int filter_y_stride, 1308 int w, int h) { 1309 vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, 1310 filter_x_stride, filter_y, filter_y_stride, w, h, 10); 1311} 1312 1313void wrap_convolve8_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride, 1314 uint8_t *dst, ptrdiff_t dst_stride, 1315 const int16_t *filter_x, 1316 int filter_x_stride, 1317 const int16_t *filter_y, 1318 int filter_y_stride, 1319 int w, int h) { 1320 vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, 1321 filter_x_stride, filter_y, filter_y_stride, 1322 w, h, 12); 1323} 1324 1325void wrap_convolve8_avg_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride, 1326 uint8_t *dst, ptrdiff_t dst_stride, 1327 const int16_t *filter_x, 1328 int filter_x_stride, 1329 const int16_t *filter_y, 1330 int filter_y_stride, 1331 int w, int h) { 1332 vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x, 1333 filter_x_stride, filter_y, filter_y_stride, 1334 w, h, 12); 1335} 1336 1337void wrap_convolve8_vert_c_12(const uint8_t *src, ptrdiff_t src_stride, 1338 uint8_t *dst, ptrdiff_t dst_stride, 1339 const int16_t *filter_x, 1340 int filter_x_stride, 1341 const int16_t *filter_y, 1342 int filter_y_stride, 1343 int w, int h) { 1344 vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, 1345 filter_x_stride, filter_y, filter_y_stride, 1346 w, h, 12); 1347} 1348 1349void wrap_convolve8_avg_vert_c_12(const uint8_t *src, ptrdiff_t src_stride, 1350 uint8_t *dst, ptrdiff_t dst_stride, 1351 const int16_t *filter_x, 1352 int filter_x_stride, 1353 const int16_t *filter_y, 1354 int filter_y_stride, 1355 int w, int h) { 1356 vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x, 1357 filter_x_stride, filter_y, filter_y_stride, 1358 w, h, 12); 1359} 1360 1361void wrap_convolve8_c_12(const uint8_t *src, ptrdiff_t src_stride, 1362 uint8_t *dst, ptrdiff_t dst_stride, 1363 const int16_t *filter_x, 1364 int filter_x_stride, 1365 const int16_t *filter_y, 1366 int filter_y_stride, 1367 int w, int h) { 1368 vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x, 1369 filter_x_stride, filter_y, filter_y_stride, 1370 w, h, 12); 1371} 1372 1373void wrap_convolve8_avg_c_12(const uint8_t *src, ptrdiff_t src_stride, 1374 uint8_t *dst, ptrdiff_t dst_stride, 1375 const int16_t *filter_x, 1376 int filter_x_stride, 1377 const int16_t *filter_y, 1378 int filter_y_stride, 1379 int w, int h) { 1380 vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, 1381 filter_x_stride, filter_y, filter_y_stride, 1382 w, h, 12); 1383} 1384 1385const ConvolveFunctions convolve8_c( 1386 wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8, 1387 wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, 1388 wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8); 1389INSTANTIATE_TEST_CASE_P(C_8, ConvolveTest, ::testing::Values( 1390 make_tuple(4, 4, &convolve8_c), 1391 make_tuple(8, 4, &convolve8_c), 1392 make_tuple(4, 8, &convolve8_c), 1393 make_tuple(8, 8, &convolve8_c), 1394 make_tuple(16, 8, &convolve8_c), 1395 make_tuple(8, 16, &convolve8_c), 1396 make_tuple(16, 16, &convolve8_c), 1397 make_tuple(32, 16, &convolve8_c), 1398 make_tuple(16, 32, &convolve8_c), 1399 make_tuple(32, 32, &convolve8_c), 1400 make_tuple(64, 32, &convolve8_c), 1401 make_tuple(32, 64, &convolve8_c), 1402 make_tuple(64, 64, &convolve8_c))); 1403const ConvolveFunctions convolve10_c( 1404 wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10, 1405 wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, 1406 wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10); 1407INSTANTIATE_TEST_CASE_P(C_10, ConvolveTest, ::testing::Values( 1408 make_tuple(4, 4, &convolve10_c), 1409 make_tuple(8, 4, &convolve10_c), 1410 make_tuple(4, 8, &convolve10_c), 1411 make_tuple(8, 8, &convolve10_c), 1412 make_tuple(16, 8, &convolve10_c), 1413 make_tuple(8, 16, &convolve10_c), 1414 make_tuple(16, 16, &convolve10_c), 1415 make_tuple(32, 16, &convolve10_c), 1416 make_tuple(16, 32, &convolve10_c), 1417 make_tuple(32, 32, &convolve10_c), 1418 make_tuple(64, 32, &convolve10_c), 1419 make_tuple(32, 64, &convolve10_c), 1420 make_tuple(64, 64, &convolve10_c))); 1421const ConvolveFunctions convolve12_c( 1422 wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12, 1423 wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, 1424 wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12); 1425INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values( 1426 make_tuple(4, 4, &convolve12_c), 1427 make_tuple(8, 4, &convolve12_c), 1428 make_tuple(4, 8, &convolve12_c), 1429 make_tuple(8, 8, &convolve12_c), 1430 make_tuple(16, 8, &convolve12_c), 1431 make_tuple(8, 16, &convolve12_c), 1432 make_tuple(16, 16, &convolve12_c), 1433 make_tuple(32, 16, &convolve12_c), 1434 make_tuple(16, 32, &convolve12_c), 1435 make_tuple(32, 32, &convolve12_c), 1436 make_tuple(64, 32, &convolve12_c), 1437 make_tuple(32, 64, &convolve12_c), 1438 make_tuple(64, 64, &convolve12_c))); 1439 1440#else 1441 1442const ConvolveFunctions convolve8_c( 1443 vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c, 1444 vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c, 1445 vp9_convolve8_c, vp9_convolve8_avg_c, 0); 1446 1447INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values( 1448 make_tuple(4, 4, &convolve8_c), 1449 make_tuple(8, 4, &convolve8_c), 1450 make_tuple(4, 8, &convolve8_c), 1451 make_tuple(8, 8, &convolve8_c), 1452 make_tuple(16, 8, &convolve8_c), 1453 make_tuple(8, 16, &convolve8_c), 1454 make_tuple(16, 16, &convolve8_c), 1455 make_tuple(32, 16, &convolve8_c), 1456 make_tuple(16, 32, &convolve8_c), 1457 make_tuple(32, 32, &convolve8_c), 1458 make_tuple(64, 32, &convolve8_c), 1459 make_tuple(32, 64, &convolve8_c), 1460 make_tuple(64, 64, &convolve8_c))); 1461#endif 1462 1463#if HAVE_SSE2 && ARCH_X86_64 1464#if CONFIG_VP9_HIGHBITDEPTH 1465const ConvolveFunctions convolve8_sse2( 1466 wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8, 1467 wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8, 1468 wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8); 1469INSTANTIATE_TEST_CASE_P(SSE2_8, ConvolveTest, ::testing::Values( 1470 make_tuple(4, 4, &convolve8_sse2), 1471 make_tuple(8, 4, &convolve8_sse2), 1472 make_tuple(4, 8, &convolve8_sse2), 1473 make_tuple(8, 8, &convolve8_sse2), 1474 make_tuple(16, 8, &convolve8_sse2), 1475 make_tuple(8, 16, &convolve8_sse2), 1476 make_tuple(16, 16, &convolve8_sse2), 1477 make_tuple(32, 16, &convolve8_sse2), 1478 make_tuple(16, 32, &convolve8_sse2), 1479 make_tuple(32, 32, &convolve8_sse2), 1480 make_tuple(64, 32, &convolve8_sse2), 1481 make_tuple(32, 64, &convolve8_sse2), 1482 make_tuple(64, 64, &convolve8_sse2))); 1483const ConvolveFunctions convolve10_sse2( 1484 wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10, 1485 wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10, 1486 wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10); 1487INSTANTIATE_TEST_CASE_P(SSE2_10, ConvolveTest, ::testing::Values( 1488 make_tuple(4, 4, &convolve10_sse2), 1489 make_tuple(8, 4, &convolve10_sse2), 1490 make_tuple(4, 8, &convolve10_sse2), 1491 make_tuple(8, 8, &convolve10_sse2), 1492 make_tuple(16, 8, &convolve10_sse2), 1493 make_tuple(8, 16, &convolve10_sse2), 1494 make_tuple(16, 16, &convolve10_sse2), 1495 make_tuple(32, 16, &convolve10_sse2), 1496 make_tuple(16, 32, &convolve10_sse2), 1497 make_tuple(32, 32, &convolve10_sse2), 1498 make_tuple(64, 32, &convolve10_sse2), 1499 make_tuple(32, 64, &convolve10_sse2), 1500 make_tuple(64, 64, &convolve10_sse2))); 1501const ConvolveFunctions convolve12_sse2( 1502 wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12, 1503 wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12, 1504 wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12); 1505INSTANTIATE_TEST_CASE_P(SSE2_12, ConvolveTest, ::testing::Values( 1506 make_tuple(4, 4, &convolve12_sse2), 1507 make_tuple(8, 4, &convolve12_sse2), 1508 make_tuple(4, 8, &convolve12_sse2), 1509 make_tuple(8, 8, &convolve12_sse2), 1510 make_tuple(16, 8, &convolve12_sse2), 1511 make_tuple(8, 16, &convolve12_sse2), 1512 make_tuple(16, 16, &convolve12_sse2), 1513 make_tuple(32, 16, &convolve12_sse2), 1514 make_tuple(16, 32, &convolve12_sse2), 1515 make_tuple(32, 32, &convolve12_sse2), 1516 make_tuple(64, 32, &convolve12_sse2), 1517 make_tuple(32, 64, &convolve12_sse2), 1518 make_tuple(64, 64, &convolve12_sse2))); 1519#else 1520const ConvolveFunctions convolve8_sse2( 1521 vp9_convolve8_horiz_sse2, vp9_convolve8_avg_horiz_sse2, 1522 vp9_convolve8_vert_sse2, vp9_convolve8_avg_vert_sse2, 1523 vp9_convolve8_sse2, vp9_convolve8_avg_sse2, 0); 1524 1525INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values( 1526 make_tuple(4, 4, &convolve8_sse2), 1527 make_tuple(8, 4, &convolve8_sse2), 1528 make_tuple(4, 8, &convolve8_sse2), 1529 make_tuple(8, 8, &convolve8_sse2), 1530 make_tuple(16, 8, &convolve8_sse2), 1531 make_tuple(8, 16, &convolve8_sse2), 1532 make_tuple(16, 16, &convolve8_sse2), 1533 make_tuple(32, 16, &convolve8_sse2), 1534 make_tuple(16, 32, &convolve8_sse2), 1535 make_tuple(32, 32, &convolve8_sse2), 1536 make_tuple(64, 32, &convolve8_sse2), 1537 make_tuple(32, 64, &convolve8_sse2), 1538 make_tuple(64, 64, &convolve8_sse2))); 1539#endif // CONFIG_VP9_HIGHBITDEPTH 1540#endif 1541 1542#if HAVE_SSSE3 1543const ConvolveFunctions convolve8_ssse3( 1544 vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3, 1545 vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3, 1546 vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3, 0); 1547 1548INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values( 1549 make_tuple(4, 4, &convolve8_ssse3), 1550 make_tuple(8, 4, &convolve8_ssse3), 1551 make_tuple(4, 8, &convolve8_ssse3), 1552 make_tuple(8, 8, &convolve8_ssse3), 1553 make_tuple(16, 8, &convolve8_ssse3), 1554 make_tuple(8, 16, &convolve8_ssse3), 1555 make_tuple(16, 16, &convolve8_ssse3), 1556 make_tuple(32, 16, &convolve8_ssse3), 1557 make_tuple(16, 32, &convolve8_ssse3), 1558 make_tuple(32, 32, &convolve8_ssse3), 1559 make_tuple(64, 32, &convolve8_ssse3), 1560 make_tuple(32, 64, &convolve8_ssse3), 1561 make_tuple(64, 64, &convolve8_ssse3))); 1562#endif 1563 1564#if HAVE_AVX2 && HAVE_SSSE3 1565const ConvolveFunctions convolve8_avx2( 1566 vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3, 1567 vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3, 1568 vp9_convolve8_avx2, vp9_convolve8_avg_ssse3, 0); 1569 1570INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values( 1571 make_tuple(4, 4, &convolve8_avx2), 1572 make_tuple(8, 4, &convolve8_avx2), 1573 make_tuple(4, 8, &convolve8_avx2), 1574 make_tuple(8, 8, &convolve8_avx2), 1575 make_tuple(8, 16, &convolve8_avx2), 1576 make_tuple(16, 8, &convolve8_avx2), 1577 make_tuple(16, 16, &convolve8_avx2), 1578 make_tuple(32, 16, &convolve8_avx2), 1579 make_tuple(16, 32, &convolve8_avx2), 1580 make_tuple(32, 32, &convolve8_avx2), 1581 make_tuple(64, 32, &convolve8_avx2), 1582 make_tuple(32, 64, &convolve8_avx2), 1583 make_tuple(64, 64, &convolve8_avx2))); 1584#endif // HAVE_AVX2 && HAVE_SSSE3 1585 1586#if HAVE_NEON_ASM 1587const ConvolveFunctions convolve8_neon( 1588 vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon, 1589 vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon, 1590 vp9_convolve8_neon, vp9_convolve8_avg_neon, 0); 1591 1592INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values( 1593 make_tuple(4, 4, &convolve8_neon), 1594 make_tuple(8, 4, &convolve8_neon), 1595 make_tuple(4, 8, &convolve8_neon), 1596 make_tuple(8, 8, &convolve8_neon), 1597 make_tuple(16, 8, &convolve8_neon), 1598 make_tuple(8, 16, &convolve8_neon), 1599 make_tuple(16, 16, &convolve8_neon), 1600 make_tuple(32, 16, &convolve8_neon), 1601 make_tuple(16, 32, &convolve8_neon), 1602 make_tuple(32, 32, &convolve8_neon), 1603 make_tuple(64, 32, &convolve8_neon), 1604 make_tuple(32, 64, &convolve8_neon), 1605 make_tuple(64, 64, &convolve8_neon))); 1606#endif 1607 1608#if HAVE_DSPR2 1609const ConvolveFunctions convolve8_dspr2( 1610 vp9_convolve8_horiz_dspr2, vp9_convolve8_avg_horiz_dspr2, 1611 vp9_convolve8_vert_dspr2, vp9_convolve8_avg_vert_dspr2, 1612 vp9_convolve8_dspr2, vp9_convolve8_avg_dspr2, 0); 1613 1614INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values( 1615 make_tuple(4, 4, &convolve8_dspr2), 1616 make_tuple(8, 4, &convolve8_dspr2), 1617 make_tuple(4, 8, &convolve8_dspr2), 1618 make_tuple(8, 8, &convolve8_dspr2), 1619 make_tuple(16, 8, &convolve8_dspr2), 1620 make_tuple(8, 16, &convolve8_dspr2), 1621 make_tuple(16, 16, &convolve8_dspr2), 1622 make_tuple(32, 16, &convolve8_dspr2), 1623 make_tuple(16, 32, &convolve8_dspr2), 1624 make_tuple(32, 32, &convolve8_dspr2), 1625 make_tuple(64, 32, &convolve8_dspr2), 1626 make_tuple(32, 64, &convolve8_dspr2), 1627 make_tuple(64, 64, &convolve8_dspr2))); 1628#endif 1629} // namespace 1630