1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include <string.h> 12 13#include "third_party/googletest/src/include/gtest/gtest.h" 14 15#include "./vpx_config.h" 16#include "./vp9_rtcd.h" 17#include "./vpx_dsp_rtcd.h" 18#include "test/acm_random.h" 19#include "test/clear_system_state.h" 20#include "test/register_state_check.h" 21#include "test/util.h" 22#include "vp9/common/vp9_common.h" 23#include "vp9/common/vp9_filter.h" 24#include "vpx_dsp/vpx_dsp_common.h" 25#include "vpx_dsp/vpx_filter.h" 26#include "vpx_mem/vpx_mem.h" 27#include "vpx_ports/mem.h" 28 29namespace { 30 31static const unsigned int kMaxDimension = 64; 32 33typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride, 34 uint8_t *dst, ptrdiff_t dst_stride, 35 const int16_t *filter_x, int filter_x_stride, 36 const int16_t *filter_y, int filter_y_stride, 37 int w, int h); 38 39struct ConvolveFunctions { 40 ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg, 41 ConvolveFunc h8, ConvolveFunc h8_avg, 42 ConvolveFunc v8, ConvolveFunc v8_avg, 43 ConvolveFunc hv8, ConvolveFunc hv8_avg, 44 ConvolveFunc sh8, ConvolveFunc sh8_avg, 45 ConvolveFunc sv8, ConvolveFunc sv8_avg, 46 ConvolveFunc shv8, ConvolveFunc shv8_avg, 47 int bd) 48 : copy_(copy), avg_(avg), h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg), 49 v8_avg_(v8_avg), hv8_avg_(hv8_avg), sh8_(sh8), sv8_(sv8), shv8_(shv8), 50 sh8_avg_(sh8_avg), sv8_avg_(sv8_avg), shv8_avg_(shv8_avg), 51 use_highbd_(bd) {} 52 53 ConvolveFunc copy_; 54 ConvolveFunc avg_; 55 ConvolveFunc h8_; 56 ConvolveFunc v8_; 57 ConvolveFunc hv8_; 58 ConvolveFunc h8_avg_; 59 ConvolveFunc v8_avg_; 60 ConvolveFunc hv8_avg_; 61 ConvolveFunc sh8_; // scaled horiz 62 ConvolveFunc sv8_; // scaled vert 63 ConvolveFunc shv8_; // scaled horiz/vert 64 ConvolveFunc sh8_avg_; // scaled avg horiz 65 ConvolveFunc sv8_avg_; // scaled avg vert 66 ConvolveFunc shv8_avg_; // scaled avg horiz/vert 67 int use_highbd_; // 0 if high bitdepth not used, else the actual bit depth. 68}; 69 70typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam; 71 72// Reference 8-tap subpixel filter, slightly modified to fit into this test. 73#define VP9_FILTER_WEIGHT 128 74#define VP9_FILTER_SHIFT 7 75uint8_t clip_pixel(int x) { 76 return x < 0 ? 0 : 77 x > 255 ? 255 : 78 x; 79} 80 81void filter_block2d_8_c(const uint8_t *src_ptr, 82 const unsigned int src_stride, 83 const int16_t *HFilter, 84 const int16_t *VFilter, 85 uint8_t *dst_ptr, 86 unsigned int dst_stride, 87 unsigned int output_width, 88 unsigned int output_height) { 89 // Between passes, we use an intermediate buffer whose height is extended to 90 // have enough horizontally filtered values as input for the vertical pass. 91 // This buffer is allocated to be big enough for the largest block type we 92 // support. 93 const int kInterp_Extend = 4; 94 const unsigned int intermediate_height = 95 (kInterp_Extend - 1) + output_height + kInterp_Extend; 96 unsigned int i, j; 97 98 // Size of intermediate_buffer is max_intermediate_height * filter_max_width, 99 // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height 100 // + kInterp_Extend 101 // = 3 + 16 + 4 102 // = 23 103 // and filter_max_width = 16 104 // 105 uint8_t intermediate_buffer[71 * kMaxDimension]; 106 const int intermediate_next_stride = 1 - intermediate_height * output_width; 107 108 // Horizontal pass (src -> transposed intermediate). 109 uint8_t *output_ptr = intermediate_buffer; 110 const int src_next_row_stride = src_stride - output_width; 111 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); 112 for (i = 0; i < intermediate_height; ++i) { 113 for (j = 0; j < output_width; ++j) { 114 // Apply filter... 115 const int temp = (src_ptr[0] * HFilter[0]) + 116 (src_ptr[1] * HFilter[1]) + 117 (src_ptr[2] * HFilter[2]) + 118 (src_ptr[3] * HFilter[3]) + 119 (src_ptr[4] * HFilter[4]) + 120 (src_ptr[5] * HFilter[5]) + 121 (src_ptr[6] * HFilter[6]) + 122 (src_ptr[7] * HFilter[7]) + 123 (VP9_FILTER_WEIGHT >> 1); // Rounding 124 125 // Normalize back to 0-255... 126 *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT); 127 ++src_ptr; 128 output_ptr += intermediate_height; 129 } 130 src_ptr += src_next_row_stride; 131 output_ptr += intermediate_next_stride; 132 } 133 134 // Vertical pass (transposed intermediate -> dst). 135 src_ptr = intermediate_buffer; 136 const int dst_next_row_stride = dst_stride - output_width; 137 for (i = 0; i < output_height; ++i) { 138 for (j = 0; j < output_width; ++j) { 139 // Apply filter... 140 const int temp = (src_ptr[0] * VFilter[0]) + 141 (src_ptr[1] * VFilter[1]) + 142 (src_ptr[2] * VFilter[2]) + 143 (src_ptr[3] * VFilter[3]) + 144 (src_ptr[4] * VFilter[4]) + 145 (src_ptr[5] * VFilter[5]) + 146 (src_ptr[6] * VFilter[6]) + 147 (src_ptr[7] * VFilter[7]) + 148 (VP9_FILTER_WEIGHT >> 1); // Rounding 149 150 // Normalize back to 0-255... 151 *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT); 152 src_ptr += intermediate_height; 153 } 154 src_ptr += intermediate_next_stride; 155 dst_ptr += dst_next_row_stride; 156 } 157} 158 159void block2d_average_c(uint8_t *src, 160 unsigned int src_stride, 161 uint8_t *output_ptr, 162 unsigned int output_stride, 163 unsigned int output_width, 164 unsigned int output_height) { 165 unsigned int i, j; 166 for (i = 0; i < output_height; ++i) { 167 for (j = 0; j < output_width; ++j) { 168 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; 169 } 170 output_ptr += output_stride; 171 } 172} 173 174void filter_average_block2d_8_c(const uint8_t *src_ptr, 175 const unsigned int src_stride, 176 const int16_t *HFilter, 177 const int16_t *VFilter, 178 uint8_t *dst_ptr, 179 unsigned int dst_stride, 180 unsigned int output_width, 181 unsigned int output_height) { 182 uint8_t tmp[kMaxDimension * kMaxDimension]; 183 184 assert(output_width <= kMaxDimension); 185 assert(output_height <= kMaxDimension); 186 filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64, 187 output_width, output_height); 188 block2d_average_c(tmp, 64, dst_ptr, dst_stride, 189 output_width, output_height); 190} 191 192#if CONFIG_VP9_HIGHBITDEPTH 193void highbd_filter_block2d_8_c(const uint16_t *src_ptr, 194 const unsigned int src_stride, 195 const int16_t *HFilter, 196 const int16_t *VFilter, 197 uint16_t *dst_ptr, 198 unsigned int dst_stride, 199 unsigned int output_width, 200 unsigned int output_height, 201 int bd) { 202 // Between passes, we use an intermediate buffer whose height is extended to 203 // have enough horizontally filtered values as input for the vertical pass. 204 // This buffer is allocated to be big enough for the largest block type we 205 // support. 206 const int kInterp_Extend = 4; 207 const unsigned int intermediate_height = 208 (kInterp_Extend - 1) + output_height + kInterp_Extend; 209 210 /* Size of intermediate_buffer is max_intermediate_height * filter_max_width, 211 * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height 212 * + kInterp_Extend 213 * = 3 + 16 + 4 214 * = 23 215 * and filter_max_width = 16 216 */ 217 uint16_t intermediate_buffer[71 * kMaxDimension]; 218 const int intermediate_next_stride = 1 - intermediate_height * output_width; 219 220 // Horizontal pass (src -> transposed intermediate). 221 { 222 uint16_t *output_ptr = intermediate_buffer; 223 const int src_next_row_stride = src_stride - output_width; 224 unsigned int i, j; 225 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); 226 for (i = 0; i < intermediate_height; ++i) { 227 for (j = 0; j < output_width; ++j) { 228 // Apply filter... 229 const int temp = (src_ptr[0] * HFilter[0]) + 230 (src_ptr[1] * HFilter[1]) + 231 (src_ptr[2] * HFilter[2]) + 232 (src_ptr[3] * HFilter[3]) + 233 (src_ptr[4] * HFilter[4]) + 234 (src_ptr[5] * HFilter[5]) + 235 (src_ptr[6] * HFilter[6]) + 236 (src_ptr[7] * HFilter[7]) + 237 (VP9_FILTER_WEIGHT >> 1); // Rounding 238 239 // Normalize back to 0-255... 240 *output_ptr = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd); 241 ++src_ptr; 242 output_ptr += intermediate_height; 243 } 244 src_ptr += src_next_row_stride; 245 output_ptr += intermediate_next_stride; 246 } 247 } 248 249 // Vertical pass (transposed intermediate -> dst). 250 { 251 uint16_t *src_ptr = intermediate_buffer; 252 const int dst_next_row_stride = dst_stride - output_width; 253 unsigned int i, j; 254 for (i = 0; i < output_height; ++i) { 255 for (j = 0; j < output_width; ++j) { 256 // Apply filter... 257 const int temp = (src_ptr[0] * VFilter[0]) + 258 (src_ptr[1] * VFilter[1]) + 259 (src_ptr[2] * VFilter[2]) + 260 (src_ptr[3] * VFilter[3]) + 261 (src_ptr[4] * VFilter[4]) + 262 (src_ptr[5] * VFilter[5]) + 263 (src_ptr[6] * VFilter[6]) + 264 (src_ptr[7] * VFilter[7]) + 265 (VP9_FILTER_WEIGHT >> 1); // Rounding 266 267 // Normalize back to 0-255... 268 *dst_ptr++ = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd); 269 src_ptr += intermediate_height; 270 } 271 src_ptr += intermediate_next_stride; 272 dst_ptr += dst_next_row_stride; 273 } 274 } 275} 276 277void highbd_block2d_average_c(uint16_t *src, 278 unsigned int src_stride, 279 uint16_t *output_ptr, 280 unsigned int output_stride, 281 unsigned int output_width, 282 unsigned int output_height, 283 int bd) { 284 unsigned int i, j; 285 for (i = 0; i < output_height; ++i) { 286 for (j = 0; j < output_width; ++j) { 287 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; 288 } 289 output_ptr += output_stride; 290 } 291} 292 293void highbd_filter_average_block2d_8_c(const uint16_t *src_ptr, 294 const unsigned int src_stride, 295 const int16_t *HFilter, 296 const int16_t *VFilter, 297 uint16_t *dst_ptr, 298 unsigned int dst_stride, 299 unsigned int output_width, 300 unsigned int output_height, 301 int bd) { 302 uint16_t tmp[kMaxDimension * kMaxDimension]; 303 304 assert(output_width <= kMaxDimension); 305 assert(output_height <= kMaxDimension); 306 highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64, 307 output_width, output_height, bd); 308 highbd_block2d_average_c(tmp, 64, dst_ptr, dst_stride, 309 output_width, output_height, bd); 310} 311#endif // CONFIG_VP9_HIGHBITDEPTH 312 313class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> { 314 public: 315 static void SetUpTestCase() { 316 // Force input_ to be unaligned, output to be 16 byte aligned. 317 input_ = reinterpret_cast<uint8_t*>( 318 vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1; 319 output_ = reinterpret_cast<uint8_t*>( 320 vpx_memalign(kDataAlignment, kOutputBufferSize)); 321 output_ref_ = reinterpret_cast<uint8_t*>( 322 vpx_memalign(kDataAlignment, kOutputBufferSize)); 323#if CONFIG_VP9_HIGHBITDEPTH 324 input16_ = reinterpret_cast<uint16_t*>( 325 vpx_memalign(kDataAlignment, 326 (kInputBufferSize + 1) * sizeof(uint16_t))) + 1; 327 output16_ = reinterpret_cast<uint16_t*>( 328 vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t))); 329 output16_ref_ = reinterpret_cast<uint16_t*>( 330 vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t))); 331#endif 332 } 333 334 virtual void TearDown() { libvpx_test::ClearSystemState(); } 335 336 static void TearDownTestCase() { 337 vpx_free(input_ - 1); 338 input_ = NULL; 339 vpx_free(output_); 340 output_ = NULL; 341 vpx_free(output_ref_); 342 output_ref_ = NULL; 343#if CONFIG_VP9_HIGHBITDEPTH 344 vpx_free(input16_ - 1); 345 input16_ = NULL; 346 vpx_free(output16_); 347 output16_ = NULL; 348 vpx_free(output16_ref_); 349 output16_ref_ = NULL; 350#endif 351 } 352 353 protected: 354 static const int kDataAlignment = 16; 355 static const int kOuterBlockSize = 256; 356 static const int kInputStride = kOuterBlockSize; 357 static const int kOutputStride = kOuterBlockSize; 358 static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize; 359 static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize; 360 361 int Width() const { return GET_PARAM(0); } 362 int Height() const { return GET_PARAM(1); } 363 int BorderLeft() const { 364 const int center = (kOuterBlockSize - Width()) / 2; 365 return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1); 366 } 367 int BorderTop() const { return (kOuterBlockSize - Height()) / 2; } 368 369 bool IsIndexInBorder(int i) { 370 return (i < BorderTop() * kOuterBlockSize || 371 i >= (BorderTop() + Height()) * kOuterBlockSize || 372 i % kOuterBlockSize < BorderLeft() || 373 i % kOuterBlockSize >= (BorderLeft() + Width())); 374 } 375 376 virtual void SetUp() { 377 UUT_ = GET_PARAM(2); 378#if CONFIG_VP9_HIGHBITDEPTH 379 if (UUT_->use_highbd_ != 0) 380 mask_ = (1 << UUT_->use_highbd_) - 1; 381 else 382 mask_ = 255; 383#endif 384 /* Set up guard blocks for an inner block centered in the outer block */ 385 for (int i = 0; i < kOutputBufferSize; ++i) { 386 if (IsIndexInBorder(i)) 387 output_[i] = 255; 388 else 389 output_[i] = 0; 390 } 391 392 ::libvpx_test::ACMRandom prng; 393 for (int i = 0; i < kInputBufferSize; ++i) { 394 if (i & 1) { 395 input_[i] = 255; 396#if CONFIG_VP9_HIGHBITDEPTH 397 input16_[i] = mask_; 398#endif 399 } else { 400 input_[i] = prng.Rand8Extremes(); 401#if CONFIG_VP9_HIGHBITDEPTH 402 input16_[i] = prng.Rand16() & mask_; 403#endif 404 } 405 } 406 } 407 408 void SetConstantInput(int value) { 409 memset(input_, value, kInputBufferSize); 410#if CONFIG_VP9_HIGHBITDEPTH 411 vpx_memset16(input16_, value, kInputBufferSize); 412#endif 413 } 414 415 void CopyOutputToRef() { 416 memcpy(output_ref_, output_, kOutputBufferSize); 417#if CONFIG_VP9_HIGHBITDEPTH 418 memcpy(output16_ref_, output16_, kOutputBufferSize); 419#endif 420 } 421 422 void CheckGuardBlocks() { 423 for (int i = 0; i < kOutputBufferSize; ++i) { 424 if (IsIndexInBorder(i)) 425 EXPECT_EQ(255, output_[i]); 426 } 427 } 428 429 uint8_t *input() const { 430#if CONFIG_VP9_HIGHBITDEPTH 431 if (UUT_->use_highbd_ == 0) { 432 return input_ + BorderTop() * kOuterBlockSize + BorderLeft(); 433 } else { 434 return CONVERT_TO_BYTEPTR(input16_ + BorderTop() * kOuterBlockSize + 435 BorderLeft()); 436 } 437#else 438 return input_ + BorderTop() * kOuterBlockSize + BorderLeft(); 439#endif 440 } 441 442 uint8_t *output() const { 443#if CONFIG_VP9_HIGHBITDEPTH 444 if (UUT_->use_highbd_ == 0) { 445 return output_ + BorderTop() * kOuterBlockSize + BorderLeft(); 446 } else { 447 return CONVERT_TO_BYTEPTR(output16_ + BorderTop() * kOuterBlockSize + 448 BorderLeft()); 449 } 450#else 451 return output_ + BorderTop() * kOuterBlockSize + BorderLeft(); 452#endif 453 } 454 455 uint8_t *output_ref() const { 456#if CONFIG_VP9_HIGHBITDEPTH 457 if (UUT_->use_highbd_ == 0) { 458 return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft(); 459 } else { 460 return CONVERT_TO_BYTEPTR(output16_ref_ + BorderTop() * kOuterBlockSize + 461 BorderLeft()); 462 } 463#else 464 return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft(); 465#endif 466 } 467 468 uint16_t lookup(uint8_t *list, int index) const { 469#if CONFIG_VP9_HIGHBITDEPTH 470 if (UUT_->use_highbd_ == 0) { 471 return list[index]; 472 } else { 473 return CONVERT_TO_SHORTPTR(list)[index]; 474 } 475#else 476 return list[index]; 477#endif 478 } 479 480 void assign_val(uint8_t *list, int index, uint16_t val) const { 481#if CONFIG_VP9_HIGHBITDEPTH 482 if (UUT_->use_highbd_ == 0) { 483 list[index] = (uint8_t) val; 484 } else { 485 CONVERT_TO_SHORTPTR(list)[index] = val; 486 } 487#else 488 list[index] = (uint8_t) val; 489#endif 490 } 491 492 void wrapper_filter_average_block2d_8_c(const uint8_t *src_ptr, 493 const unsigned int src_stride, 494 const int16_t *HFilter, 495 const int16_t *VFilter, 496 uint8_t *dst_ptr, 497 unsigned int dst_stride, 498 unsigned int output_width, 499 unsigned int output_height) { 500#if CONFIG_VP9_HIGHBITDEPTH 501 if (UUT_->use_highbd_ == 0) { 502 filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, 503 dst_ptr, dst_stride, output_width, 504 output_height); 505 } else { 506 highbd_filter_average_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), 507 src_stride, HFilter, VFilter, 508 CONVERT_TO_SHORTPTR(dst_ptr), 509 dst_stride, output_width, output_height, 510 UUT_->use_highbd_); 511 } 512#else 513 filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, 514 dst_ptr, dst_stride, output_width, 515 output_height); 516#endif 517 } 518 519 void wrapper_filter_block2d_8_c(const uint8_t *src_ptr, 520 const unsigned int src_stride, 521 const int16_t *HFilter, 522 const int16_t *VFilter, 523 uint8_t *dst_ptr, 524 unsigned int dst_stride, 525 unsigned int output_width, 526 unsigned int output_height) { 527#if CONFIG_VP9_HIGHBITDEPTH 528 if (UUT_->use_highbd_ == 0) { 529 filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, 530 dst_ptr, dst_stride, output_width, output_height); 531 } else { 532 highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride, 533 HFilter, VFilter, 534 CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, 535 output_width, output_height, UUT_->use_highbd_); 536 } 537#else 538 filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, 539 dst_ptr, dst_stride, output_width, output_height); 540#endif 541 } 542 543 const ConvolveFunctions* UUT_; 544 static uint8_t* input_; 545 static uint8_t* output_; 546 static uint8_t* output_ref_; 547#if CONFIG_VP9_HIGHBITDEPTH 548 static uint16_t* input16_; 549 static uint16_t* output16_; 550 static uint16_t* output16_ref_; 551 int mask_; 552#endif 553}; 554 555uint8_t* ConvolveTest::input_ = NULL; 556uint8_t* ConvolveTest::output_ = NULL; 557uint8_t* ConvolveTest::output_ref_ = NULL; 558#if CONFIG_VP9_HIGHBITDEPTH 559uint16_t* ConvolveTest::input16_ = NULL; 560uint16_t* ConvolveTest::output16_ = NULL; 561uint16_t* ConvolveTest::output16_ref_ = NULL; 562#endif 563 564TEST_P(ConvolveTest, GuardBlocks) { 565 CheckGuardBlocks(); 566} 567 568TEST_P(ConvolveTest, Copy) { 569 uint8_t* const in = input(); 570 uint8_t* const out = output(); 571 572 ASM_REGISTER_STATE_CHECK( 573 UUT_->copy_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0, 574 Width(), Height())); 575 576 CheckGuardBlocks(); 577 578 for (int y = 0; y < Height(); ++y) 579 for (int x = 0; x < Width(); ++x) 580 ASSERT_EQ(lookup(out, y * kOutputStride + x), 581 lookup(in, y * kInputStride + x)) 582 << "(" << x << "," << y << ")"; 583} 584 585TEST_P(ConvolveTest, Avg) { 586 uint8_t* const in = input(); 587 uint8_t* const out = output(); 588 uint8_t* const out_ref = output_ref(); 589 CopyOutputToRef(); 590 591 ASM_REGISTER_STATE_CHECK( 592 UUT_->avg_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0, 593 Width(), Height())); 594 595 CheckGuardBlocks(); 596 597 for (int y = 0; y < Height(); ++y) 598 for (int x = 0; x < Width(); ++x) 599 ASSERT_EQ(lookup(out, y * kOutputStride + x), 600 ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) + 601 lookup(out_ref, y * kOutputStride + x), 1)) 602 << "(" << x << "," << y << ")"; 603} 604 605TEST_P(ConvolveTest, CopyHoriz) { 606 uint8_t* const in = input(); 607 uint8_t* const out = output(); 608 DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0}; 609 610 ASM_REGISTER_STATE_CHECK( 611 UUT_->sh8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16, 612 Width(), Height())); 613 614 CheckGuardBlocks(); 615 616 for (int y = 0; y < Height(); ++y) 617 for (int x = 0; x < Width(); ++x) 618 ASSERT_EQ(lookup(out, y * kOutputStride + x), 619 lookup(in, y * kInputStride + x)) 620 << "(" << x << "," << y << ")"; 621} 622 623TEST_P(ConvolveTest, CopyVert) { 624 uint8_t* const in = input(); 625 uint8_t* const out = output(); 626 DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0}; 627 628 ASM_REGISTER_STATE_CHECK( 629 UUT_->sv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16, 630 Width(), Height())); 631 632 CheckGuardBlocks(); 633 634 for (int y = 0; y < Height(); ++y) 635 for (int x = 0; x < Width(); ++x) 636 ASSERT_EQ(lookup(out, y * kOutputStride + x), 637 lookup(in, y * kInputStride + x)) 638 << "(" << x << "," << y << ")"; 639} 640 641TEST_P(ConvolveTest, Copy2D) { 642 uint8_t* const in = input(); 643 uint8_t* const out = output(); 644 DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0}; 645 646 ASM_REGISTER_STATE_CHECK( 647 UUT_->shv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 648 16, Width(), Height())); 649 650 CheckGuardBlocks(); 651 652 for (int y = 0; y < Height(); ++y) 653 for (int x = 0; x < Width(); ++x) 654 ASSERT_EQ(lookup(out, y * kOutputStride + x), 655 lookup(in, y * kInputStride + x)) 656 << "(" << x << "," << y << ")"; 657} 658 659const int kNumFilterBanks = 4; 660const int kNumFilters = 16; 661 662TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) { 663 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { 664 const InterpKernel *filters = 665 vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)]; 666 for (int i = 0; i < kNumFilters; i++) { 667 const int p0 = filters[i][0] + filters[i][1]; 668 const int p1 = filters[i][2] + filters[i][3]; 669 const int p2 = filters[i][4] + filters[i][5]; 670 const int p3 = filters[i][6] + filters[i][7]; 671 EXPECT_LE(p0, 128); 672 EXPECT_LE(p1, 128); 673 EXPECT_LE(p2, 128); 674 EXPECT_LE(p3, 128); 675 EXPECT_LE(p0 + p3, 128); 676 EXPECT_LE(p0 + p3 + p1, 128); 677 EXPECT_LE(p0 + p3 + p1 + p2, 128); 678 EXPECT_EQ(p0 + p1 + p2 + p3, 128); 679 } 680 } 681} 682 683const int16_t kInvalidFilter[8] = { 0 }; 684 685TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) { 686 uint8_t* const in = input(); 687 uint8_t* const out = output(); 688#if CONFIG_VP9_HIGHBITDEPTH 689 uint8_t ref8[kOutputStride * kMaxDimension]; 690 uint16_t ref16[kOutputStride * kMaxDimension]; 691 uint8_t* ref; 692 if (UUT_->use_highbd_ == 0) { 693 ref = ref8; 694 } else { 695 ref = CONVERT_TO_BYTEPTR(ref16); 696 } 697#else 698 uint8_t ref[kOutputStride * kMaxDimension]; 699#endif 700 701 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { 702 const InterpKernel *filters = 703 vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)]; 704 705 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { 706 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { 707 wrapper_filter_block2d_8_c(in, kInputStride, 708 filters[filter_x], filters[filter_y], 709 ref, kOutputStride, 710 Width(), Height()); 711 712 if (filter_x && filter_y) 713 ASM_REGISTER_STATE_CHECK( 714 UUT_->hv8_(in, kInputStride, out, kOutputStride, 715 filters[filter_x], 16, filters[filter_y], 16, 716 Width(), Height())); 717 else if (filter_y) 718 ASM_REGISTER_STATE_CHECK( 719 UUT_->v8_(in, kInputStride, out, kOutputStride, 720 kInvalidFilter, 16, filters[filter_y], 16, 721 Width(), Height())); 722 else if (filter_x) 723 ASM_REGISTER_STATE_CHECK( 724 UUT_->h8_(in, kInputStride, out, kOutputStride, 725 filters[filter_x], 16, kInvalidFilter, 16, 726 Width(), Height())); 727 else 728 ASM_REGISTER_STATE_CHECK( 729 UUT_->copy_(in, kInputStride, out, kOutputStride, 730 kInvalidFilter, 0, kInvalidFilter, 0, 731 Width(), Height())); 732 733 CheckGuardBlocks(); 734 735 for (int y = 0; y < Height(); ++y) 736 for (int x = 0; x < Width(); ++x) 737 ASSERT_EQ(lookup(ref, y * kOutputStride + x), 738 lookup(out, y * kOutputStride + x)) 739 << "mismatch at (" << x << "," << y << "), " 740 << "filters (" << filter_bank << "," 741 << filter_x << "," << filter_y << ")"; 742 } 743 } 744 } 745} 746 747TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) { 748 uint8_t* const in = input(); 749 uint8_t* const out = output(); 750#if CONFIG_VP9_HIGHBITDEPTH 751 uint8_t ref8[kOutputStride * kMaxDimension]; 752 uint16_t ref16[kOutputStride * kMaxDimension]; 753 uint8_t* ref; 754 if (UUT_->use_highbd_ == 0) { 755 ref = ref8; 756 } else { 757 ref = CONVERT_TO_BYTEPTR(ref16); 758 } 759#else 760 uint8_t ref[kOutputStride * kMaxDimension]; 761#endif 762 763 // Populate ref and out with some random data 764 ::libvpx_test::ACMRandom prng; 765 for (int y = 0; y < Height(); ++y) { 766 for (int x = 0; x < Width(); ++x) { 767 uint16_t r; 768#if CONFIG_VP9_HIGHBITDEPTH 769 if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) { 770 r = prng.Rand8Extremes(); 771 } else { 772 r = prng.Rand16() & mask_; 773 } 774#else 775 r = prng.Rand8Extremes(); 776#endif 777 778 assign_val(out, y * kOutputStride + x, r); 779 assign_val(ref, y * kOutputStride + x, r); 780 } 781 } 782 783 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { 784 const InterpKernel *filters = 785 vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)]; 786 787 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { 788 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { 789 wrapper_filter_average_block2d_8_c(in, kInputStride, 790 filters[filter_x], filters[filter_y], 791 ref, kOutputStride, 792 Width(), Height()); 793 794 if (filter_x && filter_y) 795 ASM_REGISTER_STATE_CHECK( 796 UUT_->hv8_avg_(in, kInputStride, out, kOutputStride, 797 filters[filter_x], 16, filters[filter_y], 16, 798 Width(), Height())); 799 else if (filter_y) 800 ASM_REGISTER_STATE_CHECK( 801 UUT_->v8_avg_(in, kInputStride, out, kOutputStride, 802 kInvalidFilter, 16, filters[filter_y], 16, 803 Width(), Height())); 804 else if (filter_x) 805 ASM_REGISTER_STATE_CHECK( 806 UUT_->h8_avg_(in, kInputStride, out, kOutputStride, 807 filters[filter_x], 16, kInvalidFilter, 16, 808 Width(), Height())); 809 else 810 ASM_REGISTER_STATE_CHECK( 811 UUT_->avg_(in, kInputStride, out, kOutputStride, 812 kInvalidFilter, 0, kInvalidFilter, 0, 813 Width(), Height())); 814 815 CheckGuardBlocks(); 816 817 for (int y = 0; y < Height(); ++y) 818 for (int x = 0; x < Width(); ++x) 819 ASSERT_EQ(lookup(ref, y * kOutputStride + x), 820 lookup(out, y * kOutputStride + x)) 821 << "mismatch at (" << x << "," << y << "), " 822 << "filters (" << filter_bank << "," 823 << filter_x << "," << filter_y << ")"; 824 } 825 } 826 } 827} 828 829TEST_P(ConvolveTest, FilterExtremes) { 830 uint8_t *const in = input(); 831 uint8_t *const out = output(); 832#if CONFIG_VP9_HIGHBITDEPTH 833 uint8_t ref8[kOutputStride * kMaxDimension]; 834 uint16_t ref16[kOutputStride * kMaxDimension]; 835 uint8_t *ref; 836 if (UUT_->use_highbd_ == 0) { 837 ref = ref8; 838 } else { 839 ref = CONVERT_TO_BYTEPTR(ref16); 840 } 841#else 842 uint8_t ref[kOutputStride * kMaxDimension]; 843#endif 844 845 // Populate ref and out with some random data 846 ::libvpx_test::ACMRandom prng; 847 for (int y = 0; y < Height(); ++y) { 848 for (int x = 0; x < Width(); ++x) { 849 uint16_t r; 850#if CONFIG_VP9_HIGHBITDEPTH 851 if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) { 852 r = prng.Rand8Extremes(); 853 } else { 854 r = prng.Rand16() & mask_; 855 } 856#else 857 r = prng.Rand8Extremes(); 858#endif 859 assign_val(out, y * kOutputStride + x, r); 860 assign_val(ref, y * kOutputStride + x, r); 861 } 862 } 863 864 for (int axis = 0; axis < 2; axis++) { 865 int seed_val = 0; 866 while (seed_val < 256) { 867 for (int y = 0; y < 8; ++y) { 868 for (int x = 0; x < 8; ++x) { 869#if CONFIG_VP9_HIGHBITDEPTH 870 assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1, 871 ((seed_val >> (axis ? y : x)) & 1) * mask_); 872#else 873 assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1, 874 ((seed_val >> (axis ? y : x)) & 1) * 255); 875#endif 876 if (axis) seed_val++; 877 } 878 if (axis) 879 seed_val-= 8; 880 else 881 seed_val++; 882 } 883 if (axis) seed_val += 8; 884 885 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { 886 const InterpKernel *filters = 887 vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)]; 888 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { 889 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { 890 wrapper_filter_block2d_8_c(in, kInputStride, 891 filters[filter_x], filters[filter_y], 892 ref, kOutputStride, 893 Width(), Height()); 894 if (filter_x && filter_y) 895 ASM_REGISTER_STATE_CHECK( 896 UUT_->hv8_(in, kInputStride, out, kOutputStride, 897 filters[filter_x], 16, filters[filter_y], 16, 898 Width(), Height())); 899 else if (filter_y) 900 ASM_REGISTER_STATE_CHECK( 901 UUT_->v8_(in, kInputStride, out, kOutputStride, 902 kInvalidFilter, 16, filters[filter_y], 16, 903 Width(), Height())); 904 else if (filter_x) 905 ASM_REGISTER_STATE_CHECK( 906 UUT_->h8_(in, kInputStride, out, kOutputStride, 907 filters[filter_x], 16, kInvalidFilter, 16, 908 Width(), Height())); 909 else 910 ASM_REGISTER_STATE_CHECK( 911 UUT_->copy_(in, kInputStride, out, kOutputStride, 912 kInvalidFilter, 0, kInvalidFilter, 0, 913 Width(), Height())); 914 915 for (int y = 0; y < Height(); ++y) 916 for (int x = 0; x < Width(); ++x) 917 ASSERT_EQ(lookup(ref, y * kOutputStride + x), 918 lookup(out, y * kOutputStride + x)) 919 << "mismatch at (" << x << "," << y << "), " 920 << "filters (" << filter_bank << "," 921 << filter_x << "," << filter_y << ")"; 922 } 923 } 924 } 925 } 926 } 927} 928 929/* This test exercises that enough rows and columns are filtered with every 930 possible initial fractional positions and scaling steps. */ 931TEST_P(ConvolveTest, CheckScalingFiltering) { 932 uint8_t* const in = input(); 933 uint8_t* const out = output(); 934 const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP]; 935 936 SetConstantInput(127); 937 938 for (int frac = 0; frac < 16; ++frac) { 939 for (int step = 1; step <= 32; ++step) { 940 /* Test the horizontal and vertical filters in combination. */ 941 ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride, 942 eighttap[frac], step, 943 eighttap[frac], step, 944 Width(), Height())); 945 946 CheckGuardBlocks(); 947 948 for (int y = 0; y < Height(); ++y) { 949 for (int x = 0; x < Width(); ++x) { 950 ASSERT_EQ(lookup(in, y * kInputStride + x), 951 lookup(out, y * kOutputStride + x)) 952 << "x == " << x << ", y == " << y 953 << ", frac == " << frac << ", step == " << step; 954 } 955 } 956 } 957 } 958} 959 960using std::tr1::make_tuple; 961 962#if CONFIG_VP9_HIGHBITDEPTH 963#define WRAP(func, bd) \ 964void wrap_ ## func ## _ ## bd(const uint8_t *src, ptrdiff_t src_stride, \ 965 uint8_t *dst, ptrdiff_t dst_stride, \ 966 const int16_t *filter_x, \ 967 int filter_x_stride, \ 968 const int16_t *filter_y, \ 969 int filter_y_stride, \ 970 int w, int h) { \ 971 vpx_highbd_ ## func(src, src_stride, dst, dst_stride, filter_x, \ 972 filter_x_stride, filter_y, filter_y_stride, \ 973 w, h, bd); \ 974} 975#if HAVE_SSE2 && ARCH_X86_64 976#if CONFIG_USE_X86INC 977WRAP(convolve_copy_sse2, 8) 978WRAP(convolve_avg_sse2, 8) 979WRAP(convolve_copy_sse2, 10) 980WRAP(convolve_avg_sse2, 10) 981WRAP(convolve_copy_sse2, 12) 982WRAP(convolve_avg_sse2, 12) 983#endif // CONFIG_USE_X86INC 984WRAP(convolve8_horiz_sse2, 8) 985WRAP(convolve8_avg_horiz_sse2, 8) 986WRAP(convolve8_vert_sse2, 8) 987WRAP(convolve8_avg_vert_sse2, 8) 988WRAP(convolve8_sse2, 8) 989WRAP(convolve8_avg_sse2, 8) 990WRAP(convolve8_horiz_sse2, 10) 991WRAP(convolve8_avg_horiz_sse2, 10) 992WRAP(convolve8_vert_sse2, 10) 993WRAP(convolve8_avg_vert_sse2, 10) 994WRAP(convolve8_sse2, 10) 995WRAP(convolve8_avg_sse2, 10) 996WRAP(convolve8_horiz_sse2, 12) 997WRAP(convolve8_avg_horiz_sse2, 12) 998WRAP(convolve8_vert_sse2, 12) 999WRAP(convolve8_avg_vert_sse2, 12) 1000WRAP(convolve8_sse2, 12) 1001WRAP(convolve8_avg_sse2, 12) 1002#endif // HAVE_SSE2 && ARCH_X86_64 1003 1004WRAP(convolve_copy_c, 8) 1005WRAP(convolve_avg_c, 8) 1006WRAP(convolve8_horiz_c, 8) 1007WRAP(convolve8_avg_horiz_c, 8) 1008WRAP(convolve8_vert_c, 8) 1009WRAP(convolve8_avg_vert_c, 8) 1010WRAP(convolve8_c, 8) 1011WRAP(convolve8_avg_c, 8) 1012WRAP(convolve_copy_c, 10) 1013WRAP(convolve_avg_c, 10) 1014WRAP(convolve8_horiz_c, 10) 1015WRAP(convolve8_avg_horiz_c, 10) 1016WRAP(convolve8_vert_c, 10) 1017WRAP(convolve8_avg_vert_c, 10) 1018WRAP(convolve8_c, 10) 1019WRAP(convolve8_avg_c, 10) 1020WRAP(convolve_copy_c, 12) 1021WRAP(convolve_avg_c, 12) 1022WRAP(convolve8_horiz_c, 12) 1023WRAP(convolve8_avg_horiz_c, 12) 1024WRAP(convolve8_vert_c, 12) 1025WRAP(convolve8_avg_vert_c, 12) 1026WRAP(convolve8_c, 12) 1027WRAP(convolve8_avg_c, 12) 1028#undef WRAP 1029 1030const ConvolveFunctions convolve8_c( 1031 wrap_convolve_copy_c_8, wrap_convolve_avg_c_8, 1032 wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8, 1033 wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, 1034 wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 1035 wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8, 1036 wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, 1037 wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8); 1038INSTANTIATE_TEST_CASE_P(C_8, ConvolveTest, ::testing::Values( 1039 make_tuple(4, 4, &convolve8_c), 1040 make_tuple(8, 4, &convolve8_c), 1041 make_tuple(4, 8, &convolve8_c), 1042 make_tuple(8, 8, &convolve8_c), 1043 make_tuple(16, 8, &convolve8_c), 1044 make_tuple(8, 16, &convolve8_c), 1045 make_tuple(16, 16, &convolve8_c), 1046 make_tuple(32, 16, &convolve8_c), 1047 make_tuple(16, 32, &convolve8_c), 1048 make_tuple(32, 32, &convolve8_c), 1049 make_tuple(64, 32, &convolve8_c), 1050 make_tuple(32, 64, &convolve8_c), 1051 make_tuple(64, 64, &convolve8_c))); 1052const ConvolveFunctions convolve10_c( 1053 wrap_convolve_copy_c_10, wrap_convolve_avg_c_10, 1054 wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10, 1055 wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, 1056 wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 1057 wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10, 1058 wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, 1059 wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10); 1060INSTANTIATE_TEST_CASE_P(C_10, ConvolveTest, ::testing::Values( 1061 make_tuple(4, 4, &convolve10_c), 1062 make_tuple(8, 4, &convolve10_c), 1063 make_tuple(4, 8, &convolve10_c), 1064 make_tuple(8, 8, &convolve10_c), 1065 make_tuple(16, 8, &convolve10_c), 1066 make_tuple(8, 16, &convolve10_c), 1067 make_tuple(16, 16, &convolve10_c), 1068 make_tuple(32, 16, &convolve10_c), 1069 make_tuple(16, 32, &convolve10_c), 1070 make_tuple(32, 32, &convolve10_c), 1071 make_tuple(64, 32, &convolve10_c), 1072 make_tuple(32, 64, &convolve10_c), 1073 make_tuple(64, 64, &convolve10_c))); 1074const ConvolveFunctions convolve12_c( 1075 wrap_convolve_copy_c_12, wrap_convolve_avg_c_12, 1076 wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12, 1077 wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, 1078 wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 1079 wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12, 1080 wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, 1081 wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12); 1082INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values( 1083 make_tuple(4, 4, &convolve12_c), 1084 make_tuple(8, 4, &convolve12_c), 1085 make_tuple(4, 8, &convolve12_c), 1086 make_tuple(8, 8, &convolve12_c), 1087 make_tuple(16, 8, &convolve12_c), 1088 make_tuple(8, 16, &convolve12_c), 1089 make_tuple(16, 16, &convolve12_c), 1090 make_tuple(32, 16, &convolve12_c), 1091 make_tuple(16, 32, &convolve12_c), 1092 make_tuple(32, 32, &convolve12_c), 1093 make_tuple(64, 32, &convolve12_c), 1094 make_tuple(32, 64, &convolve12_c), 1095 make_tuple(64, 64, &convolve12_c))); 1096 1097#else 1098 1099const ConvolveFunctions convolve8_c( 1100 vpx_convolve_copy_c, vpx_convolve_avg_c, 1101 vpx_convolve8_horiz_c, vpx_convolve8_avg_horiz_c, 1102 vpx_convolve8_vert_c, vpx_convolve8_avg_vert_c, 1103 vpx_convolve8_c, vpx_convolve8_avg_c, 1104 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, 1105 vpx_scaled_vert_c, vpx_scaled_avg_vert_c, 1106 vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); 1107 1108INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values( 1109 make_tuple(4, 4, &convolve8_c), 1110 make_tuple(8, 4, &convolve8_c), 1111 make_tuple(4, 8, &convolve8_c), 1112 make_tuple(8, 8, &convolve8_c), 1113 make_tuple(16, 8, &convolve8_c), 1114 make_tuple(8, 16, &convolve8_c), 1115 make_tuple(16, 16, &convolve8_c), 1116 make_tuple(32, 16, &convolve8_c), 1117 make_tuple(16, 32, &convolve8_c), 1118 make_tuple(32, 32, &convolve8_c), 1119 make_tuple(64, 32, &convolve8_c), 1120 make_tuple(32, 64, &convolve8_c), 1121 make_tuple(64, 64, &convolve8_c))); 1122#endif 1123 1124#if HAVE_SSE2 && ARCH_X86_64 1125#if CONFIG_VP9_HIGHBITDEPTH 1126const ConvolveFunctions convolve8_sse2( 1127#if CONFIG_USE_X86INC 1128 wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8, 1129#else 1130 wrap_convolve_copy_c_8, wrap_convolve_avg_c_8, 1131#endif // CONFIG_USE_X86INC 1132 wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8, 1133 wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8, 1134 wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 1135 wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8, 1136 wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8, 1137 wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8); 1138const ConvolveFunctions convolve10_sse2( 1139#if CONFIG_USE_X86INC 1140 wrap_convolve_copy_sse2_10, wrap_convolve_avg_sse2_10, 1141#else 1142 wrap_convolve_copy_c_10, wrap_convolve_avg_c_10, 1143#endif // CONFIG_USE_X86INC 1144 wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10, 1145 wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10, 1146 wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 1147 wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10, 1148 wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10, 1149 wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10); 1150const ConvolveFunctions convolve12_sse2( 1151#if CONFIG_USE_X86INC 1152 wrap_convolve_copy_sse2_12, wrap_convolve_avg_sse2_12, 1153#else 1154 wrap_convolve_copy_c_12, wrap_convolve_avg_c_12, 1155#endif // CONFIG_USE_X86INC 1156 wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12, 1157 wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12, 1158 wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 1159 wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12, 1160 wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12, 1161 wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12); 1162INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values( 1163 make_tuple(4, 4, &convolve8_sse2), 1164 make_tuple(8, 4, &convolve8_sse2), 1165 make_tuple(4, 8, &convolve8_sse2), 1166 make_tuple(8, 8, &convolve8_sse2), 1167 make_tuple(16, 8, &convolve8_sse2), 1168 make_tuple(8, 16, &convolve8_sse2), 1169 make_tuple(16, 16, &convolve8_sse2), 1170 make_tuple(32, 16, &convolve8_sse2), 1171 make_tuple(16, 32, &convolve8_sse2), 1172 make_tuple(32, 32, &convolve8_sse2), 1173 make_tuple(64, 32, &convolve8_sse2), 1174 make_tuple(32, 64, &convolve8_sse2), 1175 make_tuple(64, 64, &convolve8_sse2), 1176 make_tuple(4, 4, &convolve10_sse2), 1177 make_tuple(8, 4, &convolve10_sse2), 1178 make_tuple(4, 8, &convolve10_sse2), 1179 make_tuple(8, 8, &convolve10_sse2), 1180 make_tuple(16, 8, &convolve10_sse2), 1181 make_tuple(8, 16, &convolve10_sse2), 1182 make_tuple(16, 16, &convolve10_sse2), 1183 make_tuple(32, 16, &convolve10_sse2), 1184 make_tuple(16, 32, &convolve10_sse2), 1185 make_tuple(32, 32, &convolve10_sse2), 1186 make_tuple(64, 32, &convolve10_sse2), 1187 make_tuple(32, 64, &convolve10_sse2), 1188 make_tuple(64, 64, &convolve10_sse2), 1189 make_tuple(4, 4, &convolve12_sse2), 1190 make_tuple(8, 4, &convolve12_sse2), 1191 make_tuple(4, 8, &convolve12_sse2), 1192 make_tuple(8, 8, &convolve12_sse2), 1193 make_tuple(16, 8, &convolve12_sse2), 1194 make_tuple(8, 16, &convolve12_sse2), 1195 make_tuple(16, 16, &convolve12_sse2), 1196 make_tuple(32, 16, &convolve12_sse2), 1197 make_tuple(16, 32, &convolve12_sse2), 1198 make_tuple(32, 32, &convolve12_sse2), 1199 make_tuple(64, 32, &convolve12_sse2), 1200 make_tuple(32, 64, &convolve12_sse2), 1201 make_tuple(64, 64, &convolve12_sse2))); 1202#else 1203const ConvolveFunctions convolve8_sse2( 1204#if CONFIG_USE_X86INC 1205 vpx_convolve_copy_sse2, vpx_convolve_avg_sse2, 1206#else 1207 vpx_convolve_copy_c, vpx_convolve_avg_c, 1208#endif // CONFIG_USE_X86INC 1209 vpx_convolve8_horiz_sse2, vpx_convolve8_avg_horiz_sse2, 1210 vpx_convolve8_vert_sse2, vpx_convolve8_avg_vert_sse2, 1211 vpx_convolve8_sse2, vpx_convolve8_avg_sse2, 1212 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, 1213 vpx_scaled_vert_c, vpx_scaled_avg_vert_c, 1214 vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); 1215 1216INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values( 1217 make_tuple(4, 4, &convolve8_sse2), 1218 make_tuple(8, 4, &convolve8_sse2), 1219 make_tuple(4, 8, &convolve8_sse2), 1220 make_tuple(8, 8, &convolve8_sse2), 1221 make_tuple(16, 8, &convolve8_sse2), 1222 make_tuple(8, 16, &convolve8_sse2), 1223 make_tuple(16, 16, &convolve8_sse2), 1224 make_tuple(32, 16, &convolve8_sse2), 1225 make_tuple(16, 32, &convolve8_sse2), 1226 make_tuple(32, 32, &convolve8_sse2), 1227 make_tuple(64, 32, &convolve8_sse2), 1228 make_tuple(32, 64, &convolve8_sse2), 1229 make_tuple(64, 64, &convolve8_sse2))); 1230#endif // CONFIG_VP9_HIGHBITDEPTH 1231#endif 1232 1233#if HAVE_SSSE3 1234const ConvolveFunctions convolve8_ssse3( 1235 vpx_convolve_copy_c, vpx_convolve_avg_c, 1236 vpx_convolve8_horiz_ssse3, vpx_convolve8_avg_horiz_ssse3, 1237 vpx_convolve8_vert_ssse3, vpx_convolve8_avg_vert_ssse3, 1238 vpx_convolve8_ssse3, vpx_convolve8_avg_ssse3, 1239 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, 1240 vpx_scaled_vert_c, vpx_scaled_avg_vert_c, 1241 vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); 1242 1243INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values( 1244 make_tuple(4, 4, &convolve8_ssse3), 1245 make_tuple(8, 4, &convolve8_ssse3), 1246 make_tuple(4, 8, &convolve8_ssse3), 1247 make_tuple(8, 8, &convolve8_ssse3), 1248 make_tuple(16, 8, &convolve8_ssse3), 1249 make_tuple(8, 16, &convolve8_ssse3), 1250 make_tuple(16, 16, &convolve8_ssse3), 1251 make_tuple(32, 16, &convolve8_ssse3), 1252 make_tuple(16, 32, &convolve8_ssse3), 1253 make_tuple(32, 32, &convolve8_ssse3), 1254 make_tuple(64, 32, &convolve8_ssse3), 1255 make_tuple(32, 64, &convolve8_ssse3), 1256 make_tuple(64, 64, &convolve8_ssse3))); 1257#endif 1258 1259#if HAVE_AVX2 && HAVE_SSSE3 1260const ConvolveFunctions convolve8_avx2( 1261 vpx_convolve_copy_c, vpx_convolve_avg_c, 1262 vpx_convolve8_horiz_avx2, vpx_convolve8_avg_horiz_ssse3, 1263 vpx_convolve8_vert_avx2, vpx_convolve8_avg_vert_ssse3, 1264 vpx_convolve8_avx2, vpx_convolve8_avg_ssse3, 1265 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, 1266 vpx_scaled_vert_c, vpx_scaled_avg_vert_c, 1267 vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); 1268 1269INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values( 1270 make_tuple(4, 4, &convolve8_avx2), 1271 make_tuple(8, 4, &convolve8_avx2), 1272 make_tuple(4, 8, &convolve8_avx2), 1273 make_tuple(8, 8, &convolve8_avx2), 1274 make_tuple(8, 16, &convolve8_avx2), 1275 make_tuple(16, 8, &convolve8_avx2), 1276 make_tuple(16, 16, &convolve8_avx2), 1277 make_tuple(32, 16, &convolve8_avx2), 1278 make_tuple(16, 32, &convolve8_avx2), 1279 make_tuple(32, 32, &convolve8_avx2), 1280 make_tuple(64, 32, &convolve8_avx2), 1281 make_tuple(32, 64, &convolve8_avx2), 1282 make_tuple(64, 64, &convolve8_avx2))); 1283#endif // HAVE_AVX2 && HAVE_SSSE3 1284 1285#if HAVE_NEON 1286#if HAVE_NEON_ASM 1287const ConvolveFunctions convolve8_neon( 1288 vpx_convolve_copy_neon, vpx_convolve_avg_neon, 1289 vpx_convolve8_horiz_neon, vpx_convolve8_avg_horiz_neon, 1290 vpx_convolve8_vert_neon, vpx_convolve8_avg_vert_neon, 1291 vpx_convolve8_neon, vpx_convolve8_avg_neon, 1292 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, 1293 vpx_scaled_vert_c, vpx_scaled_avg_vert_c, 1294 vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); 1295#else // HAVE_NEON 1296const ConvolveFunctions convolve8_neon( 1297 vpx_convolve_copy_neon, vpx_convolve_avg_neon, 1298 vpx_convolve8_horiz_neon, vpx_convolve8_avg_horiz_neon, 1299 vpx_convolve8_vert_neon, vpx_convolve8_avg_vert_neon, 1300 vpx_convolve8_neon, vpx_convolve8_avg_neon, 1301 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, 1302 vpx_scaled_vert_c, vpx_scaled_avg_vert_c, 1303 vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); 1304#endif // HAVE_NEON_ASM 1305 1306INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values( 1307 make_tuple(4, 4, &convolve8_neon), 1308 make_tuple(8, 4, &convolve8_neon), 1309 make_tuple(4, 8, &convolve8_neon), 1310 make_tuple(8, 8, &convolve8_neon), 1311 make_tuple(16, 8, &convolve8_neon), 1312 make_tuple(8, 16, &convolve8_neon), 1313 make_tuple(16, 16, &convolve8_neon), 1314 make_tuple(32, 16, &convolve8_neon), 1315 make_tuple(16, 32, &convolve8_neon), 1316 make_tuple(32, 32, &convolve8_neon), 1317 make_tuple(64, 32, &convolve8_neon), 1318 make_tuple(32, 64, &convolve8_neon), 1319 make_tuple(64, 64, &convolve8_neon))); 1320#endif // HAVE_NEON 1321 1322#if HAVE_DSPR2 1323const ConvolveFunctions convolve8_dspr2( 1324 vpx_convolve_copy_dspr2, vpx_convolve_avg_dspr2, 1325 vpx_convolve8_horiz_dspr2, vpx_convolve8_avg_horiz_dspr2, 1326 vpx_convolve8_vert_dspr2, vpx_convolve8_avg_vert_dspr2, 1327 vpx_convolve8_dspr2, vpx_convolve8_avg_dspr2, 1328 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, 1329 vpx_scaled_vert_c, vpx_scaled_avg_vert_c, 1330 vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); 1331 1332INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values( 1333 make_tuple(4, 4, &convolve8_dspr2), 1334 make_tuple(8, 4, &convolve8_dspr2), 1335 make_tuple(4, 8, &convolve8_dspr2), 1336 make_tuple(8, 8, &convolve8_dspr2), 1337 make_tuple(16, 8, &convolve8_dspr2), 1338 make_tuple(8, 16, &convolve8_dspr2), 1339 make_tuple(16, 16, &convolve8_dspr2), 1340 make_tuple(32, 16, &convolve8_dspr2), 1341 make_tuple(16, 32, &convolve8_dspr2), 1342 make_tuple(32, 32, &convolve8_dspr2), 1343 make_tuple(64, 32, &convolve8_dspr2), 1344 make_tuple(32, 64, &convolve8_dspr2), 1345 make_tuple(64, 64, &convolve8_dspr2))); 1346#endif 1347 1348#if HAVE_MSA 1349const ConvolveFunctions convolve8_msa( 1350 vpx_convolve_copy_msa, vpx_convolve_avg_msa, 1351 vpx_convolve8_horiz_msa, vpx_convolve8_avg_horiz_msa, 1352 vpx_convolve8_vert_msa, vpx_convolve8_avg_vert_msa, 1353 vpx_convolve8_msa, vpx_convolve8_avg_msa, 1354 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, 1355 vpx_scaled_vert_c, vpx_scaled_avg_vert_c, 1356 vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); 1357 1358INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, ::testing::Values( 1359 make_tuple(4, 4, &convolve8_msa), 1360 make_tuple(8, 4, &convolve8_msa), 1361 make_tuple(4, 8, &convolve8_msa), 1362 make_tuple(8, 8, &convolve8_msa), 1363 make_tuple(16, 8, &convolve8_msa), 1364 make_tuple(8, 16, &convolve8_msa), 1365 make_tuple(16, 16, &convolve8_msa), 1366 make_tuple(32, 16, &convolve8_msa), 1367 make_tuple(16, 32, &convolve8_msa), 1368 make_tuple(32, 32, &convolve8_msa), 1369 make_tuple(64, 32, &convolve8_msa), 1370 make_tuple(32, 64, &convolve8_msa), 1371 make_tuple(64, 64, &convolve8_msa))); 1372#endif // HAVE_MSA 1373} // namespace 1374