1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include <string.h> 12 13#include "third_party/googletest/src/include/gtest/gtest.h" 14 15#include "./vp9_rtcd.h" 16#include "./vpx_config.h" 17#include "./vpx_dsp_rtcd.h" 18#include "test/acm_random.h" 19#include "test/clear_system_state.h" 20#include "test/register_state_check.h" 21#include "test/util.h" 22#include "vp9/common/vp9_common.h" 23#include "vp9/common/vp9_filter.h" 24#include "vpx_dsp/vpx_dsp_common.h" 25#include "vpx_dsp/vpx_filter.h" 26#include "vpx_mem/vpx_mem.h" 27#include "vpx_ports/mem.h" 28#include "vpx_ports/vpx_timer.h" 29 30namespace { 31 32static const unsigned int kMaxDimension = 64; 33 34typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride, 35 uint8_t *dst, ptrdiff_t dst_stride, 36 const int16_t *filter_x, int filter_x_stride, 37 const int16_t *filter_y, int filter_y_stride, 38 int w, int h); 39 40typedef void (*WrapperFilterBlock2d8Func)( 41 const uint8_t *src_ptr, const unsigned int src_stride, 42 const int16_t *hfilter, const int16_t *vfilter, uint8_t *dst_ptr, 43 unsigned int dst_stride, unsigned int output_width, 44 unsigned int output_height, int use_highbd); 45 46struct ConvolveFunctions { 47 ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg, ConvolveFunc h8, 48 ConvolveFunc h8_avg, ConvolveFunc v8, ConvolveFunc v8_avg, 49 ConvolveFunc hv8, ConvolveFunc hv8_avg, ConvolveFunc sh8, 50 ConvolveFunc sh8_avg, ConvolveFunc sv8, 51 ConvolveFunc sv8_avg, ConvolveFunc shv8, 52 ConvolveFunc shv8_avg, int bd) 53 : use_highbd_(bd) { 54 copy_[0] = copy; 55 copy_[1] = avg; 56 h8_[0] = h8; 57 h8_[1] = h8_avg; 58 v8_[0] = v8; 59 v8_[1] = v8_avg; 60 hv8_[0] = hv8; 61 hv8_[1] = hv8_avg; 62 sh8_[0] = sh8; 63 sh8_[1] = sh8_avg; 64 sv8_[0] = sv8; 65 sv8_[1] = sv8_avg; 66 shv8_[0] = shv8; 67 shv8_[1] = shv8_avg; 68 } 69 70 ConvolveFunc copy_[2]; 71 ConvolveFunc h8_[2]; 72 ConvolveFunc v8_[2]; 73 ConvolveFunc hv8_[2]; 74 ConvolveFunc sh8_[2]; // scaled horiz 75 ConvolveFunc sv8_[2]; // scaled vert 76 ConvolveFunc shv8_[2]; // scaled horiz/vert 77 int use_highbd_; // 0 if high bitdepth not used, else the actual bit depth. 78}; 79 80typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam; 81 82#define ALL_SIZES(convolve_fn) \ 83 make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn), \ 84 make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn), \ 85 make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn), \ 86 make_tuple(16, 16, &convolve_fn), make_tuple(32, 16, &convolve_fn), \ 87 make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \ 88 make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \ 89 make_tuple(64, 64, &convolve_fn) 90 91// Reference 8-tap subpixel filter, slightly modified to fit into this test. 92#define VP9_FILTER_WEIGHT 128 93#define VP9_FILTER_SHIFT 7 94uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; } 95 96void filter_block2d_8_c(const uint8_t *src_ptr, const unsigned int src_stride, 97 const int16_t *hfilter, const int16_t *vfilter, 98 uint8_t *dst_ptr, unsigned int dst_stride, 99 unsigned int output_width, unsigned int output_height) { 100 // Between passes, we use an intermediate buffer whose height is extended to 101 // have enough horizontally filtered values as input for the vertical pass. 102 // This buffer is allocated to be big enough for the largest block type we 103 // support. 104 const int kInterp_Extend = 4; 105 const unsigned int intermediate_height = 106 (kInterp_Extend - 1) + output_height + kInterp_Extend; 107 unsigned int i, j; 108 109 // Size of intermediate_buffer is max_intermediate_height * filter_max_width, 110 // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height 111 // + kInterp_Extend 112 // = 3 + 16 + 4 113 // = 23 114 // and filter_max_width = 16 115 // 116 uint8_t intermediate_buffer[71 * kMaxDimension]; 117 const int intermediate_next_stride = 118 1 - static_cast<int>(intermediate_height * output_width); 119 120 // Horizontal pass (src -> transposed intermediate). 121 uint8_t *output_ptr = intermediate_buffer; 122 const int src_next_row_stride = src_stride - output_width; 123 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); 124 for (i = 0; i < intermediate_height; ++i) { 125 for (j = 0; j < output_width; ++j) { 126 // Apply filter... 127 const int temp = (src_ptr[0] * hfilter[0]) + (src_ptr[1] * hfilter[1]) + 128 (src_ptr[2] * hfilter[2]) + (src_ptr[3] * hfilter[3]) + 129 (src_ptr[4] * hfilter[4]) + (src_ptr[5] * hfilter[5]) + 130 (src_ptr[6] * hfilter[6]) + (src_ptr[7] * hfilter[7]) + 131 (VP9_FILTER_WEIGHT >> 1); // Rounding 132 133 // Normalize back to 0-255... 134 *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT); 135 ++src_ptr; 136 output_ptr += intermediate_height; 137 } 138 src_ptr += src_next_row_stride; 139 output_ptr += intermediate_next_stride; 140 } 141 142 // Vertical pass (transposed intermediate -> dst). 143 src_ptr = intermediate_buffer; 144 const int dst_next_row_stride = dst_stride - output_width; 145 for (i = 0; i < output_height; ++i) { 146 for (j = 0; j < output_width; ++j) { 147 // Apply filter... 148 const int temp = (src_ptr[0] * vfilter[0]) + (src_ptr[1] * vfilter[1]) + 149 (src_ptr[2] * vfilter[2]) + (src_ptr[3] * vfilter[3]) + 150 (src_ptr[4] * vfilter[4]) + (src_ptr[5] * vfilter[5]) + 151 (src_ptr[6] * vfilter[6]) + (src_ptr[7] * vfilter[7]) + 152 (VP9_FILTER_WEIGHT >> 1); // Rounding 153 154 // Normalize back to 0-255... 155 *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT); 156 src_ptr += intermediate_height; 157 } 158 src_ptr += intermediate_next_stride; 159 dst_ptr += dst_next_row_stride; 160 } 161} 162 163void block2d_average_c(uint8_t *src, unsigned int src_stride, 164 uint8_t *output_ptr, unsigned int output_stride, 165 unsigned int output_width, unsigned int output_height) { 166 unsigned int i, j; 167 for (i = 0; i < output_height; ++i) { 168 for (j = 0; j < output_width; ++j) { 169 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; 170 } 171 output_ptr += output_stride; 172 } 173} 174 175void filter_average_block2d_8_c(const uint8_t *src_ptr, 176 const unsigned int src_stride, 177 const int16_t *hfilter, const int16_t *vfilter, 178 uint8_t *dst_ptr, unsigned int dst_stride, 179 unsigned int output_width, 180 unsigned int output_height) { 181 uint8_t tmp[kMaxDimension * kMaxDimension]; 182 183 assert(output_width <= kMaxDimension); 184 assert(output_height <= kMaxDimension); 185 filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, tmp, 64, 186 output_width, output_height); 187 block2d_average_c(tmp, 64, dst_ptr, dst_stride, output_width, output_height); 188} 189 190#if CONFIG_VP9_HIGHBITDEPTH 191void highbd_filter_block2d_8_c(const uint16_t *src_ptr, 192 const unsigned int src_stride, 193 const int16_t *hfilter, const int16_t *vfilter, 194 uint16_t *dst_ptr, unsigned int dst_stride, 195 unsigned int output_width, 196 unsigned int output_height, int bd) { 197 // Between passes, we use an intermediate buffer whose height is extended to 198 // have enough horizontally filtered values as input for the vertical pass. 199 // This buffer is allocated to be big enough for the largest block type we 200 // support. 201 const int kInterp_Extend = 4; 202 const unsigned int intermediate_height = 203 (kInterp_Extend - 1) + output_height + kInterp_Extend; 204 205 /* Size of intermediate_buffer is max_intermediate_height * filter_max_width, 206 * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height 207 * + kInterp_Extend 208 * = 3 + 16 + 4 209 * = 23 210 * and filter_max_width = 16 211 */ 212 uint16_t intermediate_buffer[71 * kMaxDimension]; 213 const int intermediate_next_stride = 214 1 - static_cast<int>(intermediate_height * output_width); 215 216 // Horizontal pass (src -> transposed intermediate). 217 { 218 uint16_t *output_ptr = intermediate_buffer; 219 const int src_next_row_stride = src_stride - output_width; 220 unsigned int i, j; 221 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); 222 for (i = 0; i < intermediate_height; ++i) { 223 for (j = 0; j < output_width; ++j) { 224 // Apply filter... 225 const int temp = (src_ptr[0] * hfilter[0]) + (src_ptr[1] * hfilter[1]) + 226 (src_ptr[2] * hfilter[2]) + (src_ptr[3] * hfilter[3]) + 227 (src_ptr[4] * hfilter[4]) + (src_ptr[5] * hfilter[5]) + 228 (src_ptr[6] * hfilter[6]) + (src_ptr[7] * hfilter[7]) + 229 (VP9_FILTER_WEIGHT >> 1); // Rounding 230 231 // Normalize back to 0-255... 232 *output_ptr = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd); 233 ++src_ptr; 234 output_ptr += intermediate_height; 235 } 236 src_ptr += src_next_row_stride; 237 output_ptr += intermediate_next_stride; 238 } 239 } 240 241 // Vertical pass (transposed intermediate -> dst). 242 { 243 uint16_t *src_ptr = intermediate_buffer; 244 const int dst_next_row_stride = dst_stride - output_width; 245 unsigned int i, j; 246 for (i = 0; i < output_height; ++i) { 247 for (j = 0; j < output_width; ++j) { 248 // Apply filter... 249 const int temp = (src_ptr[0] * vfilter[0]) + (src_ptr[1] * vfilter[1]) + 250 (src_ptr[2] * vfilter[2]) + (src_ptr[3] * vfilter[3]) + 251 (src_ptr[4] * vfilter[4]) + (src_ptr[5] * vfilter[5]) + 252 (src_ptr[6] * vfilter[6]) + (src_ptr[7] * vfilter[7]) + 253 (VP9_FILTER_WEIGHT >> 1); // Rounding 254 255 // Normalize back to 0-255... 256 *dst_ptr++ = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd); 257 src_ptr += intermediate_height; 258 } 259 src_ptr += intermediate_next_stride; 260 dst_ptr += dst_next_row_stride; 261 } 262 } 263} 264 265void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride, 266 uint16_t *output_ptr, unsigned int output_stride, 267 unsigned int output_width, 268 unsigned int output_height) { 269 unsigned int i, j; 270 for (i = 0; i < output_height; ++i) { 271 for (j = 0; j < output_width; ++j) { 272 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; 273 } 274 output_ptr += output_stride; 275 } 276} 277 278void highbd_filter_average_block2d_8_c( 279 const uint16_t *src_ptr, const unsigned int src_stride, 280 const int16_t *hfilter, const int16_t *vfilter, uint16_t *dst_ptr, 281 unsigned int dst_stride, unsigned int output_width, 282 unsigned int output_height, int bd) { 283 uint16_t tmp[kMaxDimension * kMaxDimension]; 284 285 assert(output_width <= kMaxDimension); 286 assert(output_height <= kMaxDimension); 287 highbd_filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, tmp, 64, 288 output_width, output_height, bd); 289 highbd_block2d_average_c(tmp, 64, dst_ptr, dst_stride, output_width, 290 output_height); 291} 292#endif // CONFIG_VP9_HIGHBITDEPTH 293 294void wrapper_filter_average_block2d_8_c( 295 const uint8_t *src_ptr, const unsigned int src_stride, 296 const int16_t *hfilter, const int16_t *vfilter, uint8_t *dst_ptr, 297 unsigned int dst_stride, unsigned int output_width, 298 unsigned int output_height, int use_highbd) { 299#if CONFIG_VP9_HIGHBITDEPTH 300 if (use_highbd == 0) { 301 filter_average_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr, 302 dst_stride, output_width, output_height); 303 } else { 304 highbd_filter_average_block2d_8_c(CAST_TO_SHORTPTR(src_ptr), src_stride, 305 hfilter, vfilter, 306 CAST_TO_SHORTPTR(dst_ptr), dst_stride, 307 output_width, output_height, use_highbd); 308 } 309#else 310 ASSERT_EQ(0, use_highbd); 311 filter_average_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr, 312 dst_stride, output_width, output_height); 313#endif 314} 315 316void wrapper_filter_block2d_8_c(const uint8_t *src_ptr, 317 const unsigned int src_stride, 318 const int16_t *hfilter, const int16_t *vfilter, 319 uint8_t *dst_ptr, unsigned int dst_stride, 320 unsigned int output_width, 321 unsigned int output_height, int use_highbd) { 322#if CONFIG_VP9_HIGHBITDEPTH 323 if (use_highbd == 0) { 324 filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr, 325 dst_stride, output_width, output_height); 326 } else { 327 highbd_filter_block2d_8_c(CAST_TO_SHORTPTR(src_ptr), src_stride, hfilter, 328 vfilter, CAST_TO_SHORTPTR(dst_ptr), dst_stride, 329 output_width, output_height, use_highbd); 330 } 331#else 332 ASSERT_EQ(0, use_highbd); 333 filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr, dst_stride, 334 output_width, output_height); 335#endif 336} 337 338class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> { 339 public: 340 static void SetUpTestCase() { 341 // Force input_ to be unaligned, output to be 16 byte aligned. 342 input_ = reinterpret_cast<uint8_t *>( 343 vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 344 1; 345 output_ = reinterpret_cast<uint8_t *>( 346 vpx_memalign(kDataAlignment, kOutputBufferSize)); 347 output_ref_ = reinterpret_cast<uint8_t *>( 348 vpx_memalign(kDataAlignment, kOutputBufferSize)); 349#if CONFIG_VP9_HIGHBITDEPTH 350 input16_ = reinterpret_cast<uint16_t *>(vpx_memalign( 351 kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) + 352 1; 353 output16_ = reinterpret_cast<uint16_t *>( 354 vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t))); 355 output16_ref_ = reinterpret_cast<uint16_t *>( 356 vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t))); 357#endif 358 } 359 360 virtual void TearDown() { libvpx_test::ClearSystemState(); } 361 362 static void TearDownTestCase() { 363 vpx_free(input_ - 1); 364 input_ = NULL; 365 vpx_free(output_); 366 output_ = NULL; 367 vpx_free(output_ref_); 368 output_ref_ = NULL; 369#if CONFIG_VP9_HIGHBITDEPTH 370 vpx_free(input16_ - 1); 371 input16_ = NULL; 372 vpx_free(output16_); 373 output16_ = NULL; 374 vpx_free(output16_ref_); 375 output16_ref_ = NULL; 376#endif 377 } 378 379 protected: 380 static const int kDataAlignment = 16; 381 static const int kOuterBlockSize = 256; 382 static const int kInputStride = kOuterBlockSize; 383 static const int kOutputStride = kOuterBlockSize; 384 static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize; 385 static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize; 386 387 int Width() const { return GET_PARAM(0); } 388 int Height() const { return GET_PARAM(1); } 389 int BorderLeft() const { 390 const int center = (kOuterBlockSize - Width()) / 2; 391 return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1); 392 } 393 int BorderTop() const { return (kOuterBlockSize - Height()) / 2; } 394 395 bool IsIndexInBorder(int i) { 396 return (i < BorderTop() * kOuterBlockSize || 397 i >= (BorderTop() + Height()) * kOuterBlockSize || 398 i % kOuterBlockSize < BorderLeft() || 399 i % kOuterBlockSize >= (BorderLeft() + Width())); 400 } 401 402 virtual void SetUp() { 403 UUT_ = GET_PARAM(2); 404#if CONFIG_VP9_HIGHBITDEPTH 405 if (UUT_->use_highbd_ != 0) { 406 mask_ = (1 << UUT_->use_highbd_) - 1; 407 } else { 408 mask_ = 255; 409 } 410#endif 411 /* Set up guard blocks for an inner block centered in the outer block */ 412 for (int i = 0; i < kOutputBufferSize; ++i) { 413 if (IsIndexInBorder(i)) { 414 output_[i] = 255; 415 } else { 416 output_[i] = 0; 417 } 418 } 419 420 ::libvpx_test::ACMRandom prng; 421 for (int i = 0; i < kInputBufferSize; ++i) { 422 if (i & 1) { 423 input_[i] = 255; 424#if CONFIG_VP9_HIGHBITDEPTH 425 input16_[i] = mask_; 426#endif 427 } else { 428 input_[i] = prng.Rand8Extremes(); 429#if CONFIG_VP9_HIGHBITDEPTH 430 input16_[i] = prng.Rand16() & mask_; 431#endif 432 } 433 } 434 } 435 436 void SetConstantInput(int value) { 437 memset(input_, value, kInputBufferSize); 438#if CONFIG_VP9_HIGHBITDEPTH 439 vpx_memset16(input16_, value, kInputBufferSize); 440#endif 441 } 442 443 void CopyOutputToRef() { 444 memcpy(output_ref_, output_, kOutputBufferSize); 445#if CONFIG_VP9_HIGHBITDEPTH 446 memcpy(output16_ref_, output16_, 447 kOutputBufferSize * sizeof(output16_ref_[0])); 448#endif 449 } 450 451 void CheckGuardBlocks() { 452 for (int i = 0; i < kOutputBufferSize; ++i) { 453 if (IsIndexInBorder(i)) EXPECT_EQ(255, output_[i]); 454 } 455 } 456 457 uint8_t *input() const { 458 const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); 459#if CONFIG_VP9_HIGHBITDEPTH 460 if (UUT_->use_highbd_ == 0) { 461 return input_ + offset; 462 } else { 463 return CAST_TO_BYTEPTR(input16_ + offset); 464 } 465#else 466 return input_ + offset; 467#endif 468 } 469 470 uint8_t *output() const { 471 const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); 472#if CONFIG_VP9_HIGHBITDEPTH 473 if (UUT_->use_highbd_ == 0) { 474 return output_ + offset; 475 } else { 476 return CAST_TO_BYTEPTR(output16_ + offset); 477 } 478#else 479 return output_ + offset; 480#endif 481 } 482 483 uint8_t *output_ref() const { 484 const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); 485#if CONFIG_VP9_HIGHBITDEPTH 486 if (UUT_->use_highbd_ == 0) { 487 return output_ref_ + offset; 488 } else { 489 return CAST_TO_BYTEPTR(output16_ref_ + offset); 490 } 491#else 492 return output_ref_ + offset; 493#endif 494 } 495 496 uint16_t lookup(uint8_t *list, int index) const { 497#if CONFIG_VP9_HIGHBITDEPTH 498 if (UUT_->use_highbd_ == 0) { 499 return list[index]; 500 } else { 501 return CAST_TO_SHORTPTR(list)[index]; 502 } 503#else 504 return list[index]; 505#endif 506 } 507 508 void assign_val(uint8_t *list, int index, uint16_t val) const { 509#if CONFIG_VP9_HIGHBITDEPTH 510 if (UUT_->use_highbd_ == 0) { 511 list[index] = (uint8_t)val; 512 } else { 513 CAST_TO_SHORTPTR(list)[index] = val; 514 } 515#else 516 list[index] = (uint8_t)val; 517#endif 518 } 519 520 const ConvolveFunctions *UUT_; 521 static uint8_t *input_; 522 static uint8_t *output_; 523 static uint8_t *output_ref_; 524#if CONFIG_VP9_HIGHBITDEPTH 525 static uint16_t *input16_; 526 static uint16_t *output16_; 527 static uint16_t *output16_ref_; 528 int mask_; 529#endif 530}; 531 532uint8_t *ConvolveTest::input_ = NULL; 533uint8_t *ConvolveTest::output_ = NULL; 534uint8_t *ConvolveTest::output_ref_ = NULL; 535#if CONFIG_VP9_HIGHBITDEPTH 536uint16_t *ConvolveTest::input16_ = NULL; 537uint16_t *ConvolveTest::output16_ = NULL; 538uint16_t *ConvolveTest::output16_ref_ = NULL; 539#endif 540 541TEST_P(ConvolveTest, GuardBlocks) { CheckGuardBlocks(); } 542 543TEST_P(ConvolveTest, DISABLED_Copy_Speed) { 544 const uint8_t *const in = input(); 545 uint8_t *const out = output(); 546 const int kNumTests = 5000000; 547 const int width = Width(); 548 const int height = Height(); 549 vpx_usec_timer timer; 550 551 vpx_usec_timer_start(&timer); 552 for (int n = 0; n < kNumTests; ++n) { 553 UUT_->copy_[0](in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0, 554 width, height); 555 } 556 vpx_usec_timer_mark(&timer); 557 558 const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer)); 559 printf("convolve_copy_%dx%d_%d: %d us\n", width, height, 560 UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); 561} 562 563TEST_P(ConvolveTest, DISABLED_Avg_Speed) { 564 const uint8_t *const in = input(); 565 uint8_t *const out = output(); 566 const int kNumTests = 5000000; 567 const int width = Width(); 568 const int height = Height(); 569 vpx_usec_timer timer; 570 571 vpx_usec_timer_start(&timer); 572 for (int n = 0; n < kNumTests; ++n) { 573 UUT_->copy_[1](in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0, 574 width, height); 575 } 576 vpx_usec_timer_mark(&timer); 577 578 const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer)); 579 printf("convolve_avg_%dx%d_%d: %d us\n", width, height, 580 UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); 581} 582 583TEST_P(ConvolveTest, Copy) { 584 uint8_t *const in = input(); 585 uint8_t *const out = output(); 586 587 ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](in, kInputStride, out, kOutputStride, 588 NULL, 0, NULL, 0, Width(), Height())); 589 590 CheckGuardBlocks(); 591 592 for (int y = 0; y < Height(); ++y) { 593 for (int x = 0; x < Width(); ++x) 594 ASSERT_EQ(lookup(out, y * kOutputStride + x), 595 lookup(in, y * kInputStride + x)) 596 << "(" << x << "," << y << ")"; 597 } 598} 599 600TEST_P(ConvolveTest, Avg) { 601 uint8_t *const in = input(); 602 uint8_t *const out = output(); 603 uint8_t *const out_ref = output_ref(); 604 CopyOutputToRef(); 605 606 ASM_REGISTER_STATE_CHECK(UUT_->copy_[1](in, kInputStride, out, kOutputStride, 607 NULL, 0, NULL, 0, Width(), Height())); 608 609 CheckGuardBlocks(); 610 611 for (int y = 0; y < Height(); ++y) { 612 for (int x = 0; x < Width(); ++x) 613 ASSERT_EQ(lookup(out, y * kOutputStride + x), 614 ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) + 615 lookup(out_ref, y * kOutputStride + x), 616 1)) 617 << "(" << x << "," << y << ")"; 618 } 619} 620 621TEST_P(ConvolveTest, CopyHoriz) { 622 uint8_t *const in = input(); 623 uint8_t *const out = output(); 624 DECLARE_ALIGNED(256, const int16_t, 625 filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 }; 626 627 ASM_REGISTER_STATE_CHECK(UUT_->sh8_[0](in, kInputStride, out, kOutputStride, 628 filter8, 16, filter8, 16, Width(), 629 Height())); 630 631 CheckGuardBlocks(); 632 633 for (int y = 0; y < Height(); ++y) { 634 for (int x = 0; x < Width(); ++x) 635 ASSERT_EQ(lookup(out, y * kOutputStride + x), 636 lookup(in, y * kInputStride + x)) 637 << "(" << x << "," << y << ")"; 638 } 639} 640 641TEST_P(ConvolveTest, CopyVert) { 642 uint8_t *const in = input(); 643 uint8_t *const out = output(); 644 DECLARE_ALIGNED(256, const int16_t, 645 filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 }; 646 647 ASM_REGISTER_STATE_CHECK(UUT_->sv8_[0](in, kInputStride, out, kOutputStride, 648 filter8, 16, filter8, 16, Width(), 649 Height())); 650 651 CheckGuardBlocks(); 652 653 for (int y = 0; y < Height(); ++y) { 654 for (int x = 0; x < Width(); ++x) 655 ASSERT_EQ(lookup(out, y * kOutputStride + x), 656 lookup(in, y * kInputStride + x)) 657 << "(" << x << "," << y << ")"; 658 } 659} 660 661TEST_P(ConvolveTest, Copy2D) { 662 uint8_t *const in = input(); 663 uint8_t *const out = output(); 664 DECLARE_ALIGNED(256, const int16_t, 665 filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 }; 666 667 ASM_REGISTER_STATE_CHECK(UUT_->shv8_[0](in, kInputStride, out, kOutputStride, 668 filter8, 16, filter8, 16, Width(), 669 Height())); 670 671 CheckGuardBlocks(); 672 673 for (int y = 0; y < Height(); ++y) { 674 for (int x = 0; x < Width(); ++x) 675 ASSERT_EQ(lookup(out, y * kOutputStride + x), 676 lookup(in, y * kInputStride + x)) 677 << "(" << x << "," << y << ")"; 678 } 679} 680 681const int kNumFilterBanks = 4; 682const int kNumFilters = 16; 683 684TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) { 685 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { 686 const InterpKernel *filters = 687 vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)]; 688 for (int i = 0; i < kNumFilters; i++) { 689 const int p0 = filters[i][0] + filters[i][1]; 690 const int p1 = filters[i][2] + filters[i][3]; 691 const int p2 = filters[i][4] + filters[i][5]; 692 const int p3 = filters[i][6] + filters[i][7]; 693 EXPECT_LE(p0, 128); 694 EXPECT_LE(p1, 128); 695 EXPECT_LE(p2, 128); 696 EXPECT_LE(p3, 128); 697 EXPECT_LE(p0 + p3, 128); 698 EXPECT_LE(p0 + p3 + p1, 128); 699 EXPECT_LE(p0 + p3 + p1 + p2, 128); 700 EXPECT_EQ(p0 + p1 + p2 + p3, 128); 701 } 702 } 703} 704 705const int16_t kInvalidFilter[8] = { 0 }; 706const WrapperFilterBlock2d8Func wrapper_filter_block2d_8[2] = { 707 wrapper_filter_block2d_8_c, wrapper_filter_average_block2d_8_c 708}; 709 710TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) { 711 for (int i = 0; i < 2; ++i) { 712 uint8_t *const in = input(); 713 uint8_t *const out = output(); 714#if CONFIG_VP9_HIGHBITDEPTH 715 uint8_t ref8[kOutputStride * kMaxDimension]; 716 uint16_t ref16[kOutputStride * kMaxDimension]; 717 uint8_t *ref; 718 if (UUT_->use_highbd_ == 0) { 719 ref = ref8; 720 } else { 721 ref = CAST_TO_BYTEPTR(ref16); 722 } 723#else 724 uint8_t ref[kOutputStride * kMaxDimension]; 725#endif 726 727 // Populate ref and out with some random data 728 ::libvpx_test::ACMRandom prng; 729 for (int y = 0; y < Height(); ++y) { 730 for (int x = 0; x < Width(); ++x) { 731 uint16_t r; 732#if CONFIG_VP9_HIGHBITDEPTH 733 if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) { 734 r = prng.Rand8Extremes(); 735 } else { 736 r = prng.Rand16() & mask_; 737 } 738#else 739 r = prng.Rand8Extremes(); 740#endif 741 742 assign_val(out, y * kOutputStride + x, r); 743 assign_val(ref, y * kOutputStride + x, r); 744 } 745 } 746 747 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { 748 const InterpKernel *filters = 749 vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)]; 750 751 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { 752 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { 753 wrapper_filter_block2d_8[i](in, kInputStride, filters[filter_x], 754 filters[filter_y], ref, kOutputStride, 755 Width(), Height(), UUT_->use_highbd_); 756 757 if (filter_x && filter_y) 758 ASM_REGISTER_STATE_CHECK(UUT_->hv8_[i]( 759 in, kInputStride, out, kOutputStride, filters[filter_x], 16, 760 filters[filter_y], 16, Width(), Height())); 761 else if (filter_y) 762 ASM_REGISTER_STATE_CHECK(UUT_->v8_[i]( 763 in, kInputStride, out, kOutputStride, kInvalidFilter, 16, 764 filters[filter_y], 16, Width(), Height())); 765 else if (filter_x) 766 ASM_REGISTER_STATE_CHECK(UUT_->h8_[i]( 767 in, kInputStride, out, kOutputStride, filters[filter_x], 16, 768 kInvalidFilter, 16, Width(), Height())); 769 else 770 ASM_REGISTER_STATE_CHECK(UUT_->copy_[i]( 771 in, kInputStride, out, kOutputStride, kInvalidFilter, 0, 772 kInvalidFilter, 0, Width(), Height())); 773 774 CheckGuardBlocks(); 775 776 for (int y = 0; y < Height(); ++y) { 777 for (int x = 0; x < Width(); ++x) 778 ASSERT_EQ(lookup(ref, y * kOutputStride + x), 779 lookup(out, y * kOutputStride + x)) 780 << "mismatch at (" << x << "," << y << "), " 781 << "filters (" << filter_bank << "," << filter_x << "," 782 << filter_y << ")"; 783 } 784 } 785 } 786 } 787 } 788} 789 790TEST_P(ConvolveTest, FilterExtremes) { 791 uint8_t *const in = input(); 792 uint8_t *const out = output(); 793#if CONFIG_VP9_HIGHBITDEPTH 794 uint8_t ref8[kOutputStride * kMaxDimension]; 795 uint16_t ref16[kOutputStride * kMaxDimension]; 796 uint8_t *ref; 797 if (UUT_->use_highbd_ == 0) { 798 ref = ref8; 799 } else { 800 ref = CAST_TO_BYTEPTR(ref16); 801 } 802#else 803 uint8_t ref[kOutputStride * kMaxDimension]; 804#endif 805 806 // Populate ref and out with some random data 807 ::libvpx_test::ACMRandom prng; 808 for (int y = 0; y < Height(); ++y) { 809 for (int x = 0; x < Width(); ++x) { 810 uint16_t r; 811#if CONFIG_VP9_HIGHBITDEPTH 812 if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) { 813 r = prng.Rand8Extremes(); 814 } else { 815 r = prng.Rand16() & mask_; 816 } 817#else 818 r = prng.Rand8Extremes(); 819#endif 820 assign_val(out, y * kOutputStride + x, r); 821 assign_val(ref, y * kOutputStride + x, r); 822 } 823 } 824 825 for (int axis = 0; axis < 2; axis++) { 826 int seed_val = 0; 827 while (seed_val < 256) { 828 for (int y = 0; y < 8; ++y) { 829 for (int x = 0; x < 8; ++x) { 830#if CONFIG_VP9_HIGHBITDEPTH 831 assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1, 832 ((seed_val >> (axis ? y : x)) & 1) * mask_); 833#else 834 assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1, 835 ((seed_val >> (axis ? y : x)) & 1) * 255); 836#endif 837 if (axis) seed_val++; 838 } 839 if (axis) { 840 seed_val -= 8; 841 } else { 842 seed_val++; 843 } 844 } 845 if (axis) seed_val += 8; 846 847 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { 848 const InterpKernel *filters = 849 vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)]; 850 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { 851 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { 852 wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x], 853 filters[filter_y], ref, kOutputStride, 854 Width(), Height(), UUT_->use_highbd_); 855 if (filter_x && filter_y) 856 ASM_REGISTER_STATE_CHECK(UUT_->hv8_[0]( 857 in, kInputStride, out, kOutputStride, filters[filter_x], 16, 858 filters[filter_y], 16, Width(), Height())); 859 else if (filter_y) 860 ASM_REGISTER_STATE_CHECK(UUT_->v8_[0]( 861 in, kInputStride, out, kOutputStride, kInvalidFilter, 16, 862 filters[filter_y], 16, Width(), Height())); 863 else if (filter_x) 864 ASM_REGISTER_STATE_CHECK(UUT_->h8_[0]( 865 in, kInputStride, out, kOutputStride, filters[filter_x], 16, 866 kInvalidFilter, 16, Width(), Height())); 867 else 868 ASM_REGISTER_STATE_CHECK(UUT_->copy_[0]( 869 in, kInputStride, out, kOutputStride, kInvalidFilter, 0, 870 kInvalidFilter, 0, Width(), Height())); 871 872 for (int y = 0; y < Height(); ++y) { 873 for (int x = 0; x < Width(); ++x) 874 ASSERT_EQ(lookup(ref, y * kOutputStride + x), 875 lookup(out, y * kOutputStride + x)) 876 << "mismatch at (" << x << "," << y << "), " 877 << "filters (" << filter_bank << "," << filter_x << "," 878 << filter_y << ")"; 879 } 880 } 881 } 882 } 883 } 884 } 885} 886 887/* This test exercises that enough rows and columns are filtered with every 888 possible initial fractional positions and scaling steps. */ 889TEST_P(ConvolveTest, CheckScalingFiltering) { 890 uint8_t *const in = input(); 891 uint8_t *const out = output(); 892 const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP]; 893 894 SetConstantInput(127); 895 896 for (int frac = 0; frac < 16; ++frac) { 897 for (int step = 1; step <= 32; ++step) { 898 /* Test the horizontal and vertical filters in combination. */ 899 ASM_REGISTER_STATE_CHECK( 900 UUT_->shv8_[0](in, kInputStride, out, kOutputStride, eighttap[frac], 901 step, eighttap[frac], step, Width(), Height())); 902 903 CheckGuardBlocks(); 904 905 for (int y = 0; y < Height(); ++y) { 906 for (int x = 0; x < Width(); ++x) { 907 ASSERT_EQ(lookup(in, y * kInputStride + x), 908 lookup(out, y * kOutputStride + x)) 909 << "x == " << x << ", y == " << y << ", frac == " << frac 910 << ", step == " << step; 911 } 912 } 913 } 914 } 915} 916 917using std::tr1::make_tuple; 918 919#if CONFIG_VP9_HIGHBITDEPTH 920#define WRAP(func, bd) \ 921 void wrap_##func##_##bd( \ 922 const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ 923 ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride, \ 924 const int16_t *filter_y, int filter_y_stride, int w, int h) { \ 925 vpx_highbd_##func(reinterpret_cast<const uint16_t *>(src), src_stride, \ 926 reinterpret_cast<uint16_t *>(dst), dst_stride, filter_x, \ 927 filter_x_stride, filter_y, filter_y_stride, w, h, bd); \ 928 } 929 930#if HAVE_SSE2 && ARCH_X86_64 931WRAP(convolve_copy_sse2, 8) 932WRAP(convolve_avg_sse2, 8) 933WRAP(convolve_copy_sse2, 10) 934WRAP(convolve_avg_sse2, 10) 935WRAP(convolve_copy_sse2, 12) 936WRAP(convolve_avg_sse2, 12) 937WRAP(convolve8_horiz_sse2, 8) 938WRAP(convolve8_avg_horiz_sse2, 8) 939WRAP(convolve8_vert_sse2, 8) 940WRAP(convolve8_avg_vert_sse2, 8) 941WRAP(convolve8_sse2, 8) 942WRAP(convolve8_avg_sse2, 8) 943WRAP(convolve8_horiz_sse2, 10) 944WRAP(convolve8_avg_horiz_sse2, 10) 945WRAP(convolve8_vert_sse2, 10) 946WRAP(convolve8_avg_vert_sse2, 10) 947WRAP(convolve8_sse2, 10) 948WRAP(convolve8_avg_sse2, 10) 949WRAP(convolve8_horiz_sse2, 12) 950WRAP(convolve8_avg_horiz_sse2, 12) 951WRAP(convolve8_vert_sse2, 12) 952WRAP(convolve8_avg_vert_sse2, 12) 953WRAP(convolve8_sse2, 12) 954WRAP(convolve8_avg_sse2, 12) 955#endif // HAVE_SSE2 && ARCH_X86_64 956 957#if HAVE_AVX2 958WRAP(convolve_copy_avx2, 8) 959WRAP(convolve_avg_avx2, 8) 960WRAP(convolve8_horiz_avx2, 8) 961WRAP(convolve8_avg_horiz_avx2, 8) 962WRAP(convolve8_vert_avx2, 8) 963WRAP(convolve8_avg_vert_avx2, 8) 964WRAP(convolve8_avx2, 8) 965WRAP(convolve8_avg_avx2, 8) 966 967WRAP(convolve_copy_avx2, 10) 968WRAP(convolve_avg_avx2, 10) 969WRAP(convolve8_avx2, 10) 970WRAP(convolve8_horiz_avx2, 10) 971WRAP(convolve8_vert_avx2, 10) 972WRAP(convolve8_avg_avx2, 10) 973WRAP(convolve8_avg_horiz_avx2, 10) 974WRAP(convolve8_avg_vert_avx2, 10) 975 976WRAP(convolve_copy_avx2, 12) 977WRAP(convolve_avg_avx2, 12) 978WRAP(convolve8_avx2, 12) 979WRAP(convolve8_horiz_avx2, 12) 980WRAP(convolve8_vert_avx2, 12) 981WRAP(convolve8_avg_avx2, 12) 982WRAP(convolve8_avg_horiz_avx2, 12) 983WRAP(convolve8_avg_vert_avx2, 12) 984#endif // HAVE_AVX2 985 986#if HAVE_NEON 987WRAP(convolve_copy_neon, 8) 988WRAP(convolve_avg_neon, 8) 989WRAP(convolve_copy_neon, 10) 990WRAP(convolve_avg_neon, 10) 991WRAP(convolve_copy_neon, 12) 992WRAP(convolve_avg_neon, 12) 993WRAP(convolve8_horiz_neon, 8) 994WRAP(convolve8_avg_horiz_neon, 8) 995WRAP(convolve8_vert_neon, 8) 996WRAP(convolve8_avg_vert_neon, 8) 997WRAP(convolve8_neon, 8) 998WRAP(convolve8_avg_neon, 8) 999WRAP(convolve8_horiz_neon, 10) 1000WRAP(convolve8_avg_horiz_neon, 10) 1001WRAP(convolve8_vert_neon, 10) 1002WRAP(convolve8_avg_vert_neon, 10) 1003WRAP(convolve8_neon, 10) 1004WRAP(convolve8_avg_neon, 10) 1005WRAP(convolve8_horiz_neon, 12) 1006WRAP(convolve8_avg_horiz_neon, 12) 1007WRAP(convolve8_vert_neon, 12) 1008WRAP(convolve8_avg_vert_neon, 12) 1009WRAP(convolve8_neon, 12) 1010WRAP(convolve8_avg_neon, 12) 1011#endif // HAVE_NEON 1012 1013WRAP(convolve_copy_c, 8) 1014WRAP(convolve_avg_c, 8) 1015WRAP(convolve8_horiz_c, 8) 1016WRAP(convolve8_avg_horiz_c, 8) 1017WRAP(convolve8_vert_c, 8) 1018WRAP(convolve8_avg_vert_c, 8) 1019WRAP(convolve8_c, 8) 1020WRAP(convolve8_avg_c, 8) 1021WRAP(convolve_copy_c, 10) 1022WRAP(convolve_avg_c, 10) 1023WRAP(convolve8_horiz_c, 10) 1024WRAP(convolve8_avg_horiz_c, 10) 1025WRAP(convolve8_vert_c, 10) 1026WRAP(convolve8_avg_vert_c, 10) 1027WRAP(convolve8_c, 10) 1028WRAP(convolve8_avg_c, 10) 1029WRAP(convolve_copy_c, 12) 1030WRAP(convolve_avg_c, 12) 1031WRAP(convolve8_horiz_c, 12) 1032WRAP(convolve8_avg_horiz_c, 12) 1033WRAP(convolve8_vert_c, 12) 1034WRAP(convolve8_avg_vert_c, 12) 1035WRAP(convolve8_c, 12) 1036WRAP(convolve8_avg_c, 12) 1037#undef WRAP 1038 1039const ConvolveFunctions convolve8_c( 1040 wrap_convolve_copy_c_8, wrap_convolve_avg_c_8, wrap_convolve8_horiz_c_8, 1041 wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8, 1042 wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 1043 wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8, 1044 wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, 1045 wrap_convolve8_avg_c_8, 8); 1046const ConvolveFunctions convolve10_c( 1047 wrap_convolve_copy_c_10, wrap_convolve_avg_c_10, wrap_convolve8_horiz_c_10, 1048 wrap_convolve8_avg_horiz_c_10, wrap_convolve8_vert_c_10, 1049 wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 1050 wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10, 1051 wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, 1052 wrap_convolve8_avg_c_10, 10); 1053const ConvolveFunctions convolve12_c( 1054 wrap_convolve_copy_c_12, wrap_convolve_avg_c_12, wrap_convolve8_horiz_c_12, 1055 wrap_convolve8_avg_horiz_c_12, wrap_convolve8_vert_c_12, 1056 wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 1057 wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12, 1058 wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, 1059 wrap_convolve8_avg_c_12, 12); 1060const ConvolveParam kArrayConvolve_c[] = { 1061 ALL_SIZES(convolve8_c), ALL_SIZES(convolve10_c), ALL_SIZES(convolve12_c) 1062}; 1063 1064#else 1065const ConvolveFunctions convolve8_c( 1066 vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_c, 1067 vpx_convolve8_avg_horiz_c, vpx_convolve8_vert_c, vpx_convolve8_avg_vert_c, 1068 vpx_convolve8_c, vpx_convolve8_avg_c, vpx_scaled_horiz_c, 1069 vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, vpx_scaled_avg_vert_c, 1070 vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); 1071const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) }; 1072#endif 1073INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_c)); 1074 1075#if HAVE_SSE2 && ARCH_X86_64 1076#if CONFIG_VP9_HIGHBITDEPTH 1077const ConvolveFunctions convolve8_sse2( 1078 wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8, 1079 wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8, 1080 wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8, 1081 wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 1082 wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8, 1083 wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8, 1084 wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8); 1085const ConvolveFunctions convolve10_sse2( 1086 wrap_convolve_copy_sse2_10, wrap_convolve_avg_sse2_10, 1087 wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10, 1088 wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10, 1089 wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 1090 wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10, 1091 wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10, 1092 wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10); 1093const ConvolveFunctions convolve12_sse2( 1094 wrap_convolve_copy_sse2_12, wrap_convolve_avg_sse2_12, 1095 wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12, 1096 wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12, 1097 wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 1098 wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12, 1099 wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12, 1100 wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12); 1101const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2), 1102 ALL_SIZES(convolve10_sse2), 1103 ALL_SIZES(convolve12_sse2) }; 1104#else 1105const ConvolveFunctions convolve8_sse2( 1106 vpx_convolve_copy_sse2, vpx_convolve_avg_sse2, vpx_convolve8_horiz_sse2, 1107 vpx_convolve8_avg_horiz_sse2, vpx_convolve8_vert_sse2, 1108 vpx_convolve8_avg_vert_sse2, vpx_convolve8_sse2, vpx_convolve8_avg_sse2, 1109 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, 1110 vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); 1111 1112const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2) }; 1113#endif // CONFIG_VP9_HIGHBITDEPTH 1114INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, 1115 ::testing::ValuesIn(kArrayConvolve_sse2)); 1116#endif 1117 1118#if HAVE_SSSE3 1119const ConvolveFunctions convolve8_ssse3( 1120 vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_ssse3, 1121 vpx_convolve8_avg_horiz_ssse3, vpx_convolve8_vert_ssse3, 1122 vpx_convolve8_avg_vert_ssse3, vpx_convolve8_ssse3, vpx_convolve8_avg_ssse3, 1123 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, 1124 vpx_scaled_avg_vert_c, vpx_scaled_2d_ssse3, vpx_scaled_avg_2d_c, 0); 1125 1126const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) }; 1127INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, 1128 ::testing::ValuesIn(kArrayConvolve8_ssse3)); 1129#endif 1130 1131#if HAVE_AVX2 1132#if CONFIG_VP9_HIGHBITDEPTH 1133const ConvolveFunctions convolve8_avx2( 1134 wrap_convolve_copy_avx2_8, wrap_convolve_avg_avx2_8, 1135 wrap_convolve8_horiz_avx2_8, wrap_convolve8_avg_horiz_avx2_8, 1136 wrap_convolve8_vert_avx2_8, wrap_convolve8_avg_vert_avx2_8, 1137 wrap_convolve8_avx2_8, wrap_convolve8_avg_avx2_8, wrap_convolve8_horiz_c_8, 1138 wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8, 1139 wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8); 1140const ConvolveFunctions convolve10_avx2( 1141 wrap_convolve_copy_avx2_10, wrap_convolve_avg_avx2_10, 1142 wrap_convolve8_horiz_avx2_10, wrap_convolve8_avg_horiz_avx2_10, 1143 wrap_convolve8_vert_avx2_10, wrap_convolve8_avg_vert_avx2_10, 1144 wrap_convolve8_avx2_10, wrap_convolve8_avg_avx2_10, 1145 wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10, 1146 wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, 1147 wrap_convolve8_avg_c_10, 10); 1148const ConvolveFunctions convolve12_avx2( 1149 wrap_convolve_copy_avx2_12, wrap_convolve_avg_avx2_12, 1150 wrap_convolve8_horiz_avx2_12, wrap_convolve8_avg_horiz_avx2_12, 1151 wrap_convolve8_vert_avx2_12, wrap_convolve8_avg_vert_avx2_12, 1152 wrap_convolve8_avx2_12, wrap_convolve8_avg_avx2_12, 1153 wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12, 1154 wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, 1155 wrap_convolve8_avg_c_12, 12); 1156const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2), 1157 ALL_SIZES(convolve10_avx2), 1158 ALL_SIZES(convolve12_avx2) }; 1159INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, 1160 ::testing::ValuesIn(kArrayConvolve8_avx2)); 1161#else // !CONFIG_VP9_HIGHBITDEPTH 1162const ConvolveFunctions convolve8_avx2( 1163 vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_avx2, 1164 vpx_convolve8_avg_horiz_ssse3, vpx_convolve8_vert_avx2, 1165 vpx_convolve8_avg_vert_ssse3, vpx_convolve8_avx2, vpx_convolve8_avg_ssse3, 1166 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, 1167 vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); 1168const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2) }; 1169INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, 1170 ::testing::ValuesIn(kArrayConvolve8_avx2)); 1171#endif // CONFIG_VP9_HIGHBITDEPTH 1172#endif // HAVE_AVX2 1173 1174#if HAVE_NEON 1175#if CONFIG_VP9_HIGHBITDEPTH 1176const ConvolveFunctions convolve8_neon( 1177 wrap_convolve_copy_neon_8, wrap_convolve_avg_neon_8, 1178 wrap_convolve8_horiz_neon_8, wrap_convolve8_avg_horiz_neon_8, 1179 wrap_convolve8_vert_neon_8, wrap_convolve8_avg_vert_neon_8, 1180 wrap_convolve8_neon_8, wrap_convolve8_avg_neon_8, 1181 wrap_convolve8_horiz_neon_8, wrap_convolve8_avg_horiz_neon_8, 1182 wrap_convolve8_vert_neon_8, wrap_convolve8_avg_vert_neon_8, 1183 wrap_convolve8_neon_8, wrap_convolve8_avg_neon_8, 8); 1184const ConvolveFunctions convolve10_neon( 1185 wrap_convolve_copy_neon_10, wrap_convolve_avg_neon_10, 1186 wrap_convolve8_horiz_neon_10, wrap_convolve8_avg_horiz_neon_10, 1187 wrap_convolve8_vert_neon_10, wrap_convolve8_avg_vert_neon_10, 1188 wrap_convolve8_neon_10, wrap_convolve8_avg_neon_10, 1189 wrap_convolve8_horiz_neon_10, wrap_convolve8_avg_horiz_neon_10, 1190 wrap_convolve8_vert_neon_10, wrap_convolve8_avg_vert_neon_10, 1191 wrap_convolve8_neon_10, wrap_convolve8_avg_neon_10, 10); 1192const ConvolveFunctions convolve12_neon( 1193 wrap_convolve_copy_neon_12, wrap_convolve_avg_neon_12, 1194 wrap_convolve8_horiz_neon_12, wrap_convolve8_avg_horiz_neon_12, 1195 wrap_convolve8_vert_neon_12, wrap_convolve8_avg_vert_neon_12, 1196 wrap_convolve8_neon_12, wrap_convolve8_avg_neon_12, 1197 wrap_convolve8_horiz_neon_12, wrap_convolve8_avg_horiz_neon_12, 1198 wrap_convolve8_vert_neon_12, wrap_convolve8_avg_vert_neon_12, 1199 wrap_convolve8_neon_12, wrap_convolve8_avg_neon_12, 12); 1200const ConvolveParam kArrayConvolve_neon[] = { ALL_SIZES(convolve8_neon), 1201 ALL_SIZES(convolve10_neon), 1202 ALL_SIZES(convolve12_neon) }; 1203#else 1204const ConvolveFunctions convolve8_neon( 1205 vpx_convolve_copy_neon, vpx_convolve_avg_neon, vpx_convolve8_horiz_neon, 1206 vpx_convolve8_avg_horiz_neon, vpx_convolve8_vert_neon, 1207 vpx_convolve8_avg_vert_neon, vpx_convolve8_neon, vpx_convolve8_avg_neon, 1208 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, 1209 vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); 1210 1211const ConvolveParam kArrayConvolve_neon[] = { ALL_SIZES(convolve8_neon) }; 1212#endif // CONFIG_VP9_HIGHBITDEPTH 1213INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, 1214 ::testing::ValuesIn(kArrayConvolve_neon)); 1215#endif // HAVE_NEON 1216 1217#if HAVE_DSPR2 1218const ConvolveFunctions convolve8_dspr2( 1219 vpx_convolve_copy_dspr2, vpx_convolve_avg_dspr2, vpx_convolve8_horiz_dspr2, 1220 vpx_convolve8_avg_horiz_dspr2, vpx_convolve8_vert_dspr2, 1221 vpx_convolve8_avg_vert_dspr2, vpx_convolve8_dspr2, vpx_convolve8_avg_dspr2, 1222 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, 1223 vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); 1224 1225const ConvolveParam kArrayConvolve8_dspr2[] = { ALL_SIZES(convolve8_dspr2) }; 1226INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, 1227 ::testing::ValuesIn(kArrayConvolve8_dspr2)); 1228#endif // HAVE_DSPR2 1229 1230#if HAVE_MSA 1231const ConvolveFunctions convolve8_msa( 1232 vpx_convolve_copy_msa, vpx_convolve_avg_msa, vpx_convolve8_horiz_msa, 1233 vpx_convolve8_avg_horiz_msa, vpx_convolve8_vert_msa, 1234 vpx_convolve8_avg_vert_msa, vpx_convolve8_msa, vpx_convolve8_avg_msa, 1235 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, 1236 vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); 1237 1238const ConvolveParam kArrayConvolve8_msa[] = { ALL_SIZES(convolve8_msa) }; 1239INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, 1240 ::testing::ValuesIn(kArrayConvolve8_msa)); 1241#endif // HAVE_MSA 1242 1243#if HAVE_VSX 1244const ConvolveFunctions convolve8_vsx( 1245 vpx_convolve_copy_vsx, vpx_convolve_avg_vsx, vpx_convolve8_horiz_vsx, 1246 vpx_convolve8_avg_horiz_vsx, vpx_convolve8_vert_vsx, 1247 vpx_convolve8_avg_vert_vsx, vpx_convolve8_vsx, vpx_convolve8_avg_vsx, 1248 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, 1249 vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); 1250const ConvolveParam kArrayConvolve_vsx[] = { ALL_SIZES(convolve8_vsx) }; 1251INSTANTIATE_TEST_CASE_P(VSX, ConvolveTest, 1252 ::testing::ValuesIn(kArrayConvolve_vsx)); 1253#endif // HAVE_VSX 1254} // namespace 1255