1/* 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include <math.h> 12#include <stdlib.h> 13#include <string.h> 14 15#include "third_party/googletest/src/include/gtest/gtest.h" 16#include "test/acm_random.h" 17#include "test/clear_system_state.h" 18#include "test/register_state_check.h" 19#include "test/util.h" 20 21#include "./vp9_rtcd.h" 22#include "vp9/common/vp9_entropy.h" 23#include "vpx/vpx_codec.h" 24#include "vpx/vpx_integer.h" 25 26using libvpx_test::ACMRandom; 27 28namespace { 29const int kNumCoeffs = 16; 30typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride); 31typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride); 32typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride, 33 int tx_type); 34typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, 35 int tx_type); 36 37typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct4x4Param; 38typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht4x4Param; 39 40void fdct4x4_ref(const int16_t *in, tran_low_t *out, int stride, 41 int tx_type) { 42 vp9_fdct4x4_c(in, out, stride); 43} 44 45void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) { 46 vp9_fht4x4_c(in, out, stride, tx_type); 47} 48 49void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride, 50 int tx_type) { 51 vp9_fwht4x4_c(in, out, stride); 52} 53 54#if CONFIG_VP9_HIGHBITDEPTH 55void idct4x4_10(const tran_low_t *in, uint8_t *out, int stride) { 56 vp9_high_idct4x4_16_add_c(in, out, stride, 10); 57} 58 59void idct4x4_12(const tran_low_t *in, uint8_t *out, int stride) { 60 vp9_high_idct4x4_16_add_c(in, out, stride, 12); 61} 62 63void iht4x4_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { 64 vp9_high_iht4x4_16_add_c(in, out, stride, tx_type, 10); 65} 66 67void iht4x4_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { 68 vp9_high_iht4x4_16_add_c(in, out, stride, tx_type, 12); 69} 70 71void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) { 72 vp9_high_iwht4x4_16_add_c(in, out, stride, 10); 73} 74 75void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) { 76 vp9_high_iwht4x4_16_add_c(in, out, stride, 12); 77} 78#endif 79 80class Trans4x4TestBase { 81 public: 82 virtual ~Trans4x4TestBase() {} 83 84 protected: 85 virtual void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) = 0; 86 87 virtual void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) = 0; 88 89 void RunAccuracyCheck(int limit) { 90 ACMRandom rnd(ACMRandom::DeterministicSeed()); 91 uint32_t max_error = 0; 92 int64_t total_error = 0; 93 const int count_test_block = 10000; 94 for (int i = 0; i < count_test_block; ++i) { 95 DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs); 96 DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs); 97 DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs); 98 DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs); 99#if CONFIG_VP9_HIGHBITDEPTH 100 DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs); 101 DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs); 102#endif 103 104 // Initialize a test block with input range [-255, 255]. 105 for (int j = 0; j < kNumCoeffs; ++j) { 106 if (bit_depth_ == VPX_BITS_8) { 107 src[j] = rnd.Rand8(); 108 dst[j] = rnd.Rand8(); 109 test_input_block[j] = src[j] - dst[j]; 110#if CONFIG_VP9_HIGHBITDEPTH 111 } else { 112 src16[j] = rnd.Rand16() & mask_; 113 dst16[j] = rnd.Rand16() & mask_; 114 test_input_block[j] = src16[j] - dst16[j]; 115#endif 116 } 117 } 118 119 ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block, 120 test_temp_block, pitch_)); 121 if (bit_depth_ == VPX_BITS_8) { 122 ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_)); 123#if CONFIG_VP9_HIGHBITDEPTH 124 } else { 125 ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, 126 CONVERT_TO_BYTEPTR(dst16), pitch_)); 127#endif 128 } 129 130 for (int j = 0; j < kNumCoeffs; ++j) { 131#if CONFIG_VP9_HIGHBITDEPTH 132 const uint32_t diff = 133 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; 134#else 135 const uint32_t diff = dst[j] - src[j]; 136#endif 137 const uint32_t error = diff * diff; 138 if (max_error < error) 139 max_error = error; 140 total_error += error; 141 } 142 } 143 144 EXPECT_GE(static_cast<uint32_t>(limit), max_error) 145 << "Error: 4x4 FHT/IHT has an individual round trip error > " 146 << limit; 147 148 EXPECT_GE(count_test_block * limit, total_error) 149 << "Error: 4x4 FHT/IHT has average round trip error > " << limit 150 << " per block"; 151 } 152 153 void RunCoeffCheck() { 154 ACMRandom rnd(ACMRandom::DeterministicSeed()); 155 const int count_test_block = 5000; 156 DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs); 157 DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs); 158 DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs); 159 160 for (int i = 0; i < count_test_block; ++i) { 161 // Initialize a test block with input range [-mask_, mask_]. 162 for (int j = 0; j < kNumCoeffs; ++j) 163 input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_); 164 165 fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_); 166 ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_)); 167 168 // The minimum quant value is 4. 169 for (int j = 0; j < kNumCoeffs; ++j) 170 EXPECT_EQ(output_block[j], output_ref_block[j]); 171 } 172 } 173 174 void RunMemCheck() { 175 ACMRandom rnd(ACMRandom::DeterministicSeed()); 176 const int count_test_block = 5000; 177 DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs); 178 DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs); 179 DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs); 180 DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs); 181 182 for (int i = 0; i < count_test_block; ++i) { 183 // Initialize a test block with input range [-mask_, mask_]. 184 for (int j = 0; j < kNumCoeffs; ++j) { 185 input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_); 186 input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_; 187 } 188 if (i == 0) { 189 for (int j = 0; j < kNumCoeffs; ++j) 190 input_extreme_block[j] = mask_; 191 } else if (i == 1) { 192 for (int j = 0; j < kNumCoeffs; ++j) 193 input_extreme_block[j] = -mask_; 194 } 195 196 fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_); 197 ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block, 198 output_block, pitch_)); 199 200 // The minimum quant value is 4. 201 for (int j = 0; j < kNumCoeffs; ++j) { 202 EXPECT_EQ(output_block[j], output_ref_block[j]); 203 EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j])) 204 << "Error: 4x4 FDCT has coefficient larger than 4*DCT_MAX_VALUE"; 205 } 206 } 207 } 208 209 void RunInvAccuracyCheck(int limit) { 210 ACMRandom rnd(ACMRandom::DeterministicSeed()); 211 const int count_test_block = 1000; 212 DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs); 213 DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs); 214 DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs); 215 DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs); 216#if CONFIG_VP9_HIGHBITDEPTH 217 DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs); 218 DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs); 219#endif 220 221 for (int i = 0; i < count_test_block; ++i) { 222 // Initialize a test block with input range [-mask_, mask_]. 223 for (int j = 0; j < kNumCoeffs; ++j) { 224 if (bit_depth_ == VPX_BITS_8) { 225 src[j] = rnd.Rand8(); 226 dst[j] = rnd.Rand8(); 227 in[j] = src[j] - dst[j]; 228#if CONFIG_VP9_HIGHBITDEPTH 229 } else { 230 src16[j] = rnd.Rand16() & mask_; 231 dst16[j] = rnd.Rand16() & mask_; 232 in[j] = src16[j] - dst16[j]; 233#endif 234 } 235 } 236 237 fwd_txfm_ref(in, coeff, pitch_, tx_type_); 238 239 if (bit_depth_ == VPX_BITS_8) { 240 ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_)); 241#if CONFIG_VP9_HIGHBITDEPTH 242 } else { 243 ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), 244 pitch_)); 245#endif 246 } 247 248 for (int j = 0; j < kNumCoeffs; ++j) { 249#if CONFIG_VP9_HIGHBITDEPTH 250 const uint32_t diff = 251 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; 252#else 253 const uint32_t diff = dst[j] - src[j]; 254#endif 255 const uint32_t error = diff * diff; 256 EXPECT_GE(static_cast<uint32_t>(limit), error) 257 << "Error: 4x4 IDCT has error " << error 258 << " at index " << j; 259 } 260 } 261 } 262 263 int pitch_; 264 int tx_type_; 265 FhtFunc fwd_txfm_ref; 266 vpx_bit_depth_t bit_depth_; 267 int mask_; 268}; 269 270class Trans4x4DCT 271 : public Trans4x4TestBase, 272 public ::testing::TestWithParam<Dct4x4Param> { 273 public: 274 virtual ~Trans4x4DCT() {} 275 276 virtual void SetUp() { 277 fwd_txfm_ = GET_PARAM(0); 278 inv_txfm_ = GET_PARAM(1); 279 tx_type_ = GET_PARAM(2); 280 pitch_ = 4; 281 fwd_txfm_ref = fdct4x4_ref; 282 bit_depth_ = GET_PARAM(3); 283 mask_ = (1 << bit_depth_) - 1; 284 } 285 virtual void TearDown() { libvpx_test::ClearSystemState(); } 286 287 protected: 288 void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) { 289 fwd_txfm_(in, out, stride); 290 } 291 void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) { 292 inv_txfm_(out, dst, stride); 293 } 294 295 FdctFunc fwd_txfm_; 296 IdctFunc inv_txfm_; 297}; 298 299TEST_P(Trans4x4DCT, AccuracyCheck) { 300 RunAccuracyCheck(1); 301} 302 303TEST_P(Trans4x4DCT, CoeffCheck) { 304 RunCoeffCheck(); 305} 306 307TEST_P(Trans4x4DCT, MemCheck) { 308 RunMemCheck(); 309} 310 311TEST_P(Trans4x4DCT, InvAccuracyCheck) { 312 RunInvAccuracyCheck(1); 313} 314 315class Trans4x4HT 316 : public Trans4x4TestBase, 317 public ::testing::TestWithParam<Ht4x4Param> { 318 public: 319 virtual ~Trans4x4HT() {} 320 321 virtual void SetUp() { 322 fwd_txfm_ = GET_PARAM(0); 323 inv_txfm_ = GET_PARAM(1); 324 tx_type_ = GET_PARAM(2); 325 pitch_ = 4; 326 fwd_txfm_ref = fht4x4_ref; 327 bit_depth_ = GET_PARAM(3); 328 mask_ = (1 << bit_depth_) - 1; 329 } 330 virtual void TearDown() { libvpx_test::ClearSystemState(); } 331 332 protected: 333 void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) { 334 fwd_txfm_(in, out, stride, tx_type_); 335 } 336 337 void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) { 338 inv_txfm_(out, dst, stride, tx_type_); 339 } 340 341 FhtFunc fwd_txfm_; 342 IhtFunc inv_txfm_; 343}; 344 345TEST_P(Trans4x4HT, AccuracyCheck) { 346 RunAccuracyCheck(1); 347} 348 349TEST_P(Trans4x4HT, CoeffCheck) { 350 RunCoeffCheck(); 351} 352 353TEST_P(Trans4x4HT, MemCheck) { 354 RunMemCheck(); 355} 356 357TEST_P(Trans4x4HT, InvAccuracyCheck) { 358 RunInvAccuracyCheck(1); 359} 360 361class Trans4x4WHT 362 : public Trans4x4TestBase, 363 public ::testing::TestWithParam<Dct4x4Param> { 364 public: 365 virtual ~Trans4x4WHT() {} 366 367 virtual void SetUp() { 368 fwd_txfm_ = GET_PARAM(0); 369 inv_txfm_ = GET_PARAM(1); 370 tx_type_ = GET_PARAM(2); 371 pitch_ = 4; 372 fwd_txfm_ref = fwht4x4_ref; 373 bit_depth_ = GET_PARAM(3); 374 mask_ = (1 << bit_depth_) - 1; 375 } 376 virtual void TearDown() { libvpx_test::ClearSystemState(); } 377 378 protected: 379 void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) { 380 fwd_txfm_(in, out, stride); 381 } 382 void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) { 383 inv_txfm_(out, dst, stride); 384 } 385 386 FdctFunc fwd_txfm_; 387 IdctFunc inv_txfm_; 388}; 389 390TEST_P(Trans4x4WHT, AccuracyCheck) { 391 RunAccuracyCheck(0); 392} 393 394TEST_P(Trans4x4WHT, CoeffCheck) { 395 RunCoeffCheck(); 396} 397 398TEST_P(Trans4x4WHT, MemCheck) { 399 RunMemCheck(); 400} 401 402TEST_P(Trans4x4WHT, InvAccuracyCheck) { 403 RunInvAccuracyCheck(0); 404} 405using std::tr1::make_tuple; 406 407#if CONFIG_VP9_HIGHBITDEPTH 408INSTANTIATE_TEST_CASE_P( 409 C, Trans4x4DCT, 410 ::testing::Values( 411 make_tuple(&vp9_high_fdct4x4_c, &idct4x4_10, 0, VPX_BITS_10), 412 make_tuple(&vp9_high_fdct4x4_c, &idct4x4_12, 0, VPX_BITS_12), 413 make_tuple(&vp9_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8))); 414#else 415INSTANTIATE_TEST_CASE_P( 416 C, Trans4x4DCT, 417 ::testing::Values( 418 make_tuple(&vp9_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8))); 419#endif 420 421#if CONFIG_VP9_HIGHBITDEPTH 422INSTANTIATE_TEST_CASE_P( 423 C, Trans4x4HT, 424 ::testing::Values( 425 make_tuple(&vp9_high_fht4x4_c, &iht4x4_10, 0, VPX_BITS_10), 426 make_tuple(&vp9_high_fht4x4_c, &iht4x4_10, 1, VPX_BITS_10), 427 make_tuple(&vp9_high_fht4x4_c, &iht4x4_10, 2, VPX_BITS_10), 428 make_tuple(&vp9_high_fht4x4_c, &iht4x4_10, 3, VPX_BITS_10), 429 make_tuple(&vp9_high_fht4x4_c, &iht4x4_12, 0, VPX_BITS_12), 430 make_tuple(&vp9_high_fht4x4_c, &iht4x4_12, 1, VPX_BITS_12), 431 make_tuple(&vp9_high_fht4x4_c, &iht4x4_12, 2, VPX_BITS_12), 432 make_tuple(&vp9_high_fht4x4_c, &iht4x4_12, 3, VPX_BITS_12), 433 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8), 434 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8), 435 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8), 436 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8))); 437#else 438INSTANTIATE_TEST_CASE_P( 439 C, Trans4x4HT, 440 ::testing::Values( 441 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8), 442 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8), 443 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8), 444 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8))); 445#endif 446 447#if CONFIG_VP9_HIGHBITDEPTH 448INSTANTIATE_TEST_CASE_P( 449 C, Trans4x4WHT, 450 ::testing::Values( 451 make_tuple(&vp9_high_fwht4x4_c, &iwht4x4_10, 0, VPX_BITS_10), 452 make_tuple(&vp9_high_fwht4x4_c, &iwht4x4_12, 0, VPX_BITS_12), 453 make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8))); 454#else 455INSTANTIATE_TEST_CASE_P( 456 C, Trans4x4WHT, 457 ::testing::Values( 458 make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8))); 459#endif 460 461#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH 462INSTANTIATE_TEST_CASE_P( 463 NEON, Trans4x4DCT, 464 ::testing::Values( 465 make_tuple(&vp9_fdct4x4_c, 466 &vp9_idct4x4_16_add_neon, 0, VPX_BITS_8))); 467INSTANTIATE_TEST_CASE_P( 468 DISABLED_NEON, Trans4x4HT, 469 ::testing::Values( 470 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 0, VPX_BITS_8), 471 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 1, VPX_BITS_8), 472 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 2, VPX_BITS_8), 473 make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8))); 474#endif 475 476#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH 477INSTANTIATE_TEST_CASE_P( 478 MMX, Trans4x4WHT, 479 ::testing::Values( 480 make_tuple(&vp9_fwht4x4_mmx, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8))); 481#endif 482 483#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH 484INSTANTIATE_TEST_CASE_P( 485 SSE2, Trans4x4DCT, 486 ::testing::Values( 487 make_tuple(&vp9_fdct4x4_sse2, 488 &vp9_idct4x4_16_add_sse2, 0, VPX_BITS_8))); 489INSTANTIATE_TEST_CASE_P( 490 SSE2, Trans4x4HT, 491 ::testing::Values( 492 make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 0, VPX_BITS_8), 493 make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1, VPX_BITS_8), 494 make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2, VPX_BITS_8), 495 make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3, VPX_BITS_8))); 496#endif 497 498} // namespace 499