1/* 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include <math.h> 12#include <stdlib.h> 13#include <string.h> 14 15#include "third_party/googletest/src/include/gtest/gtest.h" 16 17#include "./vp9_rtcd.h" 18#include "./vpx_config.h" 19#include "./vpx_dsp_rtcd.h" 20#include "test/acm_random.h" 21#include "test/clear_system_state.h" 22#include "test/register_state_check.h" 23#include "test/util.h" 24#include "vp9/common/vp9_entropy.h" 25#include "vpx/vpx_codec.h" 26#include "vpx/vpx_integer.h" 27#include "vpx_ports/mem.h" 28 29using libvpx_test::ACMRandom; 30 31namespace { 32#ifdef _MSC_VER 33static int round(double x) { 34 if (x < 0) 35 return static_cast<int>(ceil(x - 0.5)); 36 else 37 return static_cast<int>(floor(x + 0.5)); 38} 39#endif 40 41const int kNumCoeffs = 1024; 42const double kPi = 3.141592653589793238462643383279502884; 43void reference_32x32_dct_1d(const double in[32], double out[32]) { 44 const double kInvSqrt2 = 0.707106781186547524400844362104; 45 for (int k = 0; k < 32; k++) { 46 out[k] = 0.0; 47 for (int n = 0; n < 32; n++) 48 out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0); 49 if (k == 0) 50 out[k] = out[k] * kInvSqrt2; 51 } 52} 53 54void reference_32x32_dct_2d(const int16_t input[kNumCoeffs], 55 double output[kNumCoeffs]) { 56 // First transform columns 57 for (int i = 0; i < 32; ++i) { 58 double temp_in[32], temp_out[32]; 59 for (int j = 0; j < 32; ++j) 60 temp_in[j] = input[j*32 + i]; 61 reference_32x32_dct_1d(temp_in, temp_out); 62 for (int j = 0; j < 32; ++j) 63 output[j * 32 + i] = temp_out[j]; 64 } 65 // Then transform rows 66 for (int i = 0; i < 32; ++i) { 67 double temp_in[32], temp_out[32]; 68 for (int j = 0; j < 32; ++j) 69 temp_in[j] = output[j + i*32]; 70 reference_32x32_dct_1d(temp_in, temp_out); 71 // Scale by some magic number 72 for (int j = 0; j < 32; ++j) 73 output[j + i * 32] = temp_out[j] / 4; 74 } 75} 76 77typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride); 78typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride); 79 80typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t> 81 Trans32x32Param; 82 83#if CONFIG_VP9_HIGHBITDEPTH 84void idct32x32_8(const tran_low_t *in, uint8_t *out, int stride) { 85 vpx_highbd_idct32x32_1024_add_c(in, out, stride, 8); 86} 87 88void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) { 89 vpx_highbd_idct32x32_1024_add_c(in, out, stride, 10); 90} 91 92void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) { 93 vpx_highbd_idct32x32_1024_add_c(in, out, stride, 12); 94} 95#endif // CONFIG_VP9_HIGHBITDEPTH 96 97class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> { 98 public: 99 virtual ~Trans32x32Test() {} 100 virtual void SetUp() { 101 fwd_txfm_ = GET_PARAM(0); 102 inv_txfm_ = GET_PARAM(1); 103 version_ = GET_PARAM(2); // 0: high precision forward transform 104 // 1: low precision version for rd loop 105 bit_depth_ = GET_PARAM(3); 106 mask_ = (1 << bit_depth_) - 1; 107 } 108 109 virtual void TearDown() { libvpx_test::ClearSystemState(); } 110 111 protected: 112 int version_; 113 vpx_bit_depth_t bit_depth_; 114 int mask_; 115 FwdTxfmFunc fwd_txfm_; 116 InvTxfmFunc inv_txfm_; 117}; 118 119TEST_P(Trans32x32Test, AccuracyCheck) { 120 ACMRandom rnd(ACMRandom::DeterministicSeed()); 121 uint32_t max_error = 0; 122 int64_t total_error = 0; 123 const int count_test_block = 10000; 124 DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]); 125 DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]); 126 DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); 127 DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]); 128#if CONFIG_VP9_HIGHBITDEPTH 129 DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); 130 DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]); 131#endif 132 133 for (int i = 0; i < count_test_block; ++i) { 134 // Initialize a test block with input range [-mask_, mask_]. 135 for (int j = 0; j < kNumCoeffs; ++j) { 136 if (bit_depth_ == VPX_BITS_8) { 137 src[j] = rnd.Rand8(); 138 dst[j] = rnd.Rand8(); 139 test_input_block[j] = src[j] - dst[j]; 140#if CONFIG_VP9_HIGHBITDEPTH 141 } else { 142 src16[j] = rnd.Rand16() & mask_; 143 dst16[j] = rnd.Rand16() & mask_; 144 test_input_block[j] = src16[j] - dst16[j]; 145#endif 146 } 147 } 148 149 ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32)); 150 if (bit_depth_ == VPX_BITS_8) { 151 ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32)); 152#if CONFIG_VP9_HIGHBITDEPTH 153 } else { 154 ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, 155 CONVERT_TO_BYTEPTR(dst16), 32)); 156#endif 157 } 158 159 for (int j = 0; j < kNumCoeffs; ++j) { 160#if CONFIG_VP9_HIGHBITDEPTH 161 const uint32_t diff = 162 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; 163#else 164 const uint32_t diff = dst[j] - src[j]; 165#endif 166 const uint32_t error = diff * diff; 167 if (max_error < error) 168 max_error = error; 169 total_error += error; 170 } 171 } 172 173 if (version_ == 1) { 174 max_error /= 2; 175 total_error /= 45; 176 } 177 178 EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error) 179 << "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1"; 180 181 EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error) 182 << "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block"; 183} 184 185TEST_P(Trans32x32Test, CoeffCheck) { 186 ACMRandom rnd(ACMRandom::DeterministicSeed()); 187 const int count_test_block = 1000; 188 189 DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]); 190 DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]); 191 DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]); 192 193 for (int i = 0; i < count_test_block; ++i) { 194 for (int j = 0; j < kNumCoeffs; ++j) 195 input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_); 196 197 const int stride = 32; 198 vpx_fdct32x32_c(input_block, output_ref_block, stride); 199 ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride)); 200 201 if (version_ == 0) { 202 for (int j = 0; j < kNumCoeffs; ++j) 203 EXPECT_EQ(output_block[j], output_ref_block[j]) 204 << "Error: 32x32 FDCT versions have mismatched coefficients"; 205 } else { 206 for (int j = 0; j < kNumCoeffs; ++j) 207 EXPECT_GE(6, abs(output_block[j] - output_ref_block[j])) 208 << "Error: 32x32 FDCT rd has mismatched coefficients"; 209 } 210 } 211} 212 213TEST_P(Trans32x32Test, MemCheck) { 214 ACMRandom rnd(ACMRandom::DeterministicSeed()); 215 const int count_test_block = 2000; 216 217 DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]); 218 DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]); 219 DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]); 220 221 for (int i = 0; i < count_test_block; ++i) { 222 // Initialize a test block with input range [-mask_, mask_]. 223 for (int j = 0; j < kNumCoeffs; ++j) { 224 input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_; 225 } 226 if (i == 0) { 227 for (int j = 0; j < kNumCoeffs; ++j) 228 input_extreme_block[j] = mask_; 229 } else if (i == 1) { 230 for (int j = 0; j < kNumCoeffs; ++j) 231 input_extreme_block[j] = -mask_; 232 } 233 234 const int stride = 32; 235 vpx_fdct32x32_c(input_extreme_block, output_ref_block, stride); 236 ASM_REGISTER_STATE_CHECK( 237 fwd_txfm_(input_extreme_block, output_block, stride)); 238 239 // The minimum quant value is 4. 240 for (int j = 0; j < kNumCoeffs; ++j) { 241 if (version_ == 0) { 242 EXPECT_EQ(output_block[j], output_ref_block[j]) 243 << "Error: 32x32 FDCT versions have mismatched coefficients"; 244 } else { 245 EXPECT_GE(6, abs(output_block[j] - output_ref_block[j])) 246 << "Error: 32x32 FDCT rd has mismatched coefficients"; 247 } 248 EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_ref_block[j])) 249 << "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE"; 250 EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j])) 251 << "Error: 32x32 FDCT has coefficient larger than " 252 << "4*DCT_MAX_VALUE"; 253 } 254 } 255} 256 257TEST_P(Trans32x32Test, InverseAccuracy) { 258 ACMRandom rnd(ACMRandom::DeterministicSeed()); 259 const int count_test_block = 1000; 260 DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]); 261 DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]); 262 DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); 263 DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]); 264#if CONFIG_VP9_HIGHBITDEPTH 265 DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); 266 DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]); 267#endif 268 269 for (int i = 0; i < count_test_block; ++i) { 270 double out_r[kNumCoeffs]; 271 272 // Initialize a test block with input range [-255, 255] 273 for (int j = 0; j < kNumCoeffs; ++j) { 274 if (bit_depth_ == VPX_BITS_8) { 275 src[j] = rnd.Rand8(); 276 dst[j] = rnd.Rand8(); 277 in[j] = src[j] - dst[j]; 278#if CONFIG_VP9_HIGHBITDEPTH 279 } else { 280 src16[j] = rnd.Rand16() & mask_; 281 dst16[j] = rnd.Rand16() & mask_; 282 in[j] = src16[j] - dst16[j]; 283#endif 284 } 285 } 286 287 reference_32x32_dct_2d(in, out_r); 288 for (int j = 0; j < kNumCoeffs; ++j) 289 coeff[j] = static_cast<tran_low_t>(round(out_r[j])); 290 if (bit_depth_ == VPX_BITS_8) { 291 ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32)); 292#if CONFIG_VP9_HIGHBITDEPTH 293 } else { 294 ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CONVERT_TO_BYTEPTR(dst16), 32)); 295#endif 296 } 297 for (int j = 0; j < kNumCoeffs; ++j) { 298#if CONFIG_VP9_HIGHBITDEPTH 299 const int diff = 300 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; 301#else 302 const int diff = dst[j] - src[j]; 303#endif 304 const int error = diff * diff; 305 EXPECT_GE(1, error) 306 << "Error: 32x32 IDCT has error " << error 307 << " at index " << j; 308 } 309 } 310} 311 312using std::tr1::make_tuple; 313 314#if CONFIG_VP9_HIGHBITDEPTH 315INSTANTIATE_TEST_CASE_P( 316 C, Trans32x32Test, 317 ::testing::Values( 318 make_tuple(&vpx_highbd_fdct32x32_c, 319 &idct32x32_10, 0, VPX_BITS_10), 320 make_tuple(&vpx_highbd_fdct32x32_rd_c, 321 &idct32x32_10, 1, VPX_BITS_10), 322 make_tuple(&vpx_highbd_fdct32x32_c, 323 &idct32x32_12, 0, VPX_BITS_12), 324 make_tuple(&vpx_highbd_fdct32x32_rd_c, 325 &idct32x32_12, 1, VPX_BITS_12), 326 make_tuple(&vpx_fdct32x32_c, 327 &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8), 328 make_tuple(&vpx_fdct32x32_rd_c, 329 &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8))); 330#else 331INSTANTIATE_TEST_CASE_P( 332 C, Trans32x32Test, 333 ::testing::Values( 334 make_tuple(&vpx_fdct32x32_c, 335 &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8), 336 make_tuple(&vpx_fdct32x32_rd_c, 337 &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8))); 338#endif // CONFIG_VP9_HIGHBITDEPTH 339 340#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE 341INSTANTIATE_TEST_CASE_P( 342 NEON, Trans32x32Test, 343 ::testing::Values( 344 make_tuple(&vpx_fdct32x32_c, 345 &vpx_idct32x32_1024_add_neon, 0, VPX_BITS_8), 346 make_tuple(&vpx_fdct32x32_rd_c, 347 &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8))); 348#endif // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE 349 350#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE 351INSTANTIATE_TEST_CASE_P( 352 SSE2, Trans32x32Test, 353 ::testing::Values( 354 make_tuple(&vpx_fdct32x32_sse2, 355 &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8), 356 make_tuple(&vpx_fdct32x32_rd_sse2, 357 &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8))); 358#endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE 359 360#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE 361INSTANTIATE_TEST_CASE_P( 362 SSE2, Trans32x32Test, 363 ::testing::Values( 364 make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10), 365 make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1, 366 VPX_BITS_10), 367 make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12), 368 make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1, 369 VPX_BITS_12), 370 make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_c, 0, 371 VPX_BITS_8), 372 make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1, 373 VPX_BITS_8))); 374#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE 375 376#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE 377INSTANTIATE_TEST_CASE_P( 378 AVX2, Trans32x32Test, 379 ::testing::Values( 380 make_tuple(&vpx_fdct32x32_avx2, 381 &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8), 382 make_tuple(&vpx_fdct32x32_rd_avx2, 383 &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8))); 384#endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE 385 386#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE 387INSTANTIATE_TEST_CASE_P( 388 MSA, Trans32x32Test, 389 ::testing::Values( 390 make_tuple(&vpx_fdct32x32_msa, 391 &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8), 392 make_tuple(&vpx_fdct32x32_rd_msa, 393 &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8))); 394#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE 395} // namespace 396