1/* 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include <math.h> 12#include <stdlib.h> 13#include <string.h> 14 15#include "third_party/googletest/src/include/gtest/gtest.h" 16#include "test/acm_random.h" 17#include "test/clear_system_state.h" 18#include "test/register_state_check.h" 19#include "test/util.h" 20 21#include "./vpx_config.h" 22#include "./vp9_rtcd.h" 23#include "vp9/common/vp9_entropy.h" 24#include "vpx/vpx_codec.h" 25#include "vpx/vpx_integer.h" 26 27using libvpx_test::ACMRandom; 28 29namespace { 30#ifdef _MSC_VER 31static int round(double x) { 32 if (x < 0) 33 return static_cast<int>(ceil(x - 0.5)); 34 else 35 return static_cast<int>(floor(x + 0.5)); 36} 37#endif 38 39const int kNumCoeffs = 1024; 40const double kPi = 3.141592653589793238462643383279502884; 41void reference_32x32_dct_1d(const double in[32], double out[32]) { 42 const double kInvSqrt2 = 0.707106781186547524400844362104; 43 for (int k = 0; k < 32; k++) { 44 out[k] = 0.0; 45 for (int n = 0; n < 32; n++) 46 out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0); 47 if (k == 0) 48 out[k] = out[k] * kInvSqrt2; 49 } 50} 51 52void reference_32x32_dct_2d(const int16_t input[kNumCoeffs], 53 double output[kNumCoeffs]) { 54 // First transform columns 55 for (int i = 0; i < 32; ++i) { 56 double temp_in[32], temp_out[32]; 57 for (int j = 0; j < 32; ++j) 58 temp_in[j] = input[j*32 + i]; 59 reference_32x32_dct_1d(temp_in, temp_out); 60 for (int j = 0; j < 32; ++j) 61 output[j * 32 + i] = temp_out[j]; 62 } 63 // Then transform rows 64 for (int i = 0; i < 32; ++i) { 65 double temp_in[32], temp_out[32]; 66 for (int j = 0; j < 32; ++j) 67 temp_in[j] = output[j + i*32]; 68 reference_32x32_dct_1d(temp_in, temp_out); 69 // Scale by some magic number 70 for (int j = 0; j < 32; ++j) 71 output[j + i * 32] = temp_out[j] / 4; 72 } 73} 74 75typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride); 76typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride); 77 78typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t> 79 Trans32x32Param; 80 81#if CONFIG_VP9_HIGHBITDEPTH 82void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) { 83 vp9_high_idct32x32_1024_add_c(in, out, stride, 10); 84} 85 86void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) { 87 vp9_high_idct32x32_1024_add_c(in, out, stride, 12); 88} 89#endif 90 91class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> { 92 public: 93 virtual ~Trans32x32Test() {} 94 virtual void SetUp() { 95 fwd_txfm_ = GET_PARAM(0); 96 inv_txfm_ = GET_PARAM(1); 97 version_ = GET_PARAM(2); // 0: high precision forward transform 98 // 1: low precision version for rd loop 99 bit_depth_ = GET_PARAM(3); 100 mask_ = (1 << bit_depth_) - 1; 101 } 102 103 virtual void TearDown() { libvpx_test::ClearSystemState(); } 104 105 protected: 106 int version_; 107 vpx_bit_depth_t bit_depth_; 108 int mask_; 109 FwdTxfmFunc fwd_txfm_; 110 InvTxfmFunc inv_txfm_; 111}; 112 113TEST_P(Trans32x32Test, AccuracyCheck) { 114 ACMRandom rnd(ACMRandom::DeterministicSeed()); 115 uint32_t max_error = 0; 116 int64_t total_error = 0; 117 const int count_test_block = 1000; 118 DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs); 119 DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs); 120 DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs); 121 DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs); 122#if CONFIG_VP9_HIGHBITDEPTH 123 DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs); 124 DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs); 125#endif 126 127 for (int i = 0; i < count_test_block; ++i) { 128 // Initialize a test block with input range [-mask_, mask_]. 129 for (int j = 0; j < kNumCoeffs; ++j) { 130 if (bit_depth_ == 8) { 131 src[j] = rnd.Rand8(); 132 dst[j] = rnd.Rand8(); 133 test_input_block[j] = src[j] - dst[j]; 134#if CONFIG_VP9_HIGHBITDEPTH 135 } else { 136 src16[j] = rnd.Rand16() & mask_; 137 dst16[j] = rnd.Rand16() & mask_; 138 test_input_block[j] = src16[j] - dst16[j]; 139#endif 140 } 141 } 142 143 ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32)); 144 if (bit_depth_ == VPX_BITS_8) { 145 ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32)); 146#if CONFIG_VP9_HIGHBITDEPTH 147 } else { 148 ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, 149 CONVERT_TO_BYTEPTR(dst16), 32)); 150#endif 151 } 152 153 for (int j = 0; j < kNumCoeffs; ++j) { 154#if CONFIG_VP9_HIGHBITDEPTH 155 const uint32_t diff = 156 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; 157#else 158 const uint32_t diff = dst[j] - src[j]; 159#endif 160 const uint32_t error = diff * diff; 161 if (max_error < error) 162 max_error = error; 163 total_error += error; 164 } 165 } 166 167 if (version_ == 1) { 168 max_error /= 2; 169 total_error /= 45; 170 } 171 172 EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error) 173 << "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1"; 174 175 EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error) 176 << "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block"; 177} 178 179TEST_P(Trans32x32Test, CoeffCheck) { 180 ACMRandom rnd(ACMRandom::DeterministicSeed()); 181 const int count_test_block = 1000; 182 183 DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs); 184 DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs); 185 DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs); 186 187 for (int i = 0; i < count_test_block; ++i) { 188 for (int j = 0; j < kNumCoeffs; ++j) 189 input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_); 190 191 const int stride = 32; 192 vp9_fdct32x32_c(input_block, output_ref_block, stride); 193 ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride)); 194 195 if (version_ == 0) { 196 for (int j = 0; j < kNumCoeffs; ++j) 197 EXPECT_EQ(output_block[j], output_ref_block[j]) 198 << "Error: 32x32 FDCT versions have mismatched coefficients"; 199 } else { 200 for (int j = 0; j < kNumCoeffs; ++j) 201 EXPECT_GE(6, abs(output_block[j] - output_ref_block[j])) 202 << "Error: 32x32 FDCT rd has mismatched coefficients"; 203 } 204 } 205} 206 207TEST_P(Trans32x32Test, MemCheck) { 208 ACMRandom rnd(ACMRandom::DeterministicSeed()); 209 const int count_test_block = 2000; 210 211 DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs); 212 DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs); 213 DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs); 214 DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs); 215 216 for (int i = 0; i < count_test_block; ++i) { 217 // Initialize a test block with input range [-mask_, mask_]. 218 for (int j = 0; j < kNumCoeffs; ++j) { 219 input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_); 220 input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_; 221 } 222 if (i == 0) { 223 for (int j = 0; j < kNumCoeffs; ++j) 224 input_extreme_block[j] = mask_; 225 } else if (i == 1) { 226 for (int j = 0; j < kNumCoeffs; ++j) 227 input_extreme_block[j] = -mask_; 228 } 229 230 const int stride = 32; 231 vp9_fdct32x32_c(input_extreme_block, output_ref_block, stride); 232 ASM_REGISTER_STATE_CHECK( 233 fwd_txfm_(input_extreme_block, output_block, stride)); 234 235 // The minimum quant value is 4. 236 for (int j = 0; j < kNumCoeffs; ++j) { 237 if (version_ == 0) { 238 EXPECT_EQ(output_block[j], output_ref_block[j]) 239 << "Error: 32x32 FDCT versions have mismatched coefficients"; 240 } else { 241 EXPECT_GE(6, abs(output_block[j] - output_ref_block[j])) 242 << "Error: 32x32 FDCT rd has mismatched coefficients"; 243 } 244 EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_ref_block[j])) 245 << "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE"; 246 EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j])) 247 << "Error: 32x32 FDCT has coefficient larger than " 248 << "4*DCT_MAX_VALUE"; 249 } 250 } 251} 252 253TEST_P(Trans32x32Test, InverseAccuracy) { 254 ACMRandom rnd(ACMRandom::DeterministicSeed()); 255 const int count_test_block = 1000; 256 DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs); 257 DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs); 258 DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs); 259 DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs); 260#if CONFIG_VP9_HIGHBITDEPTH 261 DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs); 262 DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs); 263#endif 264 265 for (int i = 0; i < count_test_block; ++i) { 266 double out_r[kNumCoeffs]; 267 268 // Initialize a test block with input range [-255, 255] 269 for (int j = 0; j < kNumCoeffs; ++j) { 270 if (bit_depth_ == VPX_BITS_8) { 271 src[j] = rnd.Rand8(); 272 dst[j] = rnd.Rand8(); 273 in[j] = src[j] - dst[j]; 274#if CONFIG_VP9_HIGHBITDEPTH 275 } else { 276 src16[j] = rnd.Rand16() & mask_; 277 dst16[j] = rnd.Rand16() & mask_; 278 in[j] = src16[j] - dst16[j]; 279#endif 280 } 281 } 282 283 reference_32x32_dct_2d(in, out_r); 284 for (int j = 0; j < kNumCoeffs; ++j) 285 coeff[j] = round(out_r[j]); 286 if (bit_depth_ == VPX_BITS_8) { 287 ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32)); 288#if CONFIG_VP9_HIGHBITDEPTH 289 } else { 290 ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CONVERT_TO_BYTEPTR(dst16), 32)); 291#endif 292 } 293 for (int j = 0; j < kNumCoeffs; ++j) { 294#if CONFIG_VP9_HIGHBITDEPTH 295 const int diff = 296 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; 297#else 298 const int diff = dst[j] - src[j]; 299#endif 300 const int error = diff * diff; 301 EXPECT_GE(1, error) 302 << "Error: 32x32 IDCT has error " << error 303 << " at index " << j; 304 } 305 } 306} 307 308using std::tr1::make_tuple; 309 310#if CONFIG_VP9_HIGHBITDEPTH 311INSTANTIATE_TEST_CASE_P( 312 C, Trans32x32Test, 313 ::testing::Values( 314 make_tuple(&vp9_high_fdct32x32_c, 315 &idct32x32_10, 0, VPX_BITS_10), 316 make_tuple(&vp9_high_fdct32x32_rd_c, 317 &idct32x32_10, 1, VPX_BITS_10), 318 make_tuple(&vp9_high_fdct32x32_c, 319 &idct32x32_12, 0, VPX_BITS_12), 320 make_tuple(&vp9_high_fdct32x32_rd_c, 321 &idct32x32_12, 1, VPX_BITS_12), 322 make_tuple(&vp9_fdct32x32_c, 323 &vp9_idct32x32_1024_add_c, 0, VPX_BITS_8), 324 make_tuple(&vp9_fdct32x32_rd_c, 325 &vp9_idct32x32_1024_add_c, 1, VPX_BITS_8))); 326#else 327INSTANTIATE_TEST_CASE_P( 328 C, Trans32x32Test, 329 ::testing::Values( 330 make_tuple(&vp9_fdct32x32_c, 331 &vp9_idct32x32_1024_add_c, 0, VPX_BITS_8), 332 make_tuple(&vp9_fdct32x32_rd_c, 333 &vp9_idct32x32_1024_add_c, 1, VPX_BITS_8))); 334#endif 335 336#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH 337INSTANTIATE_TEST_CASE_P( 338 NEON, Trans32x32Test, 339 ::testing::Values( 340 make_tuple(&vp9_fdct32x32_c, 341 &vp9_idct32x32_1024_add_neon, 0, VPX_BITS_8), 342 make_tuple(&vp9_fdct32x32_rd_c, 343 &vp9_idct32x32_1024_add_neon, 1, VPX_BITS_8))); 344#endif 345 346#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH 347INSTANTIATE_TEST_CASE_P( 348 SSE2, Trans32x32Test, 349 ::testing::Values( 350 make_tuple(&vp9_fdct32x32_sse2, 351 &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8), 352 make_tuple(&vp9_fdct32x32_rd_sse2, 353 &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8))); 354#endif 355 356#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH 357INSTANTIATE_TEST_CASE_P( 358 AVX2, Trans32x32Test, 359 ::testing::Values( 360 make_tuple(&vp9_fdct32x32_avx2, 361 &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8), 362 make_tuple(&vp9_fdct32x32_rd_avx2, 363 &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8))); 364#endif 365} // namespace 366