1/*
2 *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <math.h>
12#include <stdlib.h>
13#include <string.h>
14
15#include "third_party/googletest/src/include/gtest/gtest.h"
16
17#include "./vp9_rtcd.h"
18#include "./vpx_config.h"
19#include "./vpx_dsp_rtcd.h"
20#include "test/acm_random.h"
21#include "test/clear_system_state.h"
22#include "test/register_state_check.h"
23#include "test/util.h"
24#include "vp9/common/vp9_entropy.h"
25#include "vpx/vpx_codec.h"
26#include "vpx/vpx_integer.h"
27#include "vpx_ports/mem.h"
28
29using libvpx_test::ACMRandom;
30
31namespace {
32#ifdef _MSC_VER
33static int round(double x) {
34  if (x < 0)
35    return static_cast<int>(ceil(x - 0.5));
36  else
37    return static_cast<int>(floor(x + 0.5));
38}
39#endif
40
41const int kNumCoeffs = 1024;
42const double kPi = 3.141592653589793238462643383279502884;
43void reference_32x32_dct_1d(const double in[32], double out[32]) {
44  const double kInvSqrt2 = 0.707106781186547524400844362104;
45  for (int k = 0; k < 32; k++) {
46    out[k] = 0.0;
47    for (int n = 0; n < 32; n++)
48      out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0);
49    if (k == 0)
50      out[k] = out[k] * kInvSqrt2;
51  }
52}
53
54void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
55                            double output[kNumCoeffs]) {
56  // First transform columns
57  for (int i = 0; i < 32; ++i) {
58    double temp_in[32], temp_out[32];
59    for (int j = 0; j < 32; ++j)
60      temp_in[j] = input[j*32 + i];
61    reference_32x32_dct_1d(temp_in, temp_out);
62    for (int j = 0; j < 32; ++j)
63      output[j * 32 + i] = temp_out[j];
64  }
65  // Then transform rows
66  for (int i = 0; i < 32; ++i) {
67    double temp_in[32], temp_out[32];
68    for (int j = 0; j < 32; ++j)
69      temp_in[j] = output[j + i*32];
70    reference_32x32_dct_1d(temp_in, temp_out);
71    // Scale by some magic number
72    for (int j = 0; j < 32; ++j)
73      output[j + i * 32] = temp_out[j] / 4;
74  }
75}
76
77typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
78typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
79
80typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
81    Trans32x32Param;
82
83#if CONFIG_VP9_HIGHBITDEPTH
84void idct32x32_8(const tran_low_t *in, uint8_t *out, int stride) {
85  vpx_highbd_idct32x32_1024_add_c(in, out, stride, 8);
86}
87
88void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
89  vpx_highbd_idct32x32_1024_add_c(in, out, stride, 10);
90}
91
92void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
93  vpx_highbd_idct32x32_1024_add_c(in, out, stride, 12);
94}
95#endif  // CONFIG_VP9_HIGHBITDEPTH
96
97class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> {
98 public:
99  virtual ~Trans32x32Test() {}
100  virtual void SetUp() {
101    fwd_txfm_ = GET_PARAM(0);
102    inv_txfm_ = GET_PARAM(1);
103    version_  = GET_PARAM(2);  // 0: high precision forward transform
104                               // 1: low precision version for rd loop
105    bit_depth_ = GET_PARAM(3);
106    mask_ = (1 << bit_depth_) - 1;
107  }
108
109  virtual void TearDown() { libvpx_test::ClearSystemState(); }
110
111 protected:
112  int version_;
113  vpx_bit_depth_t bit_depth_;
114  int mask_;
115  FwdTxfmFunc fwd_txfm_;
116  InvTxfmFunc inv_txfm_;
117};
118
119TEST_P(Trans32x32Test, AccuracyCheck) {
120  ACMRandom rnd(ACMRandom::DeterministicSeed());
121  uint32_t max_error = 0;
122  int64_t total_error = 0;
123  const int count_test_block = 10000;
124  DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
125  DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
126  DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
127  DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
128#if CONFIG_VP9_HIGHBITDEPTH
129  DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
130  DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
131#endif
132
133  for (int i = 0; i < count_test_block; ++i) {
134    // Initialize a test block with input range [-mask_, mask_].
135    for (int j = 0; j < kNumCoeffs; ++j) {
136      if (bit_depth_ == VPX_BITS_8) {
137        src[j] = rnd.Rand8();
138        dst[j] = rnd.Rand8();
139        test_input_block[j] = src[j] - dst[j];
140#if CONFIG_VP9_HIGHBITDEPTH
141      } else {
142        src16[j] = rnd.Rand16() & mask_;
143        dst16[j] = rnd.Rand16() & mask_;
144        test_input_block[j] = src16[j] - dst16[j];
145#endif
146      }
147    }
148
149    ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
150    if (bit_depth_ == VPX_BITS_8) {
151      ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
152#if CONFIG_VP9_HIGHBITDEPTH
153    } else {
154      ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block,
155                                         CONVERT_TO_BYTEPTR(dst16), 32));
156#endif
157    }
158
159    for (int j = 0; j < kNumCoeffs; ++j) {
160#if CONFIG_VP9_HIGHBITDEPTH
161      const uint32_t diff =
162          bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
163#else
164      const uint32_t diff = dst[j] - src[j];
165#endif
166      const uint32_t error = diff * diff;
167      if (max_error < error)
168        max_error = error;
169      total_error += error;
170    }
171  }
172
173  if (version_ == 1) {
174    max_error /= 2;
175    total_error /= 45;
176  }
177
178  EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error)
179      << "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
180
181  EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
182      << "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
183}
184
185TEST_P(Trans32x32Test, CoeffCheck) {
186  ACMRandom rnd(ACMRandom::DeterministicSeed());
187  const int count_test_block = 1000;
188
189  DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
190  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
191  DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
192
193  for (int i = 0; i < count_test_block; ++i) {
194    for (int j = 0; j < kNumCoeffs; ++j)
195      input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
196
197    const int stride = 32;
198    vpx_fdct32x32_c(input_block, output_ref_block, stride);
199    ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));
200
201    if (version_ == 0) {
202      for (int j = 0; j < kNumCoeffs; ++j)
203        EXPECT_EQ(output_block[j], output_ref_block[j])
204            << "Error: 32x32 FDCT versions have mismatched coefficients";
205    } else {
206      for (int j = 0; j < kNumCoeffs; ++j)
207        EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
208            << "Error: 32x32 FDCT rd has mismatched coefficients";
209    }
210  }
211}
212
213TEST_P(Trans32x32Test, MemCheck) {
214  ACMRandom rnd(ACMRandom::DeterministicSeed());
215  const int count_test_block = 2000;
216
217  DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
218  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
219  DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
220
221  for (int i = 0; i < count_test_block; ++i) {
222    // Initialize a test block with input range [-mask_, mask_].
223    for (int j = 0; j < kNumCoeffs; ++j) {
224      input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_;
225    }
226    if (i == 0) {
227      for (int j = 0; j < kNumCoeffs; ++j)
228        input_extreme_block[j] = mask_;
229    } else if (i == 1) {
230      for (int j = 0; j < kNumCoeffs; ++j)
231        input_extreme_block[j] = -mask_;
232    }
233
234    const int stride = 32;
235    vpx_fdct32x32_c(input_extreme_block, output_ref_block, stride);
236    ASM_REGISTER_STATE_CHECK(
237        fwd_txfm_(input_extreme_block, output_block, stride));
238
239    // The minimum quant value is 4.
240    for (int j = 0; j < kNumCoeffs; ++j) {
241      if (version_ == 0) {
242        EXPECT_EQ(output_block[j], output_ref_block[j])
243            << "Error: 32x32 FDCT versions have mismatched coefficients";
244      } else {
245        EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
246            << "Error: 32x32 FDCT rd has mismatched coefficients";
247      }
248      EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_ref_block[j]))
249          << "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
250      EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
251          << "Error: 32x32 FDCT has coefficient larger than "
252          << "4*DCT_MAX_VALUE";
253    }
254  }
255}
256
257TEST_P(Trans32x32Test, InverseAccuracy) {
258  ACMRandom rnd(ACMRandom::DeterministicSeed());
259  const int count_test_block = 1000;
260  DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
261  DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
262  DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
263  DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
264#if CONFIG_VP9_HIGHBITDEPTH
265  DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
266  DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
267#endif
268
269  for (int i = 0; i < count_test_block; ++i) {
270    double out_r[kNumCoeffs];
271
272    // Initialize a test block with input range [-255, 255]
273    for (int j = 0; j < kNumCoeffs; ++j) {
274      if (bit_depth_ == VPX_BITS_8) {
275        src[j] = rnd.Rand8();
276        dst[j] = rnd.Rand8();
277        in[j] = src[j] - dst[j];
278#if CONFIG_VP9_HIGHBITDEPTH
279      } else {
280        src16[j] = rnd.Rand16() & mask_;
281        dst16[j] = rnd.Rand16() & mask_;
282        in[j] = src16[j] - dst16[j];
283#endif
284      }
285    }
286
287    reference_32x32_dct_2d(in, out_r);
288    for (int j = 0; j < kNumCoeffs; ++j)
289      coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
290    if (bit_depth_ == VPX_BITS_8) {
291      ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
292#if CONFIG_VP9_HIGHBITDEPTH
293    } else {
294      ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CONVERT_TO_BYTEPTR(dst16), 32));
295#endif
296    }
297    for (int j = 0; j < kNumCoeffs; ++j) {
298#if CONFIG_VP9_HIGHBITDEPTH
299      const int diff =
300          bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
301#else
302      const int diff = dst[j] - src[j];
303#endif
304      const int error = diff * diff;
305      EXPECT_GE(1, error)
306          << "Error: 32x32 IDCT has error " << error
307          << " at index " << j;
308    }
309  }
310}
311
312using std::tr1::make_tuple;
313
314#if CONFIG_VP9_HIGHBITDEPTH
315INSTANTIATE_TEST_CASE_P(
316    C, Trans32x32Test,
317    ::testing::Values(
318        make_tuple(&vpx_highbd_fdct32x32_c,
319                   &idct32x32_10, 0, VPX_BITS_10),
320        make_tuple(&vpx_highbd_fdct32x32_rd_c,
321                   &idct32x32_10, 1, VPX_BITS_10),
322        make_tuple(&vpx_highbd_fdct32x32_c,
323                   &idct32x32_12, 0, VPX_BITS_12),
324        make_tuple(&vpx_highbd_fdct32x32_rd_c,
325                   &idct32x32_12, 1, VPX_BITS_12),
326        make_tuple(&vpx_fdct32x32_c,
327                   &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
328        make_tuple(&vpx_fdct32x32_rd_c,
329                   &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));
330#else
331INSTANTIATE_TEST_CASE_P(
332    C, Trans32x32Test,
333    ::testing::Values(
334        make_tuple(&vpx_fdct32x32_c,
335                   &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
336        make_tuple(&vpx_fdct32x32_rd_c,
337                   &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));
338#endif  // CONFIG_VP9_HIGHBITDEPTH
339
340#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
341INSTANTIATE_TEST_CASE_P(
342    NEON, Trans32x32Test,
343    ::testing::Values(
344        make_tuple(&vpx_fdct32x32_c,
345                   &vpx_idct32x32_1024_add_neon, 0, VPX_BITS_8),
346        make_tuple(&vpx_fdct32x32_rd_c,
347                   &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
348#endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
349
350#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
351INSTANTIATE_TEST_CASE_P(
352    SSE2, Trans32x32Test,
353    ::testing::Values(
354        make_tuple(&vpx_fdct32x32_sse2,
355                   &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
356        make_tuple(&vpx_fdct32x32_rd_sse2,
357                   &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
358#endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
359
360#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
361INSTANTIATE_TEST_CASE_P(
362    SSE2, Trans32x32Test,
363    ::testing::Values(
364        make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10),
365        make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1,
366                   VPX_BITS_10),
367        make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),
368        make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,
369                   VPX_BITS_12),
370        make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_c, 0,
371                   VPX_BITS_8),
372        make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1,
373                   VPX_BITS_8)));
374#endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
375
376#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
377INSTANTIATE_TEST_CASE_P(
378    AVX2, Trans32x32Test,
379    ::testing::Values(
380        make_tuple(&vpx_fdct32x32_avx2,
381                   &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
382        make_tuple(&vpx_fdct32x32_rd_avx2,
383                   &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
384#endif  // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
385
386#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
387INSTANTIATE_TEST_CASE_P(
388    MSA, Trans32x32Test,
389    ::testing::Values(
390        make_tuple(&vpx_fdct32x32_msa,
391                   &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8),
392        make_tuple(&vpx_fdct32x32_rd_msa,
393                   &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
394#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
395}  // namespace
396