1/*
2 *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <math.h>
12#include <stdlib.h>
13#include <string.h>
14
15#include "third_party/googletest/src/include/gtest/gtest.h"
16#include "test/acm_random.h"
17#include "test/clear_system_state.h"
18#include "test/register_state_check.h"
19#include "test/util.h"
20
21#include "./vp9_rtcd.h"
22#include "vp9/common/vp9_entropy.h"
23#include "vpx/vpx_codec.h"
24#include "vpx/vpx_integer.h"
25
26using libvpx_test::ACMRandom;
27
28namespace {
29const int kNumCoeffs = 16;
30typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
31typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
32typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
33                        int tx_type);
34typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
35                        int tx_type);
36
37typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct4x4Param;
38typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht4x4Param;
39
40void fdct4x4_ref(const int16_t *in, tran_low_t *out, int stride,
41                 int tx_type) {
42  vp9_fdct4x4_c(in, out, stride);
43}
44
45void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
46  vp9_fht4x4_c(in, out, stride, tx_type);
47}
48
49void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
50                 int tx_type) {
51  vp9_fwht4x4_c(in, out, stride);
52}
53
54#if CONFIG_VP9_HIGHBITDEPTH
55void idct4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
56  vp9_high_idct4x4_16_add_c(in, out, stride, 10);
57}
58
59void idct4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
60  vp9_high_idct4x4_16_add_c(in, out, stride, 12);
61}
62
63void iht4x4_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
64  vp9_high_iht4x4_16_add_c(in, out, stride, tx_type, 10);
65}
66
67void iht4x4_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
68  vp9_high_iht4x4_16_add_c(in, out, stride, tx_type, 12);
69}
70
71void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
72  vp9_high_iwht4x4_16_add_c(in, out, stride, 10);
73}
74
75void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
76  vp9_high_iwht4x4_16_add_c(in, out, stride, 12);
77}
78#endif
79
80class Trans4x4TestBase {
81 public:
82  virtual ~Trans4x4TestBase() {}
83
84 protected:
85  virtual void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) = 0;
86
87  virtual void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) = 0;
88
89  void RunAccuracyCheck(int limit) {
90    ACMRandom rnd(ACMRandom::DeterministicSeed());
91    uint32_t max_error = 0;
92    int64_t total_error = 0;
93    const int count_test_block = 10000;
94    for (int i = 0; i < count_test_block; ++i) {
95      DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
96      DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
97      DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
98      DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
99#if CONFIG_VP9_HIGHBITDEPTH
100      DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
101      DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
102#endif
103
104      // Initialize a test block with input range [-255, 255].
105      for (int j = 0; j < kNumCoeffs; ++j) {
106        if (bit_depth_ == VPX_BITS_8) {
107          src[j] = rnd.Rand8();
108          dst[j] = rnd.Rand8();
109          test_input_block[j] = src[j] - dst[j];
110#if CONFIG_VP9_HIGHBITDEPTH
111        } else {
112          src16[j] = rnd.Rand16() & mask_;
113          dst16[j] = rnd.Rand16() & mask_;
114          test_input_block[j] = src16[j] - dst16[j];
115#endif
116        }
117      }
118
119      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
120                                          test_temp_block, pitch_));
121      if (bit_depth_ == VPX_BITS_8) {
122        ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
123#if CONFIG_VP9_HIGHBITDEPTH
124      } else {
125        ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block,
126                                            CONVERT_TO_BYTEPTR(dst16), pitch_));
127#endif
128      }
129
130      for (int j = 0; j < kNumCoeffs; ++j) {
131#if CONFIG_VP9_HIGHBITDEPTH
132        const uint32_t diff =
133            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
134#else
135        const uint32_t diff = dst[j] - src[j];
136#endif
137        const uint32_t error = diff * diff;
138        if (max_error < error)
139          max_error = error;
140        total_error += error;
141      }
142    }
143
144    EXPECT_GE(static_cast<uint32_t>(limit), max_error)
145        << "Error: 4x4 FHT/IHT has an individual round trip error > "
146        << limit;
147
148    EXPECT_GE(count_test_block * limit, total_error)
149        << "Error: 4x4 FHT/IHT has average round trip error > " << limit
150        << " per block";
151  }
152
153  void RunCoeffCheck() {
154    ACMRandom rnd(ACMRandom::DeterministicSeed());
155    const int count_test_block = 5000;
156    DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
157    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
158    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
159
160    for (int i = 0; i < count_test_block; ++i) {
161      // Initialize a test block with input range [-mask_, mask_].
162      for (int j = 0; j < kNumCoeffs; ++j)
163        input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
164
165      fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
166      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
167
168      // The minimum quant value is 4.
169      for (int j = 0; j < kNumCoeffs; ++j)
170        EXPECT_EQ(output_block[j], output_ref_block[j]);
171    }
172  }
173
174  void RunMemCheck() {
175    ACMRandom rnd(ACMRandom::DeterministicSeed());
176    const int count_test_block = 5000;
177    DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
178    DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
179    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
180    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
181
182    for (int i = 0; i < count_test_block; ++i) {
183      // Initialize a test block with input range [-mask_, mask_].
184      for (int j = 0; j < kNumCoeffs; ++j) {
185        input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
186        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
187      }
188      if (i == 0) {
189        for (int j = 0; j < kNumCoeffs; ++j)
190          input_extreme_block[j] = mask_;
191      } else if (i == 1) {
192        for (int j = 0; j < kNumCoeffs; ++j)
193          input_extreme_block[j] = -mask_;
194      }
195
196      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
197      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
198                                          output_block, pitch_));
199
200      // The minimum quant value is 4.
201      for (int j = 0; j < kNumCoeffs; ++j) {
202        EXPECT_EQ(output_block[j], output_ref_block[j]);
203        EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
204            << "Error: 4x4 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
205      }
206    }
207  }
208
209  void RunInvAccuracyCheck(int limit) {
210    ACMRandom rnd(ACMRandom::DeterministicSeed());
211    const int count_test_block = 1000;
212    DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
213    DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
214    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
215    DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
216#if CONFIG_VP9_HIGHBITDEPTH
217    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
218    DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
219#endif
220
221    for (int i = 0; i < count_test_block; ++i) {
222      // Initialize a test block with input range [-mask_, mask_].
223      for (int j = 0; j < kNumCoeffs; ++j) {
224        if (bit_depth_ == VPX_BITS_8) {
225          src[j] = rnd.Rand8();
226          dst[j] = rnd.Rand8();
227          in[j] = src[j] - dst[j];
228#if CONFIG_VP9_HIGHBITDEPTH
229        } else {
230          src16[j] = rnd.Rand16() & mask_;
231          dst16[j] = rnd.Rand16() & mask_;
232          in[j] = src16[j] - dst16[j];
233#endif
234        }
235      }
236
237      fwd_txfm_ref(in, coeff, pitch_, tx_type_);
238
239      if (bit_depth_ == VPX_BITS_8) {
240        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
241#if CONFIG_VP9_HIGHBITDEPTH
242      } else {
243        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
244                                            pitch_));
245#endif
246      }
247
248      for (int j = 0; j < kNumCoeffs; ++j) {
249#if CONFIG_VP9_HIGHBITDEPTH
250        const uint32_t diff =
251            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
252#else
253        const uint32_t diff = dst[j] - src[j];
254#endif
255        const uint32_t error = diff * diff;
256        EXPECT_GE(static_cast<uint32_t>(limit), error)
257            << "Error: 4x4 IDCT has error " << error
258            << " at index " << j;
259      }
260    }
261  }
262
263  int pitch_;
264  int tx_type_;
265  FhtFunc fwd_txfm_ref;
266  vpx_bit_depth_t bit_depth_;
267  int mask_;
268};
269
270class Trans4x4DCT
271    : public Trans4x4TestBase,
272      public ::testing::TestWithParam<Dct4x4Param> {
273 public:
274  virtual ~Trans4x4DCT() {}
275
276  virtual void SetUp() {
277    fwd_txfm_ = GET_PARAM(0);
278    inv_txfm_ = GET_PARAM(1);
279    tx_type_  = GET_PARAM(2);
280    pitch_    = 4;
281    fwd_txfm_ref = fdct4x4_ref;
282    bit_depth_ = GET_PARAM(3);
283    mask_ = (1 << bit_depth_) - 1;
284  }
285  virtual void TearDown() { libvpx_test::ClearSystemState(); }
286
287 protected:
288  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
289    fwd_txfm_(in, out, stride);
290  }
291  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
292    inv_txfm_(out, dst, stride);
293  }
294
295  FdctFunc fwd_txfm_;
296  IdctFunc inv_txfm_;
297};
298
299TEST_P(Trans4x4DCT, AccuracyCheck) {
300  RunAccuracyCheck(1);
301}
302
303TEST_P(Trans4x4DCT, CoeffCheck) {
304  RunCoeffCheck();
305}
306
307TEST_P(Trans4x4DCT, MemCheck) {
308  RunMemCheck();
309}
310
311TEST_P(Trans4x4DCT, InvAccuracyCheck) {
312  RunInvAccuracyCheck(1);
313}
314
315class Trans4x4HT
316    : public Trans4x4TestBase,
317      public ::testing::TestWithParam<Ht4x4Param> {
318 public:
319  virtual ~Trans4x4HT() {}
320
321  virtual void SetUp() {
322    fwd_txfm_ = GET_PARAM(0);
323    inv_txfm_ = GET_PARAM(1);
324    tx_type_  = GET_PARAM(2);
325    pitch_    = 4;
326    fwd_txfm_ref = fht4x4_ref;
327    bit_depth_ = GET_PARAM(3);
328    mask_ = (1 << bit_depth_) - 1;
329  }
330  virtual void TearDown() { libvpx_test::ClearSystemState(); }
331
332 protected:
333  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
334    fwd_txfm_(in, out, stride, tx_type_);
335  }
336
337  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
338    inv_txfm_(out, dst, stride, tx_type_);
339  }
340
341  FhtFunc fwd_txfm_;
342  IhtFunc inv_txfm_;
343};
344
345TEST_P(Trans4x4HT, AccuracyCheck) {
346  RunAccuracyCheck(1);
347}
348
349TEST_P(Trans4x4HT, CoeffCheck) {
350  RunCoeffCheck();
351}
352
353TEST_P(Trans4x4HT, MemCheck) {
354  RunMemCheck();
355}
356
357TEST_P(Trans4x4HT, InvAccuracyCheck) {
358  RunInvAccuracyCheck(1);
359}
360
361class Trans4x4WHT
362    : public Trans4x4TestBase,
363      public ::testing::TestWithParam<Dct4x4Param> {
364 public:
365  virtual ~Trans4x4WHT() {}
366
367  virtual void SetUp() {
368    fwd_txfm_ = GET_PARAM(0);
369    inv_txfm_ = GET_PARAM(1);
370    tx_type_  = GET_PARAM(2);
371    pitch_    = 4;
372    fwd_txfm_ref = fwht4x4_ref;
373    bit_depth_ = GET_PARAM(3);
374    mask_ = (1 << bit_depth_) - 1;
375  }
376  virtual void TearDown() { libvpx_test::ClearSystemState(); }
377
378 protected:
379  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
380    fwd_txfm_(in, out, stride);
381  }
382  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
383    inv_txfm_(out, dst, stride);
384  }
385
386  FdctFunc fwd_txfm_;
387  IdctFunc inv_txfm_;
388};
389
390TEST_P(Trans4x4WHT, AccuracyCheck) {
391  RunAccuracyCheck(0);
392}
393
394TEST_P(Trans4x4WHT, CoeffCheck) {
395  RunCoeffCheck();
396}
397
398TEST_P(Trans4x4WHT, MemCheck) {
399  RunMemCheck();
400}
401
402TEST_P(Trans4x4WHT, InvAccuracyCheck) {
403  RunInvAccuracyCheck(0);
404}
405using std::tr1::make_tuple;
406
407#if CONFIG_VP9_HIGHBITDEPTH
408INSTANTIATE_TEST_CASE_P(
409    C, Trans4x4DCT,
410    ::testing::Values(
411        make_tuple(&vp9_high_fdct4x4_c, &idct4x4_10, 0, VPX_BITS_10),
412        make_tuple(&vp9_high_fdct4x4_c, &idct4x4_12, 0, VPX_BITS_12),
413        make_tuple(&vp9_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8)));
414#else
415INSTANTIATE_TEST_CASE_P(
416    C, Trans4x4DCT,
417    ::testing::Values(
418        make_tuple(&vp9_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8)));
419#endif
420
421#if CONFIG_VP9_HIGHBITDEPTH
422INSTANTIATE_TEST_CASE_P(
423    C, Trans4x4HT,
424    ::testing::Values(
425        make_tuple(&vp9_high_fht4x4_c, &iht4x4_10, 0, VPX_BITS_10),
426        make_tuple(&vp9_high_fht4x4_c, &iht4x4_10, 1, VPX_BITS_10),
427        make_tuple(&vp9_high_fht4x4_c, &iht4x4_10, 2, VPX_BITS_10),
428        make_tuple(&vp9_high_fht4x4_c, &iht4x4_10, 3, VPX_BITS_10),
429        make_tuple(&vp9_high_fht4x4_c, &iht4x4_12, 0, VPX_BITS_12),
430        make_tuple(&vp9_high_fht4x4_c, &iht4x4_12, 1, VPX_BITS_12),
431        make_tuple(&vp9_high_fht4x4_c, &iht4x4_12, 2, VPX_BITS_12),
432        make_tuple(&vp9_high_fht4x4_c, &iht4x4_12, 3, VPX_BITS_12),
433        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
434        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
435        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
436        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
437#else
438INSTANTIATE_TEST_CASE_P(
439    C, Trans4x4HT,
440    ::testing::Values(
441        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
442        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
443        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
444        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
445#endif
446
447#if CONFIG_VP9_HIGHBITDEPTH
448INSTANTIATE_TEST_CASE_P(
449    C, Trans4x4WHT,
450    ::testing::Values(
451        make_tuple(&vp9_high_fwht4x4_c, &iwht4x4_10, 0, VPX_BITS_10),
452        make_tuple(&vp9_high_fwht4x4_c, &iwht4x4_12, 0, VPX_BITS_12),
453        make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
454#else
455INSTANTIATE_TEST_CASE_P(
456    C, Trans4x4WHT,
457    ::testing::Values(
458        make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
459#endif
460
461#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
462INSTANTIATE_TEST_CASE_P(
463    NEON, Trans4x4DCT,
464    ::testing::Values(
465        make_tuple(&vp9_fdct4x4_c,
466                   &vp9_idct4x4_16_add_neon, 0, VPX_BITS_8)));
467INSTANTIATE_TEST_CASE_P(
468    DISABLED_NEON, Trans4x4HT,
469    ::testing::Values(
470        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 0, VPX_BITS_8),
471        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 1, VPX_BITS_8),
472        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 2, VPX_BITS_8),
473        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8)));
474#endif
475
476#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH
477INSTANTIATE_TEST_CASE_P(
478    MMX, Trans4x4WHT,
479    ::testing::Values(
480        make_tuple(&vp9_fwht4x4_mmx, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
481#endif
482
483#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
484INSTANTIATE_TEST_CASE_P(
485    SSE2, Trans4x4DCT,
486    ::testing::Values(
487        make_tuple(&vp9_fdct4x4_sse2,
488                   &vp9_idct4x4_16_add_sse2, 0, VPX_BITS_8)));
489INSTANTIATE_TEST_CASE_P(
490    SSE2, Trans4x4HT,
491    ::testing::Values(
492        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 0, VPX_BITS_8),
493        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1, VPX_BITS_8),
494        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2, VPX_BITS_8),
495        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3, VPX_BITS_8)));
496#endif
497
498}  // namespace
499