1/*
2 *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <algorithm>
12
13#include "third_party/googletest/src/include/gtest/gtest.h"
14
15#include "./vpx_dsp_rtcd.h"
16#include "vpx_ports/vpx_timer.h"
17
18#include "test/acm_random.h"
19#include "test/register_state_check.h"
20
21namespace {
22
23using ::libvpx_test::ACMRandom;
24
25typedef void (*HadamardFunc)(const int16_t *a, ptrdiff_t a_stride,
26                             tran_low_t *b);
27
28void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) {
29  int16_t b[8];
30  for (int i = 0; i < 8; i += 2) {
31    b[i + 0] = a[i * a_stride] + a[(i + 1) * a_stride];
32    b[i + 1] = a[i * a_stride] - a[(i + 1) * a_stride];
33  }
34  int16_t c[8];
35  for (int i = 0; i < 8; i += 4) {
36    c[i + 0] = b[i + 0] + b[i + 2];
37    c[i + 1] = b[i + 1] + b[i + 3];
38    c[i + 2] = b[i + 0] - b[i + 2];
39    c[i + 3] = b[i + 1] - b[i + 3];
40  }
41  out[0] = c[0] + c[4];
42  out[7] = c[1] + c[5];
43  out[3] = c[2] + c[6];
44  out[4] = c[3] + c[7];
45  out[2] = c[0] - c[4];
46  out[6] = c[1] - c[5];
47  out[1] = c[2] - c[6];
48  out[5] = c[3] - c[7];
49}
50
51void reference_hadamard8x8(const int16_t *a, int a_stride, tran_low_t *b) {
52  int16_t buf[64];
53  int16_t buf2[64];
54  for (int i = 0; i < 8; ++i) hadamard_loop(a + i, a_stride, buf + i * 8);
55  for (int i = 0; i < 8; ++i) hadamard_loop(buf + i, 8, buf2 + i * 8);
56
57  for (int i = 0; i < 64; ++i) b[i] = (tran_low_t)buf2[i];
58}
59
60void reference_hadamard16x16(const int16_t *a, int a_stride, tran_low_t *b) {
61  /* The source is a 16x16 block. The destination is rearranged to 8x32.
62   * Input is 9 bit. */
63  reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
64  reference_hadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64);
65  reference_hadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128);
66  reference_hadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192);
67
68  /* Overlay the 8x8 blocks and combine. */
69  for (int i = 0; i < 64; ++i) {
70    /* 8x8 steps the range up to 15 bits. */
71    const tran_low_t a0 = b[0];
72    const tran_low_t a1 = b[64];
73    const tran_low_t a2 = b[128];
74    const tran_low_t a3 = b[192];
75
76    /* Prevent the result from escaping int16_t. */
77    const tran_low_t b0 = (a0 + a1) >> 1;
78    const tran_low_t b1 = (a0 - a1) >> 1;
79    const tran_low_t b2 = (a2 + a3) >> 1;
80    const tran_low_t b3 = (a2 - a3) >> 1;
81
82    /* Store a 16 bit value. */
83    b[0] = b0 + b2;
84    b[64] = b1 + b3;
85    b[128] = b0 - b2;
86    b[192] = b1 - b3;
87
88    ++b;
89  }
90}
91
92class HadamardTestBase : public ::testing::TestWithParam<HadamardFunc> {
93 public:
94  virtual void SetUp() {
95    h_func_ = GetParam();
96    rnd_.Reset(ACMRandom::DeterministicSeed());
97  }
98
99 protected:
100  HadamardFunc h_func_;
101  ACMRandom rnd_;
102};
103
104void HadamardSpeedTest(const char *name, HadamardFunc const func,
105                       const int16_t *input, int stride, tran_low_t *output,
106                       int times) {
107  int i;
108  vpx_usec_timer timer;
109
110  vpx_usec_timer_start(&timer);
111  for (i = 0; i < times; ++i) {
112    func(input, stride, output);
113  }
114  vpx_usec_timer_mark(&timer);
115
116  const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
117  printf("%s[%12d runs]: %d us\n", name, times, elapsed_time);
118}
119
120class Hadamard8x8Test : public HadamardTestBase {};
121
122void HadamardSpeedTest8x8(HadamardFunc const func, int times) {
123  DECLARE_ALIGNED(16, int16_t, input[64]);
124  DECLARE_ALIGNED(16, tran_low_t, output[64]);
125  memset(input, 1, sizeof(input));
126  HadamardSpeedTest("Hadamard8x8", func, input, 8, output, times);
127}
128
129TEST_P(Hadamard8x8Test, CompareReferenceRandom) {
130  DECLARE_ALIGNED(16, int16_t, a[64]);
131  DECLARE_ALIGNED(16, tran_low_t, b[64]);
132  tran_low_t b_ref[64];
133  for (int i = 0; i < 64; ++i) {
134    a[i] = rnd_.Rand9Signed();
135  }
136  memset(b, 0, sizeof(b));
137  memset(b_ref, 0, sizeof(b_ref));
138
139  reference_hadamard8x8(a, 8, b_ref);
140  ASM_REGISTER_STATE_CHECK(h_func_(a, 8, b));
141
142  // The order of the output is not important. Sort before checking.
143  std::sort(b, b + 64);
144  std::sort(b_ref, b_ref + 64);
145  EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
146}
147
148TEST_P(Hadamard8x8Test, VaryStride) {
149  DECLARE_ALIGNED(16, int16_t, a[64 * 8]);
150  DECLARE_ALIGNED(16, tran_low_t, b[64]);
151  tran_low_t b_ref[64];
152  for (int i = 0; i < 64 * 8; ++i) {
153    a[i] = rnd_.Rand9Signed();
154  }
155
156  for (int i = 8; i < 64; i += 8) {
157    memset(b, 0, sizeof(b));
158    memset(b_ref, 0, sizeof(b_ref));
159
160    reference_hadamard8x8(a, i, b_ref);
161    ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
162
163    // The order of the output is not important. Sort before checking.
164    std::sort(b, b + 64);
165    std::sort(b_ref, b_ref + 64);
166    EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
167  }
168}
169
170TEST_P(Hadamard8x8Test, DISABLED_Speed) {
171  HadamardSpeedTest8x8(h_func_, 10);
172  HadamardSpeedTest8x8(h_func_, 10000);
173  HadamardSpeedTest8x8(h_func_, 10000000);
174}
175
176INSTANTIATE_TEST_CASE_P(C, Hadamard8x8Test,
177                        ::testing::Values(&vpx_hadamard_8x8_c));
178
179#if HAVE_SSE2
180INSTANTIATE_TEST_CASE_P(SSE2, Hadamard8x8Test,
181                        ::testing::Values(&vpx_hadamard_8x8_sse2));
182#endif  // HAVE_SSE2
183
184#if HAVE_SSSE3 && ARCH_X86_64
185INSTANTIATE_TEST_CASE_P(SSSE3, Hadamard8x8Test,
186                        ::testing::Values(&vpx_hadamard_8x8_ssse3));
187#endif  // HAVE_SSSE3 && ARCH_X86_64
188
189#if HAVE_NEON
190INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test,
191                        ::testing::Values(&vpx_hadamard_8x8_neon));
192#endif  // HAVE_NEON
193
194// TODO(jingning): Remove highbitdepth flag when the SIMD functions are
195// in place and turn on the unit test.
196#if !CONFIG_VP9_HIGHBITDEPTH
197#if HAVE_MSA
198INSTANTIATE_TEST_CASE_P(MSA, Hadamard8x8Test,
199                        ::testing::Values(&vpx_hadamard_8x8_msa));
200#endif  // HAVE_MSA
201#endif  // !CONFIG_VP9_HIGHBITDEPTH
202
203#if HAVE_VSX
204INSTANTIATE_TEST_CASE_P(VSX, Hadamard8x8Test,
205                        ::testing::Values(&vpx_hadamard_8x8_vsx));
206#endif  // HAVE_VSX
207
208class Hadamard16x16Test : public HadamardTestBase {};
209
210void HadamardSpeedTest16x16(HadamardFunc const func, int times) {
211  DECLARE_ALIGNED(16, int16_t, input[256]);
212  DECLARE_ALIGNED(16, tran_low_t, output[256]);
213  memset(input, 1, sizeof(input));
214  HadamardSpeedTest("Hadamard16x16", func, input, 16, output, times);
215}
216
217TEST_P(Hadamard16x16Test, CompareReferenceRandom) {
218  DECLARE_ALIGNED(16, int16_t, a[16 * 16]);
219  DECLARE_ALIGNED(16, tran_low_t, b[16 * 16]);
220  tran_low_t b_ref[16 * 16];
221  for (int i = 0; i < 16 * 16; ++i) {
222    a[i] = rnd_.Rand9Signed();
223  }
224  memset(b, 0, sizeof(b));
225  memset(b_ref, 0, sizeof(b_ref));
226
227  reference_hadamard16x16(a, 16, b_ref);
228  ASM_REGISTER_STATE_CHECK(h_func_(a, 16, b));
229
230  // The order of the output is not important. Sort before checking.
231  std::sort(b, b + 16 * 16);
232  std::sort(b_ref, b_ref + 16 * 16);
233  EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
234}
235
236TEST_P(Hadamard16x16Test, VaryStride) {
237  DECLARE_ALIGNED(16, int16_t, a[16 * 16 * 8]);
238  DECLARE_ALIGNED(16, tran_low_t, b[16 * 16]);
239  tran_low_t b_ref[16 * 16];
240  for (int i = 0; i < 16 * 16 * 8; ++i) {
241    a[i] = rnd_.Rand9Signed();
242  }
243
244  for (int i = 8; i < 64; i += 8) {
245    memset(b, 0, sizeof(b));
246    memset(b_ref, 0, sizeof(b_ref));
247
248    reference_hadamard16x16(a, i, b_ref);
249    ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
250
251    // The order of the output is not important. Sort before checking.
252    std::sort(b, b + 16 * 16);
253    std::sort(b_ref, b_ref + 16 * 16);
254    EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
255  }
256}
257
258TEST_P(Hadamard16x16Test, DISABLED_Speed) {
259  HadamardSpeedTest16x16(h_func_, 10);
260  HadamardSpeedTest16x16(h_func_, 10000);
261  HadamardSpeedTest16x16(h_func_, 10000000);
262}
263
264INSTANTIATE_TEST_CASE_P(C, Hadamard16x16Test,
265                        ::testing::Values(&vpx_hadamard_16x16_c));
266
267#if HAVE_SSE2
268INSTANTIATE_TEST_CASE_P(SSE2, Hadamard16x16Test,
269                        ::testing::Values(&vpx_hadamard_16x16_sse2));
270#endif  // HAVE_SSE2
271
272#if HAVE_AVX2
273INSTANTIATE_TEST_CASE_P(AVX2, Hadamard16x16Test,
274                        ::testing::Values(&vpx_hadamard_16x16_avx2));
275#endif  // HAVE_AVX2
276
277#if HAVE_VSX
278INSTANTIATE_TEST_CASE_P(VSX, Hadamard16x16Test,
279                        ::testing::Values(&vpx_hadamard_16x16_vsx));
280#endif  // HAVE_VSX
281
282#if HAVE_NEON
283INSTANTIATE_TEST_CASE_P(NEON, Hadamard16x16Test,
284                        ::testing::Values(&vpx_hadamard_16x16_neon));
285#endif  // HAVE_NEON
286
287#if !CONFIG_VP9_HIGHBITDEPTH
288#if HAVE_MSA
289INSTANTIATE_TEST_CASE_P(MSA, Hadamard16x16Test,
290                        ::testing::Values(&vpx_hadamard_16x16_msa));
291#endif  // HAVE_MSA
292#endif  // !CONFIG_VP9_HIGHBITDEPTH
293}  // namespace
294