1/* 2 * Copyright (c) 2016 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include <algorithm> 12 13#include "third_party/googletest/src/include/gtest/gtest.h" 14 15#include "./vpx_dsp_rtcd.h" 16#include "vpx_ports/vpx_timer.h" 17 18#include "test/acm_random.h" 19#include "test/register_state_check.h" 20 21namespace { 22 23using ::libvpx_test::ACMRandom; 24 25typedef void (*HadamardFunc)(const int16_t *a, ptrdiff_t a_stride, 26 tran_low_t *b); 27 28void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) { 29 int16_t b[8]; 30 for (int i = 0; i < 8; i += 2) { 31 b[i + 0] = a[i * a_stride] + a[(i + 1) * a_stride]; 32 b[i + 1] = a[i * a_stride] - a[(i + 1) * a_stride]; 33 } 34 int16_t c[8]; 35 for (int i = 0; i < 8; i += 4) { 36 c[i + 0] = b[i + 0] + b[i + 2]; 37 c[i + 1] = b[i + 1] + b[i + 3]; 38 c[i + 2] = b[i + 0] - b[i + 2]; 39 c[i + 3] = b[i + 1] - b[i + 3]; 40 } 41 out[0] = c[0] + c[4]; 42 out[7] = c[1] + c[5]; 43 out[3] = c[2] + c[6]; 44 out[4] = c[3] + c[7]; 45 out[2] = c[0] - c[4]; 46 out[6] = c[1] - c[5]; 47 out[1] = c[2] - c[6]; 48 out[5] = c[3] - c[7]; 49} 50 51void reference_hadamard8x8(const int16_t *a, int a_stride, tran_low_t *b) { 52 int16_t buf[64]; 53 int16_t buf2[64]; 54 for (int i = 0; i < 8; ++i) hadamard_loop(a + i, a_stride, buf + i * 8); 55 for (int i = 0; i < 8; ++i) hadamard_loop(buf + i, 8, buf2 + i * 8); 56 57 for (int i = 0; i < 64; ++i) b[i] = (tran_low_t)buf2[i]; 58} 59 60void reference_hadamard16x16(const int16_t *a, int a_stride, tran_low_t *b) { 61 /* The source is a 16x16 block. The destination is rearranged to 8x32. 62 * Input is 9 bit. */ 63 reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0); 64 reference_hadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64); 65 reference_hadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128); 66 reference_hadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192); 67 68 /* Overlay the 8x8 blocks and combine. */ 69 for (int i = 0; i < 64; ++i) { 70 /* 8x8 steps the range up to 15 bits. */ 71 const tran_low_t a0 = b[0]; 72 const tran_low_t a1 = b[64]; 73 const tran_low_t a2 = b[128]; 74 const tran_low_t a3 = b[192]; 75 76 /* Prevent the result from escaping int16_t. */ 77 const tran_low_t b0 = (a0 + a1) >> 1; 78 const tran_low_t b1 = (a0 - a1) >> 1; 79 const tran_low_t b2 = (a2 + a3) >> 1; 80 const tran_low_t b3 = (a2 - a3) >> 1; 81 82 /* Store a 16 bit value. */ 83 b[0] = b0 + b2; 84 b[64] = b1 + b3; 85 b[128] = b0 - b2; 86 b[192] = b1 - b3; 87 88 ++b; 89 } 90} 91 92class HadamardTestBase : public ::testing::TestWithParam<HadamardFunc> { 93 public: 94 virtual void SetUp() { 95 h_func_ = GetParam(); 96 rnd_.Reset(ACMRandom::DeterministicSeed()); 97 } 98 99 protected: 100 HadamardFunc h_func_; 101 ACMRandom rnd_; 102}; 103 104void HadamardSpeedTest(const char *name, HadamardFunc const func, 105 const int16_t *input, int stride, tran_low_t *output, 106 int times) { 107 int i; 108 vpx_usec_timer timer; 109 110 vpx_usec_timer_start(&timer); 111 for (i = 0; i < times; ++i) { 112 func(input, stride, output); 113 } 114 vpx_usec_timer_mark(&timer); 115 116 const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer)); 117 printf("%s[%12d runs]: %d us\n", name, times, elapsed_time); 118} 119 120class Hadamard8x8Test : public HadamardTestBase {}; 121 122void HadamardSpeedTest8x8(HadamardFunc const func, int times) { 123 DECLARE_ALIGNED(16, int16_t, input[64]); 124 DECLARE_ALIGNED(16, tran_low_t, output[64]); 125 memset(input, 1, sizeof(input)); 126 HadamardSpeedTest("Hadamard8x8", func, input, 8, output, times); 127} 128 129TEST_P(Hadamard8x8Test, CompareReferenceRandom) { 130 DECLARE_ALIGNED(16, int16_t, a[64]); 131 DECLARE_ALIGNED(16, tran_low_t, b[64]); 132 tran_low_t b_ref[64]; 133 for (int i = 0; i < 64; ++i) { 134 a[i] = rnd_.Rand9Signed(); 135 } 136 memset(b, 0, sizeof(b)); 137 memset(b_ref, 0, sizeof(b_ref)); 138 139 reference_hadamard8x8(a, 8, b_ref); 140 ASM_REGISTER_STATE_CHECK(h_func_(a, 8, b)); 141 142 // The order of the output is not important. Sort before checking. 143 std::sort(b, b + 64); 144 std::sort(b_ref, b_ref + 64); 145 EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); 146} 147 148TEST_P(Hadamard8x8Test, VaryStride) { 149 DECLARE_ALIGNED(16, int16_t, a[64 * 8]); 150 DECLARE_ALIGNED(16, tran_low_t, b[64]); 151 tran_low_t b_ref[64]; 152 for (int i = 0; i < 64 * 8; ++i) { 153 a[i] = rnd_.Rand9Signed(); 154 } 155 156 for (int i = 8; i < 64; i += 8) { 157 memset(b, 0, sizeof(b)); 158 memset(b_ref, 0, sizeof(b_ref)); 159 160 reference_hadamard8x8(a, i, b_ref); 161 ASM_REGISTER_STATE_CHECK(h_func_(a, i, b)); 162 163 // The order of the output is not important. Sort before checking. 164 std::sort(b, b + 64); 165 std::sort(b_ref, b_ref + 64); 166 EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); 167 } 168} 169 170TEST_P(Hadamard8x8Test, DISABLED_Speed) { 171 HadamardSpeedTest8x8(h_func_, 10); 172 HadamardSpeedTest8x8(h_func_, 10000); 173 HadamardSpeedTest8x8(h_func_, 10000000); 174} 175 176INSTANTIATE_TEST_CASE_P(C, Hadamard8x8Test, 177 ::testing::Values(&vpx_hadamard_8x8_c)); 178 179#if HAVE_SSE2 180INSTANTIATE_TEST_CASE_P(SSE2, Hadamard8x8Test, 181 ::testing::Values(&vpx_hadamard_8x8_sse2)); 182#endif // HAVE_SSE2 183 184#if HAVE_SSSE3 && ARCH_X86_64 185INSTANTIATE_TEST_CASE_P(SSSE3, Hadamard8x8Test, 186 ::testing::Values(&vpx_hadamard_8x8_ssse3)); 187#endif // HAVE_SSSE3 && ARCH_X86_64 188 189#if HAVE_NEON 190INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test, 191 ::testing::Values(&vpx_hadamard_8x8_neon)); 192#endif // HAVE_NEON 193 194// TODO(jingning): Remove highbitdepth flag when the SIMD functions are 195// in place and turn on the unit test. 196#if !CONFIG_VP9_HIGHBITDEPTH 197#if HAVE_MSA 198INSTANTIATE_TEST_CASE_P(MSA, Hadamard8x8Test, 199 ::testing::Values(&vpx_hadamard_8x8_msa)); 200#endif // HAVE_MSA 201#endif // !CONFIG_VP9_HIGHBITDEPTH 202 203#if HAVE_VSX 204INSTANTIATE_TEST_CASE_P(VSX, Hadamard8x8Test, 205 ::testing::Values(&vpx_hadamard_8x8_vsx)); 206#endif // HAVE_VSX 207 208class Hadamard16x16Test : public HadamardTestBase {}; 209 210void HadamardSpeedTest16x16(HadamardFunc const func, int times) { 211 DECLARE_ALIGNED(16, int16_t, input[256]); 212 DECLARE_ALIGNED(16, tran_low_t, output[256]); 213 memset(input, 1, sizeof(input)); 214 HadamardSpeedTest("Hadamard16x16", func, input, 16, output, times); 215} 216 217TEST_P(Hadamard16x16Test, CompareReferenceRandom) { 218 DECLARE_ALIGNED(16, int16_t, a[16 * 16]); 219 DECLARE_ALIGNED(16, tran_low_t, b[16 * 16]); 220 tran_low_t b_ref[16 * 16]; 221 for (int i = 0; i < 16 * 16; ++i) { 222 a[i] = rnd_.Rand9Signed(); 223 } 224 memset(b, 0, sizeof(b)); 225 memset(b_ref, 0, sizeof(b_ref)); 226 227 reference_hadamard16x16(a, 16, b_ref); 228 ASM_REGISTER_STATE_CHECK(h_func_(a, 16, b)); 229 230 // The order of the output is not important. Sort before checking. 231 std::sort(b, b + 16 * 16); 232 std::sort(b_ref, b_ref + 16 * 16); 233 EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); 234} 235 236TEST_P(Hadamard16x16Test, VaryStride) { 237 DECLARE_ALIGNED(16, int16_t, a[16 * 16 * 8]); 238 DECLARE_ALIGNED(16, tran_low_t, b[16 * 16]); 239 tran_low_t b_ref[16 * 16]; 240 for (int i = 0; i < 16 * 16 * 8; ++i) { 241 a[i] = rnd_.Rand9Signed(); 242 } 243 244 for (int i = 8; i < 64; i += 8) { 245 memset(b, 0, sizeof(b)); 246 memset(b_ref, 0, sizeof(b_ref)); 247 248 reference_hadamard16x16(a, i, b_ref); 249 ASM_REGISTER_STATE_CHECK(h_func_(a, i, b)); 250 251 // The order of the output is not important. Sort before checking. 252 std::sort(b, b + 16 * 16); 253 std::sort(b_ref, b_ref + 16 * 16); 254 EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); 255 } 256} 257 258TEST_P(Hadamard16x16Test, DISABLED_Speed) { 259 HadamardSpeedTest16x16(h_func_, 10); 260 HadamardSpeedTest16x16(h_func_, 10000); 261 HadamardSpeedTest16x16(h_func_, 10000000); 262} 263 264INSTANTIATE_TEST_CASE_P(C, Hadamard16x16Test, 265 ::testing::Values(&vpx_hadamard_16x16_c)); 266 267#if HAVE_SSE2 268INSTANTIATE_TEST_CASE_P(SSE2, Hadamard16x16Test, 269 ::testing::Values(&vpx_hadamard_16x16_sse2)); 270#endif // HAVE_SSE2 271 272#if HAVE_AVX2 273INSTANTIATE_TEST_CASE_P(AVX2, Hadamard16x16Test, 274 ::testing::Values(&vpx_hadamard_16x16_avx2)); 275#endif // HAVE_AVX2 276 277#if HAVE_VSX 278INSTANTIATE_TEST_CASE_P(VSX, Hadamard16x16Test, 279 ::testing::Values(&vpx_hadamard_16x16_vsx)); 280#endif // HAVE_VSX 281 282#if HAVE_NEON 283INSTANTIATE_TEST_CASE_P(NEON, Hadamard16x16Test, 284 ::testing::Values(&vpx_hadamard_16x16_neon)); 285#endif // HAVE_NEON 286 287#if !CONFIG_VP9_HIGHBITDEPTH 288#if HAVE_MSA 289INSTANTIATE_TEST_CASE_P(MSA, Hadamard16x16Test, 290 ::testing::Values(&vpx_hadamard_16x16_msa)); 291#endif // HAVE_MSA 292#endif // !CONFIG_VP9_HIGHBITDEPTH 293} // namespace 294