11e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)// Copyright 2013 The Chromium Authors. All rights reserved.
21e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
31e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)// found in the LICENSE file.
41e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)
51e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "base/memory/aligned_memory.h"
61e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "base/memory/scoped_ptr.h"
71e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "base/time/time.h"
81e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "media/base/vector_math.h"
91e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "media/base/vector_math_testing.h"
101e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "testing/gtest/include/gtest/gtest.h"
111e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "testing/perf/perf_test.h"
121e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)
131e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)using base::TimeTicks;
141e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)using std::fill;
151e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)
161e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)namespace media {
171e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)
181e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)static const int kBenchmarkIterations = 200000;
19f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)static const int kEWMABenchmarkIterations = 50000;
201e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)static const float kScale = 0.5;
211e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)static const int kVectorSize = 8192;
221e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)
231e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)class VectorMathPerfTest : public testing::Test {
241e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) public:
251e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  VectorMathPerfTest() {
261e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)    // Initialize input and output vectors.
271e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)    input_vector_.reset(static_cast<float*>(base::AlignedAlloc(
281e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)        sizeof(float) * kVectorSize, vector_math::kRequiredAlignment)));
291e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)    output_vector_.reset(static_cast<float*>(base::AlignedAlloc(
301e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)        sizeof(float) * kVectorSize, vector_math::kRequiredAlignment)));
311e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)    fill(input_vector_.get(), input_vector_.get() + kVectorSize, 1.0f);
321e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)    fill(output_vector_.get(), output_vector_.get() + kVectorSize, 0.0f);
331e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  }
341e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)
351e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  void RunBenchmark(void (*fn)(const float[], float, int, float[]),
361e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)                    bool aligned,
371e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)                    const std::string& test_name,
381e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)                    const std::string& trace_name) {
391e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)    TimeTicks start = TimeTicks::HighResNow();
401e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)    for (int i = 0; i < kBenchmarkIterations; ++i) {
411e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)      fn(input_vector_.get(),
421e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)         kScale,
431e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)         kVectorSize - (aligned ? 0 : 1),
441e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)         output_vector_.get());
451e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)    }
46f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    double total_time_milliseconds =
47f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        (TimeTicks::HighResNow() - start).InMillisecondsF();
481e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)    perf_test::PrintResult(test_name,
491e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)                           "",
501e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)                           trace_name,
51f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                           kBenchmarkIterations / total_time_milliseconds,
52f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                           "runs/ms",
53f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                           true);
54f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
55f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
56f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  void RunBenchmark(
57f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      std::pair<float, float> (*fn)(float, const float[], int, float),
58f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      int len,
59f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      const std::string& test_name,
60f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      const std::string& trace_name) {
61f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    TimeTicks start = TimeTicks::HighResNow();
62f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    for (int i = 0; i < kEWMABenchmarkIterations; ++i) {
63f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      fn(0.5f, input_vector_.get(), len, 0.1f);
64f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    }
65f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    double total_time_milliseconds =
66f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        (TimeTicks::HighResNow() - start).InMillisecondsF();
67f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    perf_test::PrintResult(test_name,
68f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                           "",
69f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                           trace_name,
70f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                           kEWMABenchmarkIterations / total_time_milliseconds,
71f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                           "runs/ms",
721e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)                           true);
731e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  }
741e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)
751e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) protected:
765d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  scoped_ptr<float, base::AlignedFreeDeleter> input_vector_;
775d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  scoped_ptr<float, base::AlignedFreeDeleter> output_vector_;
781e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)
791e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(VectorMathPerfTest);
801e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)};
811e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)
8246d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)// Define platform dependent function names for SIMD optimized methods.
831e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#if defined(ARCH_CPU_X86_FAMILY)
841e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#define FMAC_FUNC FMAC_SSE
8546d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#define FMUL_FUNC FMUL_SSE
8646d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_SSE
871e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
881e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#define FMAC_FUNC FMAC_NEON
8946d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#define FMUL_FUNC FMUL_NEON
9046d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_NEON
911e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#endif
921e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)
931e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)// Benchmark for each optimized vector_math::FMAC() method.
941e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)TEST_F(VectorMathPerfTest, FMAC) {
951e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  // Benchmark FMAC_C().
961e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  RunBenchmark(
971e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)      vector_math::FMAC_C, true, "vector_math_fmac", "unoptimized");
981e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#if defined(FMAC_FUNC)
991e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  // Benchmark FMAC_FUNC() with unaligned size.
1001e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment /
1011e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)                                 sizeof(float)), 0U);
1021e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  RunBenchmark(
1031e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)      vector_math::FMAC_FUNC, false, "vector_math_fmac", "optimized_unaligned");
1041e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  // Benchmark FMAC_FUNC() with aligned size.
1051e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),
1061e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)            0U);
1071e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  RunBenchmark(
1081e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)      vector_math::FMAC_FUNC, true, "vector_math_fmac", "optimized_aligned");
1091e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#endif
1101e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)}
1111e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)
1121e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)// Benchmark for each optimized vector_math::FMUL() method.
1131e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)TEST_F(VectorMathPerfTest, FMUL) {
1141e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  // Benchmark FMUL_C().
1151e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  RunBenchmark(
1161e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)      vector_math::FMUL_C, true, "vector_math_fmul", "unoptimized");
1171e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#if defined(FMUL_FUNC)
1181e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  // Benchmark FMUL_FUNC() with unaligned size.
1191e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment /
1201e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)                                 sizeof(float)), 0U);
1211e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  RunBenchmark(
122f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      vector_math::FMUL_FUNC, false, "vector_math_fmul", "optimized_unaligned");
1231e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  // Benchmark FMUL_FUNC() with aligned size.
1241e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),
1251e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)            0U);
1261e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  RunBenchmark(
127f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      vector_math::FMUL_FUNC, true, "vector_math_fmul", "optimized_aligned");
1281e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#endif
1291e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)}
1301e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)
131f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// Benchmark for each optimized vector_math::EWMAAndMaxPower() method.
132f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)TEST_F(VectorMathPerfTest, EWMAAndMaxPower) {
133f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // Benchmark EWMAAndMaxPower_C().
134f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  RunBenchmark(vector_math::EWMAAndMaxPower_C,
135f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)               kVectorSize,
136f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)               "vector_math_ewma_and_max_power",
137f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)               "unoptimized");
138f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#if defined(EWMAAndMaxPower_FUNC)
139f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // Benchmark EWMAAndMaxPower_FUNC() with unaligned size.
140f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment /
141f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                 sizeof(float)), 0U);
142f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  RunBenchmark(vector_math::EWMAAndMaxPower_FUNC,
143f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)               kVectorSize - 1,
144f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)               "vector_math_ewma_and_max_power",
145f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)               "optimized_unaligned");
146f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // Benchmark EWMAAndMaxPower_FUNC() with aligned size.
147f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),
148f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            0U);
149f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  RunBenchmark(vector_math::EWMAAndMaxPower_FUNC,
150f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)               kVectorSize,
151f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)               "vector_math_ewma_and_max_power",
152f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)               "optimized_aligned");
153f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#endif
154f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)}
155f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
1561e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)} // namespace media
157