11e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)// Copyright 2013 The Chromium Authors. All rights reserved. 21e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 31e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)// found in the LICENSE file. 41e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 51e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "base/memory/aligned_memory.h" 61e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "base/memory/scoped_ptr.h" 71e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "base/time/time.h" 81e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "media/base/vector_math.h" 91e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "media/base/vector_math_testing.h" 101e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "testing/gtest/include/gtest/gtest.h" 111e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "testing/perf/perf_test.h" 121e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 131e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)using base::TimeTicks; 141e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)using std::fill; 151e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 161e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)namespace media { 171e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 181e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)static const int kBenchmarkIterations = 200000; 19f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)static const int kEWMABenchmarkIterations = 50000; 201e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)static const float kScale = 0.5; 211e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)static const int kVectorSize = 8192; 221e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 231e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)class VectorMathPerfTest : public testing::Test { 241e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) public: 251e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) VectorMathPerfTest() { 261e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) // Initialize input and output vectors. 271e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) input_vector_.reset(static_cast<float*>(base::AlignedAlloc( 281e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); 291e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) output_vector_.reset(static_cast<float*>(base::AlignedAlloc( 301e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); 311e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) fill(input_vector_.get(), input_vector_.get() + kVectorSize, 1.0f); 321e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) fill(output_vector_.get(), output_vector_.get() + kVectorSize, 0.0f); 331e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 341e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 351e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) void RunBenchmark(void (*fn)(const float[], float, int, float[]), 361e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) bool aligned, 371e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) const std::string& test_name, 381e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) const std::string& trace_name) { 391e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) TimeTicks start = TimeTicks::HighResNow(); 401e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) for (int i = 0; i < kBenchmarkIterations; ++i) { 411e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) fn(input_vector_.get(), 421e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) kScale, 431e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) kVectorSize - (aligned ? 0 : 1), 441e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) output_vector_.get()); 451e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 46f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) double total_time_milliseconds = 47f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) (TimeTicks::HighResNow() - start).InMillisecondsF(); 481e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) perf_test::PrintResult(test_name, 491e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) "", 501e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) trace_name, 51f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) kBenchmarkIterations / total_time_milliseconds, 52f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) "runs/ms", 53f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) true); 54f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 55f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 56f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) void RunBenchmark( 57f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) std::pair<float, float> (*fn)(float, const float[], int, float), 58f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) int len, 59f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) const std::string& test_name, 60f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) const std::string& trace_name) { 61f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) TimeTicks start = TimeTicks::HighResNow(); 62f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) for (int i = 0; i < kEWMABenchmarkIterations; ++i) { 63f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) fn(0.5f, input_vector_.get(), len, 0.1f); 64f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 65f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) double total_time_milliseconds = 66f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) (TimeTicks::HighResNow() - start).InMillisecondsF(); 67f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) perf_test::PrintResult(test_name, 68f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) "", 69f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) trace_name, 70f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) kEWMABenchmarkIterations / total_time_milliseconds, 71f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) "runs/ms", 721e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) true); 731e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 741e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 751e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) protected: 765d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) scoped_ptr<float, base::AlignedFreeDeleter> input_vector_; 775d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) scoped_ptr<float, base::AlignedFreeDeleter> output_vector_; 781e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 791e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) DISALLOW_COPY_AND_ASSIGN(VectorMathPerfTest); 801e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)}; 811e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 8246d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)// Define platform dependent function names for SIMD optimized methods. 831e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#if defined(ARCH_CPU_X86_FAMILY) 841e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#define FMAC_FUNC FMAC_SSE 8546d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#define FMUL_FUNC FMUL_SSE 8646d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_SSE 871e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) 881e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#define FMAC_FUNC FMAC_NEON 8946d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#define FMUL_FUNC FMUL_NEON 9046d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_NEON 911e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#endif 921e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 931e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)// Benchmark for each optimized vector_math::FMAC() method. 941e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)TEST_F(VectorMathPerfTest, FMAC) { 951e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) // Benchmark FMAC_C(). 961e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) RunBenchmark( 971e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) vector_math::FMAC_C, true, "vector_math_fmac", "unoptimized"); 981e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#if defined(FMAC_FUNC) 991e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) // Benchmark FMAC_FUNC() with unaligned size. 1001e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / 1011e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) sizeof(float)), 0U); 1021e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) RunBenchmark( 1031e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) vector_math::FMAC_FUNC, false, "vector_math_fmac", "optimized_unaligned"); 1041e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) // Benchmark FMAC_FUNC() with aligned size. 1051e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), 1061e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 0U); 1071e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) RunBenchmark( 1081e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) vector_math::FMAC_FUNC, true, "vector_math_fmac", "optimized_aligned"); 1091e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#endif 1101e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)} 1111e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 1121e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)// Benchmark for each optimized vector_math::FMUL() method. 1131e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)TEST_F(VectorMathPerfTest, FMUL) { 1141e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) // Benchmark FMUL_C(). 1151e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) RunBenchmark( 1161e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) vector_math::FMUL_C, true, "vector_math_fmul", "unoptimized"); 1171e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#if defined(FMUL_FUNC) 1181e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) // Benchmark FMUL_FUNC() with unaligned size. 1191e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / 1201e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) sizeof(float)), 0U); 1211e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) RunBenchmark( 122f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) vector_math::FMUL_FUNC, false, "vector_math_fmul", "optimized_unaligned"); 1231e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) // Benchmark FMUL_FUNC() with aligned size. 1241e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), 1251e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 0U); 1261e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) RunBenchmark( 127f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) vector_math::FMUL_FUNC, true, "vector_math_fmul", "optimized_aligned"); 1281e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#endif 1291e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)} 1301e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 131f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// Benchmark for each optimized vector_math::EWMAAndMaxPower() method. 132f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)TEST_F(VectorMathPerfTest, EWMAAndMaxPower) { 133f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Benchmark EWMAAndMaxPower_C(). 134f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) RunBenchmark(vector_math::EWMAAndMaxPower_C, 135f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) kVectorSize, 136f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) "vector_math_ewma_and_max_power", 137f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) "unoptimized"); 138f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#if defined(EWMAAndMaxPower_FUNC) 139f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Benchmark EWMAAndMaxPower_FUNC() with unaligned size. 140f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / 141f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) sizeof(float)), 0U); 142f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) RunBenchmark(vector_math::EWMAAndMaxPower_FUNC, 143f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) kVectorSize - 1, 144f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) "vector_math_ewma_and_max_power", 145f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) "optimized_unaligned"); 146f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Benchmark EWMAAndMaxPower_FUNC() with aligned size. 147f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), 148f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 0U); 149f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) RunBenchmark(vector_math::EWMAAndMaxPower_FUNC, 150f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) kVectorSize, 151f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) "vector_math_ewma_and_max_power", 152f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) "optimized_aligned"); 153f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#endif 154f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} 155f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 1561e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)} // namespace media 157