1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/base_switches.h"
6#include "base/bind.h"
7#include "base/command_line.h"
8#include "base/memory/scoped_vector.h"
9#include "base/synchronization/condition_variable.h"
10#include "base/synchronization/lock.h"
11#include "base/synchronization/waitable_event.h"
12#include "base/threading/thread.h"
13#include "base/time/time.h"
14#include "build/build_config.h"
15#include "testing/gtest/include/gtest/gtest.h"
16#include "testing/perf/perf_test.h"
17
18#if defined(OS_POSIX)
19#include <pthread.h>
20#endif
21
22namespace base {
23
24namespace {
25
26const int kNumRuns = 100000;
27
28// Base class for a threading perf-test. This sets up some threads for the
29// test and measures the clock-time in addition to time spent on each thread.
30class ThreadPerfTest : public testing::Test {
31 public:
32  ThreadPerfTest()
33      : done_(false, false) {
34    // Disable the task profiler as it adds significant cost!
35    CommandLine::Init(0, NULL);
36    CommandLine::ForCurrentProcess()->AppendSwitchASCII(
37        switches::kProfilerTiming,
38        switches::kProfilerTimingDisabledValue);
39  }
40
41  // To be implemented by each test. Subclass must uses threads_ such that
42  // their cpu-time can be measured. Test must return from PingPong() _and_
43  // call FinishMeasurement from any thread to complete the test.
44  virtual void Init() {}
45  virtual void PingPong(int hops) = 0;
46  virtual void Reset() {}
47
48  void TimeOnThread(base::TimeTicks* ticks, base::WaitableEvent* done) {
49    *ticks = base::TimeTicks::ThreadNow();
50    done->Signal();
51  }
52
53  base::TimeTicks ThreadNow(base::Thread* thread) {
54    base::WaitableEvent done(false, false);
55    base::TimeTicks ticks;
56    thread->message_loop_proxy()->PostTask(
57        FROM_HERE,
58        base::Bind(&ThreadPerfTest::TimeOnThread,
59                   base::Unretained(this),
60                   &ticks,
61                   &done));
62    done.Wait();
63    return ticks;
64  }
65
66  void RunPingPongTest(const std::string& name, unsigned num_threads) {
67    // Create threads and collect starting cpu-time for each thread.
68    std::vector<base::TimeTicks> thread_starts;
69    while (threads_.size() < num_threads) {
70      threads_.push_back(new base::Thread("PingPonger"));
71      threads_.back()->Start();
72      if (base::TimeTicks::IsThreadNowSupported())
73        thread_starts.push_back(ThreadNow(threads_.back()));
74    }
75
76    Init();
77
78    base::TimeTicks start = base::TimeTicks::HighResNow();
79    PingPong(kNumRuns);
80    done_.Wait();
81    base::TimeTicks end = base::TimeTicks::HighResNow();
82
83    // Gather the cpu-time spent on each thread. This does one extra tasks,
84    // but that should be in the noise given enough runs.
85    base::TimeDelta thread_time;
86    while (threads_.size()) {
87      if (base::TimeTicks::IsThreadNowSupported()) {
88        thread_time += ThreadNow(threads_.back()) - thread_starts.back();
89        thread_starts.pop_back();
90      }
91      threads_.pop_back();
92    }
93
94    Reset();
95
96    double num_runs = static_cast<double>(kNumRuns);
97    double us_per_task_clock = (end - start).InMicroseconds() / num_runs;
98    double us_per_task_cpu = thread_time.InMicroseconds() / num_runs;
99
100    // Clock time per task.
101    perf_test::PrintResult(
102        "task", "", name + "_time ", us_per_task_clock, "us/hop", true);
103
104    // Total utilization across threads if available (likely higher).
105    if (base::TimeTicks::IsThreadNowSupported()) {
106      perf_test::PrintResult(
107          "task", "", name + "_cpu ", us_per_task_cpu, "us/hop", true);
108    }
109  }
110
111 protected:
112  void FinishMeasurement() { done_.Signal(); }
113  ScopedVector<base::Thread> threads_;
114
115 private:
116  base::WaitableEvent done_;
117};
118
119// Class to test task performance by posting empty tasks back and forth.
120class TaskPerfTest : public ThreadPerfTest {
121  base::Thread* NextThread(int count) {
122    return threads_[count % threads_.size()];
123  }
124
125  virtual void PingPong(int hops) OVERRIDE {
126    if (!hops) {
127      FinishMeasurement();
128      return;
129    }
130    NextThread(hops)->message_loop_proxy()->PostTask(
131        FROM_HERE,
132        base::Bind(
133            &ThreadPerfTest::PingPong, base::Unretained(this), hops - 1));
134  }
135};
136
137// This tries to test the 'best-case' as well as the 'worst-case' task posting
138// performance. The best-case keeps one thread alive such that it never yeilds,
139// while the worse-case forces a context switch for every task. Four threads are
140// used to ensure the threads do yeild (with just two it might be possible for
141// both threads to stay awake if they can signal each other fast enough).
142TEST_F(TaskPerfTest, TaskPingPong) {
143  RunPingPongTest("1_Task_Threads", 1);
144  RunPingPongTest("4_Task_Threads", 4);
145}
146
147
148// Same as above, but add observers to test their perf impact.
149class MessageLoopObserver : public base::MessageLoop::TaskObserver {
150 public:
151  virtual void WillProcessTask(const base::PendingTask& pending_task) OVERRIDE {
152  }
153  virtual void DidProcessTask(const base::PendingTask& pending_task) OVERRIDE {
154  }
155};
156MessageLoopObserver message_loop_observer;
157
158class TaskObserverPerfTest : public TaskPerfTest {
159 public:
160  virtual void Init() OVERRIDE {
161    TaskPerfTest::Init();
162    for (size_t i = 0; i < threads_.size(); i++) {
163      threads_[i]->message_loop()->AddTaskObserver(&message_loop_observer);
164    }
165  }
166};
167
168TEST_F(TaskObserverPerfTest, TaskPingPong) {
169  RunPingPongTest("1_Task_Threads_With_Observer", 1);
170  RunPingPongTest("4_Task_Threads_With_Observer", 4);
171}
172
173// Class to test our WaitableEvent performance by signaling back and fort.
174// WaitableEvent is templated so we can also compare with other versions.
175template <typename WaitableEventType>
176class EventPerfTest : public ThreadPerfTest {
177 public:
178  virtual void Init() OVERRIDE {
179    for (size_t i = 0; i < threads_.size(); i++)
180      events_.push_back(new WaitableEventType(false, false));
181  }
182
183  virtual void Reset() OVERRIDE { events_.clear(); }
184
185  void WaitAndSignalOnThread(size_t event) {
186    size_t next_event = (event + 1) % events_.size();
187    int my_hops = 0;
188    do {
189      events_[event]->Wait();
190      my_hops = --remaining_hops_;  // We own 'hops' between Wait and Signal.
191      events_[next_event]->Signal();
192    } while (my_hops > 0);
193    // Once we are done, all threads will signal as hops passes zero.
194    // We only signal completion once, on the thread that reaches zero.
195    if (!my_hops)
196      FinishMeasurement();
197  }
198
199  virtual void PingPong(int hops) OVERRIDE {
200    remaining_hops_ = hops;
201    for (size_t i = 0; i < threads_.size(); i++) {
202      threads_[i]->message_loop_proxy()->PostTask(
203          FROM_HERE,
204          base::Bind(&EventPerfTest::WaitAndSignalOnThread,
205                     base::Unretained(this),
206                     i));
207    }
208
209    // Kick off the Signal ping-ponging.
210    events_.front()->Signal();
211  }
212
213  int remaining_hops_;
214  ScopedVector<WaitableEventType> events_;
215};
216
217// Similar to the task posting test, this just tests similar functionality
218// using WaitableEvents. We only test four threads (worst-case), but we
219// might want to craft a way to test the best-case (where the thread doesn't
220// end up blocking because the event is already signalled).
221typedef EventPerfTest<base::WaitableEvent> WaitableEventPerfTest;
222TEST_F(WaitableEventPerfTest, EventPingPong) {
223  RunPingPongTest("4_WaitableEvent_Threads", 4);
224}
225
226// Build a minimal event using ConditionVariable.
227class ConditionVariableEvent {
228 public:
229  ConditionVariableEvent(bool manual_reset, bool initially_signaled)
230      : cond_(&lock_), signaled_(false) {
231    DCHECK(!manual_reset);
232    DCHECK(!initially_signaled);
233  }
234
235  void Signal() {
236    {
237      base::AutoLock scoped_lock(lock_);
238      signaled_ = true;
239    }
240    cond_.Signal();
241  }
242
243  void Wait() {
244    base::AutoLock scoped_lock(lock_);
245    while (!signaled_)
246      cond_.Wait();
247    signaled_ = false;
248  }
249
250 private:
251  base::Lock lock_;
252  base::ConditionVariable cond_;
253  bool signaled_;
254};
255
256// This is meant to test the absolute minimal context switching time
257// using our own base synchronization code.
258typedef EventPerfTest<ConditionVariableEvent> ConditionVariablePerfTest;
259TEST_F(ConditionVariablePerfTest, EventPingPong) {
260  RunPingPongTest("4_ConditionVariable_Threads", 4);
261}
262
263#if defined(OS_POSIX)
264
265// Absolutely 100% minimal posix waitable event. If there is a better/faster
266// way to force a context switch, we should use that instead.
267class PthreadEvent {
268 public:
269  PthreadEvent(bool manual_reset, bool initially_signaled) {
270    DCHECK(!manual_reset);
271    DCHECK(!initially_signaled);
272    pthread_mutex_init(&mutex_, 0);
273    pthread_cond_init(&cond_, 0);
274    signaled_ = false;
275  }
276
277  ~PthreadEvent() {
278    pthread_cond_destroy(&cond_);
279    pthread_mutex_destroy(&mutex_);
280  }
281
282  void Signal() {
283    pthread_mutex_lock(&mutex_);
284    signaled_ = true;
285    pthread_mutex_unlock(&mutex_);
286    pthread_cond_signal(&cond_);
287  }
288
289  void Wait() {
290    pthread_mutex_lock(&mutex_);
291    while (!signaled_)
292      pthread_cond_wait(&cond_, &mutex_);
293    signaled_ = false;
294    pthread_mutex_unlock(&mutex_);
295  }
296
297 private:
298  bool signaled_;
299  pthread_mutex_t mutex_;
300  pthread_cond_t cond_;
301};
302
303// This is meant to test the absolute minimal context switching time.
304// If there is any faster way to do this we should substitute it in.
305typedef EventPerfTest<PthreadEvent> PthreadEventPerfTest;
306TEST_F(PthreadEventPerfTest, EventPingPong) {
307  RunPingPongTest("4_PthreadCondVar_Threads", 4);
308}
309
310#endif
311
312}  // namespace
313
314}  // namespace base
315