benchmark_api.h revision 75712367c115500bf24b13396c36926df1ed1556
1// Support for registering benchmarks for functions.
2
3/* Example usage:
4// Define a function that executes the code to be measured a
5// specified number of times:
6static void BM_StringCreation(benchmark::State& state) {
7  while (state.KeepRunning())
8    std::string empty_string;
9}
10
11// Register the function as a benchmark
12BENCHMARK(BM_StringCreation);
13
14// Define another benchmark
15static void BM_StringCopy(benchmark::State& state) {
16  std::string x = "hello";
17  while (state.KeepRunning())
18    std::string copy(x);
19}
20BENCHMARK(BM_StringCopy);
21
22// Augment the main() program to invoke benchmarks if specified
23// via the --benchmarks command line flag.  E.g.,
24//       my_unittest --benchmark_filter=all
25//       my_unittest --benchmark_filter=BM_StringCreation
26//       my_unittest --benchmark_filter=String
27//       my_unittest --benchmark_filter='Copy|Creation'
28int main(int argc, char** argv) {
29  benchmark::Initialize(&argc, argv);
30  benchmark::RunSpecifiedBenchmarks();
31  return 0;
32}
33
34// Sometimes a family of microbenchmarks can be implemented with
35// just one routine that takes an extra argument to specify which
36// one of the family of benchmarks to run.  For example, the following
37// code defines a family of microbenchmarks for measuring the speed
38// of memcpy() calls of different lengths:
39
40static void BM_memcpy(benchmark::State& state) {
41  char* src = new char[state.range_x()]; char* dst = new char[state.range_x()];
42  memset(src, 'x', state.range_x());
43  while (state.KeepRunning())
44    memcpy(dst, src, state.range_x());
45  state.SetBytesProcessed(int64_t_t(state.iterations) * int64(state.range_x()));
46  delete[] src; delete[] dst;
47}
48BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
49
50// The preceding code is quite repetitive, and can be replaced with the
51// following short-hand.  The following invocation will pick a few
52// appropriate arguments in the specified range and will generate a
53// microbenchmark for each such argument.
54BENCHMARK(BM_memcpy)->Range(8, 8<<10);
55
56// You might have a microbenchmark that depends on two inputs.  For
57// example, the following code defines a family of microbenchmarks for
58// measuring the speed of set insertion.
59static void BM_SetInsert(benchmark::State& state) {
60  while (state.KeepRunning()) {
61    state.PauseTiming();
62    set<int> data = ConstructRandomSet(state.range_x());
63    state.ResumeTiming();
64    for (int j = 0; j < state.rangeY; ++j)
65      data.insert(RandomNumber());
66  }
67}
68BENCHMARK(BM_SetInsert)
69   ->ArgPair(1<<10, 1)
70   ->ArgPair(1<<10, 8)
71   ->ArgPair(1<<10, 64)
72   ->ArgPair(1<<10, 512)
73   ->ArgPair(8<<10, 1)
74   ->ArgPair(8<<10, 8)
75   ->ArgPair(8<<10, 64)
76   ->ArgPair(8<<10, 512);
77
78// The preceding code is quite repetitive, and can be replaced with
79// the following short-hand.  The following macro will pick a few
80// appropriate arguments in the product of the two specified ranges
81// and will generate a microbenchmark for each such pair.
82BENCHMARK(BM_SetInsert)->RangePair(1<<10, 8<<10, 1, 512);
83
84// For more complex patterns of inputs, passing a custom function
85// to Apply allows programmatic specification of an
86// arbitrary set of arguments to run the microbenchmark on.
87// The following example enumerates a dense range on
88// one parameter, and a sparse range on the second.
89static benchmark::internal::Benchmark* CustomArguments(
90    benchmark::internal::Benchmark* b) {
91  for (int i = 0; i <= 10; ++i)
92    for (int j = 32; j <= 1024*1024; j *= 8)
93      b = b->ArgPair(i, j);
94  return b;
95}
96BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
97
98// Templated microbenchmarks work the same way:
99// Produce then consume 'size' messages 'iters' times
100// Measures throughput in the absence of multiprogramming.
101template <class Q> int BM_Sequential(benchmark::State& state) {
102  Q q;
103  typename Q::value_type v;
104  while (state.KeepRunning()) {
105    for (int i = state.range_x(); i--; )
106      q.push(v);
107    for (int e = state.range_x(); e--; )
108      q.Wait(&v);
109  }
110  // actually messages, not bytes:
111  state.SetBytesProcessed(
112      static_cast<int64_t>(state.iterations())*state.range_x());
113}
114BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
115
116In a multithreaded test, it is guaranteed that none of the threads will start
117until all have called KeepRunning, and all will have finished before KeepRunning
118returns false. As such, any global setup or teardown you want to do can be
119wrapped in a check against the thread index:
120
121static void BM_MultiThreaded(benchmark::State& state) {
122  if (state.thread_index == 0) {
123    // Setup code here.
124  }
125  while (state.KeepRunning()) {
126    // Run the test as normal.
127  }
128  if (state.thread_index == 0) {
129    // Teardown code here.
130  }
131}
132BENCHMARK(BM_MultiThreaded)->Threads(4);
133*/
134
135#ifndef BENCHMARK_BENCHMARK_API_H_
136#define BENCHMARK_BENCHMARK_API_H_
137
138#include <assert.h>
139#include <stddef.h>
140#include <stdint.h>
141
142#include "macros.h"
143
144namespace benchmark {
145class BenchmarkReporter;
146
147void Initialize(int* argc, const char** argv);
148
149// Otherwise, run all benchmarks specified by the --benchmark_filter flag,
150// and exit after running the benchmarks.
151void RunSpecifiedBenchmarks();
152void RunSpecifiedBenchmarks(BenchmarkReporter* reporter);
153
154// If this routine is called, peak memory allocation past this point in the
155// benchmark is reported at the end of the benchmark report line. (It is
156// computed by running the benchmark once with a single iteration and a memory
157// tracer.)
158// TODO(dominic)
159// void MemoryUsage();
160
161namespace internal {
162class Benchmark;
163class BenchmarkImp;
164
165template <class T> struct Voider {
166    typedef void type;
167};
168
169template <class T, class = void>
170struct EnableIfString {};
171
172template <class T>
173struct EnableIfString<T, typename Voider<typename T::basic_string>::type> {
174    typedef int type;
175};
176
177} // end namespace internal
178
179// State is passed to a running Benchmark and contains state for the
180// benchmark to use.
181class State {
182public:
183  State(size_t max_iters, bool has_x, int x, bool has_y, int y, int thread_i);
184
185  // Returns true iff the benchmark should continue through another iteration.
186  // NOTE: A benchmark may not return from the test until KeepRunning() has
187  // returned false.
188  bool KeepRunning() {
189    if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
190        ResumeTiming();
191        started_ = true;
192    }
193    bool const res = total_iterations_++ < max_iterations;
194    if (BENCHMARK_BUILTIN_EXPECT(!res, false)) {
195        assert(started_);
196        PauseTiming();
197        // Total iterations now is one greater than max iterations. Fix this.
198        total_iterations_ = max_iterations;
199    }
200    return res;
201  }
202
203  // REQUIRES: timer is running
204  // Stop the benchmark timer.  If not called, the timer will be
205  // automatically stopped after KeepRunning() returns false for the first time.
206  //
207  // For threaded benchmarks the PauseTiming() function acts
208  // like a barrier.  I.e., the ith call by a particular thread to this
209  // function will block until all threads have made their ith call.
210  // The timer will stop when the last thread has called this function.
211  //
212  // NOTE: PauseTiming()/ResumeTiming() are relatively
213  // heavyweight, and so their use should generally be avoided
214  // within each benchmark iteration, if possible.
215  void PauseTiming();
216
217  // REQUIRES: timer is not running
218  // Start the benchmark timer.  The timer is NOT running on entrance to the
219  // benchmark function. It begins running after the first call to KeepRunning()
220  //
221  // For threaded benchmarks the ResumeTiming() function acts
222  // like a barrier.  I.e., the ith call by a particular thread to this
223  // function will block until all threads have made their ith call.
224  // The timer will start when the last thread has called this function.
225  //
226  // NOTE: PauseTiming()/ResumeTiming() are relatively
227  // heavyweight, and so their use should generally be avoided
228  // within each benchmark iteration, if possible.
229  void ResumeTiming();
230
231  // If a particular benchmark is I/O bound, or if for some reason CPU
232  // timings are not representative, call this method from within the
233  // benchmark routine.  If called, the elapsed time will be used to
234  // control how many iterations are run, and in the printing of
235  // items/second or MB/seconds values.  If not called, the cpu time
236  // used by the benchmark will be used.
237  void UseRealTime();
238
239  // Set the number of bytes processed by the current benchmark
240  // execution.  This routine is typically called once at the end of a
241  // throughput oriented benchmark.  If this routine is called with a
242  // value > 0, the report is printed in MB/sec instead of nanoseconds
243  // per iteration.
244  //
245  // REQUIRES: a benchmark has exited its KeepRunning loop.
246  BENCHMARK_ALWAYS_INLINE
247  void SetBytesProcessed(size_t bytes) {
248    bytes_processed_ = bytes;
249  }
250
251  BENCHMARK_ALWAYS_INLINE
252  size_t bytes_processed() const {
253    return bytes_processed_;
254  }
255
256  // If this routine is called with items > 0, then an items/s
257  // label is printed on the benchmark report line for the currently
258  // executing benchmark. It is typically called at the end of a processing
259  // benchmark where a processing items/second output is desired.
260  //
261  // REQUIRES: a benchmark has exited its KeepRunning loop.
262  BENCHMARK_ALWAYS_INLINE
263  void SetItemsProcessed(size_t items) {
264    items_processed_ = items;
265  }
266
267  BENCHMARK_ALWAYS_INLINE
268  size_t items_processed() const {
269    return items_processed_;
270  }
271
272  // If this routine is called, the specified label is printed at the
273  // end of the benchmark report line for the currently executing
274  // benchmark.  Example:
275  //  static void BM_Compress(int iters) {
276  //    ...
277  //    double compress = input_size / output_size;
278  //    benchmark::SetLabel(StringPrintf("compress:%.1f%%", 100.0*compression));
279  //  }
280  // Produces output that looks like:
281  //  BM_Compress   50         50   14115038  compress:27.3%
282  //
283  // REQUIRES: a benchmark has exited its KeepRunning loop.
284  void SetLabel(const char* label);
285
286  // Allow the use of std::string without actually including <string>.
287  // This function does not participate in overload resolution unless StringType
288  // has the nested typename `basic_string`. This typename should be provided
289  // as an injected class name in the case of std::string.
290  template <class StringType>
291  void SetLabel(StringType const & str,
292                typename internal::EnableIfString<StringType>::type = 1) {
293    this->SetLabel(str.c_str());
294  }
295
296  // Range arguments for this run. CHECKs if the argument has been set.
297  BENCHMARK_ALWAYS_INLINE
298  int range_x() const {
299    assert(has_range_x_);
300    ((void)has_range_x_); // Prevent unused warning.
301    return range_x_;
302  }
303
304  BENCHMARK_ALWAYS_INLINE
305  int range_y() const {
306    assert(has_range_y_);
307    ((void)has_range_y_); // Prevent unused warning.
308    return range_y_;
309  }
310
311  BENCHMARK_ALWAYS_INLINE
312  size_t iterations() const { return total_iterations_; }
313
314private:
315  bool started_;
316  size_t total_iterations_;
317
318  bool has_range_x_;
319  int range_x_;
320
321  bool has_range_y_;
322  int range_y_;
323
324  size_t bytes_processed_;
325  size_t items_processed_;
326
327public:
328  const int thread_index;
329  const size_t max_iterations;
330
331private:
332  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(State);
333};
334
335namespace internal {
336
337typedef void(Function)(State&);
338
339// ------------------------------------------------------
340// Benchmark registration object.  The BENCHMARK() macro expands
341// into an internal::Benchmark* object.  Various methods can
342// be called on this object to change the properties of the benchmark.
343// Each method returns "this" so that multiple method calls can
344// chained into one expression.
345class Benchmark {
346 public:
347  Benchmark(const char* name, Function* f);
348
349  ~Benchmark();
350
351  // Note: the following methods all return "this" so that multiple
352  // method calls can be chained together in one expression.
353
354  // Run this benchmark once with "x" as the extra argument passed
355  // to the function.
356  // REQUIRES: The function passed to the constructor must accept an arg1.
357  Benchmark* Arg(int x);
358
359  // Run this benchmark once for a number of values picked from the
360  // range [start..limit].  (start and limit are always picked.)
361  // REQUIRES: The function passed to the constructor must accept an arg1.
362  Benchmark* Range(int start, int limit);
363
364  // Run this benchmark once for every value in the range [start..limit]
365  // REQUIRES: The function passed to the constructor must accept an arg1.
366  Benchmark* DenseRange(int start, int limit);
367
368  // Run this benchmark once with "x,y" as the extra arguments passed
369  // to the function.
370  // REQUIRES: The function passed to the constructor must accept arg1,arg2.
371  Benchmark* ArgPair(int x, int y);
372
373  // Pick a set of values A from the range [lo1..hi1] and a set
374  // of values B from the range [lo2..hi2].  Run the benchmark for
375  // every pair of values in the cartesian product of A and B
376  // (i.e., for all combinations of the values in A and B).
377  // REQUIRES: The function passed to the constructor must accept arg1,arg2.
378  Benchmark* RangePair(int lo1, int hi1, int lo2, int hi2);
379
380  // Pass this benchmark object to *func, which can customize
381  // the benchmark by calling various methods like Arg, ArgPair,
382  // Threads, etc.
383  Benchmark* Apply(void (*func)(Benchmark* benchmark));
384
385  // Support for running multiple copies of the same benchmark concurrently
386  // in multiple threads.  This may be useful when measuring the scaling
387  // of some piece of code.
388
389  // Run one instance of this benchmark concurrently in t threads.
390  Benchmark* Threads(int t);
391
392  // Pick a set of values T from [min_threads,max_threads].
393  // min_threads and max_threads are always included in T.  Run this
394  // benchmark once for each value in T.  The benchmark run for a
395  // particular value t consists of t threads running the benchmark
396  // function concurrently.  For example, consider:
397  //    BENCHMARK(Foo)->ThreadRange(1,16);
398  // This will run the following benchmarks:
399  //    Foo in 1 thread
400  //    Foo in 2 threads
401  //    Foo in 4 threads
402  //    Foo in 8 threads
403  //    Foo in 16 threads
404  Benchmark* ThreadRange(int min_threads, int max_threads);
405
406  // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
407  Benchmark* ThreadPerCpu();
408
409  // Used inside the benchmark implementation
410  struct Instance;
411
412 private:
413   BenchmarkImp* imp_;
414   BENCHMARK_DISALLOW_COPY_AND_ASSIGN(Benchmark);
415};
416
417}  // end namespace internal
418}  // end namespace benchmark
419
420
421// ------------------------------------------------------
422// Macro to register benchmarks
423
424// Check that __COUNTER__ is defined and that __COUNTER__ increases by 1
425// every time it is expanded. X + 1 == X + 0 is used in case X is defined to be
426// empty. If X is empty the expression becomes (+1 == +0).
427#if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0)
428#define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__
429#else
430#define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__
431#endif
432
433// Helpers for generating unique variable names
434#define BENCHMARK_PRIVATE_NAME(n) \
435    BENCHMARK_PRIVATE_CONCAT(_benchmark_, BENCHMARK_PRIVATE_UNIQUE_ID, n)
436#define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
437#define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
438
439#define BENCHMARK_PRIVATE_DECLARE(n)       \
440  static ::benchmark::internal::Benchmark* \
441  BENCHMARK_PRIVATE_NAME(n) BENCHMARK_UNUSED
442
443#define BENCHMARK(n) \
444    BENCHMARK_PRIVATE_DECLARE(n) = (new ::benchmark::internal::Benchmark(#n, n))
445
446// Old-style macros
447#define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
448#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->ArgPair((a1), (a2))
449#define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
450#define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
451  BENCHMARK(n)->RangePair((l1), (h1), (l2), (h2))
452
453// This will register a benchmark for a templatized function.  For example:
454//
455// template<int arg>
456// void BM_Foo(int iters);
457//
458// BENCHMARK_TEMPLATE(BM_Foo, 1);
459//
460// will register BM_Foo<1> as a benchmark.
461#define BENCHMARK_TEMPLATE1(n, a) \
462  BENCHMARK_PRIVATE_DECLARE(n) =  \
463      (new ::benchmark::internal::Benchmark(#n "<" #a ">", n<a>))
464
465#define BENCHMARK_TEMPLATE2(n, a, b) \
466  BENCHMARK_PRIVATE_DECLARE(n) =     \
467      (new ::benchmark::internal::Benchmark(#n "<" #a "," #b ">", n<a, b>))
468
469#if __cplusplus >= 201103L
470#define BENCHMARK_TEMPLATE(n, ...)           \
471  BENCHMARK_PRIVATE_DECLARE(n) =             \
472      (new ::benchmark::internal::Benchmark( \
473        #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>))
474#else
475#define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
476#endif
477
478// Helper macro to create a main routine in a test that runs the benchmarks
479#define BENCHMARK_MAIN()                             \
480  int main(int argc, const char** argv) {            \
481    ::benchmark::Initialize(&argc, argv);            \
482    ::benchmark::RunSpecifiedBenchmarks();           \
483  }
484
485#endif  // BENCHMARK_BENCHMARK_API_H_
486