benchmark_api.h revision 75712367c115500bf24b13396c36926df1ed1556
1// Support for registering benchmarks for functions. 2 3/* Example usage: 4// Define a function that executes the code to be measured a 5// specified number of times: 6static void BM_StringCreation(benchmark::State& state) { 7 while (state.KeepRunning()) 8 std::string empty_string; 9} 10 11// Register the function as a benchmark 12BENCHMARK(BM_StringCreation); 13 14// Define another benchmark 15static void BM_StringCopy(benchmark::State& state) { 16 std::string x = "hello"; 17 while (state.KeepRunning()) 18 std::string copy(x); 19} 20BENCHMARK(BM_StringCopy); 21 22// Augment the main() program to invoke benchmarks if specified 23// via the --benchmarks command line flag. E.g., 24// my_unittest --benchmark_filter=all 25// my_unittest --benchmark_filter=BM_StringCreation 26// my_unittest --benchmark_filter=String 27// my_unittest --benchmark_filter='Copy|Creation' 28int main(int argc, char** argv) { 29 benchmark::Initialize(&argc, argv); 30 benchmark::RunSpecifiedBenchmarks(); 31 return 0; 32} 33 34// Sometimes a family of microbenchmarks can be implemented with 35// just one routine that takes an extra argument to specify which 36// one of the family of benchmarks to run. For example, the following 37// code defines a family of microbenchmarks for measuring the speed 38// of memcpy() calls of different lengths: 39 40static void BM_memcpy(benchmark::State& state) { 41 char* src = new char[state.range_x()]; char* dst = new char[state.range_x()]; 42 memset(src, 'x', state.range_x()); 43 while (state.KeepRunning()) 44 memcpy(dst, src, state.range_x()); 45 state.SetBytesProcessed(int64_t_t(state.iterations) * int64(state.range_x())); 46 delete[] src; delete[] dst; 47} 48BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10); 49 50// The preceding code is quite repetitive, and can be replaced with the 51// following short-hand. The following invocation will pick a few 52// appropriate arguments in the specified range and will generate a 53// microbenchmark for each such argument. 54BENCHMARK(BM_memcpy)->Range(8, 8<<10); 55 56// You might have a microbenchmark that depends on two inputs. For 57// example, the following code defines a family of microbenchmarks for 58// measuring the speed of set insertion. 59static void BM_SetInsert(benchmark::State& state) { 60 while (state.KeepRunning()) { 61 state.PauseTiming(); 62 set<int> data = ConstructRandomSet(state.range_x()); 63 state.ResumeTiming(); 64 for (int j = 0; j < state.rangeY; ++j) 65 data.insert(RandomNumber()); 66 } 67} 68BENCHMARK(BM_SetInsert) 69 ->ArgPair(1<<10, 1) 70 ->ArgPair(1<<10, 8) 71 ->ArgPair(1<<10, 64) 72 ->ArgPair(1<<10, 512) 73 ->ArgPair(8<<10, 1) 74 ->ArgPair(8<<10, 8) 75 ->ArgPair(8<<10, 64) 76 ->ArgPair(8<<10, 512); 77 78// The preceding code is quite repetitive, and can be replaced with 79// the following short-hand. The following macro will pick a few 80// appropriate arguments in the product of the two specified ranges 81// and will generate a microbenchmark for each such pair. 82BENCHMARK(BM_SetInsert)->RangePair(1<<10, 8<<10, 1, 512); 83 84// For more complex patterns of inputs, passing a custom function 85// to Apply allows programmatic specification of an 86// arbitrary set of arguments to run the microbenchmark on. 87// The following example enumerates a dense range on 88// one parameter, and a sparse range on the second. 89static benchmark::internal::Benchmark* CustomArguments( 90 benchmark::internal::Benchmark* b) { 91 for (int i = 0; i <= 10; ++i) 92 for (int j = 32; j <= 1024*1024; j *= 8) 93 b = b->ArgPair(i, j); 94 return b; 95} 96BENCHMARK(BM_SetInsert)->Apply(CustomArguments); 97 98// Templated microbenchmarks work the same way: 99// Produce then consume 'size' messages 'iters' times 100// Measures throughput in the absence of multiprogramming. 101template <class Q> int BM_Sequential(benchmark::State& state) { 102 Q q; 103 typename Q::value_type v; 104 while (state.KeepRunning()) { 105 for (int i = state.range_x(); i--; ) 106 q.push(v); 107 for (int e = state.range_x(); e--; ) 108 q.Wait(&v); 109 } 110 // actually messages, not bytes: 111 state.SetBytesProcessed( 112 static_cast<int64_t>(state.iterations())*state.range_x()); 113} 114BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10); 115 116In a multithreaded test, it is guaranteed that none of the threads will start 117until all have called KeepRunning, and all will have finished before KeepRunning 118returns false. As such, any global setup or teardown you want to do can be 119wrapped in a check against the thread index: 120 121static void BM_MultiThreaded(benchmark::State& state) { 122 if (state.thread_index == 0) { 123 // Setup code here. 124 } 125 while (state.KeepRunning()) { 126 // Run the test as normal. 127 } 128 if (state.thread_index == 0) { 129 // Teardown code here. 130 } 131} 132BENCHMARK(BM_MultiThreaded)->Threads(4); 133*/ 134 135#ifndef BENCHMARK_BENCHMARK_API_H_ 136#define BENCHMARK_BENCHMARK_API_H_ 137 138#include <assert.h> 139#include <stddef.h> 140#include <stdint.h> 141 142#include "macros.h" 143 144namespace benchmark { 145class BenchmarkReporter; 146 147void Initialize(int* argc, const char** argv); 148 149// Otherwise, run all benchmarks specified by the --benchmark_filter flag, 150// and exit after running the benchmarks. 151void RunSpecifiedBenchmarks(); 152void RunSpecifiedBenchmarks(BenchmarkReporter* reporter); 153 154// If this routine is called, peak memory allocation past this point in the 155// benchmark is reported at the end of the benchmark report line. (It is 156// computed by running the benchmark once with a single iteration and a memory 157// tracer.) 158// TODO(dominic) 159// void MemoryUsage(); 160 161namespace internal { 162class Benchmark; 163class BenchmarkImp; 164 165template <class T> struct Voider { 166 typedef void type; 167}; 168 169template <class T, class = void> 170struct EnableIfString {}; 171 172template <class T> 173struct EnableIfString<T, typename Voider<typename T::basic_string>::type> { 174 typedef int type; 175}; 176 177} // end namespace internal 178 179// State is passed to a running Benchmark and contains state for the 180// benchmark to use. 181class State { 182public: 183 State(size_t max_iters, bool has_x, int x, bool has_y, int y, int thread_i); 184 185 // Returns true iff the benchmark should continue through another iteration. 186 // NOTE: A benchmark may not return from the test until KeepRunning() has 187 // returned false. 188 bool KeepRunning() { 189 if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) { 190 ResumeTiming(); 191 started_ = true; 192 } 193 bool const res = total_iterations_++ < max_iterations; 194 if (BENCHMARK_BUILTIN_EXPECT(!res, false)) { 195 assert(started_); 196 PauseTiming(); 197 // Total iterations now is one greater than max iterations. Fix this. 198 total_iterations_ = max_iterations; 199 } 200 return res; 201 } 202 203 // REQUIRES: timer is running 204 // Stop the benchmark timer. If not called, the timer will be 205 // automatically stopped after KeepRunning() returns false for the first time. 206 // 207 // For threaded benchmarks the PauseTiming() function acts 208 // like a barrier. I.e., the ith call by a particular thread to this 209 // function will block until all threads have made their ith call. 210 // The timer will stop when the last thread has called this function. 211 // 212 // NOTE: PauseTiming()/ResumeTiming() are relatively 213 // heavyweight, and so their use should generally be avoided 214 // within each benchmark iteration, if possible. 215 void PauseTiming(); 216 217 // REQUIRES: timer is not running 218 // Start the benchmark timer. The timer is NOT running on entrance to the 219 // benchmark function. It begins running after the first call to KeepRunning() 220 // 221 // For threaded benchmarks the ResumeTiming() function acts 222 // like a barrier. I.e., the ith call by a particular thread to this 223 // function will block until all threads have made their ith call. 224 // The timer will start when the last thread has called this function. 225 // 226 // NOTE: PauseTiming()/ResumeTiming() are relatively 227 // heavyweight, and so their use should generally be avoided 228 // within each benchmark iteration, if possible. 229 void ResumeTiming(); 230 231 // If a particular benchmark is I/O bound, or if for some reason CPU 232 // timings are not representative, call this method from within the 233 // benchmark routine. If called, the elapsed time will be used to 234 // control how many iterations are run, and in the printing of 235 // items/second or MB/seconds values. If not called, the cpu time 236 // used by the benchmark will be used. 237 void UseRealTime(); 238 239 // Set the number of bytes processed by the current benchmark 240 // execution. This routine is typically called once at the end of a 241 // throughput oriented benchmark. If this routine is called with a 242 // value > 0, the report is printed in MB/sec instead of nanoseconds 243 // per iteration. 244 // 245 // REQUIRES: a benchmark has exited its KeepRunning loop. 246 BENCHMARK_ALWAYS_INLINE 247 void SetBytesProcessed(size_t bytes) { 248 bytes_processed_ = bytes; 249 } 250 251 BENCHMARK_ALWAYS_INLINE 252 size_t bytes_processed() const { 253 return bytes_processed_; 254 } 255 256 // If this routine is called with items > 0, then an items/s 257 // label is printed on the benchmark report line for the currently 258 // executing benchmark. It is typically called at the end of a processing 259 // benchmark where a processing items/second output is desired. 260 // 261 // REQUIRES: a benchmark has exited its KeepRunning loop. 262 BENCHMARK_ALWAYS_INLINE 263 void SetItemsProcessed(size_t items) { 264 items_processed_ = items; 265 } 266 267 BENCHMARK_ALWAYS_INLINE 268 size_t items_processed() const { 269 return items_processed_; 270 } 271 272 // If this routine is called, the specified label is printed at the 273 // end of the benchmark report line for the currently executing 274 // benchmark. Example: 275 // static void BM_Compress(int iters) { 276 // ... 277 // double compress = input_size / output_size; 278 // benchmark::SetLabel(StringPrintf("compress:%.1f%%", 100.0*compression)); 279 // } 280 // Produces output that looks like: 281 // BM_Compress 50 50 14115038 compress:27.3% 282 // 283 // REQUIRES: a benchmark has exited its KeepRunning loop. 284 void SetLabel(const char* label); 285 286 // Allow the use of std::string without actually including <string>. 287 // This function does not participate in overload resolution unless StringType 288 // has the nested typename `basic_string`. This typename should be provided 289 // as an injected class name in the case of std::string. 290 template <class StringType> 291 void SetLabel(StringType const & str, 292 typename internal::EnableIfString<StringType>::type = 1) { 293 this->SetLabel(str.c_str()); 294 } 295 296 // Range arguments for this run. CHECKs if the argument has been set. 297 BENCHMARK_ALWAYS_INLINE 298 int range_x() const { 299 assert(has_range_x_); 300 ((void)has_range_x_); // Prevent unused warning. 301 return range_x_; 302 } 303 304 BENCHMARK_ALWAYS_INLINE 305 int range_y() const { 306 assert(has_range_y_); 307 ((void)has_range_y_); // Prevent unused warning. 308 return range_y_; 309 } 310 311 BENCHMARK_ALWAYS_INLINE 312 size_t iterations() const { return total_iterations_; } 313 314private: 315 bool started_; 316 size_t total_iterations_; 317 318 bool has_range_x_; 319 int range_x_; 320 321 bool has_range_y_; 322 int range_y_; 323 324 size_t bytes_processed_; 325 size_t items_processed_; 326 327public: 328 const int thread_index; 329 const size_t max_iterations; 330 331private: 332 BENCHMARK_DISALLOW_COPY_AND_ASSIGN(State); 333}; 334 335namespace internal { 336 337typedef void(Function)(State&); 338 339// ------------------------------------------------------ 340// Benchmark registration object. The BENCHMARK() macro expands 341// into an internal::Benchmark* object. Various methods can 342// be called on this object to change the properties of the benchmark. 343// Each method returns "this" so that multiple method calls can 344// chained into one expression. 345class Benchmark { 346 public: 347 Benchmark(const char* name, Function* f); 348 349 ~Benchmark(); 350 351 // Note: the following methods all return "this" so that multiple 352 // method calls can be chained together in one expression. 353 354 // Run this benchmark once with "x" as the extra argument passed 355 // to the function. 356 // REQUIRES: The function passed to the constructor must accept an arg1. 357 Benchmark* Arg(int x); 358 359 // Run this benchmark once for a number of values picked from the 360 // range [start..limit]. (start and limit are always picked.) 361 // REQUIRES: The function passed to the constructor must accept an arg1. 362 Benchmark* Range(int start, int limit); 363 364 // Run this benchmark once for every value in the range [start..limit] 365 // REQUIRES: The function passed to the constructor must accept an arg1. 366 Benchmark* DenseRange(int start, int limit); 367 368 // Run this benchmark once with "x,y" as the extra arguments passed 369 // to the function. 370 // REQUIRES: The function passed to the constructor must accept arg1,arg2. 371 Benchmark* ArgPair(int x, int y); 372 373 // Pick a set of values A from the range [lo1..hi1] and a set 374 // of values B from the range [lo2..hi2]. Run the benchmark for 375 // every pair of values in the cartesian product of A and B 376 // (i.e., for all combinations of the values in A and B). 377 // REQUIRES: The function passed to the constructor must accept arg1,arg2. 378 Benchmark* RangePair(int lo1, int hi1, int lo2, int hi2); 379 380 // Pass this benchmark object to *func, which can customize 381 // the benchmark by calling various methods like Arg, ArgPair, 382 // Threads, etc. 383 Benchmark* Apply(void (*func)(Benchmark* benchmark)); 384 385 // Support for running multiple copies of the same benchmark concurrently 386 // in multiple threads. This may be useful when measuring the scaling 387 // of some piece of code. 388 389 // Run one instance of this benchmark concurrently in t threads. 390 Benchmark* Threads(int t); 391 392 // Pick a set of values T from [min_threads,max_threads]. 393 // min_threads and max_threads are always included in T. Run this 394 // benchmark once for each value in T. The benchmark run for a 395 // particular value t consists of t threads running the benchmark 396 // function concurrently. For example, consider: 397 // BENCHMARK(Foo)->ThreadRange(1,16); 398 // This will run the following benchmarks: 399 // Foo in 1 thread 400 // Foo in 2 threads 401 // Foo in 4 threads 402 // Foo in 8 threads 403 // Foo in 16 threads 404 Benchmark* ThreadRange(int min_threads, int max_threads); 405 406 // Equivalent to ThreadRange(NumCPUs(), NumCPUs()) 407 Benchmark* ThreadPerCpu(); 408 409 // Used inside the benchmark implementation 410 struct Instance; 411 412 private: 413 BenchmarkImp* imp_; 414 BENCHMARK_DISALLOW_COPY_AND_ASSIGN(Benchmark); 415}; 416 417} // end namespace internal 418} // end namespace benchmark 419 420 421// ------------------------------------------------------ 422// Macro to register benchmarks 423 424// Check that __COUNTER__ is defined and that __COUNTER__ increases by 1 425// every time it is expanded. X + 1 == X + 0 is used in case X is defined to be 426// empty. If X is empty the expression becomes (+1 == +0). 427#if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0) 428#define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__ 429#else 430#define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__ 431#endif 432 433// Helpers for generating unique variable names 434#define BENCHMARK_PRIVATE_NAME(n) \ 435 BENCHMARK_PRIVATE_CONCAT(_benchmark_, BENCHMARK_PRIVATE_UNIQUE_ID, n) 436#define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c) 437#define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c 438 439#define BENCHMARK_PRIVATE_DECLARE(n) \ 440 static ::benchmark::internal::Benchmark* \ 441 BENCHMARK_PRIVATE_NAME(n) BENCHMARK_UNUSED 442 443#define BENCHMARK(n) \ 444 BENCHMARK_PRIVATE_DECLARE(n) = (new ::benchmark::internal::Benchmark(#n, n)) 445 446// Old-style macros 447#define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a)) 448#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->ArgPair((a1), (a2)) 449#define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi)) 450#define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \ 451 BENCHMARK(n)->RangePair((l1), (h1), (l2), (h2)) 452 453// This will register a benchmark for a templatized function. For example: 454// 455// template<int arg> 456// void BM_Foo(int iters); 457// 458// BENCHMARK_TEMPLATE(BM_Foo, 1); 459// 460// will register BM_Foo<1> as a benchmark. 461#define BENCHMARK_TEMPLATE1(n, a) \ 462 BENCHMARK_PRIVATE_DECLARE(n) = \ 463 (new ::benchmark::internal::Benchmark(#n "<" #a ">", n<a>)) 464 465#define BENCHMARK_TEMPLATE2(n, a, b) \ 466 BENCHMARK_PRIVATE_DECLARE(n) = \ 467 (new ::benchmark::internal::Benchmark(#n "<" #a "," #b ">", n<a, b>)) 468 469#if __cplusplus >= 201103L 470#define BENCHMARK_TEMPLATE(n, ...) \ 471 BENCHMARK_PRIVATE_DECLARE(n) = \ 472 (new ::benchmark::internal::Benchmark( \ 473 #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>)) 474#else 475#define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a) 476#endif 477 478// Helper macro to create a main routine in a test that runs the benchmarks 479#define BENCHMARK_MAIN() \ 480 int main(int argc, const char** argv) { \ 481 ::benchmark::Initialize(&argc, argv); \ 482 ::benchmark::RunSpecifiedBenchmarks(); \ 483 } 484 485#endif // BENCHMARK_BENCHMARK_API_H_ 486