1// Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Source project : https://github.com/ismaelJimenez/cpp.leastsq
16// Adapted to be used with google benchmark
17
18#include "benchmark/benchmark_api.h"
19
20#include <algorithm>
21#include <cmath>
22#include "check.h"
23#include "complexity.h"
24#include "stat.h"
25
26namespace benchmark {
27
28// Internal function to calculate the different scalability forms
29BigOFunc* FittingCurve(BigO complexity) {
30  switch (complexity) {
31    case oN:
32      return [](int n) -> double { return n; };
33    case oNSquared:
34      return [](int n) -> double { return std::pow(n, 2); };
35    case oNCubed:
36      return [](int n) -> double { return std::pow(n, 3); };
37    case oLogN:
38      return [](int n) { return std::log2(n); };
39    case oNLogN:
40      return [](int n) { return n * std::log2(n); };
41    case o1:
42    default:
43      return [](int) { return 1.0; };
44  }
45}
46
47// Function to return an string for the calculated complexity
48std::string GetBigOString(BigO complexity) {
49  switch (complexity) {
50    case oN:
51      return "N";
52    case oNSquared:
53      return "N^2";
54    case oNCubed:
55      return "N^3";
56    case oLogN:
57      return "lgN";
58    case oNLogN:
59      return "NlgN";
60    case o1:
61      return "(1)";
62    default:
63      return "f(N)";
64  }
65}
66
67// Find the coefficient for the high-order term in the running time, by
68// minimizing the sum of squares of relative error, for the fitting curve
69// given by the lambda expresion.
70//   - n             : Vector containing the size of the benchmark tests.
71//   - time          : Vector containing the times for the benchmark tests.
72//   - fitting_curve : lambda expresion (e.g. [](int n) {return n; };).
73
74// For a deeper explanation on the algorithm logic, look the README file at
75// http://github.com/ismaelJimenez/Minimal-Cpp-Least-Squared-Fit
76
77LeastSq MinimalLeastSq(const std::vector<int>& n,
78                       const std::vector<double>& time,
79                       BigOFunc* fitting_curve) {
80  double sigma_gn = 0.0;
81  double sigma_gn_squared = 0.0;
82  double sigma_time = 0.0;
83  double sigma_time_gn = 0.0;
84
85  // Calculate least square fitting parameter
86  for (size_t i = 0; i < n.size(); ++i) {
87    double gn_i = fitting_curve(n[i]);
88    sigma_gn += gn_i;
89    sigma_gn_squared += gn_i * gn_i;
90    sigma_time += time[i];
91    sigma_time_gn += time[i] * gn_i;
92  }
93
94  LeastSq result;
95  result.complexity = oLambda;
96
97  // Calculate complexity.
98  result.coef = sigma_time_gn / sigma_gn_squared;
99
100  // Calculate RMS
101  double rms = 0.0;
102  for (size_t i = 0; i < n.size(); ++i) {
103    double fit = result.coef * fitting_curve(n[i]);
104    rms += pow((time[i] - fit), 2);
105  }
106
107  // Normalized RMS by the mean of the observed values
108  double mean = sigma_time / n.size();
109  result.rms = sqrt(rms / n.size()) / mean;
110
111  return result;
112}
113
114// Find the coefficient for the high-order term in the running time, by
115// minimizing the sum of squares of relative error.
116//   - n          : Vector containing the size of the benchmark tests.
117//   - time       : Vector containing the times for the benchmark tests.
118//   - complexity : If different than oAuto, the fitting curve will stick to
119//                  this one. If it is oAuto, it will be calculated the best
120//                  fitting curve.
121LeastSq MinimalLeastSq(const std::vector<int>& n,
122                       const std::vector<double>& time, const BigO complexity) {
123  CHECK_EQ(n.size(), time.size());
124  CHECK_GE(n.size(), 2);  // Do not compute fitting curve is less than two
125                          // benchmark runs are given
126  CHECK_NE(complexity, oNone);
127
128  LeastSq best_fit;
129
130  if (complexity == oAuto) {
131    std::vector<BigO> fit_curves = {oLogN, oN, oNLogN, oNSquared, oNCubed};
132
133    // Take o1 as default best fitting curve
134    best_fit = MinimalLeastSq(n, time, FittingCurve(o1));
135    best_fit.complexity = o1;
136
137    // Compute all possible fitting curves and stick to the best one
138    for (const auto& fit : fit_curves) {
139      LeastSq current_fit = MinimalLeastSq(n, time, FittingCurve(fit));
140      if (current_fit.rms < best_fit.rms) {
141        best_fit = current_fit;
142        best_fit.complexity = fit;
143      }
144    }
145  } else {
146    best_fit = MinimalLeastSq(n, time, FittingCurve(complexity));
147    best_fit.complexity = complexity;
148  }
149
150  return best_fit;
151}
152
153std::vector<BenchmarkReporter::Run> ComputeStats(
154    const std::vector<BenchmarkReporter::Run>& reports) {
155  typedef BenchmarkReporter::Run Run;
156  std::vector<Run> results;
157
158  auto error_count =
159      std::count_if(reports.begin(), reports.end(),
160                    [](Run const& run) { return run.error_occurred; });
161
162  if (reports.size() - error_count < 2) {
163    // We don't report aggregated data if there was a single run.
164    return results;
165  }
166  // Accumulators.
167  Stat1_d real_accumulated_time_stat;
168  Stat1_d cpu_accumulated_time_stat;
169  Stat1_d bytes_per_second_stat;
170  Stat1_d items_per_second_stat;
171  // All repetitions should be run with the same number of iterations so we
172  // can take this information from the first benchmark.
173  int64_t const run_iterations = reports.front().iterations;
174
175  // Populate the accumulators.
176  for (Run const& run : reports) {
177    CHECK_EQ(reports[0].benchmark_name, run.benchmark_name);
178    CHECK_EQ(run_iterations, run.iterations);
179    if (run.error_occurred) continue;
180    real_accumulated_time_stat +=
181        Stat1_d(run.real_accumulated_time / run.iterations, run.iterations);
182    cpu_accumulated_time_stat +=
183        Stat1_d(run.cpu_accumulated_time / run.iterations, run.iterations);
184    items_per_second_stat += Stat1_d(run.items_per_second, run.iterations);
185    bytes_per_second_stat += Stat1_d(run.bytes_per_second, run.iterations);
186  }
187
188  // Get the data from the accumulator to BenchmarkReporter::Run's.
189  Run mean_data;
190  mean_data.benchmark_name = reports[0].benchmark_name + "_mean";
191  mean_data.iterations = run_iterations;
192  mean_data.real_accumulated_time =
193      real_accumulated_time_stat.Mean() * run_iterations;
194  mean_data.cpu_accumulated_time =
195      cpu_accumulated_time_stat.Mean() * run_iterations;
196  mean_data.bytes_per_second = bytes_per_second_stat.Mean();
197  mean_data.items_per_second = items_per_second_stat.Mean();
198  mean_data.time_unit = reports[0].time_unit;
199
200  // Only add label to mean/stddev if it is same for all runs
201  mean_data.report_label = reports[0].report_label;
202  for (std::size_t i = 1; i < reports.size(); i++) {
203    if (reports[i].report_label != reports[0].report_label) {
204      mean_data.report_label = "";
205      break;
206    }
207  }
208
209  Run stddev_data;
210  stddev_data.benchmark_name = reports[0].benchmark_name + "_stddev";
211  stddev_data.report_label = mean_data.report_label;
212  stddev_data.iterations = 0;
213  stddev_data.real_accumulated_time = real_accumulated_time_stat.StdDev();
214  stddev_data.cpu_accumulated_time = cpu_accumulated_time_stat.StdDev();
215  stddev_data.bytes_per_second = bytes_per_second_stat.StdDev();
216  stddev_data.items_per_second = items_per_second_stat.StdDev();
217  stddev_data.time_unit = reports[0].time_unit;
218
219  results.push_back(mean_data);
220  results.push_back(stddev_data);
221  return results;
222}
223
224std::vector<BenchmarkReporter::Run> ComputeBigO(
225    const std::vector<BenchmarkReporter::Run>& reports) {
226  typedef BenchmarkReporter::Run Run;
227  std::vector<Run> results;
228
229  if (reports.size() < 2) return results;
230
231  // Accumulators.
232  std::vector<int> n;
233  std::vector<double> real_time;
234  std::vector<double> cpu_time;
235
236  // Populate the accumulators.
237  for (const Run& run : reports) {
238    CHECK_GT(run.complexity_n, 0) << "Did you forget to call SetComplexityN?";
239    n.push_back(run.complexity_n);
240    real_time.push_back(run.real_accumulated_time / run.iterations);
241    cpu_time.push_back(run.cpu_accumulated_time / run.iterations);
242  }
243
244  LeastSq result_cpu;
245  LeastSq result_real;
246
247  if (reports[0].complexity == oLambda) {
248    result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity_lambda);
249    result_real = MinimalLeastSq(n, real_time, reports[0].complexity_lambda);
250  } else {
251    result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity);
252    result_real = MinimalLeastSq(n, real_time, result_cpu.complexity);
253  }
254  std::string benchmark_name =
255      reports[0].benchmark_name.substr(0, reports[0].benchmark_name.find('/'));
256
257  // Get the data from the accumulator to BenchmarkReporter::Run's.
258  Run big_o;
259  big_o.benchmark_name = benchmark_name + "_BigO";
260  big_o.iterations = 0;
261  big_o.real_accumulated_time = result_real.coef;
262  big_o.cpu_accumulated_time = result_cpu.coef;
263  big_o.report_big_o = true;
264  big_o.complexity = result_cpu.complexity;
265
266  double multiplier = GetTimeUnitMultiplier(reports[0].time_unit);
267
268  // Only add label to mean/stddev if it is same for all runs
269  Run rms;
270  big_o.report_label = reports[0].report_label;
271  rms.benchmark_name = benchmark_name + "_RMS";
272  rms.report_label = big_o.report_label;
273  rms.iterations = 0;
274  rms.real_accumulated_time = result_real.rms / multiplier;
275  rms.cpu_accumulated_time = result_cpu.rms / multiplier;
276  rms.report_rms = true;
277  rms.complexity = result_cpu.complexity;
278
279  results.push_back(big_o);
280  results.push_back(rms);
281  return results;
282}
283
284}  // end namespace benchmark
285