complexity.cc revision f3b3dd99be97c330f9f4a7c0a2e5dbc3f767c558
1// Copyright 2016 Ismael Jimenez Martinez. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// Source project : https://github.com/ismaelJimenez/cpp.leastsq 16// Adapted to be used with google benchmark 17 18#include "benchmark/benchmark_api.h" 19 20#include <algorithm> 21#include <cmath> 22#include "check.h" 23#include "complexity.h" 24#include "stat.h" 25 26namespace benchmark { 27 28// Internal function to calculate the different scalability forms 29BigOFunc* FittingCurve(BigO complexity) { 30 switch (complexity) { 31 case oN: 32 return [](int n) -> double { return n; }; 33 case oNSquared: 34 return [](int n) -> double { return std::pow(n, 2); }; 35 case oNCubed: 36 return [](int n) -> double { return std::pow(n, 3); }; 37 case oLogN: 38 return [](int n) { return log2(n); }; 39 case oNLogN: 40 return [](int n) { return n * log2(n); }; 41 case o1: 42 default: 43 return [](int) { return 1.0; }; 44 } 45} 46 47// Function to return an string for the calculated complexity 48std::string GetBigOString(BigO complexity) { 49 switch (complexity) { 50 case oN: 51 return "N"; 52 case oNSquared: 53 return "N^2"; 54 case oNCubed: 55 return "N^3"; 56 case oLogN: 57 return "lgN"; 58 case oNLogN: 59 return "NlgN"; 60 case o1: 61 return "(1)"; 62 default: 63 return "f(N)"; 64 } 65} 66 67// Find the coefficient for the high-order term in the running time, by 68// minimizing the sum of squares of relative error, for the fitting curve 69// given by the lambda expresion. 70// - n : Vector containing the size of the benchmark tests. 71// - time : Vector containing the times for the benchmark tests. 72// - fitting_curve : lambda expresion (e.g. [](int n) {return n; };). 73 74// For a deeper explanation on the algorithm logic, look the README file at 75// http://github.com/ismaelJimenez/Minimal-Cpp-Least-Squared-Fit 76 77LeastSq MinimalLeastSq(const std::vector<int>& n, 78 const std::vector<double>& time, 79 BigOFunc* fitting_curve) { 80 double sigma_gn = 0.0; 81 double sigma_gn_squared = 0.0; 82 double sigma_time = 0.0; 83 double sigma_time_gn = 0.0; 84 85 // Calculate least square fitting parameter 86 for (size_t i = 0; i < n.size(); ++i) { 87 double gn_i = fitting_curve(n[i]); 88 sigma_gn += gn_i; 89 sigma_gn_squared += gn_i * gn_i; 90 sigma_time += time[i]; 91 sigma_time_gn += time[i] * gn_i; 92 } 93 94 LeastSq result; 95 result.complexity = oLambda; 96 97 // Calculate complexity. 98 result.coef = sigma_time_gn / sigma_gn_squared; 99 100 // Calculate RMS 101 double rms = 0.0; 102 for (size_t i = 0; i < n.size(); ++i) { 103 double fit = result.coef * fitting_curve(n[i]); 104 rms += pow((time[i] - fit), 2); 105 } 106 107 // Normalized RMS by the mean of the observed values 108 double mean = sigma_time / n.size(); 109 result.rms = sqrt(rms / n.size()) / mean; 110 111 return result; 112} 113 114// Find the coefficient for the high-order term in the running time, by 115// minimizing the sum of squares of relative error. 116// - n : Vector containing the size of the benchmark tests. 117// - time : Vector containing the times for the benchmark tests. 118// - complexity : If different than oAuto, the fitting curve will stick to 119// this one. If it is oAuto, it will be calculated the best 120// fitting curve. 121LeastSq MinimalLeastSq(const std::vector<int>& n, 122 const std::vector<double>& time, const BigO complexity) { 123 CHECK_EQ(n.size(), time.size()); 124 CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two 125 // benchmark runs are given 126 CHECK_NE(complexity, oNone); 127 128 LeastSq best_fit; 129 130 if (complexity == oAuto) { 131 std::vector<BigO> fit_curves = {oLogN, oN, oNLogN, oNSquared, oNCubed}; 132 133 // Take o1 as default best fitting curve 134 best_fit = MinimalLeastSq(n, time, FittingCurve(o1)); 135 best_fit.complexity = o1; 136 137 // Compute all possible fitting curves and stick to the best one 138 for (const auto& fit : fit_curves) { 139 LeastSq current_fit = MinimalLeastSq(n, time, FittingCurve(fit)); 140 if (current_fit.rms < best_fit.rms) { 141 best_fit = current_fit; 142 best_fit.complexity = fit; 143 } 144 } 145 } else { 146 best_fit = MinimalLeastSq(n, time, FittingCurve(complexity)); 147 best_fit.complexity = complexity; 148 } 149 150 return best_fit; 151} 152 153std::vector<BenchmarkReporter::Run> ComputeStats( 154 const std::vector<BenchmarkReporter::Run>& reports) { 155 typedef BenchmarkReporter::Run Run; 156 std::vector<Run> results; 157 158 auto error_count = 159 std::count_if(reports.begin(), reports.end(), 160 [](Run const& run) { return run.error_occurred; }); 161 162 if (reports.size() - error_count < 2) { 163 // We don't report aggregated data if there was a single run. 164 return results; 165 } 166 // Accumulators. 167 Stat1_d real_accumulated_time_stat; 168 Stat1_d cpu_accumulated_time_stat; 169 Stat1_d bytes_per_second_stat; 170 Stat1_d items_per_second_stat; 171 // All repetitions should be run with the same number of iterations so we 172 // can take this information from the first benchmark. 173 int64_t const run_iterations = reports.front().iterations; 174 // create stats for user counters 175 struct CounterStat { 176 Counter c; 177 Stat1_d s; 178 }; 179 std::map< std::string, CounterStat > counter_stats; 180 for(Run const& r : reports) { 181 for(auto const& cnt : r.counters) { 182 auto it = counter_stats.find(cnt.first); 183 if(it == counter_stats.end()) { 184 counter_stats.insert({cnt.first, {cnt.second, Stat1_d{}}}); 185 } else { 186 CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags); 187 } 188 } 189 } 190 191 // Populate the accumulators. 192 for (Run const& run : reports) { 193 CHECK_EQ(reports[0].benchmark_name, run.benchmark_name); 194 CHECK_EQ(run_iterations, run.iterations); 195 if (run.error_occurred) continue; 196 real_accumulated_time_stat += 197 Stat1_d(run.real_accumulated_time / run.iterations); 198 cpu_accumulated_time_stat += 199 Stat1_d(run.cpu_accumulated_time / run.iterations); 200 items_per_second_stat += Stat1_d(run.items_per_second); 201 bytes_per_second_stat += Stat1_d(run.bytes_per_second); 202 // user counters 203 for(auto const& cnt : run.counters) { 204 auto it = counter_stats.find(cnt.first); 205 CHECK_NE(it, counter_stats.end()); 206 it->second.s += Stat1_d(cnt.second); 207 } 208 } 209 210 // Get the data from the accumulator to BenchmarkReporter::Run's. 211 Run mean_data; 212 mean_data.benchmark_name = reports[0].benchmark_name + "_mean"; 213 mean_data.iterations = run_iterations; 214 mean_data.real_accumulated_time = 215 real_accumulated_time_stat.Mean() * run_iterations; 216 mean_data.cpu_accumulated_time = 217 cpu_accumulated_time_stat.Mean() * run_iterations; 218 mean_data.bytes_per_second = bytes_per_second_stat.Mean(); 219 mean_data.items_per_second = items_per_second_stat.Mean(); 220 mean_data.time_unit = reports[0].time_unit; 221 // user counters 222 for(auto const& kv : counter_stats) { 223 auto c = Counter(kv.second.s.Mean(), counter_stats[kv.first].c.flags); 224 mean_data.counters[kv.first] = c; 225 } 226 227 // Only add label to mean/stddev if it is same for all runs 228 mean_data.report_label = reports[0].report_label; 229 for (std::size_t i = 1; i < reports.size(); i++) { 230 if (reports[i].report_label != reports[0].report_label) { 231 mean_data.report_label = ""; 232 break; 233 } 234 } 235 236 Run stddev_data; 237 stddev_data.benchmark_name = reports[0].benchmark_name + "_stddev"; 238 stddev_data.report_label = mean_data.report_label; 239 stddev_data.iterations = 0; 240 stddev_data.real_accumulated_time = real_accumulated_time_stat.StdDev(); 241 stddev_data.cpu_accumulated_time = cpu_accumulated_time_stat.StdDev(); 242 stddev_data.bytes_per_second = bytes_per_second_stat.StdDev(); 243 stddev_data.items_per_second = items_per_second_stat.StdDev(); 244 stddev_data.time_unit = reports[0].time_unit; 245 // user counters 246 for(auto const& kv : counter_stats) { 247 auto c = Counter(kv.second.s.StdDev(), counter_stats[kv.first].c.flags); 248 stddev_data.counters[kv.first] = c; 249 } 250 251 results.push_back(mean_data); 252 results.push_back(stddev_data); 253 return results; 254} 255 256std::vector<BenchmarkReporter::Run> ComputeBigO( 257 const std::vector<BenchmarkReporter::Run>& reports) { 258 typedef BenchmarkReporter::Run Run; 259 std::vector<Run> results; 260 261 if (reports.size() < 2) return results; 262 263 // Accumulators. 264 std::vector<int> n; 265 std::vector<double> real_time; 266 std::vector<double> cpu_time; 267 268 // Populate the accumulators. 269 for (const Run& run : reports) { 270 CHECK_GT(run.complexity_n, 0) << "Did you forget to call SetComplexityN?"; 271 n.push_back(run.complexity_n); 272 real_time.push_back(run.real_accumulated_time / run.iterations); 273 cpu_time.push_back(run.cpu_accumulated_time / run.iterations); 274 } 275 276 LeastSq result_cpu; 277 LeastSq result_real; 278 279 if (reports[0].complexity == oLambda) { 280 result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity_lambda); 281 result_real = MinimalLeastSq(n, real_time, reports[0].complexity_lambda); 282 } else { 283 result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity); 284 result_real = MinimalLeastSq(n, real_time, result_cpu.complexity); 285 } 286 std::string benchmark_name = 287 reports[0].benchmark_name.substr(0, reports[0].benchmark_name.find('/')); 288 289 // Get the data from the accumulator to BenchmarkReporter::Run's. 290 Run big_o; 291 big_o.benchmark_name = benchmark_name + "_BigO"; 292 big_o.iterations = 0; 293 big_o.real_accumulated_time = result_real.coef; 294 big_o.cpu_accumulated_time = result_cpu.coef; 295 big_o.report_big_o = true; 296 big_o.complexity = result_cpu.complexity; 297 298 // All the time results are reported after being multiplied by the 299 // time unit multiplier. But since RMS is a relative quantity it 300 // should not be multiplied at all. So, here, we _divide_ it by the 301 // multiplier so that when it is multiplied later the result is the 302 // correct one. 303 double multiplier = GetTimeUnitMultiplier(reports[0].time_unit); 304 305 // Only add label to mean/stddev if it is same for all runs 306 Run rms; 307 big_o.report_label = reports[0].report_label; 308 rms.benchmark_name = benchmark_name + "_RMS"; 309 rms.report_label = big_o.report_label; 310 rms.iterations = 0; 311 rms.real_accumulated_time = result_real.rms / multiplier; 312 rms.cpu_accumulated_time = result_cpu.rms / multiplier; 313 rms.report_rms = true; 314 rms.complexity = result_cpu.complexity; 315 // don't forget to keep the time unit, or we won't be able to 316 // recover the correct value. 317 rms.time_unit = reports[0].time_unit; 318 319 results.push_back(big_o); 320 results.push_back(rms); 321 return results; 322} 323 324} // end namespace benchmark 325