1/* 2 * Copyright 2016 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#include "GpuTimer.h" 9#include "GrContextFactory.h" 10#include "SkCanvas.h" 11#include "SkCommonFlagsPathRenderer.h" 12#include "SkOSFile.h" 13#include "SkOSPath.h" 14#include "SkPerlinNoiseShader.h" 15#include "SkPicture.h" 16#include "SkPictureRecorder.h" 17#include "SkStream.h" 18#include "SkSurface.h" 19#include "SkSurfaceProps.h" 20#include "picture_utils.h" 21#include "sk_tool_utils.h" 22#include "flags/SkCommandLineFlags.h" 23#include "flags/SkCommonFlagsConfig.h" 24#include <stdlib.h> 25#include <algorithm> 26#include <array> 27#include <chrono> 28#include <cmath> 29#include <vector> 30 31/** 32 * This is a minimalist program whose sole purpose is to open an skp file, benchmark it on a single 33 * config, and exit. It is intended to be used through skpbench.py rather than invoked directly. 34 * Limiting the entire process to a single config/skp pair helps to keep the results repeatable. 35 * 36 * No tiling, looping, or other fanciness is used; it just draws the skp whole into a size-matched 37 * render target and syncs the GPU after each draw. 38 * 39 * Currently, only GPU configs are supported. 40 */ 41 42DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark"); 43DEFINE_int32(sampleMs, 50, "minimum duration of a sample"); 44DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)"); 45DEFINE_bool(fps, false, "use fps instead of ms"); 46DEFINE_string(skp, "", "path to a single .skp file, or 'warmup' for a builtin warmup run"); 47DEFINE_string(png, "", "if set, save a .png proof to disk at this file location"); 48DEFINE_int32(verbosity, 4, "level of verbosity (0=none to 5=debug)"); 49DEFINE_bool(suppressHeader, false, "don't print a header row before the results"); 50DEFINE_pathrenderer_flag; 51 52static const char* header = 53" accum median max min stddev samples sample_ms clock metric config bench"; 54 55static const char* resultFormat = 56"%8.4g %8.4g %8.4g %8.4g %6.3g%% %7li %9i %-5s %-6s %-9s %s"; 57 58struct Sample { 59 using duration = std::chrono::nanoseconds; 60 61 Sample() : fFrames(0), fDuration(0) {} 62 double seconds() const { return std::chrono::duration<double>(fDuration).count(); } 63 double ms() const { return std::chrono::duration<double, std::milli>(fDuration).count(); } 64 double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->ms() / fFrames; } 65 static const char* metric() { return FLAGS_fps ? "fps" : "ms"; } 66 67 int fFrames; 68 duration fDuration; 69}; 70 71class GpuSync { 72public: 73 GpuSync(const sk_gpu_test::FenceSync* fenceSync); 74 ~GpuSync(); 75 76 void syncToPreviousFrame(); 77 78private: 79 void updateFence(); 80 81 const sk_gpu_test::FenceSync* const fFenceSync; 82 sk_gpu_test::PlatformFence fFence; 83}; 84 85enum class ExitErr { 86 kOk = 0, 87 kUsage = 64, 88 kData = 65, 89 kUnavailable = 69, 90 kIO = 74, 91 kSoftware = 70 92}; 93 94static void draw_skp_and_flush(SkCanvas*, const SkPicture*); 95static sk_sp<SkPicture> create_warmup_skp(); 96static bool mkdir_p(const SkString& name); 97static SkString join(const SkCommandLineFlags::StringArray&); 98static void exitf(ExitErr, const char* format, ...); 99 100static void run_benchmark(const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas, 101 const SkPicture* skp, std::vector<Sample>* samples) { 102 using clock = std::chrono::high_resolution_clock; 103 const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs); 104 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration); 105 106 draw_skp_and_flush(canvas, skp); 107 GpuSync gpuSync(fenceSync); 108 109 draw_skp_and_flush(canvas, skp); 110 gpuSync.syncToPreviousFrame(); 111 112 clock::time_point now = clock::now(); 113 const clock::time_point endTime = now + benchDuration; 114 115 do { 116 clock::time_point sampleStart = now; 117 samples->emplace_back(); 118 Sample& sample = samples->back(); 119 120 do { 121 draw_skp_and_flush(canvas, skp); 122 gpuSync.syncToPreviousFrame(); 123 124 now = clock::now(); 125 sample.fDuration = now - sampleStart; 126 ++sample.fFrames; 127 } while (sample.fDuration < sampleDuration); 128 } while (now < endTime || 0 == samples->size() % 2); 129} 130 131static void run_gpu_time_benchmark(sk_gpu_test::GpuTimer* gpuTimer, 132 const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas, 133 const SkPicture* skp, std::vector<Sample>* samples) { 134 using sk_gpu_test::PlatformTimerQuery; 135 using clock = std::chrono::steady_clock; 136 const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs); 137 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration); 138 139 if (!gpuTimer->disjointSupport()) { 140 fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; " 141 "results may be unreliable\n"); 142 } 143 144 draw_skp_and_flush(canvas, skp); 145 GpuSync gpuSync(fenceSync); 146 147 gpuTimer->queueStart(); 148 draw_skp_and_flush(canvas, skp); 149 PlatformTimerQuery previousTime = gpuTimer->queueStop(); 150 gpuSync.syncToPreviousFrame(); 151 152 clock::time_point now = clock::now(); 153 const clock::time_point endTime = now + benchDuration; 154 155 do { 156 const clock::time_point sampleEndTime = now + sampleDuration; 157 samples->emplace_back(); 158 Sample& sample = samples->back(); 159 160 do { 161 gpuTimer->queueStart(); 162 draw_skp_and_flush(canvas, skp); 163 PlatformTimerQuery time = gpuTimer->queueStop(); 164 gpuSync.syncToPreviousFrame(); 165 166 switch (gpuTimer->checkQueryStatus(previousTime)) { 167 using QueryStatus = sk_gpu_test::GpuTimer::QueryStatus; 168 case QueryStatus::kInvalid: 169 exitf(ExitErr::kUnavailable, "GPU timer failed"); 170 case QueryStatus::kPending: 171 exitf(ExitErr::kUnavailable, "timer query still not ready after fence sync"); 172 case QueryStatus::kDisjoint: 173 if (FLAGS_verbosity >= 4) { 174 fprintf(stderr, "discarding timer query due to disjoint operations.\n"); 175 } 176 break; 177 case QueryStatus::kAccurate: 178 sample.fDuration += gpuTimer->getTimeElapsed(previousTime); 179 ++sample.fFrames; 180 break; 181 } 182 gpuTimer->deleteQuery(previousTime); 183 previousTime = time; 184 now = clock::now(); 185 } while (now < sampleEndTime || 0 == sample.fFrames); 186 } while (now < endTime || 0 == samples->size() % 2); 187 188 gpuTimer->deleteQuery(previousTime); 189} 190 191void print_result(const std::vector<Sample>& samples, const char* config, const char* bench) { 192 if (0 == (samples.size() % 2)) { 193 exitf(ExitErr::kSoftware, "attempted to gather stats on even number of samples"); 194 } 195 196 Sample accum = Sample(); 197 std::vector<double> values; 198 values.reserve(samples.size()); 199 for (const Sample& sample : samples) { 200 accum.fFrames += sample.fFrames; 201 accum.fDuration += sample.fDuration; 202 values.push_back(sample.value()); 203 } 204 std::sort(values.begin(), values.end()); 205 206 const double accumValue = accum.value(); 207 double variance = 0; 208 for (double value : values) { 209 const double delta = value - accumValue; 210 variance += delta * delta; 211 } 212 variance /= values.size(); 213 // Technically, this is the relative standard deviation. 214 const double stddev = 100/*%*/ * sqrt(variance) / accumValue; 215 216 printf(resultFormat, accumValue, values[values.size() / 2], values.back(), values.front(), 217 stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu", Sample::metric(), 218 config, bench); 219 printf("\n"); 220 fflush(stdout); 221} 222 223int main(int argc, char** argv) { 224 SkCommandLineFlags::SetUsage("Use skpbench.py instead. " 225 "You usually don't want to use this program directly."); 226 SkCommandLineFlags::Parse(argc, argv); 227 228 if (!FLAGS_suppressHeader) { 229 printf("%s\n", header); 230 } 231 if (FLAGS_duration <= 0) { 232 exit(0); // This can be used to print the header and quit. 233 } 234 235 // Parse the config. 236 const SkCommandLineConfigGpu* config = nullptr; // Initialize for spurious warning. 237 SkCommandLineConfigArray configs; 238 ParseConfigs(FLAGS_config, &configs); 239 if (configs.count() != 1 || !(config = configs[0]->asConfigGpu())) { 240 exitf(ExitErr::kUsage, "invalid config '%s': must specify one (and only one) GPU config", 241 join(FLAGS_config).c_str()); 242 } 243 244 // Parse the skp. 245 if (FLAGS_skp.count() != 1) { 246 exitf(ExitErr::kUsage, "invalid skp '%s': must specify a single skp file, or 'warmup'", 247 join(FLAGS_skp).c_str()); 248 } 249 sk_sp<SkPicture> skp; 250 SkString skpname; 251 if (0 == strcmp(FLAGS_skp[0], "warmup")) { 252 skp = create_warmup_skp(); 253 skpname = "warmup"; 254 } else { 255 const char* skpfile = FLAGS_skp[0]; 256 std::unique_ptr<SkStream> skpstream(SkStream::MakeFromFile(skpfile)); 257 if (!skpstream) { 258 exitf(ExitErr::kIO, "failed to open skp file %s", skpfile); 259 } 260 skp = SkPicture::MakeFromStream(skpstream.get()); 261 if (!skp) { 262 exitf(ExitErr::kData, "failed to parse skp file %s", skpfile); 263 } 264 skpname = SkOSPath::Basename(skpfile); 265 } 266 int width = SkTMin(SkScalarCeilToInt(skp->cullRect().width()), 2048), 267 height = SkTMin(SkScalarCeilToInt(skp->cullRect().height()), 2048); 268 if (FLAGS_verbosity >= 3 && 269 (width != skp->cullRect().width() || height != skp->cullRect().height())) { 270 fprintf(stderr, "%s is too large (%ix%i), cropping to %ix%i.\n", 271 skpname.c_str(), SkScalarCeilToInt(skp->cullRect().width()), 272 SkScalarCeilToInt(skp->cullRect().height()), width, height); 273 } 274 275 // Create a context. 276 GrContextOptions ctxOptions; 277 ctxOptions.fGpuPathRenderers = CollectGpuPathRenderersFromFlags(); 278 sk_gpu_test::GrContextFactory factory(ctxOptions); 279 sk_gpu_test::ContextInfo ctxInfo = 280 factory.getContextInfo(config->getContextType(), config->getContextOverrides()); 281 GrContext* ctx = ctxInfo.grContext(); 282 if (!ctx) { 283 exitf(ExitErr::kUnavailable, "failed to create context for config %s", 284 config->getTag().c_str()); 285 } 286 if (ctx->caps()->maxRenderTargetSize() < SkTMax(width, height)) { 287 exitf(ExitErr::kUnavailable, "render target size %ix%i not supported by platform (max: %i)", 288 width, height, ctx->caps()->maxRenderTargetSize()); 289 } 290 if (ctx->caps()->maxSampleCount() < config->getSamples()) { 291 exitf(ExitErr::kUnavailable, "sample count %i not supported by platform (max: %i)", 292 config->getSamples(), ctx->caps()->maxSampleCount()); 293 } 294 sk_gpu_test::TestContext* testCtx = ctxInfo.testContext(); 295 if (!testCtx) { 296 exitf(ExitErr::kSoftware, "testContext is null"); 297 } 298 if (!testCtx->fenceSyncSupport()) { 299 exitf(ExitErr::kUnavailable, "GPU does not support fence sync"); 300 } 301 302 // Create a render target. 303 SkImageInfo info = SkImageInfo::Make(width, height, config->getColorType(), 304 kPremul_SkAlphaType, sk_ref_sp(config->getColorSpace())); 305 uint32_t flags = config->getUseDIText() ? SkSurfaceProps::kUseDeviceIndependentFonts_Flag : 0; 306 SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType); 307 sk_sp<SkSurface> surface = 308 SkSurface::MakeRenderTarget(ctx, SkBudgeted::kNo, info, config->getSamples(), &props); 309 if (!surface) { 310 exitf(ExitErr::kUnavailable, "failed to create %ix%i render target for config %s", 311 width, height, config->getTag().c_str()); 312 } 313 314 // Run the benchmark. 315 std::vector<Sample> samples; 316 if (FLAGS_sampleMs > 0) { 317 // +1 because we might take one more sample in order to have an odd number. 318 samples.reserve(1 + (FLAGS_duration + FLAGS_sampleMs - 1) / FLAGS_sampleMs); 319 } else { 320 samples.reserve(2 * FLAGS_duration); 321 } 322 SkCanvas* canvas = surface->getCanvas(); 323 canvas->translate(-skp->cullRect().x(), -skp->cullRect().y()); 324 if (!FLAGS_gpuClock) { 325 run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples); 326 } else { 327 if (!testCtx->gpuTimingSupport()) { 328 exitf(ExitErr::kUnavailable, "GPU does not support timing"); 329 } 330 run_gpu_time_benchmark(testCtx->gpuTimer(), testCtx->fenceSync(), canvas, skp.get(), 331 &samples); 332 } 333 print_result(samples, config->getTag().c_str(), skpname.c_str()); 334 335 // Save a proof (if one was requested). 336 if (!FLAGS_png.isEmpty()) { 337 SkBitmap bmp; 338 bmp.setInfo(info); 339 if (!surface->getCanvas()->readPixels(&bmp, 0, 0)) { 340 exitf(ExitErr::kUnavailable, "failed to read canvas pixels for png"); 341 } 342 const SkString &dirname = SkOSPath::Dirname(FLAGS_png[0]), 343 &basename = SkOSPath::Basename(FLAGS_png[0]); 344 if (!mkdir_p(dirname)) { 345 exitf(ExitErr::kIO, "failed to create directory \"%s\" for png", dirname.c_str()); 346 } 347 if (!sk_tools::write_bitmap_to_disk(bmp, dirname, nullptr, basename)) { 348 exitf(ExitErr::kIO, "failed to save png to \"%s\"", FLAGS_png[0]); 349 } 350 } 351 352 exit(0); 353} 354 355static void draw_skp_and_flush(SkCanvas* canvas, const SkPicture* skp) { 356 canvas->drawPicture(skp); 357 canvas->flush(); 358} 359 360static sk_sp<SkPicture> create_warmup_skp() { 361 static constexpr SkRect bounds{0, 0, 500, 500}; 362 SkPictureRecorder recorder; 363 SkCanvas* recording = recorder.beginRecording(bounds); 364 365 recording->clear(SK_ColorWHITE); 366 367 SkPaint stroke; 368 stroke.setStyle(SkPaint::kStroke_Style); 369 stroke.setStrokeWidth(2); 370 371 // Use a big path to (theoretically) warmup the CPU. 372 SkPath bigPath; 373 sk_tool_utils::make_big_path(bigPath); 374 recording->drawPath(bigPath, stroke); 375 376 // Use a perlin shader to warmup the GPU. 377 SkPaint perlin; 378 perlin.setShader(SkPerlinNoiseShader::MakeTurbulence(0.1f, 0.1f, 1, 0, nullptr)); 379 recording->drawRect(bounds, perlin); 380 381 return recorder.finishRecordingAsPicture(); 382} 383 384bool mkdir_p(const SkString& dirname) { 385 if (dirname.isEmpty()) { 386 return true; 387 } 388 return mkdir_p(SkOSPath::Dirname(dirname.c_str())) && sk_mkdir(dirname.c_str()); 389} 390 391static SkString join(const SkCommandLineFlags::StringArray& stringArray) { 392 SkString joined; 393 for (int i = 0; i < stringArray.count(); ++i) { 394 joined.appendf(i ? " %s" : "%s", stringArray[i]); 395 } 396 return joined; 397} 398 399static void exitf(ExitErr err, const char* format, ...) { 400 fprintf(stderr, ExitErr::kSoftware == err ? "INTERNAL ERROR: " : "ERROR: "); 401 va_list args; 402 va_start(args, format); 403 vfprintf(stderr, format, args); 404 va_end(args); 405 fprintf(stderr, ExitErr::kSoftware == err ? "; this should never happen.\n": ".\n"); 406 exit((int)err); 407} 408 409GpuSync::GpuSync(const sk_gpu_test::FenceSync* fenceSync) 410 : fFenceSync(fenceSync) { 411 this->updateFence(); 412} 413 414GpuSync::~GpuSync() { 415 fFenceSync->deleteFence(fFence); 416} 417 418void GpuSync::syncToPreviousFrame() { 419 if (sk_gpu_test::kInvalidFence == fFence) { 420 exitf(ExitErr::kSoftware, "attempted to sync with invalid fence"); 421 } 422 if (!fFenceSync->waitFence(fFence)) { 423 exitf(ExitErr::kUnavailable, "failed to wait for fence"); 424 } 425 fFenceSync->deleteFence(fFence); 426 this->updateFence(); 427} 428 429void GpuSync::updateFence() { 430 fFence = fFenceSync->insertFence(); 431 if (sk_gpu_test::kInvalidFence == fFence) { 432 exitf(ExitErr::kUnavailable, "failed to insert fence"); 433 } 434} 435