1/* 2 * Copyright 2016 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#include "GpuTimer.h" 9#include "GrContextFactory.h" 10#include "SkGr.h" 11 12#include "SkCanvas.h" 13#include "SkCommonFlags.h" 14#include "SkCommonFlagsGpu.h" 15#include "SkOSFile.h" 16#include "SkOSPath.h" 17#include "SkPerlinNoiseShader.h" 18#include "SkPicture.h" 19#include "SkPictureRecorder.h" 20#include "SkStream.h" 21#include "SkSurface.h" 22#include "SkSurfaceProps.h" 23#include "picture_utils.h" 24#include "sk_tool_utils.h" 25#include "flags/SkCommandLineFlags.h" 26#include "flags/SkCommonFlagsConfig.h" 27#include <stdlib.h> 28#include <algorithm> 29#include <array> 30#include <chrono> 31#include <cmath> 32#include <vector> 33 34/** 35 * This is a minimalist program whose sole purpose is to open an skp file, benchmark it on a single 36 * config, and exit. It is intended to be used through skpbench.py rather than invoked directly. 37 * Limiting the entire process to a single config/skp pair helps to keep the results repeatable. 38 * 39 * No tiling, looping, or other fanciness is used; it just draws the skp whole into a size-matched 40 * render target and syncs the GPU after each draw. 41 * 42 * Currently, only GPU configs are supported. 43 */ 44 45DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark"); 46DEFINE_int32(sampleMs, 50, "minimum duration of a sample"); 47DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)"); 48DEFINE_bool(fps, false, "use fps instead of ms"); 49DEFINE_string(skp, "", "path to a single .skp file, or 'warmup' for a builtin warmup run"); 50DEFINE_string(png, "", "if set, save a .png proof to disk at this file location"); 51DEFINE_int32(verbosity, 4, "level of verbosity (0=none to 5=debug)"); 52DEFINE_bool(suppressHeader, false, "don't print a header row before the results"); 53 54static const char* header = 55" accum median max min stddev samples sample_ms clock metric config bench"; 56 57static const char* resultFormat = 58"%8.4g %8.4g %8.4g %8.4g %6.3g%% %7li %9i %-5s %-6s %-9s %s"; 59 60struct Sample { 61 using duration = std::chrono::nanoseconds; 62 63 Sample() : fFrames(0), fDuration(0) {} 64 double seconds() const { return std::chrono::duration<double>(fDuration).count(); } 65 double ms() const { return std::chrono::duration<double, std::milli>(fDuration).count(); } 66 double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->ms() / fFrames; } 67 static const char* metric() { return FLAGS_fps ? "fps" : "ms"; } 68 69 int fFrames; 70 duration fDuration; 71}; 72 73class GpuSync { 74public: 75 GpuSync(const sk_gpu_test::FenceSync* fenceSync); 76 ~GpuSync(); 77 78 void syncToPreviousFrame(); 79 80private: 81 void updateFence(); 82 83 const sk_gpu_test::FenceSync* const fFenceSync; 84 sk_gpu_test::PlatformFence fFence; 85}; 86 87enum class ExitErr { 88 kOk = 0, 89 kUsage = 64, 90 kData = 65, 91 kUnavailable = 69, 92 kIO = 74, 93 kSoftware = 70 94}; 95 96static void draw_skp_and_flush(SkCanvas*, const SkPicture*); 97static sk_sp<SkPicture> create_warmup_skp(); 98static bool mkdir_p(const SkString& name); 99static SkString join(const SkCommandLineFlags::StringArray&); 100static void exitf(ExitErr, const char* format, ...); 101 102static void run_benchmark(const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas, 103 const SkPicture* skp, std::vector<Sample>* samples) { 104 using clock = std::chrono::high_resolution_clock; 105 const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs); 106 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration); 107 108 draw_skp_and_flush(canvas, skp); 109 GpuSync gpuSync(fenceSync); 110 111 draw_skp_and_flush(canvas, skp); 112 gpuSync.syncToPreviousFrame(); 113 114 clock::time_point now = clock::now(); 115 const clock::time_point endTime = now + benchDuration; 116 117 do { 118 clock::time_point sampleStart = now; 119 samples->emplace_back(); 120 Sample& sample = samples->back(); 121 122 do { 123 draw_skp_and_flush(canvas, skp); 124 gpuSync.syncToPreviousFrame(); 125 126 now = clock::now(); 127 sample.fDuration = now - sampleStart; 128 ++sample.fFrames; 129 } while (sample.fDuration < sampleDuration); 130 } while (now < endTime || 0 == samples->size() % 2); 131} 132 133static void run_gpu_time_benchmark(sk_gpu_test::GpuTimer* gpuTimer, 134 const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas, 135 const SkPicture* skp, std::vector<Sample>* samples) { 136 using sk_gpu_test::PlatformTimerQuery; 137 using clock = std::chrono::steady_clock; 138 const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs); 139 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration); 140 141 if (!gpuTimer->disjointSupport()) { 142 fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; " 143 "results may be unreliable\n"); 144 } 145 146 draw_skp_and_flush(canvas, skp); 147 GpuSync gpuSync(fenceSync); 148 149 gpuTimer->queueStart(); 150 draw_skp_and_flush(canvas, skp); 151 PlatformTimerQuery previousTime = gpuTimer->queueStop(); 152 gpuSync.syncToPreviousFrame(); 153 154 clock::time_point now = clock::now(); 155 const clock::time_point endTime = now + benchDuration; 156 157 do { 158 const clock::time_point sampleEndTime = now + sampleDuration; 159 samples->emplace_back(); 160 Sample& sample = samples->back(); 161 162 do { 163 gpuTimer->queueStart(); 164 draw_skp_and_flush(canvas, skp); 165 PlatformTimerQuery time = gpuTimer->queueStop(); 166 gpuSync.syncToPreviousFrame(); 167 168 switch (gpuTimer->checkQueryStatus(previousTime)) { 169 using QueryStatus = sk_gpu_test::GpuTimer::QueryStatus; 170 case QueryStatus::kInvalid: 171 exitf(ExitErr::kUnavailable, "GPU timer failed"); 172 case QueryStatus::kPending: 173 exitf(ExitErr::kUnavailable, "timer query still not ready after fence sync"); 174 case QueryStatus::kDisjoint: 175 if (FLAGS_verbosity >= 4) { 176 fprintf(stderr, "discarding timer query due to disjoint operations.\n"); 177 } 178 break; 179 case QueryStatus::kAccurate: 180 sample.fDuration += gpuTimer->getTimeElapsed(previousTime); 181 ++sample.fFrames; 182 break; 183 } 184 gpuTimer->deleteQuery(previousTime); 185 previousTime = time; 186 now = clock::now(); 187 } while (now < sampleEndTime || 0 == sample.fFrames); 188 } while (now < endTime || 0 == samples->size() % 2); 189 190 gpuTimer->deleteQuery(previousTime); 191} 192 193void print_result(const std::vector<Sample>& samples, const char* config, const char* bench) { 194 if (0 == (samples.size() % 2)) { 195 exitf(ExitErr::kSoftware, "attempted to gather stats on even number of samples"); 196 } 197 198 Sample accum = Sample(); 199 std::vector<double> values; 200 values.reserve(samples.size()); 201 for (const Sample& sample : samples) { 202 accum.fFrames += sample.fFrames; 203 accum.fDuration += sample.fDuration; 204 values.push_back(sample.value()); 205 } 206 std::sort(values.begin(), values.end()); 207 208 const double accumValue = accum.value(); 209 double variance = 0; 210 for (double value : values) { 211 const double delta = value - accumValue; 212 variance += delta * delta; 213 } 214 variance /= values.size(); 215 // Technically, this is the relative standard deviation. 216 const double stddev = 100/*%*/ * sqrt(variance) / accumValue; 217 218 printf(resultFormat, accumValue, values[values.size() / 2], values.back(), values.front(), 219 stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu", Sample::metric(), 220 config, bench); 221 printf("\n"); 222 fflush(stdout); 223} 224 225int main(int argc, char** argv) { 226 SkCommandLineFlags::SetUsage("Use skpbench.py instead. " 227 "You usually don't want to use this program directly."); 228 SkCommandLineFlags::Parse(argc, argv); 229 230 if (!FLAGS_suppressHeader) { 231 printf("%s\n", header); 232 } 233 if (FLAGS_duration <= 0) { 234 exit(0); // This can be used to print the header and quit. 235 } 236 237 // Parse the config. 238 const SkCommandLineConfigGpu* config = nullptr; // Initialize for spurious warning. 239 SkCommandLineConfigArray configs; 240 ParseConfigs(FLAGS_config, &configs); 241 if (configs.count() != 1 || !(config = configs[0]->asConfigGpu())) { 242 exitf(ExitErr::kUsage, "invalid config '%s': must specify one (and only one) GPU config", 243 join(FLAGS_config).c_str()); 244 } 245 246 // Parse the skp. 247 if (FLAGS_skp.count() != 1) { 248 exitf(ExitErr::kUsage, "invalid skp '%s': must specify a single skp file, or 'warmup'", 249 join(FLAGS_skp).c_str()); 250 } 251 sk_sp<SkPicture> skp; 252 SkString skpname; 253 if (0 == strcmp(FLAGS_skp[0], "warmup")) { 254 skp = create_warmup_skp(); 255 skpname = "warmup"; 256 } else { 257 const char* skpfile = FLAGS_skp[0]; 258 std::unique_ptr<SkStream> skpstream(SkStream::MakeFromFile(skpfile)); 259 if (!skpstream) { 260 exitf(ExitErr::kIO, "failed to open skp file %s", skpfile); 261 } 262 skp = SkPicture::MakeFromStream(skpstream.get()); 263 if (!skp) { 264 exitf(ExitErr::kData, "failed to parse skp file %s", skpfile); 265 } 266 skpname = SkOSPath::Basename(skpfile); 267 } 268 int width = SkTMin(SkScalarCeilToInt(skp->cullRect().width()), 2048), 269 height = SkTMin(SkScalarCeilToInt(skp->cullRect().height()), 2048); 270 if (FLAGS_verbosity >= 3 && 271 (width != skp->cullRect().width() || height != skp->cullRect().height())) { 272 fprintf(stderr, "%s is too large (%ix%i), cropping to %ix%i.\n", 273 skpname.c_str(), SkScalarCeilToInt(skp->cullRect().width()), 274 SkScalarCeilToInt(skp->cullRect().height()), width, height); 275 } 276 277 // Create a context. 278 GrContextOptions ctxOptions; 279 SetCtxOptionsFromCommonFlags(&ctxOptions); 280 sk_gpu_test::GrContextFactory factory(ctxOptions); 281 sk_gpu_test::ContextInfo ctxInfo = 282 factory.getContextInfo(config->getContextType(), config->getContextOverrides()); 283 GrContext* ctx = ctxInfo.grContext(); 284 if (!ctx) { 285 exitf(ExitErr::kUnavailable, "failed to create context for config %s", 286 config->getTag().c_str()); 287 } 288 if (ctx->caps()->maxRenderTargetSize() < SkTMax(width, height)) { 289 exitf(ExitErr::kUnavailable, "render target size %ix%i not supported by platform (max: %i)", 290 width, height, ctx->caps()->maxRenderTargetSize()); 291 } 292 GrPixelConfig grPixConfig = SkImageInfo2GrPixelConfig(config->getColorType(), 293 config->getColorSpace(), 294 *ctx->caps()); 295 if (kUnknown_GrPixelConfig == grPixConfig) { 296 exitf(ExitErr::kUnavailable, "failed to get GrPixelConfig from SkColorType: %d", 297 config->getColorType()); 298 } 299 int supportedSampleCount = 300 ctx->caps()->getRenderTargetSampleCount(config->getSamples(), grPixConfig); 301 if (supportedSampleCount != config->getSamples()) { 302 exitf(ExitErr::kUnavailable, "sample count %i not supported by platform", 303 config->getSamples()); 304 } 305 sk_gpu_test::TestContext* testCtx = ctxInfo.testContext(); 306 if (!testCtx) { 307 exitf(ExitErr::kSoftware, "testContext is null"); 308 } 309 if (!testCtx->fenceSyncSupport()) { 310 exitf(ExitErr::kUnavailable, "GPU does not support fence sync"); 311 } 312 313 // Create a render target. 314 SkImageInfo info = 315 SkImageInfo::Make(width, height, config->getColorType(), config->getAlphaType(), 316 sk_ref_sp(config->getColorSpace())); 317 uint32_t flags = config->getUseDIText() ? SkSurfaceProps::kUseDeviceIndependentFonts_Flag : 0; 318 SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType); 319 sk_sp<SkSurface> surface = 320 SkSurface::MakeRenderTarget(ctx, SkBudgeted::kNo, info, config->getSamples(), &props); 321 if (!surface) { 322 exitf(ExitErr::kUnavailable, "failed to create %ix%i render target for config %s", 323 width, height, config->getTag().c_str()); 324 } 325 326 // Run the benchmark. 327 std::vector<Sample> samples; 328 if (FLAGS_sampleMs > 0) { 329 // +1 because we might take one more sample in order to have an odd number. 330 samples.reserve(1 + (FLAGS_duration + FLAGS_sampleMs - 1) / FLAGS_sampleMs); 331 } else { 332 samples.reserve(2 * FLAGS_duration); 333 } 334 SkCanvas* canvas = surface->getCanvas(); 335 canvas->translate(-skp->cullRect().x(), -skp->cullRect().y()); 336 if (!FLAGS_gpuClock) { 337 run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples); 338 } else { 339 if (!testCtx->gpuTimingSupport()) { 340 exitf(ExitErr::kUnavailable, "GPU does not support timing"); 341 } 342 run_gpu_time_benchmark(testCtx->gpuTimer(), testCtx->fenceSync(), canvas, skp.get(), 343 &samples); 344 } 345 print_result(samples, config->getTag().c_str(), skpname.c_str()); 346 347 // Save a proof (if one was requested). 348 if (!FLAGS_png.isEmpty()) { 349 SkBitmap bmp; 350 bmp.allocPixels(info); 351 if (!surface->getCanvas()->readPixels(bmp, 0, 0)) { 352 exitf(ExitErr::kUnavailable, "failed to read canvas pixels for png"); 353 } 354 const SkString &dirname = SkOSPath::Dirname(FLAGS_png[0]), 355 &basename = SkOSPath::Basename(FLAGS_png[0]); 356 if (!mkdir_p(dirname)) { 357 exitf(ExitErr::kIO, "failed to create directory \"%s\" for png", dirname.c_str()); 358 } 359 if (!sk_tools::write_bitmap_to_disk(bmp, dirname, nullptr, basename)) { 360 exitf(ExitErr::kIO, "failed to save png to \"%s\"", FLAGS_png[0]); 361 } 362 } 363 364 exit(0); 365} 366 367static void draw_skp_and_flush(SkCanvas* canvas, const SkPicture* skp) { 368 canvas->drawPicture(skp); 369 canvas->flush(); 370} 371 372static sk_sp<SkPicture> create_warmup_skp() { 373 static constexpr SkRect bounds{0, 0, 500, 500}; 374 SkPictureRecorder recorder; 375 SkCanvas* recording = recorder.beginRecording(bounds); 376 377 recording->clear(SK_ColorWHITE); 378 379 SkPaint stroke; 380 stroke.setStyle(SkPaint::kStroke_Style); 381 stroke.setStrokeWidth(2); 382 383 // Use a big path to (theoretically) warmup the CPU. 384 SkPath bigPath; 385 sk_tool_utils::make_big_path(bigPath); 386 recording->drawPath(bigPath, stroke); 387 388 // Use a perlin shader to warmup the GPU. 389 SkPaint perlin; 390 perlin.setShader(SkPerlinNoiseShader::MakeTurbulence(0.1f, 0.1f, 1, 0, nullptr)); 391 recording->drawRect(bounds, perlin); 392 393 return recorder.finishRecordingAsPicture(); 394} 395 396bool mkdir_p(const SkString& dirname) { 397 if (dirname.isEmpty()) { 398 return true; 399 } 400 return mkdir_p(SkOSPath::Dirname(dirname.c_str())) && sk_mkdir(dirname.c_str()); 401} 402 403static SkString join(const SkCommandLineFlags::StringArray& stringArray) { 404 SkString joined; 405 for (int i = 0; i < stringArray.count(); ++i) { 406 joined.appendf(i ? " %s" : "%s", stringArray[i]); 407 } 408 return joined; 409} 410 411static void exitf(ExitErr err, const char* format, ...) { 412 fprintf(stderr, ExitErr::kSoftware == err ? "INTERNAL ERROR: " : "ERROR: "); 413 va_list args; 414 va_start(args, format); 415 vfprintf(stderr, format, args); 416 va_end(args); 417 fprintf(stderr, ExitErr::kSoftware == err ? "; this should never happen.\n": ".\n"); 418 exit((int)err); 419} 420 421GpuSync::GpuSync(const sk_gpu_test::FenceSync* fenceSync) 422 : fFenceSync(fenceSync) { 423 this->updateFence(); 424} 425 426GpuSync::~GpuSync() { 427 fFenceSync->deleteFence(fFence); 428} 429 430void GpuSync::syncToPreviousFrame() { 431 if (sk_gpu_test::kInvalidFence == fFence) { 432 exitf(ExitErr::kSoftware, "attempted to sync with invalid fence"); 433 } 434 if (!fFenceSync->waitFence(fFence)) { 435 exitf(ExitErr::kUnavailable, "failed to wait for fence"); 436 } 437 fFenceSync->deleteFence(fFence); 438 this->updateFence(); 439} 440 441void GpuSync::updateFence() { 442 fFence = fFenceSync->insertFence(); 443 if (sk_gpu_test::kInvalidFence == fFence) { 444 exitf(ExitErr::kUnavailable, "failed to insert fence"); 445 } 446} 447