1/*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "GpuTimer.h"
9#include "GrContextFactory.h"
10#include "SkGr.h"
11
12#include "SkCanvas.h"
13#include "SkCommonFlags.h"
14#include "SkCommonFlagsGpu.h"
15#include "SkOSFile.h"
16#include "SkOSPath.h"
17#include "SkPerlinNoiseShader.h"
18#include "SkPicture.h"
19#include "SkPictureRecorder.h"
20#include "SkStream.h"
21#include "SkSurface.h"
22#include "SkSurfaceProps.h"
23#include "picture_utils.h"
24#include "sk_tool_utils.h"
25#include "flags/SkCommandLineFlags.h"
26#include "flags/SkCommonFlagsConfig.h"
27#include <stdlib.h>
28#include <algorithm>
29#include <array>
30#include <chrono>
31#include <cmath>
32#include <vector>
33
34/**
35 * This is a minimalist program whose sole purpose is to open an skp file, benchmark it on a single
36 * config, and exit. It is intended to be used through skpbench.py rather than invoked directly.
37 * Limiting the entire process to a single config/skp pair helps to keep the results repeatable.
38 *
39 * No tiling, looping, or other fanciness is used; it just draws the skp whole into a size-matched
40 * render target and syncs the GPU after each draw.
41 *
42 * Currently, only GPU configs are supported.
43 */
44
45DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark");
46DEFINE_int32(sampleMs, 50, "minimum duration of a sample");
47DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)");
48DEFINE_bool(fps, false, "use fps instead of ms");
49DEFINE_string(skp, "", "path to a single .skp file, or 'warmup' for a builtin warmup run");
50DEFINE_string(png, "", "if set, save a .png proof to disk at this file location");
51DEFINE_int32(verbosity, 4, "level of verbosity (0=none to 5=debug)");
52DEFINE_bool(suppressHeader, false, "don't print a header row before the results");
53
54static const char* header =
55"   accum    median       max       min   stddev  samples  sample_ms  clock  metric  config    bench";
56
57static const char* resultFormat =
58"%8.4g  %8.4g  %8.4g  %8.4g  %6.3g%%  %7li  %9i  %-5s  %-6s  %-9s %s";
59
60struct Sample {
61    using duration = std::chrono::nanoseconds;
62
63    Sample() : fFrames(0), fDuration(0) {}
64    double seconds() const { return std::chrono::duration<double>(fDuration).count(); }
65    double ms() const { return std::chrono::duration<double, std::milli>(fDuration).count(); }
66    double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->ms() / fFrames; }
67    static const char* metric() { return FLAGS_fps ? "fps" : "ms"; }
68
69    int        fFrames;
70    duration   fDuration;
71};
72
73class GpuSync {
74public:
75    GpuSync(const sk_gpu_test::FenceSync* fenceSync);
76    ~GpuSync();
77
78    void syncToPreviousFrame();
79
80private:
81    void updateFence();
82
83    const sk_gpu_test::FenceSync* const   fFenceSync;
84    sk_gpu_test::PlatformFence            fFence;
85};
86
87enum class ExitErr {
88    kOk           = 0,
89    kUsage        = 64,
90    kData         = 65,
91    kUnavailable  = 69,
92    kIO           = 74,
93    kSoftware     = 70
94};
95
96static void draw_skp_and_flush(SkCanvas*, const SkPicture*);
97static sk_sp<SkPicture> create_warmup_skp();
98static bool mkdir_p(const SkString& name);
99static SkString join(const SkCommandLineFlags::StringArray&);
100static void exitf(ExitErr, const char* format, ...);
101
102static void run_benchmark(const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas,
103                          const SkPicture* skp, std::vector<Sample>* samples) {
104    using clock = std::chrono::high_resolution_clock;
105    const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
106    const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
107
108    draw_skp_and_flush(canvas, skp);
109    GpuSync gpuSync(fenceSync);
110
111    draw_skp_and_flush(canvas, skp);
112    gpuSync.syncToPreviousFrame();
113
114    clock::time_point now = clock::now();
115    const clock::time_point endTime = now + benchDuration;
116
117    do {
118        clock::time_point sampleStart = now;
119        samples->emplace_back();
120        Sample& sample = samples->back();
121
122        do {
123            draw_skp_and_flush(canvas, skp);
124            gpuSync.syncToPreviousFrame();
125
126            now = clock::now();
127            sample.fDuration = now - sampleStart;
128            ++sample.fFrames;
129        } while (sample.fDuration < sampleDuration);
130    } while (now < endTime || 0 == samples->size() % 2);
131}
132
133static void run_gpu_time_benchmark(sk_gpu_test::GpuTimer* gpuTimer,
134                                   const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas,
135                                   const SkPicture* skp, std::vector<Sample>* samples) {
136    using sk_gpu_test::PlatformTimerQuery;
137    using clock = std::chrono::steady_clock;
138    const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
139    const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
140
141    if (!gpuTimer->disjointSupport()) {
142        fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; "
143                        "results may be unreliable\n");
144    }
145
146    draw_skp_and_flush(canvas, skp);
147    GpuSync gpuSync(fenceSync);
148
149    gpuTimer->queueStart();
150    draw_skp_and_flush(canvas, skp);
151    PlatformTimerQuery previousTime = gpuTimer->queueStop();
152    gpuSync.syncToPreviousFrame();
153
154    clock::time_point now = clock::now();
155    const clock::time_point endTime = now + benchDuration;
156
157    do {
158        const clock::time_point sampleEndTime = now + sampleDuration;
159        samples->emplace_back();
160        Sample& sample = samples->back();
161
162        do {
163            gpuTimer->queueStart();
164            draw_skp_and_flush(canvas, skp);
165            PlatformTimerQuery time = gpuTimer->queueStop();
166            gpuSync.syncToPreviousFrame();
167
168            switch (gpuTimer->checkQueryStatus(previousTime)) {
169                using QueryStatus = sk_gpu_test::GpuTimer::QueryStatus;
170                case QueryStatus::kInvalid:
171                    exitf(ExitErr::kUnavailable, "GPU timer failed");
172                case QueryStatus::kPending:
173                    exitf(ExitErr::kUnavailable, "timer query still not ready after fence sync");
174                case QueryStatus::kDisjoint:
175                    if (FLAGS_verbosity >= 4) {
176                        fprintf(stderr, "discarding timer query due to disjoint operations.\n");
177                    }
178                    break;
179                case QueryStatus::kAccurate:
180                    sample.fDuration += gpuTimer->getTimeElapsed(previousTime);
181                    ++sample.fFrames;
182                    break;
183            }
184            gpuTimer->deleteQuery(previousTime);
185            previousTime = time;
186            now = clock::now();
187        } while (now < sampleEndTime || 0 == sample.fFrames);
188    } while (now < endTime || 0 == samples->size() % 2);
189
190    gpuTimer->deleteQuery(previousTime);
191}
192
193void print_result(const std::vector<Sample>& samples, const char* config, const char* bench)  {
194    if (0 == (samples.size() % 2)) {
195        exitf(ExitErr::kSoftware, "attempted to gather stats on even number of samples");
196    }
197
198    Sample accum = Sample();
199    std::vector<double> values;
200    values.reserve(samples.size());
201    for (const Sample& sample : samples) {
202        accum.fFrames += sample.fFrames;
203        accum.fDuration += sample.fDuration;
204        values.push_back(sample.value());
205    }
206    std::sort(values.begin(), values.end());
207
208    const double accumValue = accum.value();
209    double variance = 0;
210    for (double value : values) {
211        const double delta = value - accumValue;
212        variance += delta * delta;
213    }
214    variance /= values.size();
215    // Technically, this is the relative standard deviation.
216    const double stddev = 100/*%*/ * sqrt(variance) / accumValue;
217
218    printf(resultFormat, accumValue, values[values.size() / 2], values.back(), values.front(),
219           stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu", Sample::metric(),
220           config, bench);
221    printf("\n");
222    fflush(stdout);
223}
224
225int main(int argc, char** argv) {
226    SkCommandLineFlags::SetUsage("Use skpbench.py instead. "
227                                 "You usually don't want to use this program directly.");
228    SkCommandLineFlags::Parse(argc, argv);
229
230    if (!FLAGS_suppressHeader) {
231        printf("%s\n", header);
232    }
233    if (FLAGS_duration <= 0) {
234        exit(0); // This can be used to print the header and quit.
235    }
236
237    // Parse the config.
238    const SkCommandLineConfigGpu* config = nullptr; // Initialize for spurious warning.
239    SkCommandLineConfigArray configs;
240    ParseConfigs(FLAGS_config, &configs);
241    if (configs.count() != 1 || !(config = configs[0]->asConfigGpu())) {
242        exitf(ExitErr::kUsage, "invalid config '%s': must specify one (and only one) GPU config",
243                               join(FLAGS_config).c_str());
244    }
245
246    // Parse the skp.
247    if (FLAGS_skp.count() != 1) {
248        exitf(ExitErr::kUsage, "invalid skp '%s': must specify a single skp file, or 'warmup'",
249                               join(FLAGS_skp).c_str());
250    }
251    sk_sp<SkPicture> skp;
252    SkString skpname;
253    if (0 == strcmp(FLAGS_skp[0], "warmup")) {
254        skp = create_warmup_skp();
255        skpname = "warmup";
256    } else {
257        const char* skpfile = FLAGS_skp[0];
258        std::unique_ptr<SkStream> skpstream(SkStream::MakeFromFile(skpfile));
259        if (!skpstream) {
260            exitf(ExitErr::kIO, "failed to open skp file %s", skpfile);
261        }
262        skp = SkPicture::MakeFromStream(skpstream.get());
263        if (!skp) {
264            exitf(ExitErr::kData, "failed to parse skp file %s", skpfile);
265        }
266        skpname = SkOSPath::Basename(skpfile);
267    }
268    int width = SkTMin(SkScalarCeilToInt(skp->cullRect().width()), 2048),
269        height = SkTMin(SkScalarCeilToInt(skp->cullRect().height()), 2048);
270    if (FLAGS_verbosity >= 3 &&
271        (width != skp->cullRect().width() || height != skp->cullRect().height())) {
272        fprintf(stderr, "%s is too large (%ix%i), cropping to %ix%i.\n",
273                        skpname.c_str(), SkScalarCeilToInt(skp->cullRect().width()),
274                        SkScalarCeilToInt(skp->cullRect().height()), width, height);
275    }
276
277    // Create a context.
278    GrContextOptions ctxOptions;
279    SetCtxOptionsFromCommonFlags(&ctxOptions);
280    sk_gpu_test::GrContextFactory factory(ctxOptions);
281    sk_gpu_test::ContextInfo ctxInfo =
282        factory.getContextInfo(config->getContextType(), config->getContextOverrides());
283    GrContext* ctx = ctxInfo.grContext();
284    if (!ctx) {
285        exitf(ExitErr::kUnavailable, "failed to create context for config %s",
286                                     config->getTag().c_str());
287    }
288    if (ctx->caps()->maxRenderTargetSize() < SkTMax(width, height)) {
289        exitf(ExitErr::kUnavailable, "render target size %ix%i not supported by platform (max: %i)",
290                                     width, height, ctx->caps()->maxRenderTargetSize());
291    }
292    GrPixelConfig grPixConfig = SkImageInfo2GrPixelConfig(config->getColorType(),
293                                                          config->getColorSpace(),
294                                                          *ctx->caps());
295    if (kUnknown_GrPixelConfig == grPixConfig) {
296        exitf(ExitErr::kUnavailable, "failed to get GrPixelConfig from SkColorType: %d",
297                                     config->getColorType());
298    }
299    int supportedSampleCount =
300            ctx->caps()->getRenderTargetSampleCount(config->getSamples(), grPixConfig);
301    if (supportedSampleCount != config->getSamples()) {
302        exitf(ExitErr::kUnavailable, "sample count %i not supported by platform",
303                                     config->getSamples());
304    }
305    sk_gpu_test::TestContext* testCtx = ctxInfo.testContext();
306    if (!testCtx) {
307        exitf(ExitErr::kSoftware, "testContext is null");
308    }
309    if (!testCtx->fenceSyncSupport()) {
310        exitf(ExitErr::kUnavailable, "GPU does not support fence sync");
311    }
312
313    // Create a render target.
314    SkImageInfo info =
315            SkImageInfo::Make(width, height, config->getColorType(), config->getAlphaType(),
316                              sk_ref_sp(config->getColorSpace()));
317    uint32_t flags = config->getUseDIText() ? SkSurfaceProps::kUseDeviceIndependentFonts_Flag : 0;
318    SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType);
319    sk_sp<SkSurface> surface =
320        SkSurface::MakeRenderTarget(ctx, SkBudgeted::kNo, info, config->getSamples(), &props);
321    if (!surface) {
322        exitf(ExitErr::kUnavailable, "failed to create %ix%i render target for config %s",
323                                     width, height, config->getTag().c_str());
324    }
325
326    // Run the benchmark.
327    std::vector<Sample> samples;
328    if (FLAGS_sampleMs > 0) {
329        // +1 because we might take one more sample in order to have an odd number.
330        samples.reserve(1 + (FLAGS_duration + FLAGS_sampleMs - 1) / FLAGS_sampleMs);
331    } else {
332        samples.reserve(2 * FLAGS_duration);
333    }
334    SkCanvas* canvas = surface->getCanvas();
335    canvas->translate(-skp->cullRect().x(), -skp->cullRect().y());
336    if (!FLAGS_gpuClock) {
337        run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples);
338    } else {
339        if (!testCtx->gpuTimingSupport()) {
340            exitf(ExitErr::kUnavailable, "GPU does not support timing");
341        }
342        run_gpu_time_benchmark(testCtx->gpuTimer(), testCtx->fenceSync(), canvas, skp.get(),
343                               &samples);
344    }
345    print_result(samples, config->getTag().c_str(), skpname.c_str());
346
347    // Save a proof (if one was requested).
348    if (!FLAGS_png.isEmpty()) {
349        SkBitmap bmp;
350        bmp.allocPixels(info);
351        if (!surface->getCanvas()->readPixels(bmp, 0, 0)) {
352            exitf(ExitErr::kUnavailable, "failed to read canvas pixels for png");
353        }
354        const SkString &dirname = SkOSPath::Dirname(FLAGS_png[0]),
355                       &basename = SkOSPath::Basename(FLAGS_png[0]);
356        if (!mkdir_p(dirname)) {
357            exitf(ExitErr::kIO, "failed to create directory \"%s\" for png", dirname.c_str());
358        }
359        if (!sk_tools::write_bitmap_to_disk(bmp, dirname, nullptr, basename)) {
360            exitf(ExitErr::kIO, "failed to save png to \"%s\"", FLAGS_png[0]);
361        }
362    }
363
364    exit(0);
365}
366
367static void draw_skp_and_flush(SkCanvas* canvas, const SkPicture* skp) {
368    canvas->drawPicture(skp);
369    canvas->flush();
370}
371
372static sk_sp<SkPicture> create_warmup_skp() {
373    static constexpr SkRect bounds{0, 0, 500, 500};
374    SkPictureRecorder recorder;
375    SkCanvas* recording = recorder.beginRecording(bounds);
376
377    recording->clear(SK_ColorWHITE);
378
379    SkPaint stroke;
380    stroke.setStyle(SkPaint::kStroke_Style);
381    stroke.setStrokeWidth(2);
382
383    // Use a big path to (theoretically) warmup the CPU.
384    SkPath bigPath;
385    sk_tool_utils::make_big_path(bigPath);
386    recording->drawPath(bigPath, stroke);
387
388    // Use a perlin shader to warmup the GPU.
389    SkPaint perlin;
390    perlin.setShader(SkPerlinNoiseShader::MakeTurbulence(0.1f, 0.1f, 1, 0, nullptr));
391    recording->drawRect(bounds, perlin);
392
393    return recorder.finishRecordingAsPicture();
394}
395
396bool mkdir_p(const SkString& dirname) {
397    if (dirname.isEmpty()) {
398        return true;
399    }
400    return mkdir_p(SkOSPath::Dirname(dirname.c_str())) && sk_mkdir(dirname.c_str());
401}
402
403static SkString join(const SkCommandLineFlags::StringArray& stringArray) {
404    SkString joined;
405    for (int i = 0; i < stringArray.count(); ++i) {
406        joined.appendf(i ? " %s" : "%s", stringArray[i]);
407    }
408    return joined;
409}
410
411static void exitf(ExitErr err, const char* format, ...) {
412    fprintf(stderr, ExitErr::kSoftware == err ? "INTERNAL ERROR: " : "ERROR: ");
413    va_list args;
414    va_start(args, format);
415    vfprintf(stderr, format, args);
416    va_end(args);
417    fprintf(stderr, ExitErr::kSoftware == err ? "; this should never happen.\n": ".\n");
418    exit((int)err);
419}
420
421GpuSync::GpuSync(const sk_gpu_test::FenceSync* fenceSync)
422    : fFenceSync(fenceSync) {
423    this->updateFence();
424}
425
426GpuSync::~GpuSync() {
427    fFenceSync->deleteFence(fFence);
428}
429
430void GpuSync::syncToPreviousFrame() {
431    if (sk_gpu_test::kInvalidFence == fFence) {
432        exitf(ExitErr::kSoftware, "attempted to sync with invalid fence");
433    }
434    if (!fFenceSync->waitFence(fFence)) {
435        exitf(ExitErr::kUnavailable, "failed to wait for fence");
436    }
437    fFenceSync->deleteFence(fFence);
438    this->updateFence();
439}
440
441void GpuSync::updateFence() {
442    fFence = fFenceSync->insertFence();
443    if (sk_gpu_test::kInvalidFence == fFence) {
444        exitf(ExitErr::kUnavailable, "failed to insert fence");
445    }
446}
447