1/*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "GpuTimer.h"
9#include "GrContextFactory.h"
10#include "SkCanvas.h"
11#include "SkCommonFlagsPathRenderer.h"
12#include "SkOSFile.h"
13#include "SkOSPath.h"
14#include "SkPerlinNoiseShader.h"
15#include "SkPicture.h"
16#include "SkPictureRecorder.h"
17#include "SkStream.h"
18#include "SkSurface.h"
19#include "SkSurfaceProps.h"
20#include "picture_utils.h"
21#include "sk_tool_utils.h"
22#include "flags/SkCommandLineFlags.h"
23#include "flags/SkCommonFlagsConfig.h"
24#include <stdlib.h>
25#include <algorithm>
26#include <array>
27#include <chrono>
28#include <cmath>
29#include <vector>
30
31/**
32 * This is a minimalist program whose sole purpose is to open an skp file, benchmark it on a single
33 * config, and exit. It is intended to be used through skpbench.py rather than invoked directly.
34 * Limiting the entire process to a single config/skp pair helps to keep the results repeatable.
35 *
36 * No tiling, looping, or other fanciness is used; it just draws the skp whole into a size-matched
37 * render target and syncs the GPU after each draw.
38 *
39 * Currently, only GPU configs are supported.
40 */
41
42DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark");
43DEFINE_int32(sampleMs, 50, "minimum duration of a sample");
44DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)");
45DEFINE_bool(fps, false, "use fps instead of ms");
46DEFINE_string(skp, "", "path to a single .skp file, or 'warmup' for a builtin warmup run");
47DEFINE_string(png, "", "if set, save a .png proof to disk at this file location");
48DEFINE_int32(verbosity, 4, "level of verbosity (0=none to 5=debug)");
49DEFINE_bool(suppressHeader, false, "don't print a header row before the results");
50DEFINE_pathrenderer_flag;
51
52static const char* header =
53"   accum    median       max       min   stddev  samples  sample_ms  clock  metric  config    bench";
54
55static const char* resultFormat =
56"%8.4g  %8.4g  %8.4g  %8.4g  %6.3g%%  %7li  %9i  %-5s  %-6s  %-9s %s";
57
58struct Sample {
59    using duration = std::chrono::nanoseconds;
60
61    Sample() : fFrames(0), fDuration(0) {}
62    double seconds() const { return std::chrono::duration<double>(fDuration).count(); }
63    double ms() const { return std::chrono::duration<double, std::milli>(fDuration).count(); }
64    double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->ms() / fFrames; }
65    static const char* metric() { return FLAGS_fps ? "fps" : "ms"; }
66
67    int        fFrames;
68    duration   fDuration;
69};
70
71class GpuSync {
72public:
73    GpuSync(const sk_gpu_test::FenceSync* fenceSync);
74    ~GpuSync();
75
76    void syncToPreviousFrame();
77
78private:
79    void updateFence();
80
81    const sk_gpu_test::FenceSync* const   fFenceSync;
82    sk_gpu_test::PlatformFence            fFence;
83};
84
85enum class ExitErr {
86    kOk           = 0,
87    kUsage        = 64,
88    kData         = 65,
89    kUnavailable  = 69,
90    kIO           = 74,
91    kSoftware     = 70
92};
93
94static void draw_skp_and_flush(SkCanvas*, const SkPicture*);
95static sk_sp<SkPicture> create_warmup_skp();
96static bool mkdir_p(const SkString& name);
97static SkString join(const SkCommandLineFlags::StringArray&);
98static void exitf(ExitErr, const char* format, ...);
99
100static void run_benchmark(const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas,
101                          const SkPicture* skp, std::vector<Sample>* samples) {
102    using clock = std::chrono::high_resolution_clock;
103    const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
104    const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
105
106    draw_skp_and_flush(canvas, skp);
107    GpuSync gpuSync(fenceSync);
108
109    draw_skp_and_flush(canvas, skp);
110    gpuSync.syncToPreviousFrame();
111
112    clock::time_point now = clock::now();
113    const clock::time_point endTime = now + benchDuration;
114
115    do {
116        clock::time_point sampleStart = now;
117        samples->emplace_back();
118        Sample& sample = samples->back();
119
120        do {
121            draw_skp_and_flush(canvas, skp);
122            gpuSync.syncToPreviousFrame();
123
124            now = clock::now();
125            sample.fDuration = now - sampleStart;
126            ++sample.fFrames;
127        } while (sample.fDuration < sampleDuration);
128    } while (now < endTime || 0 == samples->size() % 2);
129}
130
131static void run_gpu_time_benchmark(sk_gpu_test::GpuTimer* gpuTimer,
132                                   const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas,
133                                   const SkPicture* skp, std::vector<Sample>* samples) {
134    using sk_gpu_test::PlatformTimerQuery;
135    using clock = std::chrono::steady_clock;
136    const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
137    const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
138
139    if (!gpuTimer->disjointSupport()) {
140        fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; "
141                        "results may be unreliable\n");
142    }
143
144    draw_skp_and_flush(canvas, skp);
145    GpuSync gpuSync(fenceSync);
146
147    gpuTimer->queueStart();
148    draw_skp_and_flush(canvas, skp);
149    PlatformTimerQuery previousTime = gpuTimer->queueStop();
150    gpuSync.syncToPreviousFrame();
151
152    clock::time_point now = clock::now();
153    const clock::time_point endTime = now + benchDuration;
154
155    do {
156        const clock::time_point sampleEndTime = now + sampleDuration;
157        samples->emplace_back();
158        Sample& sample = samples->back();
159
160        do {
161            gpuTimer->queueStart();
162            draw_skp_and_flush(canvas, skp);
163            PlatformTimerQuery time = gpuTimer->queueStop();
164            gpuSync.syncToPreviousFrame();
165
166            switch (gpuTimer->checkQueryStatus(previousTime)) {
167                using QueryStatus = sk_gpu_test::GpuTimer::QueryStatus;
168                case QueryStatus::kInvalid:
169                    exitf(ExitErr::kUnavailable, "GPU timer failed");
170                case QueryStatus::kPending:
171                    exitf(ExitErr::kUnavailable, "timer query still not ready after fence sync");
172                case QueryStatus::kDisjoint:
173                    if (FLAGS_verbosity >= 4) {
174                        fprintf(stderr, "discarding timer query due to disjoint operations.\n");
175                    }
176                    break;
177                case QueryStatus::kAccurate:
178                    sample.fDuration += gpuTimer->getTimeElapsed(previousTime);
179                    ++sample.fFrames;
180                    break;
181            }
182            gpuTimer->deleteQuery(previousTime);
183            previousTime = time;
184            now = clock::now();
185        } while (now < sampleEndTime || 0 == sample.fFrames);
186    } while (now < endTime || 0 == samples->size() % 2);
187
188    gpuTimer->deleteQuery(previousTime);
189}
190
191void print_result(const std::vector<Sample>& samples, const char* config, const char* bench)  {
192    if (0 == (samples.size() % 2)) {
193        exitf(ExitErr::kSoftware, "attempted to gather stats on even number of samples");
194    }
195
196    Sample accum = Sample();
197    std::vector<double> values;
198    values.reserve(samples.size());
199    for (const Sample& sample : samples) {
200        accum.fFrames += sample.fFrames;
201        accum.fDuration += sample.fDuration;
202        values.push_back(sample.value());
203    }
204    std::sort(values.begin(), values.end());
205
206    const double accumValue = accum.value();
207    double variance = 0;
208    for (double value : values) {
209        const double delta = value - accumValue;
210        variance += delta * delta;
211    }
212    variance /= values.size();
213    // Technically, this is the relative standard deviation.
214    const double stddev = 100/*%*/ * sqrt(variance) / accumValue;
215
216    printf(resultFormat, accumValue, values[values.size() / 2], values.back(), values.front(),
217           stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu", Sample::metric(),
218           config, bench);
219    printf("\n");
220    fflush(stdout);
221}
222
223int main(int argc, char** argv) {
224    SkCommandLineFlags::SetUsage("Use skpbench.py instead. "
225                                 "You usually don't want to use this program directly.");
226    SkCommandLineFlags::Parse(argc, argv);
227
228    if (!FLAGS_suppressHeader) {
229        printf("%s\n", header);
230    }
231    if (FLAGS_duration <= 0) {
232        exit(0); // This can be used to print the header and quit.
233    }
234
235    // Parse the config.
236    const SkCommandLineConfigGpu* config = nullptr; // Initialize for spurious warning.
237    SkCommandLineConfigArray configs;
238    ParseConfigs(FLAGS_config, &configs);
239    if (configs.count() != 1 || !(config = configs[0]->asConfigGpu())) {
240        exitf(ExitErr::kUsage, "invalid config '%s': must specify one (and only one) GPU config",
241                               join(FLAGS_config).c_str());
242    }
243
244    // Parse the skp.
245    if (FLAGS_skp.count() != 1) {
246        exitf(ExitErr::kUsage, "invalid skp '%s': must specify a single skp file, or 'warmup'",
247                               join(FLAGS_skp).c_str());
248    }
249    sk_sp<SkPicture> skp;
250    SkString skpname;
251    if (0 == strcmp(FLAGS_skp[0], "warmup")) {
252        skp = create_warmup_skp();
253        skpname = "warmup";
254    } else {
255        const char* skpfile = FLAGS_skp[0];
256        std::unique_ptr<SkStream> skpstream(SkStream::MakeFromFile(skpfile));
257        if (!skpstream) {
258            exitf(ExitErr::kIO, "failed to open skp file %s", skpfile);
259        }
260        skp = SkPicture::MakeFromStream(skpstream.get());
261        if (!skp) {
262            exitf(ExitErr::kData, "failed to parse skp file %s", skpfile);
263        }
264        skpname = SkOSPath::Basename(skpfile);
265    }
266    int width = SkTMin(SkScalarCeilToInt(skp->cullRect().width()), 2048),
267        height = SkTMin(SkScalarCeilToInt(skp->cullRect().height()), 2048);
268    if (FLAGS_verbosity >= 3 &&
269        (width != skp->cullRect().width() || height != skp->cullRect().height())) {
270        fprintf(stderr, "%s is too large (%ix%i), cropping to %ix%i.\n",
271                        skpname.c_str(), SkScalarCeilToInt(skp->cullRect().width()),
272                        SkScalarCeilToInt(skp->cullRect().height()), width, height);
273    }
274
275    // Create a context.
276    GrContextOptions ctxOptions;
277    ctxOptions.fGpuPathRenderers = CollectGpuPathRenderersFromFlags();
278    sk_gpu_test::GrContextFactory factory(ctxOptions);
279    sk_gpu_test::ContextInfo ctxInfo =
280        factory.getContextInfo(config->getContextType(), config->getContextOverrides());
281    GrContext* ctx = ctxInfo.grContext();
282    if (!ctx) {
283        exitf(ExitErr::kUnavailable, "failed to create context for config %s",
284                                     config->getTag().c_str());
285    }
286    if (ctx->caps()->maxRenderTargetSize() < SkTMax(width, height)) {
287        exitf(ExitErr::kUnavailable, "render target size %ix%i not supported by platform (max: %i)",
288                                     width, height, ctx->caps()->maxRenderTargetSize());
289    }
290    if (ctx->caps()->maxSampleCount() < config->getSamples()) {
291        exitf(ExitErr::kUnavailable, "sample count %i not supported by platform (max: %i)",
292                                     config->getSamples(), ctx->caps()->maxSampleCount());
293    }
294    sk_gpu_test::TestContext* testCtx = ctxInfo.testContext();
295    if (!testCtx) {
296        exitf(ExitErr::kSoftware, "testContext is null");
297    }
298    if (!testCtx->fenceSyncSupport()) {
299        exitf(ExitErr::kUnavailable, "GPU does not support fence sync");
300    }
301
302    // Create a render target.
303    SkImageInfo info = SkImageInfo::Make(width, height, config->getColorType(),
304                                         kPremul_SkAlphaType, sk_ref_sp(config->getColorSpace()));
305    uint32_t flags = config->getUseDIText() ? SkSurfaceProps::kUseDeviceIndependentFonts_Flag : 0;
306    SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType);
307    sk_sp<SkSurface> surface =
308        SkSurface::MakeRenderTarget(ctx, SkBudgeted::kNo, info, config->getSamples(), &props);
309    if (!surface) {
310        exitf(ExitErr::kUnavailable, "failed to create %ix%i render target for config %s",
311                                     width, height, config->getTag().c_str());
312    }
313
314    // Run the benchmark.
315    std::vector<Sample> samples;
316    if (FLAGS_sampleMs > 0) {
317        // +1 because we might take one more sample in order to have an odd number.
318        samples.reserve(1 + (FLAGS_duration + FLAGS_sampleMs - 1) / FLAGS_sampleMs);
319    } else {
320        samples.reserve(2 * FLAGS_duration);
321    }
322    SkCanvas* canvas = surface->getCanvas();
323    canvas->translate(-skp->cullRect().x(), -skp->cullRect().y());
324    if (!FLAGS_gpuClock) {
325        run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples);
326    } else {
327        if (!testCtx->gpuTimingSupport()) {
328            exitf(ExitErr::kUnavailable, "GPU does not support timing");
329        }
330        run_gpu_time_benchmark(testCtx->gpuTimer(), testCtx->fenceSync(), canvas, skp.get(),
331                               &samples);
332    }
333    print_result(samples, config->getTag().c_str(), skpname.c_str());
334
335    // Save a proof (if one was requested).
336    if (!FLAGS_png.isEmpty()) {
337        SkBitmap bmp;
338        bmp.setInfo(info);
339        if (!surface->getCanvas()->readPixels(&bmp, 0, 0)) {
340            exitf(ExitErr::kUnavailable, "failed to read canvas pixels for png");
341        }
342        const SkString &dirname = SkOSPath::Dirname(FLAGS_png[0]),
343                       &basename = SkOSPath::Basename(FLAGS_png[0]);
344        if (!mkdir_p(dirname)) {
345            exitf(ExitErr::kIO, "failed to create directory \"%s\" for png", dirname.c_str());
346        }
347        if (!sk_tools::write_bitmap_to_disk(bmp, dirname, nullptr, basename)) {
348            exitf(ExitErr::kIO, "failed to save png to \"%s\"", FLAGS_png[0]);
349        }
350    }
351
352    exit(0);
353}
354
355static void draw_skp_and_flush(SkCanvas* canvas, const SkPicture* skp) {
356    canvas->drawPicture(skp);
357    canvas->flush();
358}
359
360static sk_sp<SkPicture> create_warmup_skp() {
361    static constexpr SkRect bounds{0, 0, 500, 500};
362    SkPictureRecorder recorder;
363    SkCanvas* recording = recorder.beginRecording(bounds);
364
365    recording->clear(SK_ColorWHITE);
366
367    SkPaint stroke;
368    stroke.setStyle(SkPaint::kStroke_Style);
369    stroke.setStrokeWidth(2);
370
371    // Use a big path to (theoretically) warmup the CPU.
372    SkPath bigPath;
373    sk_tool_utils::make_big_path(bigPath);
374    recording->drawPath(bigPath, stroke);
375
376    // Use a perlin shader to warmup the GPU.
377    SkPaint perlin;
378    perlin.setShader(SkPerlinNoiseShader::MakeTurbulence(0.1f, 0.1f, 1, 0, nullptr));
379    recording->drawRect(bounds, perlin);
380
381    return recorder.finishRecordingAsPicture();
382}
383
384bool mkdir_p(const SkString& dirname) {
385    if (dirname.isEmpty()) {
386        return true;
387    }
388    return mkdir_p(SkOSPath::Dirname(dirname.c_str())) && sk_mkdir(dirname.c_str());
389}
390
391static SkString join(const SkCommandLineFlags::StringArray& stringArray) {
392    SkString joined;
393    for (int i = 0; i < stringArray.count(); ++i) {
394        joined.appendf(i ? " %s" : "%s", stringArray[i]);
395    }
396    return joined;
397}
398
399static void exitf(ExitErr err, const char* format, ...) {
400    fprintf(stderr, ExitErr::kSoftware == err ? "INTERNAL ERROR: " : "ERROR: ");
401    va_list args;
402    va_start(args, format);
403    vfprintf(stderr, format, args);
404    va_end(args);
405    fprintf(stderr, ExitErr::kSoftware == err ? "; this should never happen.\n": ".\n");
406    exit((int)err);
407}
408
409GpuSync::GpuSync(const sk_gpu_test::FenceSync* fenceSync)
410    : fFenceSync(fenceSync) {
411    this->updateFence();
412}
413
414GpuSync::~GpuSync() {
415    fFenceSync->deleteFence(fFence);
416}
417
418void GpuSync::syncToPreviousFrame() {
419    if (sk_gpu_test::kInvalidFence == fFence) {
420        exitf(ExitErr::kSoftware, "attempted to sync with invalid fence");
421    }
422    if (!fFenceSync->waitFence(fFence)) {
423        exitf(ExitErr::kUnavailable, "failed to wait for fence");
424    }
425    fFenceSync->deleteFence(fFence);
426    this->updateFence();
427}
428
429void GpuSync::updateFence() {
430    fFence = fFenceSync->insertFence();
431    if (sk_gpu_test::kInvalidFence == fFence) {
432        exitf(ExitErr::kUnavailable, "failed to insert fence");
433    }
434}
435