115391ee4acaa092f52742f64968ad8046b74ca81mtklein/*
215391ee4acaa092f52742f64968ad8046b74ca81mtklein * Copyright 2015 Google Inc.
315391ee4acaa092f52742f64968ad8046b74ca81mtklein *
415391ee4acaa092f52742f64968ad8046b74ca81mtklein * Use of this source code is governed by a BSD-style license that can be
515391ee4acaa092f52742f64968ad8046b74ca81mtklein * found in the LICENSE file.
615391ee4acaa092f52742f64968ad8046b74ca81mtklein */
715391ee4acaa092f52742f64968ad8046b74ca81mtklein
8a2f4be76a9d453f1fdfd55b0cec6a683f23ffe0fmtklein#include "Benchmark.h"
9a2f4be76a9d453f1fdfd55b0cec6a683f23ffe0fmtklein#include "SkPMFloat.h"
1060ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein
1160ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein// Used to prevent the compiler from optimizing away the whole loop.
1260ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtkleinvolatile uint32_t blackhole = 0;
1360ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein
1460ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein// Not a great random number generator, but it's very fast.
1560ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein// The code we're measuring is quite fast, so low overhead is essential.
1660ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtkleinstatic uint32_t lcg_rand(uint32_t* seed) {
1760ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein    *seed *= 1664525;
1860ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein    *seed += 1013904223;
1960ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein    return *seed;
2060ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein}
21a2f4be76a9d453f1fdfd55b0cec6a683f23ffe0fmtklein
22548bf38b28986fab6770350b72247d7114d98184mtklein// I'm having better luck getting these to constant-propagate away as template parameters.
23548bf38b28986fab6770350b72247d7114d98184mtkleintemplate <bool kClamp, bool kWide>
2402fd592c8d190058652bb715fb34feb7a72992e5mtkleinstruct PMFloatGetSetBench : public Benchmark {
2502fd592c8d190058652bb715fb34feb7a72992e5mtklein    PMFloatGetSetBench() {}
26a2f4be76a9d453f1fdfd55b0cec6a683f23ffe0fmtklein
2736352bf5e38f45a70ee4f4fc132a38048d38206dmtklein    const char* onGetName() override {
28548bf38b28986fab6770350b72247d7114d98184mtklein        switch (kClamp << 1 | kWide) {
29548bf38b28986fab6770350b72247d7114d98184mtklein            case 0: return "SkPMFloat_get_1x";
30548bf38b28986fab6770350b72247d7114d98184mtklein            case 1: return "SkPMFloat_get_4x";
31548bf38b28986fab6770350b72247d7114d98184mtklein            case 2: return "SkPMFloat_clamp_1x";
32548bf38b28986fab6770350b72247d7114d98184mtklein            case 3: return "SkPMFloat_clamp_4x";
33548bf38b28986fab6770350b72247d7114d98184mtklein        }
34548bf38b28986fab6770350b72247d7114d98184mtklein        SkFAIL("unreachable");
35548bf38b28986fab6770350b72247d7114d98184mtklein        return "oh bother";
36548bf38b28986fab6770350b72247d7114d98184mtklein    }
3736352bf5e38f45a70ee4f4fc132a38048d38206dmtklein    bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
38a2f4be76a9d453f1fdfd55b0cec6a683f23ffe0fmtklein
3936352bf5e38f45a70ee4f4fc132a38048d38206dmtklein    void onDraw(const int loops, SkCanvas* canvas) override {
4060ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein        // Unlike blackhole, junk can and probably will be a register.
4160ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein        uint32_t junk = 0;
4260ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein        uint32_t seed = 0;
43a2f4be76a9d453f1fdfd55b0cec6a683f23ffe0fmtklein        for (int i = 0; i < loops; i++) {
44548bf38b28986fab6770350b72247d7114d98184mtklein            SkPMColor colors[4];
4560ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein        #ifdef SK_DEBUG
46548bf38b28986fab6770350b72247d7114d98184mtklein            for (int i = 0; i < 4; i++) {
47548bf38b28986fab6770350b72247d7114d98184mtklein                // Our SkASSERTs will remind us that it's technically required that we premultiply.
48548bf38b28986fab6770350b72247d7114d98184mtklein                colors[i] = SkPreMultiplyColor(lcg_rand(&seed));
49548bf38b28986fab6770350b72247d7114d98184mtklein            }
5060ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein        #else
5160ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein            // But it's a lot faster not to, and this code won't really mind the non-PM colors.
52548bf38b28986fab6770350b72247d7114d98184mtklein            (void)lcg_rand(&seed);
53548bf38b28986fab6770350b72247d7114d98184mtklein            colors[0] = seed + 0;
54548bf38b28986fab6770350b72247d7114d98184mtklein            colors[1] = seed + 1;
55548bf38b28986fab6770350b72247d7114d98184mtklein            colors[2] = seed + 2;
56548bf38b28986fab6770350b72247d7114d98184mtklein            colors[3] = seed + 3;
5760ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein        #endif
58548bf38b28986fab6770350b72247d7114d98184mtklein
5915391ee4acaa092f52742f64968ad8046b74ca81mtklein            SkPMFloat fa,fb,fc,fd;
60548bf38b28986fab6770350b72247d7114d98184mtklein            if (kWide) {
6115391ee4acaa092f52742f64968ad8046b74ca81mtklein                SkPMFloat::From4PMColors(colors, &fa, &fb, &fc, &fd);
62548bf38b28986fab6770350b72247d7114d98184mtklein            } else {
6315391ee4acaa092f52742f64968ad8046b74ca81mtklein                fa = SkPMFloat::FromPMColor(colors[0]);
6415391ee4acaa092f52742f64968ad8046b74ca81mtklein                fb = SkPMFloat::FromPMColor(colors[1]);
6515391ee4acaa092f52742f64968ad8046b74ca81mtklein                fc = SkPMFloat::FromPMColor(colors[2]);
6615391ee4acaa092f52742f64968ad8046b74ca81mtklein                fd = SkPMFloat::FromPMColor(colors[3]);
67548bf38b28986fab6770350b72247d7114d98184mtklein            }
68548bf38b28986fab6770350b72247d7114d98184mtklein
69548bf38b28986fab6770350b72247d7114d98184mtklein            SkPMColor back[4];
70548bf38b28986fab6770350b72247d7114d98184mtklein            switch (kClamp << 1 | kWide) {
7115391ee4acaa092f52742f64968ad8046b74ca81mtklein                case 0: {
723d626834b4b5ee2d6dda34da365dfe40520253aamtklein                    back[0] = fa.round();
733d626834b4b5ee2d6dda34da365dfe40520253aamtklein                    back[1] = fb.round();
743d626834b4b5ee2d6dda34da365dfe40520253aamtklein                    back[2] = fc.round();
753d626834b4b5ee2d6dda34da365dfe40520253aamtklein                    back[3] = fd.round();
7615391ee4acaa092f52742f64968ad8046b74ca81mtklein                } break;
773d626834b4b5ee2d6dda34da365dfe40520253aamtklein                case 1: SkPMFloat::RoundTo4PMColors(fa, fb, fc, fd, back); break;
7815391ee4acaa092f52742f64968ad8046b74ca81mtklein                case 2: {
793d626834b4b5ee2d6dda34da365dfe40520253aamtklein                    back[0] = fa.roundClamp();
803d626834b4b5ee2d6dda34da365dfe40520253aamtklein                    back[1] = fb.roundClamp();
813d626834b4b5ee2d6dda34da365dfe40520253aamtklein                    back[2] = fc.roundClamp();
823d626834b4b5ee2d6dda34da365dfe40520253aamtklein                    back[3] = fd.roundClamp();
8315391ee4acaa092f52742f64968ad8046b74ca81mtklein                } break;
843d626834b4b5ee2d6dda34da365dfe40520253aamtklein                case 3: SkPMFloat::RoundClampTo4PMColors(fa, fb, fc, fd, back); break;
85548bf38b28986fab6770350b72247d7114d98184mtklein            }
86548bf38b28986fab6770350b72247d7114d98184mtklein            for (int i = 0; i < 4; i++) {
87548bf38b28986fab6770350b72247d7114d98184mtklein                junk ^= back[i];
88548bf38b28986fab6770350b72247d7114d98184mtklein            }
89a2f4be76a9d453f1fdfd55b0cec6a683f23ffe0fmtklein        }
9060ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein        blackhole ^= junk;
91a2f4be76a9d453f1fdfd55b0cec6a683f23ffe0fmtklein    }
92a2f4be76a9d453f1fdfd55b0cec6a683f23ffe0fmtklein};
93548bf38b28986fab6770350b72247d7114d98184mtklein
94548bf38b28986fab6770350b72247d7114d98184mtklein// Extra () help DEF_BENCH not get confused by the comma inside the <>.
9502fd592c8d190058652bb715fb34feb7a72992e5mtkleinDEF_BENCH(return (new PMFloatGetSetBench< true,  true>);)
9602fd592c8d190058652bb715fb34feb7a72992e5mtkleinDEF_BENCH(return (new PMFloatGetSetBench<false,  true>);)
9702fd592c8d190058652bb715fb34feb7a72992e5mtkleinDEF_BENCH(return (new PMFloatGetSetBench< true, false>);)
9802fd592c8d190058652bb715fb34feb7a72992e5mtkleinDEF_BENCH(return (new PMFloatGetSetBench<false, false>);)
9902fd592c8d190058652bb715fb34feb7a72992e5mtklein
10002fd592c8d190058652bb715fb34feb7a72992e5mtkleinstruct PMFloatGradientBench : public Benchmark {
10102fd592c8d190058652bb715fb34feb7a72992e5mtklein    const char* onGetName() override { return "PMFloat_gradient"; }
10202fd592c8d190058652bb715fb34feb7a72992e5mtklein    bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
10302fd592c8d190058652bb715fb34feb7a72992e5mtklein
10402fd592c8d190058652bb715fb34feb7a72992e5mtklein    SkPMColor fDevice[100];
10502fd592c8d190058652bb715fb34feb7a72992e5mtklein    void onDraw(const int loops, SkCanvas*) override {
1060340df5b3698aff1c9540fcdbc3dafd9d5ddb0b0mtklein        Sk4f c0 = SkPMFloat::FromARGB(255, 255, 0, 0),
10702fd592c8d190058652bb715fb34feb7a72992e5mtklein             c1 = SkPMFloat::FromARGB(255, 0, 0, 255),
10802fd592c8d190058652bb715fb34feb7a72992e5mtklein             dc = c1 - c0,
10902fd592c8d190058652bb715fb34feb7a72992e5mtklein             fx(0.1f),
11002fd592c8d190058652bb715fb34feb7a72992e5mtklein             dx(0.002f),
11102fd592c8d190058652bb715fb34feb7a72992e5mtklein             dcdx(dc*dx),
11202fd592c8d190058652bb715fb34feb7a72992e5mtklein             dcdx4(dcdx+dcdx+dcdx+dcdx);
11302fd592c8d190058652bb715fb34feb7a72992e5mtklein
11402fd592c8d190058652bb715fb34feb7a72992e5mtklein        for (int n = 0; n < loops; n++) {
1150340df5b3698aff1c9540fcdbc3dafd9d5ddb0b0mtklein            Sk4f a = c0 + dc*fx + Sk4f(0.5f),  // The +0.5f lets us call trunc() instead of get().
11602fd592c8d190058652bb715fb34feb7a72992e5mtklein                 b = a + dcdx,
11702fd592c8d190058652bb715fb34feb7a72992e5mtklein                 c = b + dcdx,
11802fd592c8d190058652bb715fb34feb7a72992e5mtklein                 d = c + dcdx;
11902fd592c8d190058652bb715fb34feb7a72992e5mtklein            for (size_t i = 0; i < SK_ARRAY_COUNT(fDevice); i += 4) {
1203d4c4a5a9feff961c6ba70443fa40ea1ca0a503emtklein                fDevice[i+0] = SkPMFloat(a).trunc();
1213d4c4a5a9feff961c6ba70443fa40ea1ca0a503emtklein                fDevice[i+1] = SkPMFloat(b).trunc();
1223d4c4a5a9feff961c6ba70443fa40ea1ca0a503emtklein                fDevice[i+2] = SkPMFloat(c).trunc();
1233d4c4a5a9feff961c6ba70443fa40ea1ca0a503emtklein                fDevice[i+3] = SkPMFloat(d).trunc();
12402fd592c8d190058652bb715fb34feb7a72992e5mtklein                a += dcdx4;
12502fd592c8d190058652bb715fb34feb7a72992e5mtklein                b += dcdx4;
12602fd592c8d190058652bb715fb34feb7a72992e5mtklein                c += dcdx4;
12702fd592c8d190058652bb715fb34feb7a72992e5mtklein                d += dcdx4;
12802fd592c8d190058652bb715fb34feb7a72992e5mtklein            }
12902fd592c8d190058652bb715fb34feb7a72992e5mtklein        }
13002fd592c8d190058652bb715fb34feb7a72992e5mtklein    }
13102fd592c8d190058652bb715fb34feb7a72992e5mtklein};
13202fd592c8d190058652bb715fb34feb7a72992e5mtklein
13302fd592c8d190058652bb715fb34feb7a72992e5mtkleinDEF_BENCH(return new PMFloatGradientBench;)
134