115391ee4acaa092f52742f64968ad8046b74ca81mtklein/* 215391ee4acaa092f52742f64968ad8046b74ca81mtklein * Copyright 2015 Google Inc. 315391ee4acaa092f52742f64968ad8046b74ca81mtklein * 415391ee4acaa092f52742f64968ad8046b74ca81mtklein * Use of this source code is governed by a BSD-style license that can be 515391ee4acaa092f52742f64968ad8046b74ca81mtklein * found in the LICENSE file. 615391ee4acaa092f52742f64968ad8046b74ca81mtklein */ 715391ee4acaa092f52742f64968ad8046b74ca81mtklein 8a2f4be76a9d453f1fdfd55b0cec6a683f23ffe0fmtklein#include "Benchmark.h" 9a2f4be76a9d453f1fdfd55b0cec6a683f23ffe0fmtklein#include "SkPMFloat.h" 1060ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein 1160ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein// Used to prevent the compiler from optimizing away the whole loop. 1260ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtkleinvolatile uint32_t blackhole = 0; 1360ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein 1460ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein// Not a great random number generator, but it's very fast. 1560ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein// The code we're measuring is quite fast, so low overhead is essential. 1660ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtkleinstatic uint32_t lcg_rand(uint32_t* seed) { 1760ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein *seed *= 1664525; 1860ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein *seed += 1013904223; 1960ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein return *seed; 2060ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein} 21a2f4be76a9d453f1fdfd55b0cec6a683f23ffe0fmtklein 22548bf38b28986fab6770350b72247d7114d98184mtklein// I'm having better luck getting these to constant-propagate away as template parameters. 23548bf38b28986fab6770350b72247d7114d98184mtkleintemplate <bool kClamp, bool kWide> 2402fd592c8d190058652bb715fb34feb7a72992e5mtkleinstruct PMFloatGetSetBench : public Benchmark { 2502fd592c8d190058652bb715fb34feb7a72992e5mtklein PMFloatGetSetBench() {} 26a2f4be76a9d453f1fdfd55b0cec6a683f23ffe0fmtklein 2736352bf5e38f45a70ee4f4fc132a38048d38206dmtklein const char* onGetName() override { 28548bf38b28986fab6770350b72247d7114d98184mtklein switch (kClamp << 1 | kWide) { 29548bf38b28986fab6770350b72247d7114d98184mtklein case 0: return "SkPMFloat_get_1x"; 30548bf38b28986fab6770350b72247d7114d98184mtklein case 1: return "SkPMFloat_get_4x"; 31548bf38b28986fab6770350b72247d7114d98184mtklein case 2: return "SkPMFloat_clamp_1x"; 32548bf38b28986fab6770350b72247d7114d98184mtklein case 3: return "SkPMFloat_clamp_4x"; 33548bf38b28986fab6770350b72247d7114d98184mtklein } 34548bf38b28986fab6770350b72247d7114d98184mtklein SkFAIL("unreachable"); 35548bf38b28986fab6770350b72247d7114d98184mtklein return "oh bother"; 36548bf38b28986fab6770350b72247d7114d98184mtklein } 3736352bf5e38f45a70ee4f4fc132a38048d38206dmtklein bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; } 38a2f4be76a9d453f1fdfd55b0cec6a683f23ffe0fmtklein 3936352bf5e38f45a70ee4f4fc132a38048d38206dmtklein void onDraw(const int loops, SkCanvas* canvas) override { 4060ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein // Unlike blackhole, junk can and probably will be a register. 4160ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein uint32_t junk = 0; 4260ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein uint32_t seed = 0; 43a2f4be76a9d453f1fdfd55b0cec6a683f23ffe0fmtklein for (int i = 0; i < loops; i++) { 44548bf38b28986fab6770350b72247d7114d98184mtklein SkPMColor colors[4]; 4560ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein #ifdef SK_DEBUG 46548bf38b28986fab6770350b72247d7114d98184mtklein for (int i = 0; i < 4; i++) { 47548bf38b28986fab6770350b72247d7114d98184mtklein // Our SkASSERTs will remind us that it's technically required that we premultiply. 48548bf38b28986fab6770350b72247d7114d98184mtklein colors[i] = SkPreMultiplyColor(lcg_rand(&seed)); 49548bf38b28986fab6770350b72247d7114d98184mtklein } 5060ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein #else 5160ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein // But it's a lot faster not to, and this code won't really mind the non-PM colors. 52548bf38b28986fab6770350b72247d7114d98184mtklein (void)lcg_rand(&seed); 53548bf38b28986fab6770350b72247d7114d98184mtklein colors[0] = seed + 0; 54548bf38b28986fab6770350b72247d7114d98184mtklein colors[1] = seed + 1; 55548bf38b28986fab6770350b72247d7114d98184mtklein colors[2] = seed + 2; 56548bf38b28986fab6770350b72247d7114d98184mtklein colors[3] = seed + 3; 5760ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein #endif 58548bf38b28986fab6770350b72247d7114d98184mtklein 5915391ee4acaa092f52742f64968ad8046b74ca81mtklein SkPMFloat fa,fb,fc,fd; 60548bf38b28986fab6770350b72247d7114d98184mtklein if (kWide) { 6115391ee4acaa092f52742f64968ad8046b74ca81mtklein SkPMFloat::From4PMColors(colors, &fa, &fb, &fc, &fd); 62548bf38b28986fab6770350b72247d7114d98184mtklein } else { 6315391ee4acaa092f52742f64968ad8046b74ca81mtklein fa = SkPMFloat::FromPMColor(colors[0]); 6415391ee4acaa092f52742f64968ad8046b74ca81mtklein fb = SkPMFloat::FromPMColor(colors[1]); 6515391ee4acaa092f52742f64968ad8046b74ca81mtklein fc = SkPMFloat::FromPMColor(colors[2]); 6615391ee4acaa092f52742f64968ad8046b74ca81mtklein fd = SkPMFloat::FromPMColor(colors[3]); 67548bf38b28986fab6770350b72247d7114d98184mtklein } 68548bf38b28986fab6770350b72247d7114d98184mtklein 69548bf38b28986fab6770350b72247d7114d98184mtklein SkPMColor back[4]; 70548bf38b28986fab6770350b72247d7114d98184mtklein switch (kClamp << 1 | kWide) { 7115391ee4acaa092f52742f64968ad8046b74ca81mtklein case 0: { 723d626834b4b5ee2d6dda34da365dfe40520253aamtklein back[0] = fa.round(); 733d626834b4b5ee2d6dda34da365dfe40520253aamtklein back[1] = fb.round(); 743d626834b4b5ee2d6dda34da365dfe40520253aamtklein back[2] = fc.round(); 753d626834b4b5ee2d6dda34da365dfe40520253aamtklein back[3] = fd.round(); 7615391ee4acaa092f52742f64968ad8046b74ca81mtklein } break; 773d626834b4b5ee2d6dda34da365dfe40520253aamtklein case 1: SkPMFloat::RoundTo4PMColors(fa, fb, fc, fd, back); break; 7815391ee4acaa092f52742f64968ad8046b74ca81mtklein case 2: { 793d626834b4b5ee2d6dda34da365dfe40520253aamtklein back[0] = fa.roundClamp(); 803d626834b4b5ee2d6dda34da365dfe40520253aamtklein back[1] = fb.roundClamp(); 813d626834b4b5ee2d6dda34da365dfe40520253aamtklein back[2] = fc.roundClamp(); 823d626834b4b5ee2d6dda34da365dfe40520253aamtklein back[3] = fd.roundClamp(); 8315391ee4acaa092f52742f64968ad8046b74ca81mtklein } break; 843d626834b4b5ee2d6dda34da365dfe40520253aamtklein case 3: SkPMFloat::RoundClampTo4PMColors(fa, fb, fc, fd, back); break; 85548bf38b28986fab6770350b72247d7114d98184mtklein } 86548bf38b28986fab6770350b72247d7114d98184mtklein for (int i = 0; i < 4; i++) { 87548bf38b28986fab6770350b72247d7114d98184mtklein junk ^= back[i]; 88548bf38b28986fab6770350b72247d7114d98184mtklein } 89a2f4be76a9d453f1fdfd55b0cec6a683f23ffe0fmtklein } 9060ff4582ae76aa131aeb20a3e16c89fa3a40a41cmtklein blackhole ^= junk; 91a2f4be76a9d453f1fdfd55b0cec6a683f23ffe0fmtklein } 92a2f4be76a9d453f1fdfd55b0cec6a683f23ffe0fmtklein}; 93548bf38b28986fab6770350b72247d7114d98184mtklein 94548bf38b28986fab6770350b72247d7114d98184mtklein// Extra () help DEF_BENCH not get confused by the comma inside the <>. 9502fd592c8d190058652bb715fb34feb7a72992e5mtkleinDEF_BENCH(return (new PMFloatGetSetBench< true, true>);) 9602fd592c8d190058652bb715fb34feb7a72992e5mtkleinDEF_BENCH(return (new PMFloatGetSetBench<false, true>);) 9702fd592c8d190058652bb715fb34feb7a72992e5mtkleinDEF_BENCH(return (new PMFloatGetSetBench< true, false>);) 9802fd592c8d190058652bb715fb34feb7a72992e5mtkleinDEF_BENCH(return (new PMFloatGetSetBench<false, false>);) 9902fd592c8d190058652bb715fb34feb7a72992e5mtklein 10002fd592c8d190058652bb715fb34feb7a72992e5mtkleinstruct PMFloatGradientBench : public Benchmark { 10102fd592c8d190058652bb715fb34feb7a72992e5mtklein const char* onGetName() override { return "PMFloat_gradient"; } 10202fd592c8d190058652bb715fb34feb7a72992e5mtklein bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; } 10302fd592c8d190058652bb715fb34feb7a72992e5mtklein 10402fd592c8d190058652bb715fb34feb7a72992e5mtklein SkPMColor fDevice[100]; 10502fd592c8d190058652bb715fb34feb7a72992e5mtklein void onDraw(const int loops, SkCanvas*) override { 1060340df5b3698aff1c9540fcdbc3dafd9d5ddb0b0mtklein Sk4f c0 = SkPMFloat::FromARGB(255, 255, 0, 0), 10702fd592c8d190058652bb715fb34feb7a72992e5mtklein c1 = SkPMFloat::FromARGB(255, 0, 0, 255), 10802fd592c8d190058652bb715fb34feb7a72992e5mtklein dc = c1 - c0, 10902fd592c8d190058652bb715fb34feb7a72992e5mtklein fx(0.1f), 11002fd592c8d190058652bb715fb34feb7a72992e5mtklein dx(0.002f), 11102fd592c8d190058652bb715fb34feb7a72992e5mtklein dcdx(dc*dx), 11202fd592c8d190058652bb715fb34feb7a72992e5mtklein dcdx4(dcdx+dcdx+dcdx+dcdx); 11302fd592c8d190058652bb715fb34feb7a72992e5mtklein 11402fd592c8d190058652bb715fb34feb7a72992e5mtklein for (int n = 0; n < loops; n++) { 1150340df5b3698aff1c9540fcdbc3dafd9d5ddb0b0mtklein Sk4f a = c0 + dc*fx + Sk4f(0.5f), // The +0.5f lets us call trunc() instead of get(). 11602fd592c8d190058652bb715fb34feb7a72992e5mtklein b = a + dcdx, 11702fd592c8d190058652bb715fb34feb7a72992e5mtklein c = b + dcdx, 11802fd592c8d190058652bb715fb34feb7a72992e5mtklein d = c + dcdx; 11902fd592c8d190058652bb715fb34feb7a72992e5mtklein for (size_t i = 0; i < SK_ARRAY_COUNT(fDevice); i += 4) { 1203d4c4a5a9feff961c6ba70443fa40ea1ca0a503emtklein fDevice[i+0] = SkPMFloat(a).trunc(); 1213d4c4a5a9feff961c6ba70443fa40ea1ca0a503emtklein fDevice[i+1] = SkPMFloat(b).trunc(); 1223d4c4a5a9feff961c6ba70443fa40ea1ca0a503emtklein fDevice[i+2] = SkPMFloat(c).trunc(); 1233d4c4a5a9feff961c6ba70443fa40ea1ca0a503emtklein fDevice[i+3] = SkPMFloat(d).trunc(); 12402fd592c8d190058652bb715fb34feb7a72992e5mtklein a += dcdx4; 12502fd592c8d190058652bb715fb34feb7a72992e5mtklein b += dcdx4; 12602fd592c8d190058652bb715fb34feb7a72992e5mtklein c += dcdx4; 12702fd592c8d190058652bb715fb34feb7a72992e5mtklein d += dcdx4; 12802fd592c8d190058652bb715fb34feb7a72992e5mtklein } 12902fd592c8d190058652bb715fb34feb7a72992e5mtklein } 13002fd592c8d190058652bb715fb34feb7a72992e5mtklein } 13102fd592c8d190058652bb715fb34feb7a72992e5mtklein}; 13202fd592c8d190058652bb715fb34feb7a72992e5mtklein 13302fd592c8d190058652bb715fb34feb7a72992e5mtkleinDEF_BENCH(return new PMFloatGradientBench;) 134