SkBlurMask.cpp revision 336b4da6b0d20f27f9980b03415354a2f0698e18
1ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com
2ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com/*
3ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com * Copyright 2006 The Android Open Source Project
4ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com *
5ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com * Use of this source code is governed by a BSD-style license that can be
6ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com * found in the LICENSE file.
7ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com */
8ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com
98a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
108a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com#include "SkBlurMask.h"
11889bd8bd7f604acae0a6303365bc82c06da1e6f3tomhudson@google.com#include "SkMath.h"
128a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com#include "SkTemplates.h"
1301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#include "SkEndian.h"
1401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
15908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org/**
16908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org * This function performs a box blur in X, of the given radius.  If the
17884e60be30e20f38b3466a4697081187d2f1f814skia.committer@gmail.com * "transpose" parameter is true, it will transpose the pixels on write,
18908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org * such that X and Y are swapped. Reads are always performed from contiguous
19908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org * memory in X, for speed. The destination buffer (dst) must be at least
20908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org * (width + radius * 2) * height bytes in size.
21908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org */
22908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.orgstatic int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
23c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                   int leftRadius, int rightRadius, int width, int height,
24c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                   bool transpose)
2571f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org{
26c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org    int kernelSize = leftRadius + rightRadius + 1;
27c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org    int border = SkMin32(width, leftRadius + rightRadius);
2871f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org    uint32_t scale = (1 << 24) / kernelSize;
29c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org    int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
30908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org    int dst_x_stride = transpose ? height : 1;
31908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org    int dst_y_stride = transpose ? 1 : new_width;
3271f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org    for (int y = 0; y < height; ++y) {
3371f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        int sum = 0;
34908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org        uint8_t* dptr = dst + y * dst_y_stride;
35908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org        const uint8_t* right = src + y * src_y_stride;
36908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org        const uint8_t* left = right;
37336b4da6b0d20f27f9980b03415354a2f0698e18senorblanco@chromium.org        for (int x = 0; x < rightRadius - leftRadius; x++) {
38336b4da6b0d20f27f9980b03415354a2f0698e18senorblanco@chromium.org            *dptr = 0;
39336b4da6b0d20f27f9980b03415354a2f0698e18senorblanco@chromium.org            dptr += dst_x_stride;
40c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org        }
4171f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        for (int x = 0; x < border; ++x) {
42908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            sum += *right++;
4371f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            *dptr = (sum * scale) >> 24;
44908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            dptr += dst_x_stride;
4571f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        }
46c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org        for (int x = width; x < leftRadius + rightRadius; ++x) {
4771f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            *dptr = (sum * scale) >> 24;
48908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            dptr += dst_x_stride;
4971f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        }
50c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org        for (int x = leftRadius + rightRadius; x < width; ++x) {
51908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            sum += *right++;
5271f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            *dptr = (sum * scale) >> 24;
53908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            sum -= *left++;
54908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            dptr += dst_x_stride;
5571f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        }
56908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org        for (int x = 0; x < border; ++x) {
5771f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            *dptr = (sum * scale) >> 24;
58908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            sum -= *left++;
59908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            dptr += dst_x_stride;
6071f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        }
61336b4da6b0d20f27f9980b03415354a2f0698e18senorblanco@chromium.org        for (int x = 0; x < leftRadius - rightRadius; x++) {
62336b4da6b0d20f27f9980b03415354a2f0698e18senorblanco@chromium.org            *dptr = 0;
63336b4da6b0d20f27f9980b03415354a2f0698e18senorblanco@chromium.org            dptr += dst_x_stride;
64c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org        }
6571f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        SkASSERT(sum == 0);
6671f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org    }
67908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org    return new_width;
6871f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org}
6971f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org
70c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.orgstatic void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)
71c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org{
72c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org    *loRadius = *hiRadius = SkScalarCeil(passRadius);
73c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org    if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) {
74c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org        *loRadius = *hiRadius - 1;
75c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org    }
76c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org}
77c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org
7801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
7901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com// breakeven on Mac, and ~15% slowdown on Linux.
8001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com// Reading a word at a time when bulding the sum buffer seems to give
8101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
82054ff1efa4f9187ce7fd20aaf3aed7cecf14e12btomhudson@google.com#if defined(SK_BUILD_FOR_WIN32)
8301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#define UNROLL_KERNEL_LOOP 1
8401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#endif
858a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
864560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com/** The sum buffer is an array of u32 to hold the accumulated sum of all of the
874560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    src values at their position, plus all values above and to the left.
884560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    When we sample into this buffer, we need an initial row and column of 0s,
894560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    so we have an index correspondence as follows:
90fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
914560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    src[i, j] == sum[i+1, j+1]
924560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    sum[0, j] == sum[i, 0] == 0
93fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
944560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    We assume that the sum buffer's stride == its width
954560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com */
9603016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.comstatic void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
9703016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                             const uint8_t src[], int srcRB) {
984560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int sumW = srcW + 1;
994560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com
1004560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    SkASSERT(srcRB >= srcW);
1018a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    // mod srcRB so we can apply it after each row
1024560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    srcRB -= srcW;
1038a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
1048a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int x, y;
1058a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
1064560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    // zero out the top row and column
1074560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    memset(sum, 0, sumW * sizeof(sum[0]));
1084560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    sum += sumW;
1094560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com
1108a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    // special case first row
1118a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    uint32_t X = 0;
1124560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    *sum++ = 0; // initialze the first column to 0
11303016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com    for (x = srcW - 1; x >= 0; --x) {
1148a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        X = *src++ + X;
1154560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        *sum++ = X;
1168a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
1178a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    src += srcRB;
1188a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
1198a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    // now do the rest of the rows
12003016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com    for (y = srcH - 1; y > 0; --y) {
1218a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        uint32_t L = 0;
1228a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        uint32_t C = 0;
1234560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        *sum++ = 0; // initialze the first column to 0
12401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
12501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
12601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t T = sum[-sumW];
12701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            X = *src++ + L + T - C;
12801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *sum++ = X;
12901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            L = X;
13001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            C = T;
13101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        }
13201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
13301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (; x >= 4; x-=4) {
13401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t T = sum[-sumW];
13501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            X = *src++ + L + T - C;
13601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *sum++ = X;
13701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            L = X;
13801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            C = T;
13901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            T = sum[-sumW];
14001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            X = *src++ + L + T - C;
14101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *sum++ = X;
14201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            L = X;
14301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            C = T;
14401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            T = sum[-sumW];
14501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            X = *src++ + L + T - C;
14601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *sum++ = X;
14701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            L = X;
14801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            C = T;
14901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            T = sum[-sumW];
15001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            X = *src++ + L + T - C;
15101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *sum++ = X;
15201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            L = X;
15301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            C = T;
15401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        }
15501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
15601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (; x >= 0; --x) {
1574560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com            uint32_t T = sum[-sumW];
1588a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            X = *src++ + L + T - C;
1594560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com            *sum++ = X;
1608a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            L = X;
1618a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            C = T;
1628a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
1638a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        src += srcRB;
1648a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
1658a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
1668a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
16703016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com/**
1688caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * This is the path for apply_kernel() to be taken when the kernel
1698caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * is wider than the source image.
1708caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com */
1718caac6447dd68655b57dfe876626a9733b191416tomhudson@google.comstatic void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
1728caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                           int sw, int sh) {
1738caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    SkASSERT(2*rx > sw);
1748caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
1758caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
1768caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
1778caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int sumStride = sw + 1;
1788caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
1798caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int dw = sw + 2*rx;
1808caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int dh = sh + 2*ry;
1818caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
1828caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int prev_y = -2*ry;
1838caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int next_y = 1;
1848caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
1858caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    for (int y = 0; y < dh; y++) {
1868caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int py = SkClampPos(prev_y) * sumStride;
1878caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int ny = SkFastMin32(next_y, sh) * sumStride;
1888caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
1898caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int prev_x = -2*rx;
1908caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int next_x = 1;
1918caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
1928caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (int x = 0; x < dw; x++) {
1938caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = SkClampPos(prev_x);
1948caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = SkFastMin32(next_x, sw);
1958caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
1968caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
1978caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
1988caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
1998caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
2008caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
2018caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
2028caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2038caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        prev_y += 1;
2048caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        next_y += 1;
2058caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    }
2068caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com}
2078caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com/**
20803016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  sw and sh are the width and height of the src. Since the sum buffer
20903016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  matches that, but has an extra row and col at the beginning (with zeros),
21003016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  we can just use sw and sh as our "max" values for pinning coordinates
21103016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  when sampling into sum[][]
2128caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *
2138caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *  The inner loop is conceptually simple; we break it into several sections
2148caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *  to improve performance. Here's the original version:
2158caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (int x = 0; x < dw; x++) {
2168caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = SkClampPos(prev_x);
2178caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = SkFastMin32(next_x, sw);
2188caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2198caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
2208caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
2218caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2228caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
2238caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
2248caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
22501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *  The sections are:
22601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *     left-hand section, where prev_x is clamped to 0
22701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *     center section, where neither prev_x nor next_x is clamped
22801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *     right-hand section, where next_x is clamped to sw
22901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *  On some operating systems, the center section is unrolled for additional
23001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *  speedup.
2318caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com*/
2324560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.comstatic void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
2334560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com                         int sw, int sh) {
2348caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    if (2*rx > sw) {
2358caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        kernel_clamped(dst, rx, ry, sum, sw, sh);
2368caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        return;
2378caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    }
2388caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2398a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
2408a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
2414560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int sumStride = sw + 1;
2428a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
2438a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int dw = sw + 2*rx;
2448a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int dh = sh + 2*ry;
2458a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
2464560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int prev_y = -2*ry;
2474560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int next_y = 1;
2488a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
2498caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    SkASSERT(2*rx <= dw - 2*rx);
2508caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2514560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    for (int y = 0; y < dh; y++) {
2524560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int py = SkClampPos(prev_y) * sumStride;
2534560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int ny = SkFastMin32(next_y, sh) * sumStride;
2548a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
2554560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int prev_x = -2*rx;
2564560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int next_x = 1;
2578caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int x = 0;
2588caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2598caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (; x < 2*rx; x++) {
2608caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(prev_x <= 0);
2618caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(next_x <= sw);
2628caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2638caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = 0;
2648caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = next_x;
2658caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2668caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
2678caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
2688caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2698caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
2708caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
2718caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
2728caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
27301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i0 = prev_x + py;
27401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i1 = next_x + ny;
27501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i2 = next_x + py;
27601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i3 = prev_x + ny;
27701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
27801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#if UNROLL_KERNEL_LOOP
27901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (; x < dw - 2*rx - 4; x += 4) {
2808caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(prev_x >= 0);
2818caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(next_x <= sw);
2828caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
28301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
28401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
28501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
28601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
28701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
28801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
28901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
29001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
2918caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
29201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            prev_x += 4;
29301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            next_x += 4;
29401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        }
29501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#endif
29601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
29701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (; x < dw - 2*rx; x++) {
29801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            SkASSERT(prev_x >= 0);
29901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            SkASSERT(next_x <= sw);
30001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
30101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
3028caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
3038caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3048caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
3058caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
3068caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
3078caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3088caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (; x < dw; x++) {
3098caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(prev_x >= 0);
3108caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(next_x > sw);
3118caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3128caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = prev_x;
3138caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = sw;
3148caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3158caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
3168caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
3178caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3188caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
3198caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
3208caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
3218caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3228caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        prev_y += 1;
3238caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        next_y += 1;
3248caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    }
3258caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com}
3268caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3278caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com/**
3288caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * This is the path for apply_kernel_interp() to be taken when the kernel
3298caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * is wider than the source image.
3308caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com */
3318caac6447dd68655b57dfe876626a9733b191416tomhudson@google.comstatic void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
3328caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
3338caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    SkASSERT(2*rx > sw);
3348caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3358caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int inner_weight = 255 - outer_weight;
3368caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3378caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    // round these guys up if they're bigger than 127
3388caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    outer_weight += outer_weight >> 7;
3398caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    inner_weight += inner_weight >> 7;
3408caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3418caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
3428caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
3438caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3448caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int sumStride = sw + 1;
3458caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3468caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int dw = sw + 2*rx;
3478caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int dh = sh + 2*ry;
3488caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3498caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int prev_y = -2*ry;
3508caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int next_y = 1;
3518caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3528caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    for (int y = 0; y < dh; y++) {
3538caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int py = SkClampPos(prev_y) * sumStride;
3548caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int ny = SkFastMin32(next_y, sh) * sumStride;
3558caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3568caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int ipy = SkClampPos(prev_y + 1) * sumStride;
3578caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int iny = SkClampMax(next_y - 1, sh) * sumStride;
3588caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3598caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int prev_x = -2*rx;
3608caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int next_x = 1;
3618a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
3624560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        for (int x = 0; x < dw; x++) {
3638a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            int px = SkClampPos(prev_x);
3648a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            int nx = SkFastMin32(next_x, sw);
3658a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
3668caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int ipx = SkClampPos(prev_x + 1);
3678caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int inx = SkClampMax(next_x - 1, sw);
3688caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3698caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t outer_sum = sum[px+py] + sum[nx+ny]
3708caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[nx+py] - sum[px+ny];
3718caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
3728caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[inx+ipy] - sum[ipx+iny];
3738caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
3748caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
3758a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
3768a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            prev_x += 1;
3778a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            next_x += 1;
3788a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
3798a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        prev_y += 1;
3808a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        next_y += 1;
3818a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
3828a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
3838a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
38403016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com/**
38503016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  sw and sh are the width and height of the src. Since the sum buffer
38603016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  matches that, but has an extra row and col at the beginning (with zeros),
38703016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  we can just use sw and sh as our "max" values for pinning coordinates
38803016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  when sampling into sum[][]
3898caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *
3908caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *  The inner loop is conceptually simple; we break it into several variants
3918caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *  to improve performance. Here's the original version:
3928caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (int x = 0; x < dw; x++) {
3938caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = SkClampPos(prev_x);
3948caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = SkFastMin32(next_x, sw);
3958caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3968caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int ipx = SkClampPos(prev_x + 1);
3978caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int inx = SkClampMax(next_x - 1, sw);
3988caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3998caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t outer_sum = sum[px+py] + sum[nx+ny]
4008caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[nx+py] - sum[px+ny];
4018caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
4028caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[inx+ipy] - sum[ipx+iny];
4038caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
4048caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
4058caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4068caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
4078caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
4088caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
40901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *  The sections are:
41001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *     left-hand section, where prev_x is clamped to 0
41101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *     center section, where neither prev_x nor next_x is clamped
41201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *     right-hand section, where next_x is clamped to sw
41301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *  On some operating systems, the center section is unrolled for additional
41401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *  speedup.
4158caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com*/
4164560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.comstatic void apply_kernel_interp(uint8_t dst[], int rx, int ry,
4174560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com                const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
4188a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    SkASSERT(rx > 0 && ry > 0);
4198a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    SkASSERT(outer_weight <= 255);
4208a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4218caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    if (2*rx > sw) {
4228caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outer_weight);
4238caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        return;
4248caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    }
4258caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4268a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int inner_weight = 255 - outer_weight;
4278a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4288a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    // round these guys up if they're bigger than 127
4298a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    outer_weight += outer_weight >> 7;
4308a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    inner_weight += inner_weight >> 7;
4318a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4328a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
4338a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
4348a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4354560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int sumStride = sw + 1;
4368a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4378a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int dw = sw + 2*rx;
4388a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int dh = sh + 2*ry;
4398a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4404560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int prev_y = -2*ry;
4414560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int next_y = 1;
4428a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4438caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    SkASSERT(2*rx <= dw - 2*rx);
4448caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4454560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    for (int y = 0; y < dh; y++) {
4464560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int py = SkClampPos(prev_y) * sumStride;
4474560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int ny = SkFastMin32(next_y, sh) * sumStride;
4488a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4494560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int ipy = SkClampPos(prev_y + 1) * sumStride;
4504560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int iny = SkClampMax(next_y - 1, sh) * sumStride;
4518a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4524560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int prev_x = -2*rx;
4534560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int next_x = 1;
4548caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int x = 0;
4558a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4568caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (; x < 2*rx; x++) {
4578caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(prev_x < 0);
4588caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(next_x <= sw);
4598a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4608caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = 0;
4618caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = next_x;
4628caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4638caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int ipx = 0;
4648caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int inx = next_x - 1;
4658caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4668caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t outer_sum = sum[px+py] + sum[nx+ny]
4678caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[nx+py] - sum[px+ny];
4688caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
4698caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[inx+ipy] - sum[ipx+iny];
4708caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
4718caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
4728caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4738caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
4748caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
4758caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
4768caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
47701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i0 = prev_x + py;
47801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i1 = next_x + ny;
47901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i2 = next_x + py;
48001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i3 = prev_x + ny;
48101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i4 = prev_x + 1 + ipy;
48201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i5 = next_x - 1 + iny;
48301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i6 = next_x - 1 + ipy;
48401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i7 = prev_x + 1 + iny;
48501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
48601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#if UNROLL_KERNEL_LOOP
48701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (; x < dw - 2*rx - 4; x += 4) {
4888caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(prev_x >= 0);
4898caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(next_x <= sw);
4908caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
49101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
49201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
49301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
49401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
49501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
49601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
49701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
49801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
49901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
50001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
50101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
50201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
50301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
50401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
50501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
50601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
5078caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
50801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            prev_x += 4;
50901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            next_x += 4;
51001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        }
51101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#endif
5128a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
51301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (; x < dw - 2*rx; x++) {
51401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            SkASSERT(prev_x >= 0);
51501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            SkASSERT(next_x <= sw);
51601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
51701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
51801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
5198caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
5208caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
5218a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
5228a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            prev_x += 1;
5238a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            next_x += 1;
5248a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
5258caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5268caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (; x < dw; x++) {
5278caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(prev_x >= 0);
5288caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(next_x > sw);
5298caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5308caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = prev_x;
5318caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = sw;
5328caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5338caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int ipx = prev_x + 1;
5348caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int inx = sw;
5358caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5368caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t outer_sum = sum[px+py] + sum[nx+ny]
5378caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[nx+py] - sum[px+ny];
5388caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
5398caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[inx+ipy] - sum[ipx+iny];
5408caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
5418caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
5428caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5438caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
5448caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
5458caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
5468caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5478a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        prev_y += 1;
5488a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        next_y += 1;
5498a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
5508a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
5518a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
5528a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com#include "SkColorPriv.h"
5538a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
5540e3c664250f561ec9f7107b92136517a72d03afdreed@android.comstatic void merge_src_with_blur(uint8_t dst[], int dstRB,
5550e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                                const uint8_t src[], int srcRB,
5560e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                                const uint8_t blur[], int blurRB,
5570e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                                int sw, int sh) {
5580e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    dstRB -= sw;
5590e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    srcRB -= sw;
5600e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    blurRB -= sw;
5610e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    while (--sh >= 0) {
5620e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        for (int x = sw - 1; x >= 0; --x) {
5638a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
5648a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            dst += 1;
5658a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            src += 1;
5668a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            blur += 1;
5678a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
5680e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        dst += dstRB;
5690e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        src += srcRB;
5700e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        blur += blurRB;
5718a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
5728a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
5738a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
5748a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.comstatic void clamp_with_orig(uint8_t dst[], int dstRowBytes,
5750e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                            const uint8_t src[], int srcRowBytes,
5760e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                            int sw, int sh,
5774560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com                            SkBlurMask::Style style) {
5788a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int x;
5790e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    while (--sh >= 0) {
5808a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        switch (style) {
5818a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        case SkBlurMask::kSolid_Style:
5820e3c664250f561ec9f7107b92136517a72d03afdreed@android.com            for (x = sw - 1; x >= 0; --x) {
5830e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                int s = *src;
5840e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                int d = *dst;
5850e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
5868a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                dst += 1;
5878a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                src += 1;
5888a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            }
5898a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            break;
5908a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        case SkBlurMask::kOuter_Style:
5910e3c664250f561ec9f7107b92136517a72d03afdreed@android.com            for (x = sw - 1; x >= 0; --x) {
5920e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                if (*src) {
5938a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                    *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
5940e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                }
5958a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                dst += 1;
5968a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                src += 1;
5978a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            }
5988a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            break;
5998a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        default:
6000c00f21fee3f5cfa3aa7e5d46ff94cb8cf340451tomhudson@google.com            SkDEBUGFAIL("Unexpected blur style here");
6018a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            break;
6028a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
6038a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        dst += dstRowBytes - sw;
6040e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        src += srcRowBytes - sw;
6058a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
6068a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
6078a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
60803016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com///////////////////////////////////////////////////////////////////////////////
6098a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6108a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com// we use a local funciton to wrap the class static method to work around
6118a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com// a bug in gcc98
6128a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.comvoid SkMask_FreeImage(uint8_t* image);
61303016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.comvoid SkMask_FreeImage(uint8_t* image) {
6148a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    SkMask::FreeImage(image);
6158a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
6168a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6178a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.combool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
6185af16f8d670b3ce1c7644a4737e02e2e2257614ebungeman@google.com                      SkScalar radius, Style style, Quality quality,
61971f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org                      SkIPoint* margin, bool separable)
6205af16f8d670b3ce1c7644a4737e02e2e2257614ebungeman@google.com{
62103016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com    if (src.fFormat != SkMask::kA8_Format) {
6228a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        return false;
62303016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com    }
6248a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6254868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    // Force high quality off for small radii (performance)
626c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org    if (radius < SkIntToScalar(3) && !separable) quality = kLow_Quality;
6274868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org
6284868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    // highQuality: use three box blur passes as a cheap way to approximate a Gaussian blur
6294868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    int passCount = (quality == kHigh_Quality) ? 3 : 1;
6304868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    SkScalar passRadius = SkScalarDiv(radius, SkScalarSqrt(SkIntToScalar(passCount)));
6314868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org
6324868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    int rx = SkScalarCeil(passRadius);
6334868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    int outer_weight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
6348a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6358a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    SkASSERT(rx >= 0);
6368a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    SkASSERT((unsigned)outer_weight <= 255);
6370e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    if (rx <= 0) {
6388a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        return false;
6390e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    }
6408a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6418a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int ry = rx;    // only do square blur for now
6428a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6434868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    int padx = passCount * rx;
6444868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    int pady = passCount * ry;
6455af16f8d670b3ce1c7644a4737e02e2e2257614ebungeman@google.com    if (margin) {
6465af16f8d670b3ce1c7644a4737e02e2e2257614ebungeman@google.com        margin->set(padx, pady);
6475af16f8d670b3ce1c7644a4737e02e2e2257614ebungeman@google.com    }
6484868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
6494868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org        src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
65049f0ff25a046d6001dc2d095b6fa3c30f0f46b6areed@android.com    dst->fRowBytes = dst->fBounds.width();
6518a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    dst->fFormat = SkMask::kA8_Format;
6528a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    dst->fImage = NULL;
6538a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6540e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    if (src.fImage) {
655543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com        size_t dstSize = dst->computeImageSize();
656543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com        if (0 == dstSize) {
657543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com            return false;   // too big to allocate, abort
658543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com        }
659543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com
6608a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        int             sw = src.fBounds.width();
6618a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        int             sh = src.fBounds.height();
6628a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        const uint8_t*  sp = src.fImage;
663543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com        uint8_t*        dp = SkMask::AllocImage(dstSize);
6648a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6658a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
6668a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6678a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        // build the blurry destination
66871f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        if (separable) {
66971f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            SkAutoTMalloc<uint8_t>  tmpBuffer(dstSize);
67071f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            uint8_t*                tp = tmpBuffer.get();
67171f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            int w = sw, h = sh;
67271f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org
67371f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            if (quality == kHigh_Quality) {
674c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                int loRadius, hiRadius;
675c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                get_adjusted_radii(passRadius, &loRadius, &hiRadius);
676908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org                // Do three X blurs, with a transpose on the final one.
677c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
678c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                w = boxBlur(tp, w,             dp, hiRadius, loRadius, w, h, false);
679c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                w = boxBlur(dp, w,             tp, hiRadius, hiRadius, w, h, true);
680908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org                // Do three Y blurs, with a transpose on the final one.
681c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                h = boxBlur(tp, h,             dp, loRadius, hiRadius, h, w, false);
682c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                h = boxBlur(dp, h,             tp, hiRadius, loRadius, h, w, false);
683c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                h = boxBlur(tp, h,             dp, hiRadius, hiRadius, h, w, true);
684908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            } else {
685c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
686c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                h = boxBlur(tp, h,             dp, ry, ry, h, w, true);
68771f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            }
68871f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        } else {
68903016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
69003016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
69103016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            SkAutoTMalloc<uint32_t> storage(storageW * storageH);
6928a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            uint32_t*               sumBuffer = storage.get();
6938a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6944868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org            //pass1: sp is source, dp is destination
6958a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
69603016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            if (outer_weight == 255) {
6978a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
69803016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            } else {
6998a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outer_weight);
70003016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            }
7014868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org
70203016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            if (quality == kHigh_Quality) {
7034868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                //pass2: dp is source, tmpBuffer is destination
7044868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                int tmp_sw = sw + 2 * rx;
7054868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                int tmp_sh = sh + 2 * ry;
7064868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                SkAutoTMalloc<uint8_t>  tmpBuffer(dstSize);
7074868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
7084868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                if (outer_weight == 255)
7094868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                    apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
7104868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                else
71103016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                    apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
71203016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                                        tmp_sw, tmp_sh, outer_weight);
7134868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org
7144868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                //pass3: tmpBuffer is source, dp is destination
7154868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                tmp_sw += 2 * rx;
7164868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                tmp_sh += 2 * ry;
7174868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
7184868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                if (outer_weight == 255)
7194868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                    apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
7204868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                else
72103016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                    apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
72203016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                                        outer_weight);
7234868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org            }
7248a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
7258a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
7268a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        dst->fImage = dp;
7278a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        // if need be, alloc the "real" dst (same size as src) and copy/merge
7288a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        // the blur into it (applying the src)
7290e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        if (style == kInner_Style) {
7300e3c664250f561ec9f7107b92136517a72d03afdreed@android.com            // now we allocate the "real" dst, mirror the size of src
731543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com            size_t srcSize = src.computeImageSize();
732543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com            if (0 == srcSize) {
733543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com                return false;   // too big to allocate, abort
734543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com            }
735543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com            dst->fImage = SkMask::AllocImage(srcSize);
7360e3c664250f561ec9f7107b92136517a72d03afdreed@android.com            merge_src_with_blur(dst->fImage, src.fRowBytes,
7370e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                                sp, src.fRowBytes,
73803016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                                dp + passCount * (rx + ry * dst->fRowBytes),
73903016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                                dst->fRowBytes, sw, sh);
7408a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            SkMask::FreeImage(dp);
7410e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        } else if (style != kNormal_Style) {
74203016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
74303016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                            dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
7448a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
7458a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        (void)autoCall.detach();
7468a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
7478a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
7480e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    if (style == kInner_Style) {
7498a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        dst->fBounds = src.fBounds; // restore trimmed bounds
7500e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        dst->fRowBytes = src.fRowBytes;
7518a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
7528a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
7538a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    return true;
7548a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
7558a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
75671f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.orgbool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src,
75771f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org                               SkScalar radius, Style style, Quality quality,
75871f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org                               SkIPoint* margin)
75971f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org{
76071f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org    return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true);
76171f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org}
76271f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org
76371f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.orgbool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
76471f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org                     SkScalar radius, Style style, Quality quality,
76571f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org                     SkIPoint* margin)
76671f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org{
76771f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org    return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false);
76871f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org}
769