SkBlurMask.cpp revision 908276b3969cf8f8eec28026363897134c0e54e0
1ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com
2ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com/*
3ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com * Copyright 2006 The Android Open Source Project
4ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com *
5ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com * Use of this source code is governed by a BSD-style license that can be
6ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com * found in the LICENSE file.
7ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com */
8ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com
98a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
108a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com#include "SkBlurMask.h"
11889bd8bd7f604acae0a6303365bc82c06da1e6f3tomhudson@google.com#include "SkMath.h"
128a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com#include "SkTemplates.h"
1301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#include "SkEndian.h"
1401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
15908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org/**
16908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org * This function performs a box blur in X, of the given radius.  If the
17908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org * "transpose" parameter is true, it will transpose the pixels on write,
18908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org * such that X and Y are swapped. Reads are always performed from contiguous
19908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org * memory in X, for speed. The destination buffer (dst) must be at least
20908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org * (width + radius * 2) * height bytes in size.
21908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org */
22908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.orgstatic int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
23908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org                   int radius, int width, int height, bool transpose)
2471f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org{
2571f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org    int kernelSize = radius * 2 + 1;
2671f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org    int border = SkMin32(width, radius * 2);
2771f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org    uint32_t scale = (1 << 24) / kernelSize;
28908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org    int new_width = width + radius * 2;
29908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org    int dst_x_stride = transpose ? height : 1;
30908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org    int dst_y_stride = transpose ? 1 : new_width;
3171f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org    for (int y = 0; y < height; ++y) {
3271f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        int sum = 0;
33908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org        uint8_t* dptr = dst + y * dst_y_stride;
34908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org        const uint8_t* right = src + y * src_y_stride;
35908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org        const uint8_t* left = right;
3671f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        for (int x = 0; x < border; ++x) {
37908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            sum += *right++;
3871f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            *dptr = (sum * scale) >> 24;
39908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            dptr += dst_x_stride;
4071f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        }
41908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org        for (int x = width; x < radius * 2; ++x) {
4271f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            *dptr = (sum * scale) >> 24;
43908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            dptr += dst_x_stride;
4471f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        }
45908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org        for (int x = radius * 2; x < width; ++x) {
46908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            sum += *right++;
4771f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            *dptr = (sum * scale) >> 24;
48908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            sum -= *left++;
49908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            dptr += dst_x_stride;
5071f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        }
51908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org        for (int x = 0; x < border; ++x) {
5271f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            *dptr = (sum * scale) >> 24;
53908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            sum -= *left++;
54908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            dptr += dst_x_stride;
5571f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        }
5671f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        SkASSERT(sum == 0);
5771f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org    }
58908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org    return new_width;
5971f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org}
6071f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org
6101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
6201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com// breakeven on Mac, and ~15% slowdown on Linux.
6301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com// Reading a word at a time when bulding the sum buffer seems to give
6401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
65054ff1efa4f9187ce7fd20aaf3aed7cecf14e12btomhudson@google.com#if defined(SK_BUILD_FOR_WIN32)
6601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#define UNROLL_KERNEL_LOOP 1
6701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#endif
688a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
694560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com/** The sum buffer is an array of u32 to hold the accumulated sum of all of the
704560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    src values at their position, plus all values above and to the left.
714560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    When we sample into this buffer, we need an initial row and column of 0s,
724560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    so we have an index correspondence as follows:
73fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
744560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    src[i, j] == sum[i+1, j+1]
754560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    sum[0, j] == sum[i, 0] == 0
76fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
774560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    We assume that the sum buffer's stride == its width
784560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com */
7903016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.comstatic void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
8003016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                             const uint8_t src[], int srcRB) {
814560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int sumW = srcW + 1;
824560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com
834560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    SkASSERT(srcRB >= srcW);
848a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    // mod srcRB so we can apply it after each row
854560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    srcRB -= srcW;
868a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
878a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int x, y;
888a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
894560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    // zero out the top row and column
904560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    memset(sum, 0, sumW * sizeof(sum[0]));
914560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    sum += sumW;
924560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com
938a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    // special case first row
948a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    uint32_t X = 0;
954560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    *sum++ = 0; // initialze the first column to 0
9603016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com    for (x = srcW - 1; x >= 0; --x) {
978a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        X = *src++ + X;
984560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        *sum++ = X;
998a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
1008a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    src += srcRB;
1018a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
1028a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    // now do the rest of the rows
10303016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com    for (y = srcH - 1; y > 0; --y) {
1048a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        uint32_t L = 0;
1058a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        uint32_t C = 0;
1064560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        *sum++ = 0; // initialze the first column to 0
10701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
10801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
10901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t T = sum[-sumW];
11001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            X = *src++ + L + T - C;
11101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *sum++ = X;
11201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            L = X;
11301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            C = T;
11401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        }
11501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
11601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (; x >= 4; x-=4) {
11701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t T = sum[-sumW];
11801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            X = *src++ + L + T - C;
11901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *sum++ = X;
12001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            L = X;
12101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            C = T;
12201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            T = sum[-sumW];
12301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            X = *src++ + L + T - C;
12401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *sum++ = X;
12501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            L = X;
12601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            C = T;
12701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            T = sum[-sumW];
12801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            X = *src++ + L + T - C;
12901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *sum++ = X;
13001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            L = X;
13101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            C = T;
13201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            T = sum[-sumW];
13301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            X = *src++ + L + T - C;
13401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *sum++ = X;
13501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            L = X;
13601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            C = T;
13701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        }
13801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
13901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (; x >= 0; --x) {
1404560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com            uint32_t T = sum[-sumW];
1418a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            X = *src++ + L + T - C;
1424560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com            *sum++ = X;
1438a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            L = X;
1448a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            C = T;
1458a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
1468a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        src += srcRB;
1478a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
1488a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
1498a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
15003016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com/**
1518caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * This is the path for apply_kernel() to be taken when the kernel
1528caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * is wider than the source image.
1538caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com */
1548caac6447dd68655b57dfe876626a9733b191416tomhudson@google.comstatic void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
1558caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                           int sw, int sh) {
1568caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    SkASSERT(2*rx > sw);
1578caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
1588caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
1598caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
1608caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int sumStride = sw + 1;
1618caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
1628caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int dw = sw + 2*rx;
1638caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int dh = sh + 2*ry;
1648caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
1658caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int prev_y = -2*ry;
1668caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int next_y = 1;
1678caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
1688caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    for (int y = 0; y < dh; y++) {
1698caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int py = SkClampPos(prev_y) * sumStride;
1708caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int ny = SkFastMin32(next_y, sh) * sumStride;
1718caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
1728caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int prev_x = -2*rx;
1738caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int next_x = 1;
1748caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
1758caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (int x = 0; x < dw; x++) {
1768caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = SkClampPos(prev_x);
1778caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = SkFastMin32(next_x, sw);
1788caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
1798caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
1808caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
1818caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
1828caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
1838caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
1848caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
1858caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
1868caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        prev_y += 1;
1878caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        next_y += 1;
1888caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    }
1898caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com}
1908caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com/**
19103016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  sw and sh are the width and height of the src. Since the sum buffer
19203016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  matches that, but has an extra row and col at the beginning (with zeros),
19303016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  we can just use sw and sh as our "max" values for pinning coordinates
19403016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  when sampling into sum[][]
1958caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *
1968caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *  The inner loop is conceptually simple; we break it into several sections
1978caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *  to improve performance. Here's the original version:
1988caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (int x = 0; x < dw; x++) {
1998caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = SkClampPos(prev_x);
2008caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = SkFastMin32(next_x, sw);
2018caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2028caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
2038caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
2048caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2058caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
2068caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
2078caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
20801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *  The sections are:
20901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *     left-hand section, where prev_x is clamped to 0
21001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *     center section, where neither prev_x nor next_x is clamped
21101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *     right-hand section, where next_x is clamped to sw
21201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *  On some operating systems, the center section is unrolled for additional
21301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *  speedup.
2148caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com*/
2154560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.comstatic void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
2164560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com                         int sw, int sh) {
2178caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    if (2*rx > sw) {
2188caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        kernel_clamped(dst, rx, ry, sum, sw, sh);
2198caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        return;
2208caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    }
2218caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2228a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
2238a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
2244560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int sumStride = sw + 1;
2258a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
2268a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int dw = sw + 2*rx;
2278a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int dh = sh + 2*ry;
2288a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
2294560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int prev_y = -2*ry;
2304560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int next_y = 1;
2318a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
2328caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    SkASSERT(2*rx <= dw - 2*rx);
2338caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2344560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    for (int y = 0; y < dh; y++) {
2354560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int py = SkClampPos(prev_y) * sumStride;
2364560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int ny = SkFastMin32(next_y, sh) * sumStride;
2378a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
2384560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int prev_x = -2*rx;
2394560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int next_x = 1;
2408caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int x = 0;
2418caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2428caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (; x < 2*rx; x++) {
2438caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(prev_x <= 0);
2448caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(next_x <= sw);
2458caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2468caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = 0;
2478caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = next_x;
2488caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2498caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
2508caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
2518caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2528caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
2538caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
2548caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
2558caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
25601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i0 = prev_x + py;
25701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i1 = next_x + ny;
25801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i2 = next_x + py;
25901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i3 = prev_x + ny;
26001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
26101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#if UNROLL_KERNEL_LOOP
26201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (; x < dw - 2*rx - 4; x += 4) {
2638caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(prev_x >= 0);
2648caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(next_x <= sw);
2658caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
26601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
26701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
26801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
26901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
27001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
27101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
27201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
27301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
2748caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
27501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            prev_x += 4;
27601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            next_x += 4;
27701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        }
27801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#endif
27901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
28001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (; x < dw - 2*rx; x++) {
28101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            SkASSERT(prev_x >= 0);
28201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            SkASSERT(next_x <= sw);
28301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
28401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
2858caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
2868caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2878caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
2888caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
2898caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
2908caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2918caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (; x < dw; x++) {
2928caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(prev_x >= 0);
2938caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(next_x > sw);
2948caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2958caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = prev_x;
2968caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = sw;
2978caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
2988caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
2998caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
3008caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3018caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
3028caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
3038caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
3048caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3058caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        prev_y += 1;
3068caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        next_y += 1;
3078caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    }
3088caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com}
3098caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3108caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com/**
3118caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * This is the path for apply_kernel_interp() to be taken when the kernel
3128caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * is wider than the source image.
3138caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com */
3148caac6447dd68655b57dfe876626a9733b191416tomhudson@google.comstatic void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
3158caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
3168caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    SkASSERT(2*rx > sw);
3178caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3188caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int inner_weight = 255 - outer_weight;
3198caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3208caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    // round these guys up if they're bigger than 127
3218caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    outer_weight += outer_weight >> 7;
3228caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    inner_weight += inner_weight >> 7;
3238caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3248caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
3258caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
3268caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3278caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int sumStride = sw + 1;
3288caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3298caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int dw = sw + 2*rx;
3308caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int dh = sh + 2*ry;
3318caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3328caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int prev_y = -2*ry;
3338caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int next_y = 1;
3348caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3358caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    for (int y = 0; y < dh; y++) {
3368caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int py = SkClampPos(prev_y) * sumStride;
3378caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int ny = SkFastMin32(next_y, sh) * sumStride;
3388caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3398caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int ipy = SkClampPos(prev_y + 1) * sumStride;
3408caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int iny = SkClampMax(next_y - 1, sh) * sumStride;
3418caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3428caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int prev_x = -2*rx;
3438caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int next_x = 1;
3448a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
3454560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        for (int x = 0; x < dw; x++) {
3468a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            int px = SkClampPos(prev_x);
3478a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            int nx = SkFastMin32(next_x, sw);
3488a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
3498caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int ipx = SkClampPos(prev_x + 1);
3508caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int inx = SkClampMax(next_x - 1, sw);
3518caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3528caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t outer_sum = sum[px+py] + sum[nx+ny]
3538caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[nx+py] - sum[px+ny];
3548caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
3558caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[inx+ipy] - sum[ipx+iny];
3568caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
3578caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
3588a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
3598a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            prev_x += 1;
3608a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            next_x += 1;
3618a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
3628a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        prev_y += 1;
3638a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        next_y += 1;
3648a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
3658a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
3668a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
36703016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com/**
36803016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  sw and sh are the width and height of the src. Since the sum buffer
36903016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  matches that, but has an extra row and col at the beginning (with zeros),
37003016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  we can just use sw and sh as our "max" values for pinning coordinates
37103016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  when sampling into sum[][]
3728caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *
3738caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *  The inner loop is conceptually simple; we break it into several variants
3748caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *  to improve performance. Here's the original version:
3758caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (int x = 0; x < dw; x++) {
3768caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = SkClampPos(prev_x);
3778caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = SkFastMin32(next_x, sw);
3788caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3798caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int ipx = SkClampPos(prev_x + 1);
3808caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int inx = SkClampMax(next_x - 1, sw);
3818caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3828caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t outer_sum = sum[px+py] + sum[nx+ny]
3838caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[nx+py] - sum[px+ny];
3848caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
3858caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[inx+ipy] - sum[ipx+iny];
3868caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
3878caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
3888caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
3898caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
3908caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
3918caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
39201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *  The sections are:
39301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *     left-hand section, where prev_x is clamped to 0
39401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *     center section, where neither prev_x nor next_x is clamped
39501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *     right-hand section, where next_x is clamped to sw
39601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *  On some operating systems, the center section is unrolled for additional
39701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *  speedup.
3988caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com*/
3994560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.comstatic void apply_kernel_interp(uint8_t dst[], int rx, int ry,
4004560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com                const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
4018a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    SkASSERT(rx > 0 && ry > 0);
4028a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    SkASSERT(outer_weight <= 255);
4038a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4048caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    if (2*rx > sw) {
4058caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outer_weight);
4068caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        return;
4078caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    }
4088caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4098a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int inner_weight = 255 - outer_weight;
4108a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4118a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    // round these guys up if they're bigger than 127
4128a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    outer_weight += outer_weight >> 7;
4138a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    inner_weight += inner_weight >> 7;
4148a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4158a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
4168a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
4178a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4184560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int sumStride = sw + 1;
4198a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4208a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int dw = sw + 2*rx;
4218a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int dh = sh + 2*ry;
4228a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4234560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int prev_y = -2*ry;
4244560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int next_y = 1;
4258a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4268caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    SkASSERT(2*rx <= dw - 2*rx);
4278caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4284560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    for (int y = 0; y < dh; y++) {
4294560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int py = SkClampPos(prev_y) * sumStride;
4304560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int ny = SkFastMin32(next_y, sh) * sumStride;
4318a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4324560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int ipy = SkClampPos(prev_y + 1) * sumStride;
4334560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int iny = SkClampMax(next_y - 1, sh) * sumStride;
4348a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4354560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int prev_x = -2*rx;
4364560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int next_x = 1;
4378caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int x = 0;
4388a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4398caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (; x < 2*rx; x++) {
4408caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(prev_x < 0);
4418caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(next_x <= sw);
4428a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4438caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = 0;
4448caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = next_x;
4458caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4468caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int ipx = 0;
4478caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int inx = next_x - 1;
4488caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4498caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t outer_sum = sum[px+py] + sum[nx+ny]
4508caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[nx+py] - sum[px+ny];
4518caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
4528caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[inx+ipy] - sum[ipx+iny];
4538caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
4548caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
4558caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4568caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
4578caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
4588caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
4598caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
46001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i0 = prev_x + py;
46101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i1 = next_x + ny;
46201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i2 = next_x + py;
46301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i3 = prev_x + ny;
46401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i4 = prev_x + 1 + ipy;
46501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i5 = next_x - 1 + iny;
46601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i6 = next_x - 1 + ipy;
46701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i7 = prev_x + 1 + iny;
46801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
46901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#if UNROLL_KERNEL_LOOP
47001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (; x < dw - 2*rx - 4; x += 4) {
4718caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(prev_x >= 0);
4728caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(next_x <= sw);
4738caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
47401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
47501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
47601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
47701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
47801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
47901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
48001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
48101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
48201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
48301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
48401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
48501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
48601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
48701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
48801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
48901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
4908caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
49101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            prev_x += 4;
49201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            next_x += 4;
49301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        }
49401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#endif
4958a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
49601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (; x < dw - 2*rx; x++) {
49701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            SkASSERT(prev_x >= 0);
49801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            SkASSERT(next_x <= sw);
49901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
50001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
50101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
5028caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
5038caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
5048a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
5058a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            prev_x += 1;
5068a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            next_x += 1;
5078a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
5088caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5098caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (; x < dw; x++) {
5108caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(prev_x >= 0);
5118caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(next_x > sw);
5128caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5138caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = prev_x;
5148caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = sw;
5158caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5168caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int ipx = prev_x + 1;
5178caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int inx = sw;
5188caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5198caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t outer_sum = sum[px+py] + sum[nx+ny]
5208caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[nx+py] - sum[px+ny];
5218caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
5228caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[inx+ipy] - sum[ipx+iny];
5238caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
5248caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
5258caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5268caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
5278caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
5288caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
5298caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5308a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        prev_y += 1;
5318a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        next_y += 1;
5328a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
5338a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
5348a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
5358a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com#include "SkColorPriv.h"
5368a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
5370e3c664250f561ec9f7107b92136517a72d03afdreed@android.comstatic void merge_src_with_blur(uint8_t dst[], int dstRB,
5380e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                                const uint8_t src[], int srcRB,
5390e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                                const uint8_t blur[], int blurRB,
5400e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                                int sw, int sh) {
5410e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    dstRB -= sw;
5420e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    srcRB -= sw;
5430e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    blurRB -= sw;
5440e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    while (--sh >= 0) {
5450e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        for (int x = sw - 1; x >= 0; --x) {
5468a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
5478a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            dst += 1;
5488a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            src += 1;
5498a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            blur += 1;
5508a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
5510e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        dst += dstRB;
5520e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        src += srcRB;
5530e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        blur += blurRB;
5548a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
5558a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
5568a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
5578a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.comstatic void clamp_with_orig(uint8_t dst[], int dstRowBytes,
5580e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                            const uint8_t src[], int srcRowBytes,
5590e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                            int sw, int sh,
5604560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com                            SkBlurMask::Style style) {
5618a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int x;
5620e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    while (--sh >= 0) {
5638a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        switch (style) {
5648a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        case SkBlurMask::kSolid_Style:
5650e3c664250f561ec9f7107b92136517a72d03afdreed@android.com            for (x = sw - 1; x >= 0; --x) {
5660e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                int s = *src;
5670e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                int d = *dst;
5680e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
5698a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                dst += 1;
5708a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                src += 1;
5718a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            }
5728a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            break;
5738a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        case SkBlurMask::kOuter_Style:
5740e3c664250f561ec9f7107b92136517a72d03afdreed@android.com            for (x = sw - 1; x >= 0; --x) {
5750e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                if (*src) {
5768a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                    *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
5770e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                }
5788a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                dst += 1;
5798a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                src += 1;
5808a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            }
5818a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            break;
5828a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        default:
5830c00f21fee3f5cfa3aa7e5d46ff94cb8cf340451tomhudson@google.com            SkDEBUGFAIL("Unexpected blur style here");
5848a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            break;
5858a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
5868a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        dst += dstRowBytes - sw;
5870e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        src += srcRowBytes - sw;
5888a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
5898a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
5908a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
59103016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com///////////////////////////////////////////////////////////////////////////////
5928a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
5938a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com// we use a local funciton to wrap the class static method to work around
5948a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com// a bug in gcc98
5958a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.comvoid SkMask_FreeImage(uint8_t* image);
59603016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.comvoid SkMask_FreeImage(uint8_t* image) {
5978a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    SkMask::FreeImage(image);
5988a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
5998a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6008a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.combool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
6015af16f8d670b3ce1c7644a4737e02e2e2257614ebungeman@google.com                      SkScalar radius, Style style, Quality quality,
60271f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org                      SkIPoint* margin, bool separable)
6035af16f8d670b3ce1c7644a4737e02e2e2257614ebungeman@google.com{
60403016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com    if (src.fFormat != SkMask::kA8_Format) {
6058a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        return false;
60603016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com    }
6078a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6084868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    // Force high quality off for small radii (performance)
6094868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    if (radius < SkIntToScalar(3)) quality = kLow_Quality;
6104868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org
6114868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    // highQuality: use three box blur passes as a cheap way to approximate a Gaussian blur
6124868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    int passCount = (quality == kHigh_Quality) ? 3 : 1;
6134868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    SkScalar passRadius = SkScalarDiv(radius, SkScalarSqrt(SkIntToScalar(passCount)));
6144868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org
6154868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    int rx = SkScalarCeil(passRadius);
6164868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    int outer_weight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
6178a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6188a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    SkASSERT(rx >= 0);
6198a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    SkASSERT((unsigned)outer_weight <= 255);
6200e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    if (rx <= 0) {
6218a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        return false;
6220e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    }
6238a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6248a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int ry = rx;    // only do square blur for now
6258a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6264868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    int padx = passCount * rx;
6274868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    int pady = passCount * ry;
6285af16f8d670b3ce1c7644a4737e02e2e2257614ebungeman@google.com    if (margin) {
6295af16f8d670b3ce1c7644a4737e02e2e2257614ebungeman@google.com        margin->set(padx, pady);
6305af16f8d670b3ce1c7644a4737e02e2e2257614ebungeman@google.com    }
6314868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
6324868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org        src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
63349f0ff25a046d6001dc2d095b6fa3c30f0f46b6areed@android.com    dst->fRowBytes = dst->fBounds.width();
6348a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    dst->fFormat = SkMask::kA8_Format;
6358a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    dst->fImage = NULL;
6368a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6370e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    if (src.fImage) {
638543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com        size_t dstSize = dst->computeImageSize();
639543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com        if (0 == dstSize) {
640543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com            return false;   // too big to allocate, abort
641543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com        }
642543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com
6438a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        int             sw = src.fBounds.width();
6448a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        int             sh = src.fBounds.height();
6458a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        const uint8_t*  sp = src.fImage;
646543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com        uint8_t*        dp = SkMask::AllocImage(dstSize);
6478a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6488a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
6498a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6508a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        // build the blurry destination
65171f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        if (separable) {
65271f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            SkAutoTMalloc<uint8_t>  tmpBuffer(dstSize);
65371f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            uint8_t*                tp = tmpBuffer.get();
65471f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            int w = sw, h = sh;
65571f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org
65671f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            if (quality == kHigh_Quality) {
657908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org                // Do three X blurs, with a transpose on the final one.
658908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org                w = boxBlur(sp, src.fRowBytes, tp, rx, w, h, false);
659908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org                w = boxBlur(tp, w,             dp, rx, w, h, false);
660908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org                w = boxBlur(dp, w,             tp, rx, w, h, true);
661908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org                // Do three Y blurs, with a transpose on the final one.
662908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org                h = boxBlur(tp, h,             dp, ry, h, w, false);
663908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org                h = boxBlur(dp, h,             tp, ry, h, w, false);
664908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org                h = boxBlur(tp, h,             dp, ry, h, w, true);
665908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            } else {
666908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org                w = boxBlur(sp, src.fRowBytes, tp, rx, w, h, true);
667908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org                h = boxBlur(tp, h,             dp, ry, h, w, true);
66871f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            }
66971f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        } else {
67003016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
67103016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
67203016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            SkAutoTMalloc<uint32_t> storage(storageW * storageH);
6738a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            uint32_t*               sumBuffer = storage.get();
6748a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6754868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org            //pass1: sp is source, dp is destination
6768a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
67703016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            if (outer_weight == 255) {
6788a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
67903016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            } else {
6808a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outer_weight);
68103016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            }
6824868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org
68303016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            if (quality == kHigh_Quality) {
6844868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                //pass2: dp is source, tmpBuffer is destination
6854868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                int tmp_sw = sw + 2 * rx;
6864868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                int tmp_sh = sh + 2 * ry;
6874868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                SkAutoTMalloc<uint8_t>  tmpBuffer(dstSize);
6884868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
6894868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                if (outer_weight == 255)
6904868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                    apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
6914868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                else
69203016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                    apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
69303016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                                        tmp_sw, tmp_sh, outer_weight);
6944868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org
6954868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                //pass3: tmpBuffer is source, dp is destination
6964868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                tmp_sw += 2 * rx;
6974868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                tmp_sh += 2 * ry;
6984868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
6994868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                if (outer_weight == 255)
7004868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                    apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
7014868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                else
70203016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                    apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
70303016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                                        outer_weight);
7044868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org            }
7058a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
7068a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
7078a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        dst->fImage = dp;
7088a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        // if need be, alloc the "real" dst (same size as src) and copy/merge
7098a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        // the blur into it (applying the src)
7100e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        if (style == kInner_Style) {
7110e3c664250f561ec9f7107b92136517a72d03afdreed@android.com            // now we allocate the "real" dst, mirror the size of src
712543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com            size_t srcSize = src.computeImageSize();
713543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com            if (0 == srcSize) {
714543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com                return false;   // too big to allocate, abort
715543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com            }
716543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com            dst->fImage = SkMask::AllocImage(srcSize);
7170e3c664250f561ec9f7107b92136517a72d03afdreed@android.com            merge_src_with_blur(dst->fImage, src.fRowBytes,
7180e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                                sp, src.fRowBytes,
71903016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                                dp + passCount * (rx + ry * dst->fRowBytes),
72003016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                                dst->fRowBytes, sw, sh);
7218a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            SkMask::FreeImage(dp);
7220e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        } else if (style != kNormal_Style) {
72303016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
72403016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                            dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
7258a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
7268a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        (void)autoCall.detach();
7278a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
7288a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
7290e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    if (style == kInner_Style) {
7308a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        dst->fBounds = src.fBounds; // restore trimmed bounds
7310e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        dst->fRowBytes = src.fRowBytes;
7328a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
7338a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
7348a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    return true;
7358a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
7368a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
73771f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.orgbool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src,
73871f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org                               SkScalar radius, Style style, Quality quality,
73971f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org                               SkIPoint* margin)
74071f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org{
74171f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org    return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true);
74271f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org}
74371f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org
74471f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.orgbool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
74571f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org                     SkScalar radius, Style style, Quality quality,
74671f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org                     SkIPoint* margin)
74771f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org{
74871f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org    return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false);
74971f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org}
750