SkBlurMask.cpp revision 9b0d4d79f023ce91b53d9eaa47508b6722c246e6
1ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com
2ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com/*
3ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com * Copyright 2006 The Android Open Source Project
4ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com *
5ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com * Use of this source code is governed by a BSD-style license that can be
6ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com * found in the LICENSE file.
7ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com */
8ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com
98a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
108a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com#include "SkBlurMask.h"
11889bd8bd7f604acae0a6303365bc82c06da1e6f3tomhudson@google.com#include "SkMath.h"
128a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com#include "SkTemplates.h"
1301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#include "SkEndian.h"
1401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
159b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#define UNROLL_SEPARABLE_LOOPS
169b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org
17908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org/**
18908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org * This function performs a box blur in X, of the given radius.  If the
19884e60be30e20f38b3466a4697081187d2f1f814skia.committer@gmail.com * "transpose" parameter is true, it will transpose the pixels on write,
20908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org * such that X and Y are swapped. Reads are always performed from contiguous
21908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org * memory in X, for speed. The destination buffer (dst) must be at least
229b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org * (width + leftRadius + rightRadius) * height bytes in size.
23908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org */
24908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.orgstatic int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
25c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                   int leftRadius, int rightRadius, int width, int height,
26c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                   bool transpose)
2771f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org{
289b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org    int diameter = leftRadius + rightRadius;
299b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org    int kernelSize = diameter + 1;
309b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org    int border = SkMin32(width, diameter);
3171f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org    uint32_t scale = (1 << 24) / kernelSize;
32c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org    int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
33908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org    int dst_x_stride = transpose ? height : 1;
34908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org    int dst_y_stride = transpose ? 1 : new_width;
3571f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org    for (int y = 0; y < height; ++y) {
3671f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        int sum = 0;
37908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org        uint8_t* dptr = dst + y * dst_y_stride;
38908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org        const uint8_t* right = src + y * src_y_stride;
39908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org        const uint8_t* left = right;
40336b4da6b0d20f27f9980b03415354a2f0698e18senorblanco@chromium.org        for (int x = 0; x < rightRadius - leftRadius; x++) {
41336b4da6b0d20f27f9980b03415354a2f0698e18senorblanco@chromium.org            *dptr = 0;
42336b4da6b0d20f27f9980b03415354a2f0698e18senorblanco@chromium.org            dptr += dst_x_stride;
43c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org        }
449b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#define LEFT_BORDER_ITER \
459b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            sum += *right++; \
469b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            *dptr = (sum * scale) >> 24; \
47908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            dptr += dst_x_stride;
489b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org
499b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        int x = 0;
509b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#ifdef UNROLL_SEPARABLE_LOOPS
519b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        for (; x < border - 16; x += 16) {
529b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
539b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
549b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
559b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
569b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
579b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
589b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
599b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
609b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
619b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
629b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
639b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
649b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
659b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
669b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
679b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
6871f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        }
699b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#endif
709b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        for (; x < border; ++x) {
719b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
729b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        }
739b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#undef LEFT_BORDER_ITER
749b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#define TRIVIAL_ITER \
759b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            *dptr = (sum * scale) >> 24; \
76908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            dptr += dst_x_stride;
779b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        x = width;
789b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#ifdef UNROLL_SEPARABLE_LOOPS
799b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        for (; x < diameter - 16; x += 16) {
809b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            TRIVIAL_ITER
819b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            TRIVIAL_ITER
829b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            TRIVIAL_ITER
839b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            TRIVIAL_ITER
849b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            TRIVIAL_ITER
859b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            TRIVIAL_ITER
869b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            TRIVIAL_ITER
879b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            TRIVIAL_ITER
889b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            TRIVIAL_ITER
899b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            TRIVIAL_ITER
909b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            TRIVIAL_ITER
919b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            TRIVIAL_ITER
929b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            TRIVIAL_ITER
939b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            TRIVIAL_ITER
949b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            TRIVIAL_ITER
959b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            TRIVIAL_ITER
969b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        }
979b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#endif
989b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        for (; x < diameter; ++x) {
999b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            TRIVIAL_ITER
10071f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        }
1019b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#undef TRIVIAL_ITER
1029b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#define CENTER_ITER \
1039b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            sum += *right++; \
1049b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            *dptr = (sum * scale) >> 24; \
1059b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            sum -= *left++; \
106908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            dptr += dst_x_stride;
1079b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org
1089b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        x = diameter;
1099b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#ifdef UNROLL_SEPARABLE_LOOPS
1109b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        for (; x < width - 16; x += 16) {
1119b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
1129b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
1139b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
1149b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
1159b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
1169b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
1179b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
1189b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
1199b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
1209b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
1219b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
1229b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
1239b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
1249b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
1259b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
1269b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
1279b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        }
1289b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#endif
1299b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        for (; x < width; ++x) {
1309b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
13171f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        }
1329b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#undef CENTER_ITER
1339b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#define RIGHT_BORDER_ITER \
1349b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            *dptr = (sum * scale) >> 24; \
1359b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            sum -= *left++; \
136908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            dptr += dst_x_stride;
1379b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org
1389b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        x = 0;
1399b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#ifdef UNROLL_SEPARABLE_LOOPS
1409b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        for (; x < border - 16; x += 16) {
1419b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
1429b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
1439b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
1449b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
1459b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
1469b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
1479b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
1489b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
1499b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
1509b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
1519b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
1529b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
1539b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
1549b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
1559b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
1569b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
1579b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        }
1589b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#endif
1599b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        for (; x < border; ++x) {
1609b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
16171f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        }
1629b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#undef RIGHT_BORDER_ITER
163336b4da6b0d20f27f9980b03415354a2f0698e18senorblanco@chromium.org        for (int x = 0; x < leftRadius - rightRadius; x++) {
164336b4da6b0d20f27f9980b03415354a2f0698e18senorblanco@chromium.org            *dptr = 0;
165336b4da6b0d20f27f9980b03415354a2f0698e18senorblanco@chromium.org            dptr += dst_x_stride;
166c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org        }
16771f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        SkASSERT(sum == 0);
16871f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org    }
169908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org    return new_width;
17071f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org}
17171f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org
1729b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org/**
1739b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org * This variant of the box blur handles blurring of non-integer radii.  It
1749b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org * keeps two running sums: an outer sum for the rounded-up kernel radius, and
1759b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org * an inner sum for the rounded-down kernel radius.  For each pixel, it linearly
1769b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org * interpolates between them.  In float this would be:
1779b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org *  outer_weight * outer_sum / kernelSize +
1789b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org *  (1.0 - outer_weight) * innerSum / (kernelSize - 2)
1799b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org */
1809b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.orgstatic int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
1819b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org                         int radius, int width, int height,
1829b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org                         bool transpose, uint8_t outer_weight)
1839b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org{
1849b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org    int diameter = radius * 2;
1859b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org    int kernelSize = diameter + 1;
1869b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org    int border = SkMin32(width, diameter);
1879b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org    int inner_weight = 255 - outer_weight;
1889b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org    outer_weight += outer_weight >> 7;
1899b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org    inner_weight += inner_weight >> 7;
1909b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org    uint32_t outer_scale = (outer_weight << 16) / kernelSize;
1919b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org    uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2);
1929b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org    int new_width = width + diameter;
1939b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org    int dst_x_stride = transpose ? height : 1;
1949b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org    int dst_y_stride = transpose ? 1 : new_width;
1959b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org    for (int y = 0; y < height; ++y) {
1969b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        int outer_sum = 0, inner_sum = 0;
1979b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        uint8_t* dptr = dst + y * dst_y_stride;
1989b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        const uint8_t* right = src + y * src_y_stride;
1999b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        const uint8_t* left = right;
2009b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        int x = 0;
2019b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org
2029b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#define LEFT_BORDER_ITER \
2039b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            inner_sum = outer_sum; \
2049b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            outer_sum += *right++; \
2059b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24; \
2069b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            dptr += dst_x_stride;
2079b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org
2089b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#ifdef UNROLL_SEPARABLE_LOOPS
2099b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        for (;x < border - 16; x += 16) {
2109b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
2119b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
2129b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
2139b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
2149b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
2159b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
2169b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
2179b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
2189b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
2199b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
2209b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
2219b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
2229b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
2239b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
2249b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
2259b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
2269b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        }
2279b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#endif
2289b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org
2299b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        for (;x < border; x++) {
2309b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            LEFT_BORDER_ITER
2319b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        }
2329b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#undef LEFT_BORDER_ITER
2339b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        for (int x = width; x < diameter; ++x) {
2349b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24;
2359b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            dptr += dst_x_stride;
2369b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        }
2379b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        x = diameter;
2389b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org
2399b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#define CENTER_ITER \
2409b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            inner_sum = outer_sum - *left; \
2419b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            outer_sum += *right++; \
2429b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24; \
2439b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            dptr += dst_x_stride; \
2449b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            outer_sum -= *left++;
2459b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org
2469b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#ifdef UNROLL_SEPARABLE_LOOPS
2479b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        for (; x < width - 16; x += 16) {
2489b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
2499b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
2509b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
2519b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
2529b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
2539b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
2549b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
2559b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
2569b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
2579b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
2589b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
2599b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
2609b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
2619b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
2629b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
2639b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
2649b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        }
2659b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#endif
2669b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        for (; x < width; ++x) {
2679b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            CENTER_ITER
2689b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        }
2699b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#undef CENTER_ITER
2709b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org
2719b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        #define RIGHT_BORDER_ITER \
2729b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            inner_sum = outer_sum - *left++; \
2739b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24; \
2749b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            dptr += dst_x_stride; \
2759b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            outer_sum = inner_sum;
2769b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org
2779b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        x = 0;
2789b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#ifdef UNROLL_SEPARABLE_LOOPS
2799b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        for (; x < border - 16; x += 16) {
2809b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
2819b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
2829b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
2839b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
2849b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
2859b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
2869b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
2879b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
2889b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
2899b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
2909b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
2919b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
2929b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
2939b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
2949b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
2959b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
2969b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        }
2979b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#endif
2989b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        for (; x < border; x++) {
2999b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            RIGHT_BORDER_ITER
3009b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        }
3019b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#undef RIGHT_BORDER_ITER
3029b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org        SkASSERT(outer_sum == 0 && inner_sum == 0);
3039b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org    }
3049b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org    return new_width;
3059b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org}
3069b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org
307c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.orgstatic void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)
308c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org{
309c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org    *loRadius = *hiRadius = SkScalarCeil(passRadius);
310c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org    if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) {
311c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org        *loRadius = *hiRadius - 1;
312c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org    }
313c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org}
314c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org
31501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
31601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com// breakeven on Mac, and ~15% slowdown on Linux.
31701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com// Reading a word at a time when bulding the sum buffer seems to give
31801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
319054ff1efa4f9187ce7fd20aaf3aed7cecf14e12btomhudson@google.com#if defined(SK_BUILD_FOR_WIN32)
32001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#define UNROLL_KERNEL_LOOP 1
32101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#endif
3228a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
3234560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com/** The sum buffer is an array of u32 to hold the accumulated sum of all of the
3244560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    src values at their position, plus all values above and to the left.
3254560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    When we sample into this buffer, we need an initial row and column of 0s,
3264560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    so we have an index correspondence as follows:
327fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
3284560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    src[i, j] == sum[i+1, j+1]
3294560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    sum[0, j] == sum[i, 0] == 0
330fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
3314560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    We assume that the sum buffer's stride == its width
3324560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com */
33303016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.comstatic void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
33403016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                             const uint8_t src[], int srcRB) {
3354560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int sumW = srcW + 1;
3364560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com
3374560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    SkASSERT(srcRB >= srcW);
3388a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    // mod srcRB so we can apply it after each row
3394560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    srcRB -= srcW;
3408a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
3418a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int x, y;
3428a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
3434560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    // zero out the top row and column
3444560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    memset(sum, 0, sumW * sizeof(sum[0]));
3454560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    sum += sumW;
3464560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com
3478a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    // special case first row
3488a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    uint32_t X = 0;
3494560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    *sum++ = 0; // initialze the first column to 0
35003016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com    for (x = srcW - 1; x >= 0; --x) {
3518a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        X = *src++ + X;
3524560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        *sum++ = X;
3538a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
3548a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    src += srcRB;
3558a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
3568a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    // now do the rest of the rows
35703016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com    for (y = srcH - 1; y > 0; --y) {
3588a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        uint32_t L = 0;
3598a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        uint32_t C = 0;
3604560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        *sum++ = 0; // initialze the first column to 0
36101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
36201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
36301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t T = sum[-sumW];
36401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            X = *src++ + L + T - C;
36501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *sum++ = X;
36601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            L = X;
36701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            C = T;
36801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        }
36901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
37001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (; x >= 4; x-=4) {
37101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t T = sum[-sumW];
37201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            X = *src++ + L + T - C;
37301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *sum++ = X;
37401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            L = X;
37501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            C = T;
37601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            T = sum[-sumW];
37701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            X = *src++ + L + T - C;
37801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *sum++ = X;
37901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            L = X;
38001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            C = T;
38101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            T = sum[-sumW];
38201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            X = *src++ + L + T - C;
38301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *sum++ = X;
38401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            L = X;
38501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            C = T;
38601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            T = sum[-sumW];
38701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            X = *src++ + L + T - C;
38801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *sum++ = X;
38901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            L = X;
39001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            C = T;
39101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        }
39201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
39301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (; x >= 0; --x) {
3944560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com            uint32_t T = sum[-sumW];
3958a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            X = *src++ + L + T - C;
3964560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com            *sum++ = X;
3978a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            L = X;
3988a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            C = T;
3998a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
4008a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        src += srcRB;
4018a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
4028a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
4038a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
40403016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com/**
4058caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * This is the path for apply_kernel() to be taken when the kernel
4068caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * is wider than the source image.
4078caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com */
4088caac6447dd68655b57dfe876626a9733b191416tomhudson@google.comstatic void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
4098caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                           int sw, int sh) {
4108caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    SkASSERT(2*rx > sw);
4118caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4128caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
4138caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4148caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int sumStride = sw + 1;
4158caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4168caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int dw = sw + 2*rx;
4178caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int dh = sh + 2*ry;
4188caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4198caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int prev_y = -2*ry;
4208caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int next_y = 1;
4218caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4228caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    for (int y = 0; y < dh; y++) {
4238caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int py = SkClampPos(prev_y) * sumStride;
4248caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int ny = SkFastMin32(next_y, sh) * sumStride;
4258caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4268caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int prev_x = -2*rx;
4278caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int next_x = 1;
4288caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4298caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (int x = 0; x < dw; x++) {
4308caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = SkClampPos(prev_x);
4318caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = SkFastMin32(next_x, sw);
4328caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4338caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
4348caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
4358caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4368caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
4378caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
4388caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
4398caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4408caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        prev_y += 1;
4418caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        next_y += 1;
4428caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    }
4438caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com}
4448caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com/**
44503016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  sw and sh are the width and height of the src. Since the sum buffer
44603016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  matches that, but has an extra row and col at the beginning (with zeros),
44703016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  we can just use sw and sh as our "max" values for pinning coordinates
44803016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  when sampling into sum[][]
4498caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *
4508caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *  The inner loop is conceptually simple; we break it into several sections
4518caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *  to improve performance. Here's the original version:
4528caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (int x = 0; x < dw; x++) {
4538caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = SkClampPos(prev_x);
4548caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = SkFastMin32(next_x, sw);
4558caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4568caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
4578caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
4588caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4598caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
4608caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
4618caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
46201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *  The sections are:
46301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *     left-hand section, where prev_x is clamped to 0
46401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *     center section, where neither prev_x nor next_x is clamped
46501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *     right-hand section, where next_x is clamped to sw
46601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *  On some operating systems, the center section is unrolled for additional
46701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *  speedup.
4688caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com*/
4694560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.comstatic void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
4704560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com                         int sw, int sh) {
4718caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    if (2*rx > sw) {
4728caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        kernel_clamped(dst, rx, ry, sum, sw, sh);
4738caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        return;
4748caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    }
4758caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4768a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
4778a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4784560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int sumStride = sw + 1;
4798a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4808a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int dw = sw + 2*rx;
4818a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int dh = sh + 2*ry;
4828a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4834560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int prev_y = -2*ry;
4844560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int next_y = 1;
4858a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4868caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    SkASSERT(2*rx <= dw - 2*rx);
4878caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4884560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    for (int y = 0; y < dh; y++) {
4894560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int py = SkClampPos(prev_y) * sumStride;
4904560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int ny = SkFastMin32(next_y, sh) * sumStride;
4918a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
4924560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int prev_x = -2*rx;
4934560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int next_x = 1;
4948caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int x = 0;
4958caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
4968caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (; x < 2*rx; x++) {
4978caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(prev_x <= 0);
4988caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(next_x <= sw);
4998caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5008caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = 0;
5018caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = next_x;
5028caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5038caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
5048caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
5058caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5068caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
5078caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
5088caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
5098caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
51001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i0 = prev_x + py;
51101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i1 = next_x + ny;
51201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i2 = next_x + py;
51301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i3 = prev_x + ny;
51401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
51501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#if UNROLL_KERNEL_LOOP
51601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (; x < dw - 2*rx - 4; x += 4) {
5178caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(prev_x >= 0);
5188caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(next_x <= sw);
5198caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
52001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
52101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
52201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
52301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
52401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
52501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
52601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
52701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
5288caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
52901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            prev_x += 4;
53001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            next_x += 4;
53101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        }
53201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#endif
53301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
53401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (; x < dw - 2*rx; x++) {
53501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            SkASSERT(prev_x >= 0);
53601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            SkASSERT(next_x <= sw);
53701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
53801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
5398caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
5408caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5418caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
5428caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
5438caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
5448caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5458caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (; x < dw; x++) {
5468caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(prev_x >= 0);
5478caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(next_x > sw);
5488caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5498caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = prev_x;
5508caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = sw;
5518caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5528caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
5538caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8(tmp * scale >> 24);
5548caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5558caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
5568caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
5578caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
5588caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5598caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        prev_y += 1;
5608caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        next_y += 1;
5618caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    }
5628caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com}
5638caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5648caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com/**
5658caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * This is the path for apply_kernel_interp() to be taken when the kernel
5668caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * is wider than the source image.
5678caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com */
5688caac6447dd68655b57dfe876626a9733b191416tomhudson@google.comstatic void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
5698caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
5708caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    SkASSERT(2*rx > sw);
5718caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5728caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int inner_weight = 255 - outer_weight;
5738caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5748caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    // round these guys up if they're bigger than 127
5758caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    outer_weight += outer_weight >> 7;
5768caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    inner_weight += inner_weight >> 7;
5778caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5788caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
5798caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
5808caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5818caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int sumStride = sw + 1;
5828caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5838caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int dw = sw + 2*rx;
5848caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int dh = sh + 2*ry;
5858caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5868caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int prev_y = -2*ry;
5878caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    int next_y = 1;
5888caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5898caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    for (int y = 0; y < dh; y++) {
5908caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int py = SkClampPos(prev_y) * sumStride;
5918caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int ny = SkFastMin32(next_y, sh) * sumStride;
5928caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5938caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int ipy = SkClampPos(prev_y + 1) * sumStride;
5948caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int iny = SkClampMax(next_y - 1, sh) * sumStride;
5958caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
5968caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int prev_x = -2*rx;
5978caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int next_x = 1;
5988a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
5994560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        for (int x = 0; x < dw; x++) {
6008a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            int px = SkClampPos(prev_x);
6018a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            int nx = SkFastMin32(next_x, sw);
6028a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6038caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int ipx = SkClampPos(prev_x + 1);
6048caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int inx = SkClampMax(next_x - 1, sw);
6058caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
6068caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t outer_sum = sum[px+py] + sum[nx+ny]
6078caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[nx+py] - sum[px+ny];
6088caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
6098caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[inx+ipy] - sum[ipx+iny];
6108caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
6118caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
6128a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6138a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            prev_x += 1;
6148a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            next_x += 1;
6158a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
6168a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        prev_y += 1;
6178a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        next_y += 1;
6188a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
6198a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
6208a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
62103016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com/**
62203016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  sw and sh are the width and height of the src. Since the sum buffer
62303016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  matches that, but has an extra row and col at the beginning (with zeros),
62403016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  we can just use sw and sh as our "max" values for pinning coordinates
62503016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com *  when sampling into sum[][]
6268caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *
6278caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *  The inner loop is conceptually simple; we break it into several variants
6288caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *  to improve performance. Here's the original version:
6298caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (int x = 0; x < dw; x++) {
6308caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = SkClampPos(prev_x);
6318caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = SkFastMin32(next_x, sw);
6328caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
6338caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int ipx = SkClampPos(prev_x + 1);
6348caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int inx = SkClampMax(next_x - 1, sw);
6358caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
6368caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t outer_sum = sum[px+py] + sum[nx+ny]
6378caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[nx+py] - sum[px+ny];
6388caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
6398caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[inx+ipy] - sum[ipx+iny];
6408caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
6418caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
6428caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
6438caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
6448caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
6458caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
64601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *  The sections are:
64701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *     left-hand section, where prev_x is clamped to 0
64801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *     center section, where neither prev_x nor next_x is clamped
64901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *     right-hand section, where next_x is clamped to sw
65001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *  On some operating systems, the center section is unrolled for additional
65101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *  speedup.
6528caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com*/
6534560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.comstatic void apply_kernel_interp(uint8_t dst[], int rx, int ry,
6544560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com                const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
6558a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    SkASSERT(rx > 0 && ry > 0);
6568a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    SkASSERT(outer_weight <= 255);
6578a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6588caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    if (2*rx > sw) {
6598caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outer_weight);
6608caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        return;
6618caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    }
6628caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
6638a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int inner_weight = 255 - outer_weight;
6648a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6658a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    // round these guys up if they're bigger than 127
6668a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    outer_weight += outer_weight >> 7;
6678a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    inner_weight += inner_weight >> 7;
6688a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6698a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
6708a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
6718a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6724560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int sumStride = sw + 1;
6738a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6748a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int dw = sw + 2*rx;
6758a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int dh = sh + 2*ry;
6768a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6774560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int prev_y = -2*ry;
6784560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    int next_y = 1;
6798a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6808caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com    SkASSERT(2*rx <= dw - 2*rx);
6818caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
6824560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com    for (int y = 0; y < dh; y++) {
6834560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int py = SkClampPos(prev_y) * sumStride;
6844560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int ny = SkFastMin32(next_y, sh) * sumStride;
6858a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6864560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int ipy = SkClampPos(prev_y + 1) * sumStride;
6874560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int iny = SkClampMax(next_y - 1, sh) * sumStride;
6888a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6894560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int prev_x = -2*rx;
6904560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com        int next_x = 1;
6918caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        int x = 0;
6928a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6938caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (; x < 2*rx; x++) {
6948caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(prev_x < 0);
6958caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(next_x <= sw);
6968a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
6978caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = 0;
6988caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = next_x;
6998caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
7008caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int ipx = 0;
7018caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int inx = next_x - 1;
7028caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
7038caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t outer_sum = sum[px+py] + sum[nx+ny]
7048caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[nx+py] - sum[px+ny];
7058caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
7068caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[inx+ipy] - sum[ipx+iny];
7078caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
7088caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
7098caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
7108caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
7118caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
7128caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
7138caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
71401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i0 = prev_x + py;
71501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i1 = next_x + ny;
71601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i2 = next_x + py;
71701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i3 = prev_x + ny;
71801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i4 = prev_x + 1 + ipy;
71901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i5 = next_x - 1 + iny;
72001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i6 = next_x - 1 + ipy;
72101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        int i7 = prev_x + 1 + iny;
72201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
72301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#if UNROLL_KERNEL_LOOP
72401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (; x < dw - 2*rx - 4; x += 4) {
7258caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(prev_x >= 0);
7268caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(next_x <= sw);
7278caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
72801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
72901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
73001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
73101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
73201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
73301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
73401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
73501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
73601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
73701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
73801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
73901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
74001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
74101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
74201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
74301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
7448caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
74501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            prev_x += 4;
74601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            next_x += 4;
74701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        }
74801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#endif
7498a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
75001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com        for (; x < dw - 2*rx; x++) {
75101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            SkASSERT(prev_x >= 0);
75201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            SkASSERT(next_x <= sw);
75301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com
75401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
75501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com            uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
7568caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
7578caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
7588a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
7598a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            prev_x += 1;
7608a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            next_x += 1;
7618a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
7628caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
7638caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        for (; x < dw; x++) {
7648caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(prev_x >= 0);
7658caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            SkASSERT(next_x > sw);
7668caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
7678caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int px = prev_x;
7688caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int nx = sw;
7698caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
7708caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int ipx = prev_x + 1;
7718caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            int inx = sw;
7728caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
7738caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t outer_sum = sum[px+py] + sum[nx+ny]
7748caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[nx+py] - sum[px+ny];
7758caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
7768caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                               - sum[inx+ipy] - sum[ipx+iny];
7778caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            *dst++ = SkToU8((outer_sum * outer_scale
7788caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com                           + inner_sum * inner_scale) >> 24);
7798caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
7808caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            prev_x += 1;
7818caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com            next_x += 1;
7828caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com        }
7838caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com
7848a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        prev_y += 1;
7858a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        next_y += 1;
7868a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
7878a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
7888a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
7898a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com#include "SkColorPriv.h"
7908a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
7910e3c664250f561ec9f7107b92136517a72d03afdreed@android.comstatic void merge_src_with_blur(uint8_t dst[], int dstRB,
7920e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                                const uint8_t src[], int srcRB,
7930e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                                const uint8_t blur[], int blurRB,
7940e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                                int sw, int sh) {
7950e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    dstRB -= sw;
7960e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    srcRB -= sw;
7970e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    blurRB -= sw;
7980e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    while (--sh >= 0) {
7990e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        for (int x = sw - 1; x >= 0; --x) {
8008a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
8018a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            dst += 1;
8028a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            src += 1;
8038a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            blur += 1;
8048a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
8050e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        dst += dstRB;
8060e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        src += srcRB;
8070e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        blur += blurRB;
8088a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
8098a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
8108a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
8118a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.comstatic void clamp_with_orig(uint8_t dst[], int dstRowBytes,
8120e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                            const uint8_t src[], int srcRowBytes,
8130e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                            int sw, int sh,
8144560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com                            SkBlurMask::Style style) {
8158a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int x;
8160e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    while (--sh >= 0) {
8178a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        switch (style) {
8188a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        case SkBlurMask::kSolid_Style:
8190e3c664250f561ec9f7107b92136517a72d03afdreed@android.com            for (x = sw - 1; x >= 0; --x) {
8200e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                int s = *src;
8210e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                int d = *dst;
8220e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
8238a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                dst += 1;
8248a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                src += 1;
8258a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            }
8268a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            break;
8278a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        case SkBlurMask::kOuter_Style:
8280e3c664250f561ec9f7107b92136517a72d03afdreed@android.com            for (x = sw - 1; x >= 0; --x) {
8290e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                if (*src) {
8308a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                    *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
8310e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                }
8328a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                dst += 1;
8338a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                src += 1;
8348a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            }
8358a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            break;
8368a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        default:
8370c00f21fee3f5cfa3aa7e5d46ff94cb8cf340451tomhudson@google.com            SkDEBUGFAIL("Unexpected blur style here");
8388a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            break;
8398a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
8408a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        dst += dstRowBytes - sw;
8410e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        src += srcRowBytes - sw;
8428a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
8438a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
8448a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
84503016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com///////////////////////////////////////////////////////////////////////////////
8468a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
8478a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com// we use a local funciton to wrap the class static method to work around
8488a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com// a bug in gcc98
8498a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.comvoid SkMask_FreeImage(uint8_t* image);
85003016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.comvoid SkMask_FreeImage(uint8_t* image) {
8518a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    SkMask::FreeImage(image);
8528a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
8538a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
8548a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.combool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
8555af16f8d670b3ce1c7644a4737e02e2e2257614ebungeman@google.com                      SkScalar radius, Style style, Quality quality,
85671f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org                      SkIPoint* margin, bool separable)
8575af16f8d670b3ce1c7644a4737e02e2e2257614ebungeman@google.com{
85803016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com    if (src.fFormat != SkMask::kA8_Format) {
8598a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        return false;
86003016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com    }
8618a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
8624868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    // Force high quality off for small radii (performance)
863c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org    if (radius < SkIntToScalar(3) && !separable) quality = kLow_Quality;
8644868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org
8654868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    // highQuality: use three box blur passes as a cheap way to approximate a Gaussian blur
8669b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org    int passCount = (quality == kHigh_Quality || separable) ? 3 : 1;
8674868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    SkScalar passRadius = SkScalarDiv(radius, SkScalarSqrt(SkIntToScalar(passCount)));
8684868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org
8694868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    int rx = SkScalarCeil(passRadius);
8704868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    int outer_weight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
8718a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
8728a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    SkASSERT(rx >= 0);
8738a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    SkASSERT((unsigned)outer_weight <= 255);
8740e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    if (rx <= 0) {
8758a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        return false;
8760e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    }
8778a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
8788a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    int ry = rx;    // only do square blur for now
8798a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
8804868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    int padx = passCount * rx;
8814868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    int pady = passCount * ry;
8825af16f8d670b3ce1c7644a4737e02e2e2257614ebungeman@google.com    if (margin) {
8835af16f8d670b3ce1c7644a4737e02e2e2257614ebungeman@google.com        margin->set(padx, pady);
8845af16f8d670b3ce1c7644a4737e02e2e2257614ebungeman@google.com    }
8854868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org    dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
8864868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org        src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
88749f0ff25a046d6001dc2d095b6fa3c30f0f46b6areed@android.com    dst->fRowBytes = dst->fBounds.width();
8888a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    dst->fFormat = SkMask::kA8_Format;
8898a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    dst->fImage = NULL;
8908a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
8910e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    if (src.fImage) {
892543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com        size_t dstSize = dst->computeImageSize();
893543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com        if (0 == dstSize) {
894543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com            return false;   // too big to allocate, abort
895543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com        }
896543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com
8978a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        int             sw = src.fBounds.width();
8988a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        int             sh = src.fBounds.height();
8998a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        const uint8_t*  sp = src.fImage;
900543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com        uint8_t*        dp = SkMask::AllocImage(dstSize);
9018a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
9028a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
9038a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
9048a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        // build the blurry destination
90571f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        if (separable) {
90671f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            SkAutoTMalloc<uint8_t>  tmpBuffer(dstSize);
90771f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            uint8_t*                tp = tmpBuffer.get();
90871f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            int w = sw, h = sh;
90971f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org
9109b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org            if (outer_weight == 255 || quality == kLow_Quality) {
9119b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org                // For separable blurs, low quality means no interpolation.
912c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                int loRadius, hiRadius;
913c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                get_adjusted_radii(passRadius, &loRadius, &hiRadius);
914908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org                // Do three X blurs, with a transpose on the final one.
915c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
916c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                w = boxBlur(tp, w,             dp, hiRadius, loRadius, w, h, false);
917c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                w = boxBlur(dp, w,             tp, hiRadius, hiRadius, w, h, true);
918908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org                // Do three Y blurs, with a transpose on the final one.
919c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                h = boxBlur(tp, h,             dp, loRadius, hiRadius, h, w, false);
920c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                h = boxBlur(dp, h,             tp, hiRadius, loRadius, h, w, false);
921c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org                h = boxBlur(tp, h,             dp, hiRadius, hiRadius, h, w, true);
922908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org            } else {
9239b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org                // Do three X blurs, with a transpose on the final one.
9249b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org                w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outer_weight);
9259b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org                w = boxBlurInterp(tp, w,             dp, rx, w, h, false, outer_weight);
9269b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org                w = boxBlurInterp(dp, w,             tp, rx, w, h, true, outer_weight);
9279b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org                // Do three Y blurs, with a transpose on the final one.
9289b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org                h = boxBlurInterp(tp, h,             dp, ry, h, w, false, outer_weight);
9299b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org                h = boxBlurInterp(dp, h,             tp, ry, h, w, false, outer_weight);
9309b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org                h = boxBlurInterp(tp, h,             dp, ry, h, w, true, outer_weight);
93171f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org            }
93271f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org        } else {
93303016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
93403016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
93503016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            SkAutoTMalloc<uint32_t> storage(storageW * storageH);
9368a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            uint32_t*               sumBuffer = storage.get();
9378a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
9384868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org            //pass1: sp is source, dp is destination
9398a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
94003016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            if (outer_weight == 255) {
9418a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
94203016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            } else {
9438a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com                apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outer_weight);
94403016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            }
9454868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org
94603016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            if (quality == kHigh_Quality) {
9474868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                //pass2: dp is source, tmpBuffer is destination
9484868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                int tmp_sw = sw + 2 * rx;
9494868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                int tmp_sh = sh + 2 * ry;
9504868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                SkAutoTMalloc<uint8_t>  tmpBuffer(dstSize);
9514868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
9524868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                if (outer_weight == 255)
9534868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                    apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
9544868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                else
95503016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                    apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
95603016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                                        tmp_sw, tmp_sh, outer_weight);
9574868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org
9584868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                //pass3: tmpBuffer is source, dp is destination
9594868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                tmp_sw += 2 * rx;
9604868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                tmp_sh += 2 * ry;
9614868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
9624868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                if (outer_weight == 255)
9634868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                    apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
9644868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org                else
96503016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                    apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
96603016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                                        outer_weight);
9674868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org            }
9688a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
9698a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
9708a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        dst->fImage = dp;
9718a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        // if need be, alloc the "real" dst (same size as src) and copy/merge
9728a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        // the blur into it (applying the src)
9730e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        if (style == kInner_Style) {
9740e3c664250f561ec9f7107b92136517a72d03afdreed@android.com            // now we allocate the "real" dst, mirror the size of src
975543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com            size_t srcSize = src.computeImageSize();
976543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com            if (0 == srcSize) {
977543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com                return false;   // too big to allocate, abort
978543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com            }
979543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com            dst->fImage = SkMask::AllocImage(srcSize);
9800e3c664250f561ec9f7107b92136517a72d03afdreed@android.com            merge_src_with_blur(dst->fImage, src.fRowBytes,
9810e3c664250f561ec9f7107b92136517a72d03afdreed@android.com                                sp, src.fRowBytes,
98203016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                                dp + passCount * (rx + ry * dst->fRowBytes),
98303016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                                dst->fRowBytes, sw, sh);
9848a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com            SkMask::FreeImage(dp);
9850e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        } else if (style != kNormal_Style) {
98603016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com            clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
98703016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com                            dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
9888a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        }
9898a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        (void)autoCall.detach();
9908a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
9918a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
9920e3c664250f561ec9f7107b92136517a72d03afdreed@android.com    if (style == kInner_Style) {
9938a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com        dst->fBounds = src.fBounds; // restore trimmed bounds
9940e3c664250f561ec9f7107b92136517a72d03afdreed@android.com        dst->fRowBytes = src.fRowBytes;
9958a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    }
9968a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
9978a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com    return true;
9988a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com}
9998a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com
100071f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.orgbool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src,
100171f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org                               SkScalar radius, Style style, Quality quality,
100271f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org                               SkIPoint* margin)
100371f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org{
100471f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org    return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true);
100571f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org}
100671f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org
100771f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.orgbool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
100871f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org                     SkScalar radius, Style style, Quality quality,
100971f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org                     SkIPoint* margin)
101071f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org{
101171f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org    return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false);
101271f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org}
1013