15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Copyright 2006 The Android Open Source Project
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Use of this source code is governed by a BSD-style license that can be
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * found in the LICENSE file.
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "SkBlurMask.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "SkMath.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "SkTemplates.h"
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "SkEndian.h"
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This constant approximates the scaling done in the software path's
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// "high quality" mode, in SkBlurMask::Blur() (1 / sqrt(3)).
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// IMHO, it actually should be 1:  we blur "less" than we should do
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// according to the CSS and canvas specs, simply because Safari does the same.
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Firefox used to do the same too, until 4.0 where they fixed it.  So at some
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// point we should probably get rid of these scaling constants and rebaseline
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// all the blur tests.
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const SkScalar kBLUR_SIGMA_SCALE = 0.57735f;
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)SkScalar SkBlurMask::ConvertRadiusToSigma(SkScalar radius) {
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return radius > 0 ? kBLUR_SIGMA_SCALE * radius + 0.5f : 0.0f;
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)SkScalar SkBlurMask::ConvertSigmaToRadius(SkScalar sigma) {
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return sigma > 0.5f ? (sigma - 0.5f) / kBLUR_SIGMA_SCALE : 0.0f;
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
33f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#define UNROLL_SEPARABLE_LOOPS
34f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
35f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)/**
36f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) * This function performs a box blur in X, of the given radius.  If the
37f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) * "transpose" parameter is true, it will transpose the pixels on write,
38f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) * such that X and Y are swapped. Reads are always performed from contiguous
39f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) * memory in X, for speed. The destination buffer (dst) must be at least
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * (width + leftRadius + rightRadius) * height bytes in size.
412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This is what the inner loop looks like before unrolling, and with the two
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * cases broken out separately (width < diameter, width >= diameter):
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *      if (width < diameter) {
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *          for (int x = 0; x < width; ++x) {
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *              sum += *right++;
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *              *dptr = (sum * scale + half) >> 24;
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *              dptr += dst_x_stride;
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *          }
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *          for (int x = width; x < diameter; ++x) {
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *              *dptr = (sum * scale + half) >> 24;
53f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *              dptr += dst_x_stride;
54f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *          }
55f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *          for (int x = 0; x < width; ++x) {
56f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *              *dptr = (sum * scale + half) >> 24;
57f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *              sum -= *left++;
58f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *              dptr += dst_x_stride;
59f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *          }
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *      } else {
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *          for (int x = 0; x < diameter; ++x) {
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *              sum += *right++;
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *              *dptr = (sum * scale + half) >> 24;
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *              dptr += dst_x_stride;
652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *          }
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *          for (int x = diameter; x < width; ++x) {
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *              sum += *right++;
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *              *dptr = (sum * scale + half) >> 24;
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *              sum -= *left++;
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *              dptr += dst_x_stride;
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *          }
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *          for (int x = 0; x < diameter; ++x) {
73f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *              *dptr = (sum * scale + half) >> 24;
74f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *              sum -= *left++;
75f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *              dptr += dst_x_stride;
76f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *          }
77f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *      }
78f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) */
79f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
80f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                   int leftRadius, int rightRadius, int width, int height,
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   bool transpose)
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int diameter = leftRadius + rightRadius;
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int kernelSize = diameter + 1;
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int border = SkMin32(width, diameter);
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint32_t scale = (1 << 24) / kernelSize;
87f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
88f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    int dst_x_stride = transpose ? height : 1;
89f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    int dst_y_stride = transpose ? 1 : new_width;
90f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    uint32_t half = 1 << 23;
91f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    for (int y = 0; y < height; ++y) {
92f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        uint32_t sum = 0;
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        uint8_t* dptr = dst + y * dst_y_stride;
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        const uint8_t* right = src + y * src_y_stride;
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        const uint8_t* left = right;
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for (int x = 0; x < rightRadius - leftRadius; x++) {
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            *dptr = 0;
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            dptr += dst_x_stride;
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define LEFT_BORDER_ITER \
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            sum += *right++; \
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            *dptr = (sum * scale + half) >> 24; \
103f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            dptr += dst_x_stride;
104f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
105f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        int x = 0;
106f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#ifdef UNROLL_SEPARABLE_LOOPS
107f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        for (; x < border - 16; x += 16) {
108f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            LEFT_BORDER_ITER
109f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            LEFT_BORDER_ITER
110f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            LEFT_BORDER_ITER
111f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            LEFT_BORDER_ITER
112f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            LEFT_BORDER_ITER
113f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            LEFT_BORDER_ITER
114f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            LEFT_BORDER_ITER
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            LEFT_BORDER_ITER
1162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            LEFT_BORDER_ITER
1172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            LEFT_BORDER_ITER
1182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            LEFT_BORDER_ITER
1192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            LEFT_BORDER_ITER
1202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            LEFT_BORDER_ITER
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            LEFT_BORDER_ITER
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            LEFT_BORDER_ITER
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            LEFT_BORDER_ITER
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
125f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#endif
126f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        for (; x < border; ++x) {
127f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            LEFT_BORDER_ITER
128f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        }
129f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#undef LEFT_BORDER_ITER
130f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#define TRIVIAL_ITER \
131f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            *dptr = (sum * scale + half) >> 24; \
132f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            dptr += dst_x_stride;
133f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        x = width;
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef UNROLL_SEPARABLE_LOOPS
1352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        for (; x < diameter - 16; x += 16) {
1362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            TRIVIAL_ITER
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            TRIVIAL_ITER
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            TRIVIAL_ITER
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            TRIVIAL_ITER
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            TRIVIAL_ITER
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            TRIVIAL_ITER
142f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            TRIVIAL_ITER
143f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            TRIVIAL_ITER
144f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            TRIVIAL_ITER
145f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            TRIVIAL_ITER
146f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            TRIVIAL_ITER
147f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            TRIVIAL_ITER
148f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            TRIVIAL_ITER
149f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            TRIVIAL_ITER
150f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            TRIVIAL_ITER
151f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            TRIVIAL_ITER
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
1532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#endif
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for (; x < diameter; ++x) {
1552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            TRIVIAL_ITER
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#undef TRIVIAL_ITER
158f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#define CENTER_ITER \
159f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            sum += *right++; \
160f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            *dptr = (sum * scale + half) >> 24; \
161f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            sum -= *left++; \
162f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            dptr += dst_x_stride;
163f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
164f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        x = diameter;
165f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#ifdef UNROLL_SEPARABLE_LOOPS
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for (; x < width - 16; x += 16) {
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            CENTER_ITER
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            CENTER_ITER
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            CENTER_ITER
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            CENTER_ITER
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            CENTER_ITER
172f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            CENTER_ITER
173f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            CENTER_ITER
174f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            CENTER_ITER
175f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            CENTER_ITER
176f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            CENTER_ITER
177f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            CENTER_ITER
178f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            CENTER_ITER
179f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            CENTER_ITER
180f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            CENTER_ITER
181f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            CENTER_ITER
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            CENTER_ITER
1832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        }
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
1852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        for (; x < width; ++x) {
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            CENTER_ITER
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
188f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#undef CENTER_ITER
189f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#define RIGHT_BORDER_ITER \
190f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            *dptr = (sum * scale + half) >> 24; \
191f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            sum -= *left++; \
192f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            dptr += dst_x_stride;
193f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
194f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        x = 0;
195f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#ifdef UNROLL_SEPARABLE_LOOPS
196f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        for (; x < border - 16; x += 16) {
197f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            RIGHT_BORDER_ITER
198f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            RIGHT_BORDER_ITER
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            RIGHT_BORDER_ITER
2002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            RIGHT_BORDER_ITER
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            RIGHT_BORDER_ITER
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            RIGHT_BORDER_ITER
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            RIGHT_BORDER_ITER
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            RIGHT_BORDER_ITER
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            RIGHT_BORDER_ITER
206f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            RIGHT_BORDER_ITER
207f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            RIGHT_BORDER_ITER
208f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            RIGHT_BORDER_ITER
209f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            RIGHT_BORDER_ITER
210f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            RIGHT_BORDER_ITER
211f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            RIGHT_BORDER_ITER
212f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            RIGHT_BORDER_ITER
213f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        }
214f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#endif
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for (; x < border; ++x) {
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            RIGHT_BORDER_ITER
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#undef RIGHT_BORDER_ITER
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for (int x = 0; x < leftRadius - rightRadius; ++x) {
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            *dptr = 0;
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            dptr += dst_x_stride;
222f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        }
223f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        SkASSERT(sum == 0);
224f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    }
225f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return new_width;
226f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)}
227f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
228f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)/**
229f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) * This variant of the box blur handles blurring of non-integer radii.  It
230f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) * keeps two running sums: an outer sum for the rounded-up kernel radius, and
231f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) * an inner sum for the rounded-down kernel radius.  For each pixel, it linearly
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * interpolates between them.  In float this would be:
2332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *  outer_weight * outer_sum / kernelSize +
2342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *  (1.0 - outer_weight) * innerSum / (kernelSize - 2)
2352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *
2362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * This is what the inner loop looks like before unrolling, and with the two
2372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * cases broken out separately (width < diameter, width >= diameter):
2382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *
2392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *      if (width < diameter) {
240f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *          for (int x = 0; x < width; x++) {
241f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *              inner_sum = outer_sum;
242f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *              outer_sum += *right++;
243f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
244f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *              dptr += dst_x_stride;
245f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *          }
246f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *          for (int x = width; x < diameter; ++x) {
247f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
248f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *              dptr += dst_x_stride;
2492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *          }
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *          for (int x = 0; x < width; x++) {
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *              inner_sum = outer_sum - *left++;
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *              dptr += dst_x_stride;
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *              outer_sum = inner_sum;
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *          }
256f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *      } else {
257f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *          for (int x = 0; x < diameter; x++) {
258f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *              inner_sum = outer_sum;
259f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *              outer_sum += *right++;
260f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
261f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *              dptr += dst_x_stride;
262f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *          }
263f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *          for (int x = diameter; x < width; ++x) {
264f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *              inner_sum = outer_sum - *left;
265f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *              outer_sum += *right++;
2662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
2672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *              dptr += dst_x_stride;
2682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *              outer_sum -= *left++;
2692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *          }
2702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *          for (int x = 0; x < diameter; x++) {
2712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *              inner_sum = outer_sum - *left++;
2722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
2732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *              dptr += dst_x_stride;
274f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *              outer_sum = inner_sum;
275f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *          }
276f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *      }
277f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *  }
278f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *  return new_width;
279f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) */
280f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
281f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
282f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                         int radius, int width, int height,
283f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                         bool transpose, uint8_t outer_weight)
284f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles){
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int diameter = radius * 2;
2862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    int kernelSize = diameter + 1;
2872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    int border = SkMin32(width, diameter);
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int inner_weight = 255 - outer_weight;
2892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    outer_weight += outer_weight >> 7;
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    inner_weight += inner_weight >> 7;
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint32_t outer_scale = (outer_weight << 16) / kernelSize;
292f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2);
293f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    uint32_t half = 1 << 23;
294f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    int new_width = width + diameter;
295f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    int dst_x_stride = transpose ? height : 1;
296f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    int dst_y_stride = transpose ? 1 : new_width;
297f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    for (int y = 0; y < height; ++y) {
298f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        uint32_t outer_sum = 0, inner_sum = 0;
299f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        uint8_t* dptr = dst + y * dst_y_stride;
300f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        const uint8_t* right = src + y * src_y_stride;
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        const uint8_t* left = right;
3022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        int x = 0;
3032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define LEFT_BORDER_ITER \
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            inner_sum = outer_sum; \
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            outer_sum += *right++; \
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
308f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            dptr += dst_x_stride;
309f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
310f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#ifdef UNROLL_SEPARABLE_LOOPS
311f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        for (;x < border - 16; x += 16) {
312f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            LEFT_BORDER_ITER
313f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            LEFT_BORDER_ITER
314f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            LEFT_BORDER_ITER
315f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            LEFT_BORDER_ITER
316f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            LEFT_BORDER_ITER
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            LEFT_BORDER_ITER
3182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            LEFT_BORDER_ITER
3192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            LEFT_BORDER_ITER
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            LEFT_BORDER_ITER
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            LEFT_BORDER_ITER
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            LEFT_BORDER_ITER
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            LEFT_BORDER_ITER
324f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            LEFT_BORDER_ITER
325f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            LEFT_BORDER_ITER
326f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            LEFT_BORDER_ITER
327f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            LEFT_BORDER_ITER
328f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        }
329f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#endif
330f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
331f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        for (;x < border; ++x) {
332f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            LEFT_BORDER_ITER
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
3342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#undef LEFT_BORDER_ITER
3352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        for (int x = width; x < diameter; ++x) {
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            dptr += dst_x_stride;
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        x = diameter;
340f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
341f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#define CENTER_ITER \
342f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            inner_sum = outer_sum - *left; \
343f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            outer_sum += *right++; \
344f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
345f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            dptr += dst_x_stride; \
346f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            outer_sum -= *left++;
347f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
348f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#ifdef UNROLL_SEPARABLE_LOOPS
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for (; x < width - 16; x += 16) {
3502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            CENTER_ITER
3512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            CENTER_ITER
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            CENTER_ITER
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            CENTER_ITER
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            CENTER_ITER
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            CENTER_ITER
356f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            CENTER_ITER
357f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            CENTER_ITER
358f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            CENTER_ITER
359f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            CENTER_ITER
360f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            CENTER_ITER
361f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            CENTER_ITER
362f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            CENTER_ITER
363f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            CENTER_ITER
364f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            CENTER_ITER
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            CENTER_ITER
3662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        }
3672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#endif
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for (; x < width; ++x) {
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            CENTER_ITER
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#undef CENTER_ITER
372f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
373f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        #define RIGHT_BORDER_ITER \
374f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            inner_sum = outer_sum - *left++; \
375f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
376f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            dptr += dst_x_stride; \
377f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            outer_sum = inner_sum;
378f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
379f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        x = 0;
3802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#ifdef UNROLL_SEPARABLE_LOOPS
3812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        for (; x < border - 16; x += 16) {
3822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            RIGHT_BORDER_ITER
3832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            RIGHT_BORDER_ITER
3842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            RIGHT_BORDER_ITER
3852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            RIGHT_BORDER_ITER
386f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            RIGHT_BORDER_ITER
387f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            RIGHT_BORDER_ITER
388f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            RIGHT_BORDER_ITER
389f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            RIGHT_BORDER_ITER
390f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            RIGHT_BORDER_ITER
391f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            RIGHT_BORDER_ITER
392f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            RIGHT_BORDER_ITER
393f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            RIGHT_BORDER_ITER
394f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            RIGHT_BORDER_ITER
395f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            RIGHT_BORDER_ITER
3962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            RIGHT_BORDER_ITER
3972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            RIGHT_BORDER_ITER
3982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        }
3992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#endif
4002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        for (; x < border; ++x) {
4012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            RIGHT_BORDER_ITER
402f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        }
403f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#undef RIGHT_BORDER_ITER
404f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        SkASSERT(outer_sum == 0 && inner_sum == 0);
405f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    }
406f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return new_width;
407f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)}
408f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
409f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)
410f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles){
411f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    *loRadius = *hiRadius = SkScalarCeilToInt(passRadius);
4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (SkIntToScalar(*hiRadius) - passRadius > 0.5f) {
4132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        *loRadius = *hiRadius - 1;
4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "SkColorPriv.h"
418f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
419f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)static void merge_src_with_blur(uint8_t dst[], int dstRB,
420f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                const uint8_t src[], int srcRB,
421f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                const uint8_t blur[], int blurRB,
422f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                int sw, int sh) {
423f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    dstRB -= sw;
424f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    srcRB -= sw;
425f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    blurRB -= sw;
426f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    while (--sh >= 0) {
427f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        for (int x = sw - 1; x >= 0; --x) {
4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
4292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            dst += 1;
4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            src += 1;
4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            blur += 1;
4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        dst += dstRB;
434f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        src += srcRB;
435f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        blur += blurRB;
436f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    }
437f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)}
438f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
439f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
440f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                            const uint8_t src[], int srcRowBytes,
441f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                            int sw, int sh,
442f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                            SkBlurStyle style) {
443f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    int x;
444f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    while (--sh >= 0) {
4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        switch (style) {
4462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        case kSolid_SkBlurStyle:
4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            for (x = sw - 1; x >= 0; --x) {
4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                int s = *src;
4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                int d = *dst;
4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
4512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                dst += 1;
452f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                src += 1;
453f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            }
454f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            break;
455f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        case kOuter_SkBlurStyle:
456f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            for (x = sw - 1; x >= 0; --x) {
457f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                if (*src) {
458f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                    *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
459f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                }
460f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                dst += 1;
461f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                src += 1;
462f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            }
463f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            break;
464f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        default:
4652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            SkDEBUGFAIL("Unexpected blur style here");
4662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            break;
4672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        }
4682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        dst += dstRowBytes - sw;
4692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        src += srcRowBytes - sw;
4702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
4712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
472
473///////////////////////////////////////////////////////////////////////////////
474
475// we use a local function to wrap the class static method to work around
476// a bug in gcc98
477void SkMask_FreeImage(uint8_t* image);
478void SkMask_FreeImage(uint8_t* image) {
479    SkMask::FreeImage(image);
480}
481
482bool SkBlurMask::BoxBlur(SkMask* dst, const SkMask& src,
483                         SkScalar sigma, SkBlurStyle style, SkBlurQuality quality,
484                         SkIPoint* margin, bool force_quality) {
485
486    if (src.fFormat != SkMask::kA8_Format) {
487        return false;
488    }
489
490    // Force high quality off for small radii (performance)
491    if (!force_quality && sigma <= SkIntToScalar(2)) {
492        quality = kLow_SkBlurQuality;
493    }
494
495    SkScalar passRadius;
496    if (kHigh_SkBlurQuality == quality) {
497        // For the high quality path the 3 pass box blur kernel width is
498        // 6*rad+1 while the full Gaussian width is 6*sigma.
499        passRadius = sigma - (1/6.0f);
500    } else {
501        // For the low quality path we only attempt to cover 3*sigma of the
502        // Gaussian blur area (1.5*sigma on each side). The single pass box
503        // blur's kernel size is 2*rad+1.
504        passRadius = 1.5f*sigma - 0.5f;
505    }
506
507    // highQuality: use three box blur passes as a cheap way
508    // to approximate a Gaussian blur
509    int passCount = (kHigh_SkBlurQuality == quality) ? 3 : 1;
510
511    int rx = SkScalarCeilToInt(passRadius);
512    int outerWeight = 255 - SkScalarRoundToInt((SkIntToScalar(rx) - passRadius) * 255);
513
514    SkASSERT(rx >= 0);
515    SkASSERT((unsigned)outerWeight <= 255);
516    if (rx <= 0) {
517        return false;
518    }
519
520    int ry = rx;    // only do square blur for now
521
522    int padx = passCount * rx;
523    int pady = passCount * ry;
524
525    if (margin) {
526        margin->set(padx, pady);
527    }
528    dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
529                     src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
530
531    dst->fRowBytes = dst->fBounds.width();
532    dst->fFormat = SkMask::kA8_Format;
533    dst->fImage = NULL;
534
535    if (src.fImage) {
536        size_t dstSize = dst->computeImageSize();
537        if (0 == dstSize) {
538            return false;   // too big to allocate, abort
539        }
540
541        int             sw = src.fBounds.width();
542        int             sh = src.fBounds.height();
543        const uint8_t*  sp = src.fImage;
544        uint8_t*        dp = SkMask::AllocImage(dstSize);
545        SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
546
547        // build the blurry destination
548        SkAutoTMalloc<uint8_t>  tmpBuffer(dstSize);
549        uint8_t*                tp = tmpBuffer.get();
550        int w = sw, h = sh;
551
552        if (outerWeight == 255) {
553            int loRadius, hiRadius;
554            get_adjusted_radii(passRadius, &loRadius, &hiRadius);
555            if (kHigh_SkBlurQuality == quality) {
556                // Do three X blurs, with a transpose on the final one.
557                w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
558                w = boxBlur(tp, w,             dp, hiRadius, loRadius, w, h, false);
559                w = boxBlur(dp, w,             tp, hiRadius, hiRadius, w, h, true);
560                // Do three Y blurs, with a transpose on the final one.
561                h = boxBlur(tp, h,             dp, loRadius, hiRadius, h, w, false);
562                h = boxBlur(dp, h,             tp, hiRadius, loRadius, h, w, false);
563                h = boxBlur(tp, h,             dp, hiRadius, hiRadius, h, w, true);
564            } else {
565                w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
566                h = boxBlur(tp, h,             dp, ry, ry, h, w, true);
567            }
568        } else {
569            if (kHigh_SkBlurQuality == quality) {
570                // Do three X blurs, with a transpose on the final one.
571                w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outerWeight);
572                w = boxBlurInterp(tp, w,             dp, rx, w, h, false, outerWeight);
573                w = boxBlurInterp(dp, w,             tp, rx, w, h, true, outerWeight);
574                // Do three Y blurs, with a transpose on the final one.
575                h = boxBlurInterp(tp, h,             dp, ry, h, w, false, outerWeight);
576                h = boxBlurInterp(dp, h,             tp, ry, h, w, false, outerWeight);
577                h = boxBlurInterp(tp, h,             dp, ry, h, w, true, outerWeight);
578            } else {
579                w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outerWeight);
580                h = boxBlurInterp(tp, h,             dp, ry, h, w, true, outerWeight);
581            }
582        }
583
584        dst->fImage = dp;
585        // if need be, alloc the "real" dst (same size as src) and copy/merge
586        // the blur into it (applying the src)
587        if (style == kInner_SkBlurStyle) {
588            // now we allocate the "real" dst, mirror the size of src
589            size_t srcSize = src.computeImageSize();
590            if (0 == srcSize) {
591                return false;   // too big to allocate, abort
592            }
593            dst->fImage = SkMask::AllocImage(srcSize);
594            merge_src_with_blur(dst->fImage, src.fRowBytes,
595                                sp, src.fRowBytes,
596                                dp + passCount * (rx + ry * dst->fRowBytes),
597                                dst->fRowBytes, sw, sh);
598            SkMask::FreeImage(dp);
599        } else if (style != kNormal_SkBlurStyle) {
600            clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
601                            dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
602        }
603        (void)autoCall.detach();
604    }
605
606    if (style == kInner_SkBlurStyle) {
607        dst->fBounds = src.fBounds; // restore trimmed bounds
608        dst->fRowBytes = src.fRowBytes;
609    }
610
611    return true;
612}
613
614/* Convolving a box with itself three times results in a piecewise
615   quadratic function:
616
617   0                              x <= -1.5
618   9/8 + 3/2 x + 1/2 x^2   -1.5 < x <= -.5
619   3/4 - x^2                -.5 < x <= .5
620   9/8 - 3/2 x + 1/2 x^2    0.5 < x <= 1.5
621   0                        1.5 < x
622
623   Mathematica:
624
625   g[x_] := Piecewise [ {
626     {9/8 + 3/2 x + 1/2 x^2 ,  -1.5 < x <= -.5},
627     {3/4 - x^2             ,   -.5 < x <= .5},
628     {9/8 - 3/2 x + 1/2 x^2 ,   0.5 < x <= 1.5}
629   }, 0]
630
631   To get the profile curve of the blurred step function at the rectangle
632   edge, we evaluate the indefinite integral, which is piecewise cubic:
633
634   0                                        x <= -1.5
635   9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3   -1.5 < x <= -0.5
636   1/2 + 3/4 x - 1/3 x^3              -.5 < x <= .5
637   7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3     .5 < x <= 1.5
638   1                                  1.5 < x
639
640   in Mathematica code:
641
642   gi[x_] := Piecewise[ {
643     { 0 , x <= -1.5 },
644     { 9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3, -1.5 < x <= -0.5 },
645     { 1/2 + 3/4 x - 1/3 x^3          ,  -.5 < x <= .5},
646     { 7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3,   .5 < x <= 1.5}
647   },1]
648*/
649
650static float gaussianIntegral(float x) {
651    if (x > 1.5f) {
652        return 0.0f;
653    }
654    if (x < -1.5f) {
655        return 1.0f;
656    }
657
658    float x2 = x*x;
659    float x3 = x2*x;
660
661    if ( x > 0.5f ) {
662        return 0.5625f - (x3 / 6.0f - 3.0f * x2 * 0.25f + 1.125f * x);
663    }
664    if ( x > -0.5f ) {
665        return 0.5f - (0.75f * x - x3 / 3.0f);
666    }
667    return 0.4375f + (-x3 / 6.0f - 3.0f * x2 * 0.25f - 1.125f * x);
668}
669
670/*  ComputeBlurProfile allocates and fills in an array of floating
671    point values between 0 and 255 for the profile signature of
672    a blurred half-plane with the given blur radius.  Since we're
673    going to be doing screened multiplications (i.e., 1 - (1-x)(1-y))
674    all the time, we actually fill in the profile pre-inverted
675    (already done 255-x).
676
677    It's the responsibility of the caller to delete the
678    memory returned in profile_out.
679*/
680
681void SkBlurMask::ComputeBlurProfile(SkScalar sigma, uint8_t **profile_out) {
682    int size = SkScalarCeilToInt(6*sigma);
683
684    int center = size >> 1;
685    uint8_t *profile = SkNEW_ARRAY(uint8_t, size);
686
687    float invr = 1.f/(2*sigma);
688
689    profile[0] = 255;
690    for (int x = 1 ; x < size ; ++x) {
691        float scaled_x = (center - x - .5f) * invr;
692        float gi = gaussianIntegral(scaled_x);
693        profile[x] = 255 - (uint8_t) (255.f * gi);
694    }
695
696    *profile_out = profile;
697}
698
699// TODO MAYBE: Maintain a profile cache to avoid recomputing this for
700// commonly used radii.  Consider baking some of the most common blur radii
701// directly in as static data?
702
703// Implementation adapted from Michael Herf's approach:
704// http://stereopsis.com/shadowrect/
705
706uint8_t SkBlurMask::ProfileLookup(const uint8_t *profile, int loc, int blurred_width, int sharp_width) {
707    int dx = SkAbs32(((loc << 1) + 1) - blurred_width) - sharp_width; // how far are we from the original edge?
708    int ox = dx >> 1;
709    if (ox < 0) {
710        ox = 0;
711    }
712
713    return profile[ox];
714}
715
716void SkBlurMask::ComputeBlurredScanline(uint8_t *pixels, const uint8_t *profile,
717                                        unsigned int width, SkScalar sigma) {
718
719    unsigned int profile_size = SkScalarCeilToInt(6*sigma);
720    SkAutoTMalloc<uint8_t> horizontalScanline(width);
721
722    unsigned int sw = width - profile_size;
723    // nearest odd number less than the profile size represents the center
724    // of the (2x scaled) profile
725    int center = ( profile_size & ~1 ) - 1;
726
727    int w = sw - center;
728
729    for (unsigned int x = 0 ; x < width ; ++x) {
730       if (profile_size <= sw) {
731           pixels[x] = ProfileLookup(profile, x, width, w);
732       } else {
733           float span = float(sw)/(2*sigma);
734           float giX = 1.5f - (x+.5f)/(2*sigma);
735           pixels[x] = (uint8_t) (255 * (gaussianIntegral(giX) - gaussianIntegral(giX + span)));
736       }
737    }
738}
739
740bool SkBlurMask::BlurRect(SkScalar sigma, SkMask *dst,
741                          const SkRect &src, SkBlurStyle style,
742                          SkIPoint *margin, SkMask::CreateMode createMode) {
743    int profile_size = SkScalarCeilToInt(6*sigma);
744
745    int pad = profile_size/2;
746    if (margin) {
747        margin->set( pad, pad );
748    }
749
750    dst->fBounds.set(SkScalarRoundToInt(src.fLeft - pad),
751                     SkScalarRoundToInt(src.fTop - pad),
752                     SkScalarRoundToInt(src.fRight + pad),
753                     SkScalarRoundToInt(src.fBottom + pad));
754
755    dst->fRowBytes = dst->fBounds.width();
756    dst->fFormat = SkMask::kA8_Format;
757    dst->fImage = NULL;
758
759    int             sw = SkScalarFloorToInt(src.width());
760    int             sh = SkScalarFloorToInt(src.height());
761
762    if (createMode == SkMask::kJustComputeBounds_CreateMode) {
763        if (style == kInner_SkBlurStyle) {
764            dst->fBounds.set(SkScalarRoundToInt(src.fLeft),
765                             SkScalarRoundToInt(src.fTop),
766                             SkScalarRoundToInt(src.fRight),
767                             SkScalarRoundToInt(src.fBottom)); // restore trimmed bounds
768            dst->fRowBytes = sw;
769        }
770        return true;
771    }
772    uint8_t *profile = NULL;
773
774    ComputeBlurProfile(sigma, &profile);
775    SkAutoTDeleteArray<uint8_t> ada(profile);
776
777    size_t dstSize = dst->computeImageSize();
778    if (0 == dstSize) {
779        return false;   // too big to allocate, abort
780    }
781
782    uint8_t*        dp = SkMask::AllocImage(dstSize);
783
784    dst->fImage = dp;
785
786    int dstHeight = dst->fBounds.height();
787    int dstWidth = dst->fBounds.width();
788
789    uint8_t *outptr = dp;
790
791    SkAutoTMalloc<uint8_t> horizontalScanline(dstWidth);
792    SkAutoTMalloc<uint8_t> verticalScanline(dstHeight);
793
794    ComputeBlurredScanline(horizontalScanline, profile, dstWidth, sigma);
795    ComputeBlurredScanline(verticalScanline, profile, dstHeight, sigma);
796
797    for (int y = 0 ; y < dstHeight ; ++y) {
798        for (int x = 0 ; x < dstWidth ; x++) {
799            unsigned int maskval = SkMulDiv255Round(horizontalScanline[x], verticalScanline[y]);
800            *(outptr++) = maskval;
801        }
802    }
803
804    if (style == kInner_SkBlurStyle) {
805        // now we allocate the "real" dst, mirror the size of src
806        size_t srcSize = (size_t)(src.width() * src.height());
807        if (0 == srcSize) {
808            return false;   // too big to allocate, abort
809        }
810        dst->fImage = SkMask::AllocImage(srcSize);
811        for (int y = 0 ; y < sh ; y++) {
812            uint8_t *blur_scanline = dp + (y+pad)*dstWidth + pad;
813            uint8_t *inner_scanline = dst->fImage + y*sw;
814            memcpy(inner_scanline, blur_scanline, sw);
815        }
816        SkMask::FreeImage(dp);
817
818        dst->fBounds.set(SkScalarRoundToInt(src.fLeft),
819                         SkScalarRoundToInt(src.fTop),
820                         SkScalarRoundToInt(src.fRight),
821                         SkScalarRoundToInt(src.fBottom)); // restore trimmed bounds
822        dst->fRowBytes = sw;
823
824    } else if (style == kOuter_SkBlurStyle) {
825        for (int y = pad ; y < dstHeight-pad ; y++) {
826            uint8_t *dst_scanline = dp + y*dstWidth + pad;
827            memset(dst_scanline, 0, sw);
828        }
829    } else if (style == kSolid_SkBlurStyle) {
830        for (int y = pad ; y < dstHeight-pad ; y++) {
831            uint8_t *dst_scanline = dp + y*dstWidth + pad;
832            memset(dst_scanline, 0xff, sw);
833        }
834    }
835    // normal and solid styles are the same for analytic rect blurs, so don't
836    // need to handle solid specially.
837
838    return true;
839}
840
841bool SkBlurMask::BlurRRect(SkScalar sigma, SkMask *dst,
842                           const SkRRect &src, SkBlurStyle style,
843                           SkIPoint *margin, SkMask::CreateMode createMode) {
844    // Temporary for now -- always fail, should cause caller to fall back
845    // to old path.  Plumbing just to land API and parallelize effort.
846
847    return false;
848}
849
850// The "simple" blur is a direct implementation of separable convolution with a discrete
851// gaussian kernel.  It's "ground truth" in a sense; too slow to be used, but very
852// useful for correctness comparisons.
853
854bool SkBlurMask::BlurGroundTruth(SkScalar sigma, SkMask* dst, const SkMask& src,
855                                 SkBlurStyle style, SkIPoint* margin) {
856
857    if (src.fFormat != SkMask::kA8_Format) {
858        return false;
859    }
860
861    float variance = sigma * sigma;
862
863    int windowSize = SkScalarCeilToInt(sigma*6);
864    // round window size up to nearest odd number
865    windowSize |= 1;
866
867    SkAutoTMalloc<float> gaussWindow(windowSize);
868
869    int halfWindow = windowSize >> 1;
870
871    gaussWindow[halfWindow] = 1;
872
873    float windowSum = 1;
874    for (int x = 1 ; x <= halfWindow ; ++x) {
875        float gaussian = expf(-x*x / (2*variance));
876        gaussWindow[halfWindow + x] = gaussWindow[halfWindow-x] = gaussian;
877        windowSum += 2*gaussian;
878    }
879
880    // leave the filter un-normalized for now; we will divide by the normalization
881    // sum later;
882
883    int pad = halfWindow;
884    if (margin) {
885        margin->set( pad, pad );
886    }
887
888    dst->fBounds = src.fBounds;
889    dst->fBounds.outset(pad, pad);
890
891    dst->fRowBytes = dst->fBounds.width();
892    dst->fFormat = SkMask::kA8_Format;
893    dst->fImage = NULL;
894
895    if (src.fImage) {
896
897        size_t dstSize = dst->computeImageSize();
898        if (0 == dstSize) {
899            return false;   // too big to allocate, abort
900        }
901
902        int             srcWidth = src.fBounds.width();
903        int             srcHeight = src.fBounds.height();
904        int             dstWidth = dst->fBounds.width();
905
906        const uint8_t*  srcPixels = src.fImage;
907        uint8_t*        dstPixels = SkMask::AllocImage(dstSize);
908        SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dstPixels);
909
910        // do the actual blur.  First, make a padded copy of the source.
911        // use double pad so we never have to check if we're outside anything
912
913        int padWidth = srcWidth + 4*pad;
914        int padHeight = srcHeight;
915        int padSize = padWidth * padHeight;
916
917        SkAutoTMalloc<uint8_t> padPixels(padSize);
918        memset(padPixels, 0, padSize);
919
920        for (int y = 0 ; y < srcHeight; ++y) {
921            uint8_t* padptr = padPixels + y * padWidth + 2*pad;
922            const uint8_t* srcptr = srcPixels + y * srcWidth;
923            memcpy(padptr, srcptr, srcWidth);
924        }
925
926        // blur in X, transposing the result into a temporary floating point buffer.
927        // also double-pad the intermediate result so that the second blur doesn't
928        // have to do extra conditionals.
929
930        int tmpWidth = padHeight + 4*pad;
931        int tmpHeight = padWidth - 2*pad;
932        int tmpSize = tmpWidth * tmpHeight;
933
934        SkAutoTMalloc<float> tmpImage(tmpSize);
935        memset(tmpImage, 0, tmpSize*sizeof(tmpImage[0]));
936
937        for (int y = 0 ; y < padHeight ; ++y) {
938            uint8_t *srcScanline = padPixels + y*padWidth;
939            for (int x = pad ; x < padWidth - pad ; ++x) {
940                float *outPixel = tmpImage + (x-pad)*tmpWidth + y + 2*pad; // transposed output
941                uint8_t *windowCenter = srcScanline + x;
942                for (int i = -pad ; i <= pad ; ++i) {
943                    *outPixel += gaussWindow[pad+i]*windowCenter[i];
944                }
945                *outPixel /= windowSum;
946            }
947        }
948
949        // blur in Y; now filling in the actual desired destination.  We have to do
950        // the transpose again; these transposes guarantee that we read memory in
951        // linear order.
952
953        for (int y = 0 ; y < tmpHeight ; ++y) {
954            float *srcScanline = tmpImage + y*tmpWidth;
955            for (int x = pad ; x < tmpWidth - pad ; ++x) {
956                float *windowCenter = srcScanline + x;
957                float finalValue = 0;
958                for (int i = -pad ; i <= pad ; ++i) {
959                    finalValue += gaussWindow[pad+i]*windowCenter[i];
960                }
961                finalValue /= windowSum;
962                uint8_t *outPixel = dstPixels + (x-pad)*dstWidth + y; // transposed output
963                int integerPixel = int(finalValue + 0.5f);
964                *outPixel = SkClampMax( SkClampPos(integerPixel), 255 );
965            }
966        }
967
968        dst->fImage = dstPixels;
969        // if need be, alloc the "real" dst (same size as src) and copy/merge
970        // the blur into it (applying the src)
971        if (style == kInner_SkBlurStyle) {
972            // now we allocate the "real" dst, mirror the size of src
973            size_t srcSize = src.computeImageSize();
974            if (0 == srcSize) {
975                return false;   // too big to allocate, abort
976            }
977            dst->fImage = SkMask::AllocImage(srcSize);
978            merge_src_with_blur(dst->fImage, src.fRowBytes,
979                srcPixels, src.fRowBytes,
980                dstPixels + pad*dst->fRowBytes + pad,
981                dst->fRowBytes, srcWidth, srcHeight);
982            SkMask::FreeImage(dstPixels);
983        } else if (style != kNormal_SkBlurStyle) {
984            clamp_with_orig(dstPixels + pad*dst->fRowBytes + pad,
985                dst->fRowBytes, srcPixels, src.fRowBytes, srcWidth, srcHeight, style);
986        }
987        (void)autoCall.detach();
988    }
989
990    if (style == kInner_SkBlurStyle) {
991        dst->fBounds = src.fBounds; // restore trimmed bounds
992        dst->fRowBytes = src.fRowBytes;
993    }
994
995    return true;
996}
997