SkBlurMask.cpp revision 91f489a65d436d36c7fe580af2775cd0cd13c8d2
1c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
2c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)/*
3c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * Copyright 2006 The Android Open Source Project
4c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *
590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * Use of this source code is governed by a BSD-style license that can be
690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * found in the LICENSE file.
790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) */
8c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
9c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
10c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "SkBlurMask.h"
11c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "SkMath.h"
120de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)#include "SkTemplates.h"
13c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "SkEndian.h"
14c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
150de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)#define UNROLL_SEPARABLE_LOOPS
16c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
17c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)/**
18c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * This function performs a box blur in X, of the given radius.  If the
19c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * "transpose" parameter is true, it will transpose the pixels on write,
20c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * such that X and Y are swapped. Reads are always performed from contiguous
21c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * memory in X, for speed. The destination buffer (dst) must be at least
22c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * (width + leftRadius + rightRadius) * height bytes in size.
23c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) */
24c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
25c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                   int leftRadius, int rightRadius, int width, int height,
26c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                   bool transpose)
27c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles){
28c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    int diameter = leftRadius + rightRadius;
29c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    int kernelSize = diameter + 1;
30c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    int border = SkMin32(width, diameter);
31c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    uint32_t scale = (1 << 24) / kernelSize;
32c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
33c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    int dst_x_stride = transpose ? height : 1;
34c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    int dst_y_stride = transpose ? 1 : new_width;
35c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    for (int y = 0; y < height; ++y) {
36c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        int sum = 0;
37c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        uint8_t* dptr = dst + y * dst_y_stride;
38c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        const uint8_t* right = src + y * src_y_stride;
39c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        const uint8_t* left = right;
40c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        for (int x = 0; x < rightRadius - leftRadius; x++) {
41c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            *dptr = 0;
42c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            dptr += dst_x_stride;
430de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)        }
44c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#define LEFT_BORDER_ITER \
45c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            sum += *right++; \
46c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            *dptr = (sum * scale) >> 24; \
47c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            dptr += dst_x_stride;
480de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)
490de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)        int x = 0;
500de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)#ifdef UNROLL_SEPARABLE_LOOPS
510de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)        for (; x < border - 16; x += 16) {
520de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)            LEFT_BORDER_ITER
530de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)            LEFT_BORDER_ITER
540de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)            LEFT_BORDER_ITER
550de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)            LEFT_BORDER_ITER
560de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)            LEFT_BORDER_ITER
570de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)            LEFT_BORDER_ITER
580de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)            LEFT_BORDER_ITER
590de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)            LEFT_BORDER_ITER
600de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)            LEFT_BORDER_ITER
610de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)            LEFT_BORDER_ITER
62c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            LEFT_BORDER_ITER
63c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            LEFT_BORDER_ITER
64c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            LEFT_BORDER_ITER
65c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            LEFT_BORDER_ITER
66c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            LEFT_BORDER_ITER
67c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            LEFT_BORDER_ITER
68c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        }
69c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#endif
70c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        for (; x < border; ++x) {
71c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            LEFT_BORDER_ITER
72c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        }
73c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#undef LEFT_BORDER_ITER
74c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#define TRIVIAL_ITER \
75c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            *dptr = (sum * scale) >> 24; \
76c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            dptr += dst_x_stride;
77c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        x = width;
78c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#ifdef UNROLL_SEPARABLE_LOOPS
79c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        for (; x < diameter - 16; x += 16) {
80c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            TRIVIAL_ITER
81c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            TRIVIAL_ITER
82c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            TRIVIAL_ITER
83c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            TRIVIAL_ITER
84c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            TRIVIAL_ITER
85c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            TRIVIAL_ITER
86c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            TRIVIAL_ITER
87c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            TRIVIAL_ITER
88c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            TRIVIAL_ITER
89c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            TRIVIAL_ITER
90c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            TRIVIAL_ITER
91c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            TRIVIAL_ITER
92c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            TRIVIAL_ITER
93c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            TRIVIAL_ITER
94c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            TRIVIAL_ITER
95c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            TRIVIAL_ITER
96c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        }
970de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)#endif
980de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)        for (; x < diameter; ++x) {
990de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)            TRIVIAL_ITER
1000de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)        }
1010de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)#undef TRIVIAL_ITER
1020de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)#define CENTER_ITER \
1030de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles)            sum += *right++; \
104c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            *dptr = (sum * scale) >> 24; \
105c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            sum -= *left++; \
106c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            dptr += dst_x_stride;
107c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
108c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        x = diameter;
109c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#ifdef UNROLL_SEPARABLE_LOOPS
110c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        for (; x < width - 16; x += 16) {
111c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            CENTER_ITER
112c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            CENTER_ITER
113c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            CENTER_ITER
114c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            CENTER_ITER
115            CENTER_ITER
116            CENTER_ITER
117            CENTER_ITER
118            CENTER_ITER
119            CENTER_ITER
120            CENTER_ITER
121            CENTER_ITER
122            CENTER_ITER
123            CENTER_ITER
124            CENTER_ITER
125            CENTER_ITER
126            CENTER_ITER
127        }
128#endif
129        for (; x < width; ++x) {
130            CENTER_ITER
131        }
132#undef CENTER_ITER
133#define RIGHT_BORDER_ITER \
134            *dptr = (sum * scale) >> 24; \
135            sum -= *left++; \
136            dptr += dst_x_stride;
137
138        x = 0;
139#ifdef UNROLL_SEPARABLE_LOOPS
140        for (; x < border - 16; x += 16) {
141            RIGHT_BORDER_ITER
142            RIGHT_BORDER_ITER
143            RIGHT_BORDER_ITER
144            RIGHT_BORDER_ITER
145            RIGHT_BORDER_ITER
146            RIGHT_BORDER_ITER
147            RIGHT_BORDER_ITER
148            RIGHT_BORDER_ITER
149            RIGHT_BORDER_ITER
150            RIGHT_BORDER_ITER
151            RIGHT_BORDER_ITER
152            RIGHT_BORDER_ITER
153            RIGHT_BORDER_ITER
154            RIGHT_BORDER_ITER
155            RIGHT_BORDER_ITER
156            RIGHT_BORDER_ITER
157        }
158#endif
159        for (; x < border; ++x) {
160            RIGHT_BORDER_ITER
161        }
162#undef RIGHT_BORDER_ITER
163        for (int x = 0; x < leftRadius - rightRadius; x++) {
164            *dptr = 0;
165            dptr += dst_x_stride;
166        }
167        SkASSERT(sum == 0);
168    }
169    return new_width;
170}
171
172/**
173 * This variant of the box blur handles blurring of non-integer radii.  It
174 * keeps two running sums: an outer sum for the rounded-up kernel radius, and
175 * an inner sum for the rounded-down kernel radius.  For each pixel, it linearly
176 * interpolates between them.  In float this would be:
177 *  outer_weight * outer_sum / kernelSize +
178 *  (1.0 - outer_weight) * innerSum / (kernelSize - 2)
179 */
180static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
181                         int radius, int width, int height,
182                         bool transpose, uint8_t outer_weight)
183{
184    int diameter = radius * 2;
185    int kernelSize = diameter + 1;
186    int border = SkMin32(width, diameter);
187    int inner_weight = 255 - outer_weight;
188    outer_weight += outer_weight >> 7;
189    inner_weight += inner_weight >> 7;
190    uint32_t outer_scale = (outer_weight << 16) / kernelSize;
191    uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2);
192    int new_width = width + diameter;
193    int dst_x_stride = transpose ? height : 1;
194    int dst_y_stride = transpose ? 1 : new_width;
195    for (int y = 0; y < height; ++y) {
196        int outer_sum = 0, inner_sum = 0;
197        uint8_t* dptr = dst + y * dst_y_stride;
198        const uint8_t* right = src + y * src_y_stride;
199        const uint8_t* left = right;
200        int x = 0;
201
202#define LEFT_BORDER_ITER \
203            inner_sum = outer_sum; \
204            outer_sum += *right++; \
205            *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24; \
206            dptr += dst_x_stride;
207
208#ifdef UNROLL_SEPARABLE_LOOPS
209        for (;x < border - 16; x += 16) {
210            LEFT_BORDER_ITER
211            LEFT_BORDER_ITER
212            LEFT_BORDER_ITER
213            LEFT_BORDER_ITER
214            LEFT_BORDER_ITER
215            LEFT_BORDER_ITER
216            LEFT_BORDER_ITER
217            LEFT_BORDER_ITER
218            LEFT_BORDER_ITER
219            LEFT_BORDER_ITER
220            LEFT_BORDER_ITER
221            LEFT_BORDER_ITER
222            LEFT_BORDER_ITER
223            LEFT_BORDER_ITER
224            LEFT_BORDER_ITER
225            LEFT_BORDER_ITER
226        }
227#endif
228
229        for (;x < border; x++) {
230            LEFT_BORDER_ITER
231        }
232#undef LEFT_BORDER_ITER
233        for (int x = width; x < diameter; ++x) {
234            *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24;
235            dptr += dst_x_stride;
236        }
237        x = diameter;
238
239#define CENTER_ITER \
240            inner_sum = outer_sum - *left; \
241            outer_sum += *right++; \
242            *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24; \
243            dptr += dst_x_stride; \
244            outer_sum -= *left++;
245
246#ifdef UNROLL_SEPARABLE_LOOPS
247        for (; x < width - 16; x += 16) {
248            CENTER_ITER
249            CENTER_ITER
250            CENTER_ITER
251            CENTER_ITER
252            CENTER_ITER
253            CENTER_ITER
254            CENTER_ITER
255            CENTER_ITER
256            CENTER_ITER
257            CENTER_ITER
258            CENTER_ITER
259            CENTER_ITER
260            CENTER_ITER
261            CENTER_ITER
262            CENTER_ITER
263            CENTER_ITER
264        }
265#endif
266        for (; x < width; ++x) {
267            CENTER_ITER
268        }
269#undef CENTER_ITER
270
271        #define RIGHT_BORDER_ITER \
272            inner_sum = outer_sum - *left++; \
273            *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24; \
274            dptr += dst_x_stride; \
275            outer_sum = inner_sum;
276
277        x = 0;
278#ifdef UNROLL_SEPARABLE_LOOPS
279        for (; x < border - 16; x += 16) {
280            RIGHT_BORDER_ITER
281            RIGHT_BORDER_ITER
282            RIGHT_BORDER_ITER
283            RIGHT_BORDER_ITER
284            RIGHT_BORDER_ITER
285            RIGHT_BORDER_ITER
286            RIGHT_BORDER_ITER
287            RIGHT_BORDER_ITER
288            RIGHT_BORDER_ITER
289            RIGHT_BORDER_ITER
290            RIGHT_BORDER_ITER
291            RIGHT_BORDER_ITER
292            RIGHT_BORDER_ITER
293            RIGHT_BORDER_ITER
294            RIGHT_BORDER_ITER
295            RIGHT_BORDER_ITER
296        }
297#endif
298        for (; x < border; x++) {
299            RIGHT_BORDER_ITER
300        }
301#undef RIGHT_BORDER_ITER
302        SkASSERT(outer_sum == 0 && inner_sum == 0);
303    }
304    return new_width;
305}
306
307static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)
308{
309    *loRadius = *hiRadius = SkScalarCeil(passRadius);
310    if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) {
311        *loRadius = *hiRadius - 1;
312    }
313}
314
315// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
316// breakeven on Mac, and ~15% slowdown on Linux.
317// Reading a word at a time when bulding the sum buffer seems to give
318// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
319#if defined(SK_BUILD_FOR_WIN32)
320#define UNROLL_KERNEL_LOOP 1
321#endif
322
323/** The sum buffer is an array of u32 to hold the accumulated sum of all of the
324    src values at their position, plus all values above and to the left.
325    When we sample into this buffer, we need an initial row and column of 0s,
326    so we have an index correspondence as follows:
327
328    src[i, j] == sum[i+1, j+1]
329    sum[0, j] == sum[i, 0] == 0
330
331    We assume that the sum buffer's stride == its width
332 */
333static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
334                             const uint8_t src[], int srcRB) {
335    int sumW = srcW + 1;
336
337    SkASSERT(srcRB >= srcW);
338    // mod srcRB so we can apply it after each row
339    srcRB -= srcW;
340
341    int x, y;
342
343    // zero out the top row and column
344    memset(sum, 0, sumW * sizeof(sum[0]));
345    sum += sumW;
346
347    // special case first row
348    uint32_t X = 0;
349    *sum++ = 0; // initialze the first column to 0
350    for (x = srcW - 1; x >= 0; --x) {
351        X = *src++ + X;
352        *sum++ = X;
353    }
354    src += srcRB;
355
356    // now do the rest of the rows
357    for (y = srcH - 1; y > 0; --y) {
358        uint32_t L = 0;
359        uint32_t C = 0;
360        *sum++ = 0; // initialze the first column to 0
361
362        for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
363            uint32_t T = sum[-sumW];
364            X = *src++ + L + T - C;
365            *sum++ = X;
366            L = X;
367            C = T;
368        }
369
370        for (; x >= 4; x-=4) {
371            uint32_t T = sum[-sumW];
372            X = *src++ + L + T - C;
373            *sum++ = X;
374            L = X;
375            C = T;
376            T = sum[-sumW];
377            X = *src++ + L + T - C;
378            *sum++ = X;
379            L = X;
380            C = T;
381            T = sum[-sumW];
382            X = *src++ + L + T - C;
383            *sum++ = X;
384            L = X;
385            C = T;
386            T = sum[-sumW];
387            X = *src++ + L + T - C;
388            *sum++ = X;
389            L = X;
390            C = T;
391        }
392
393        for (; x >= 0; --x) {
394            uint32_t T = sum[-sumW];
395            X = *src++ + L + T - C;
396            *sum++ = X;
397            L = X;
398            C = T;
399        }
400        src += srcRB;
401    }
402}
403
404/**
405 * This is the path for apply_kernel() to be taken when the kernel
406 * is wider than the source image.
407 */
408static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
409                           int sw, int sh) {
410    SkASSERT(2*rx > sw);
411
412    uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
413
414    int sumStride = sw + 1;
415
416    int dw = sw + 2*rx;
417    int dh = sh + 2*ry;
418
419    int prev_y = -2*ry;
420    int next_y = 1;
421
422    for (int y = 0; y < dh; y++) {
423        int py = SkClampPos(prev_y) * sumStride;
424        int ny = SkFastMin32(next_y, sh) * sumStride;
425
426        int prev_x = -2*rx;
427        int next_x = 1;
428
429        for (int x = 0; x < dw; x++) {
430            int px = SkClampPos(prev_x);
431            int nx = SkFastMin32(next_x, sw);
432
433            uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
434            *dst++ = SkToU8(tmp * scale >> 24);
435
436            prev_x += 1;
437            next_x += 1;
438        }
439
440        prev_y += 1;
441        next_y += 1;
442    }
443}
444/**
445 *  sw and sh are the width and height of the src. Since the sum buffer
446 *  matches that, but has an extra row and col at the beginning (with zeros),
447 *  we can just use sw and sh as our "max" values for pinning coordinates
448 *  when sampling into sum[][]
449 *
450 *  The inner loop is conceptually simple; we break it into several sections
451 *  to improve performance. Here's the original version:
452        for (int x = 0; x < dw; x++) {
453            int px = SkClampPos(prev_x);
454            int nx = SkFastMin32(next_x, sw);
455
456            uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
457            *dst++ = SkToU8(tmp * scale >> 24);
458
459            prev_x += 1;
460            next_x += 1;
461        }
462 *  The sections are:
463 *     left-hand section, where prev_x is clamped to 0
464 *     center section, where neither prev_x nor next_x is clamped
465 *     right-hand section, where next_x is clamped to sw
466 *  On some operating systems, the center section is unrolled for additional
467 *  speedup.
468*/
469static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
470                         int sw, int sh) {
471    if (2*rx > sw) {
472        kernel_clamped(dst, rx, ry, sum, sw, sh);
473        return;
474    }
475
476    uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
477
478    int sumStride = sw + 1;
479
480    int dw = sw + 2*rx;
481    int dh = sh + 2*ry;
482
483    int prev_y = -2*ry;
484    int next_y = 1;
485
486    SkASSERT(2*rx <= dw - 2*rx);
487
488    for (int y = 0; y < dh; y++) {
489        int py = SkClampPos(prev_y) * sumStride;
490        int ny = SkFastMin32(next_y, sh) * sumStride;
491
492        int prev_x = -2*rx;
493        int next_x = 1;
494        int x = 0;
495
496        for (; x < 2*rx; x++) {
497            SkASSERT(prev_x <= 0);
498            SkASSERT(next_x <= sw);
499
500            int px = 0;
501            int nx = next_x;
502
503            uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
504            *dst++ = SkToU8(tmp * scale >> 24);
505
506            prev_x += 1;
507            next_x += 1;
508        }
509
510        int i0 = prev_x + py;
511        int i1 = next_x + ny;
512        int i2 = next_x + py;
513        int i3 = prev_x + ny;
514
515#if UNROLL_KERNEL_LOOP
516        for (; x < dw - 2*rx - 4; x += 4) {
517            SkASSERT(prev_x >= 0);
518            SkASSERT(next_x <= sw);
519
520            uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
521            *dst++ = SkToU8(tmp * scale >> 24);
522            tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
523            *dst++ = SkToU8(tmp * scale >> 24);
524            tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
525            *dst++ = SkToU8(tmp * scale >> 24);
526            tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
527            *dst++ = SkToU8(tmp * scale >> 24);
528
529            prev_x += 4;
530            next_x += 4;
531        }
532#endif
533
534        for (; x < dw - 2*rx; x++) {
535            SkASSERT(prev_x >= 0);
536            SkASSERT(next_x <= sw);
537
538            uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
539            *dst++ = SkToU8(tmp * scale >> 24);
540
541            prev_x += 1;
542            next_x += 1;
543        }
544
545        for (; x < dw; x++) {
546            SkASSERT(prev_x >= 0);
547            SkASSERT(next_x > sw);
548
549            int px = prev_x;
550            int nx = sw;
551
552            uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
553            *dst++ = SkToU8(tmp * scale >> 24);
554
555            prev_x += 1;
556            next_x += 1;
557        }
558
559        prev_y += 1;
560        next_y += 1;
561    }
562}
563
564/**
565 * This is the path for apply_kernel_interp() to be taken when the kernel
566 * is wider than the source image.
567 */
568static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
569                const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
570    SkASSERT(2*rx > sw);
571
572    int inner_weight = 255 - outer_weight;
573
574    // round these guys up if they're bigger than 127
575    outer_weight += outer_weight >> 7;
576    inner_weight += inner_weight >> 7;
577
578    uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
579    uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
580
581    int sumStride = sw + 1;
582
583    int dw = sw + 2*rx;
584    int dh = sh + 2*ry;
585
586    int prev_y = -2*ry;
587    int next_y = 1;
588
589    for (int y = 0; y < dh; y++) {
590        int py = SkClampPos(prev_y) * sumStride;
591        int ny = SkFastMin32(next_y, sh) * sumStride;
592
593        int ipy = SkClampPos(prev_y + 1) * sumStride;
594        int iny = SkClampMax(next_y - 1, sh) * sumStride;
595
596        int prev_x = -2*rx;
597        int next_x = 1;
598
599        for (int x = 0; x < dw; x++) {
600            int px = SkClampPos(prev_x);
601            int nx = SkFastMin32(next_x, sw);
602
603            int ipx = SkClampPos(prev_x + 1);
604            int inx = SkClampMax(next_x - 1, sw);
605
606            uint32_t outer_sum = sum[px+py] + sum[nx+ny]
607                               - sum[nx+py] - sum[px+ny];
608            uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
609                               - sum[inx+ipy] - sum[ipx+iny];
610            *dst++ = SkToU8((outer_sum * outer_scale
611                           + inner_sum * inner_scale) >> 24);
612
613            prev_x += 1;
614            next_x += 1;
615        }
616        prev_y += 1;
617        next_y += 1;
618    }
619}
620
621/**
622 *  sw and sh are the width and height of the src. Since the sum buffer
623 *  matches that, but has an extra row and col at the beginning (with zeros),
624 *  we can just use sw and sh as our "max" values for pinning coordinates
625 *  when sampling into sum[][]
626 *
627 *  The inner loop is conceptually simple; we break it into several variants
628 *  to improve performance. Here's the original version:
629        for (int x = 0; x < dw; x++) {
630            int px = SkClampPos(prev_x);
631            int nx = SkFastMin32(next_x, sw);
632
633            int ipx = SkClampPos(prev_x + 1);
634            int inx = SkClampMax(next_x - 1, sw);
635
636            uint32_t outer_sum = sum[px+py] + sum[nx+ny]
637                               - sum[nx+py] - sum[px+ny];
638            uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
639                               - sum[inx+ipy] - sum[ipx+iny];
640            *dst++ = SkToU8((outer_sum * outer_scale
641                           + inner_sum * inner_scale) >> 24);
642
643            prev_x += 1;
644            next_x += 1;
645        }
646 *  The sections are:
647 *     left-hand section, where prev_x is clamped to 0
648 *     center section, where neither prev_x nor next_x is clamped
649 *     right-hand section, where next_x is clamped to sw
650 *  On some operating systems, the center section is unrolled for additional
651 *  speedup.
652*/
653static void apply_kernel_interp(uint8_t dst[], int rx, int ry,
654                const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
655    SkASSERT(rx > 0 && ry > 0);
656    SkASSERT(outer_weight <= 255);
657
658    if (2*rx > sw) {
659        kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outer_weight);
660        return;
661    }
662
663    int inner_weight = 255 - outer_weight;
664
665    // round these guys up if they're bigger than 127
666    outer_weight += outer_weight >> 7;
667    inner_weight += inner_weight >> 7;
668
669    uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
670    uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
671
672    int sumStride = sw + 1;
673
674    int dw = sw + 2*rx;
675    int dh = sh + 2*ry;
676
677    int prev_y = -2*ry;
678    int next_y = 1;
679
680    SkASSERT(2*rx <= dw - 2*rx);
681
682    for (int y = 0; y < dh; y++) {
683        int py = SkClampPos(prev_y) * sumStride;
684        int ny = SkFastMin32(next_y, sh) * sumStride;
685
686        int ipy = SkClampPos(prev_y + 1) * sumStride;
687        int iny = SkClampMax(next_y - 1, sh) * sumStride;
688
689        int prev_x = -2*rx;
690        int next_x = 1;
691        int x = 0;
692
693        for (; x < 2*rx; x++) {
694            SkASSERT(prev_x < 0);
695            SkASSERT(next_x <= sw);
696
697            int px = 0;
698            int nx = next_x;
699
700            int ipx = 0;
701            int inx = next_x - 1;
702
703            uint32_t outer_sum = sum[px+py] + sum[nx+ny]
704                               - sum[nx+py] - sum[px+ny];
705            uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
706                               - sum[inx+ipy] - sum[ipx+iny];
707            *dst++ = SkToU8((outer_sum * outer_scale
708                           + inner_sum * inner_scale) >> 24);
709
710            prev_x += 1;
711            next_x += 1;
712        }
713
714        int i0 = prev_x + py;
715        int i1 = next_x + ny;
716        int i2 = next_x + py;
717        int i3 = prev_x + ny;
718        int i4 = prev_x + 1 + ipy;
719        int i5 = next_x - 1 + iny;
720        int i6 = next_x - 1 + ipy;
721        int i7 = prev_x + 1 + iny;
722
723#if UNROLL_KERNEL_LOOP
724        for (; x < dw - 2*rx - 4; x += 4) {
725            SkASSERT(prev_x >= 0);
726            SkASSERT(next_x <= sw);
727
728            uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
729            uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
730            *dst++ = SkToU8((outer_sum * outer_scale
731                           + inner_sum * inner_scale) >> 24);
732            outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
733            inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
734            *dst++ = SkToU8((outer_sum * outer_scale
735                           + inner_sum * inner_scale) >> 24);
736            outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
737            inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
738            *dst++ = SkToU8((outer_sum * outer_scale
739                           + inner_sum * inner_scale) >> 24);
740            outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
741            inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
742            *dst++ = SkToU8((outer_sum * outer_scale
743                           + inner_sum * inner_scale) >> 24);
744
745            prev_x += 4;
746            next_x += 4;
747        }
748#endif
749
750        for (; x < dw - 2*rx; x++) {
751            SkASSERT(prev_x >= 0);
752            SkASSERT(next_x <= sw);
753
754            uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
755            uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
756            *dst++ = SkToU8((outer_sum * outer_scale
757                           + inner_sum * inner_scale) >> 24);
758
759            prev_x += 1;
760            next_x += 1;
761        }
762
763        for (; x < dw; x++) {
764            SkASSERT(prev_x >= 0);
765            SkASSERT(next_x > sw);
766
767            int px = prev_x;
768            int nx = sw;
769
770            int ipx = prev_x + 1;
771            int inx = sw;
772
773            uint32_t outer_sum = sum[px+py] + sum[nx+ny]
774                               - sum[nx+py] - sum[px+ny];
775            uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
776                               - sum[inx+ipy] - sum[ipx+iny];
777            *dst++ = SkToU8((outer_sum * outer_scale
778                           + inner_sum * inner_scale) >> 24);
779
780            prev_x += 1;
781            next_x += 1;
782        }
783
784        prev_y += 1;
785        next_y += 1;
786    }
787}
788
789#include "SkColorPriv.h"
790
791static void merge_src_with_blur(uint8_t dst[], int dstRB,
792                                const uint8_t src[], int srcRB,
793                                const uint8_t blur[], int blurRB,
794                                int sw, int sh) {
795    dstRB -= sw;
796    srcRB -= sw;
797    blurRB -= sw;
798    while (--sh >= 0) {
799        for (int x = sw - 1; x >= 0; --x) {
800            *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
801            dst += 1;
802            src += 1;
803            blur += 1;
804        }
805        dst += dstRB;
806        src += srcRB;
807        blur += blurRB;
808    }
809}
810
811static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
812                            const uint8_t src[], int srcRowBytes,
813                            int sw, int sh,
814                            SkBlurMask::Style style) {
815    int x;
816    while (--sh >= 0) {
817        switch (style) {
818        case SkBlurMask::kSolid_Style:
819            for (x = sw - 1; x >= 0; --x) {
820                int s = *src;
821                int d = *dst;
822                *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
823                dst += 1;
824                src += 1;
825            }
826            break;
827        case SkBlurMask::kOuter_Style:
828            for (x = sw - 1; x >= 0; --x) {
829                if (*src) {
830                    *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
831                }
832                dst += 1;
833                src += 1;
834            }
835            break;
836        default:
837            SkDEBUGFAIL("Unexpected blur style here");
838            break;
839        }
840        dst += dstRowBytes - sw;
841        src += srcRowBytes - sw;
842    }
843}
844
845///////////////////////////////////////////////////////////////////////////////
846
847// we use a local funciton to wrap the class static method to work around
848// a bug in gcc98
849void SkMask_FreeImage(uint8_t* image);
850void SkMask_FreeImage(uint8_t* image) {
851    SkMask::FreeImage(image);
852}
853
854bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
855                      SkScalar radius, Style style, Quality quality,
856                      SkIPoint* margin, bool separable)
857{
858    if (src.fFormat != SkMask::kA8_Format) {
859        return false;
860    }
861
862    // Force high quality off for small radii (performance)
863    if (radius < SkIntToScalar(3)) {
864        quality = kLow_Quality;
865    }
866
867    // highQuality: use three box blur passes as a cheap way to approximate a Gaussian blur
868    int passCount = (kHigh_Quality == quality) ? 3 : 1;
869    SkScalar passRadius = SkScalarDiv(radius, SkScalarSqrt(SkIntToScalar(passCount)));
870
871    int rx = SkScalarCeil(passRadius);
872    int outer_weight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
873
874    SkASSERT(rx >= 0);
875    SkASSERT((unsigned)outer_weight <= 255);
876    if (rx <= 0) {
877        return false;
878    }
879
880    int ry = rx;    // only do square blur for now
881
882    int padx = passCount * rx;
883    int pady = passCount * ry;
884    if (margin) {
885        margin->set(padx, pady);
886    }
887    dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
888        src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
889    dst->fRowBytes = dst->fBounds.width();
890    dst->fFormat = SkMask::kA8_Format;
891    dst->fImage = NULL;
892
893    if (src.fImage) {
894        size_t dstSize = dst->computeImageSize();
895        if (0 == dstSize) {
896            return false;   // too big to allocate, abort
897        }
898
899        int             sw = src.fBounds.width();
900        int             sh = src.fBounds.height();
901        const uint8_t*  sp = src.fImage;
902        uint8_t*        dp = SkMask::AllocImage(dstSize);
903
904        SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
905
906        // build the blurry destination
907        if (separable) {
908            SkAutoTMalloc<uint8_t>  tmpBuffer(dstSize);
909            uint8_t*                tp = tmpBuffer.get();
910            int w = sw, h = sh;
911
912            if (outer_weight == 255) {
913                int loRadius, hiRadius;
914                get_adjusted_radii(passRadius, &loRadius, &hiRadius);
915                if (kHigh_Quality == quality) {
916                    // Do three X blurs, with a transpose on the final one.
917                    w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
918                    w = boxBlur(tp, w,             dp, hiRadius, loRadius, w, h, false);
919                    w = boxBlur(dp, w,             tp, hiRadius, hiRadius, w, h, true);
920                    // Do three Y blurs, with a transpose on the final one.
921                    h = boxBlur(tp, h,             dp, loRadius, hiRadius, h, w, false);
922                    h = boxBlur(dp, h,             tp, hiRadius, loRadius, h, w, false);
923                    h = boxBlur(tp, h,             dp, hiRadius, hiRadius, h, w, true);
924                } else {
925                    w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
926                    h = boxBlur(tp, h,             dp, ry, ry, h, w, true);
927                }
928            } else {
929                if (kHigh_Quality == quality) {
930                    // Do three X blurs, with a transpose on the final one.
931                    w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outer_weight);
932                    w = boxBlurInterp(tp, w,             dp, rx, w, h, false, outer_weight);
933                    w = boxBlurInterp(dp, w,             tp, rx, w, h, true, outer_weight);
934                    // Do three Y blurs, with a transpose on the final one.
935                    h = boxBlurInterp(tp, h,             dp, ry, h, w, false, outer_weight);
936                    h = boxBlurInterp(dp, h,             tp, ry, h, w, false, outer_weight);
937                    h = boxBlurInterp(tp, h,             dp, ry, h, w, true, outer_weight);
938                } else {
939                    w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outer_weight);
940                    h = boxBlurInterp(tp, h,             dp, ry, h, w, true, outer_weight);
941                }
942            }
943        } else {
944            const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
945            const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
946            SkAutoTMalloc<uint32_t> storage(storageW * storageH);
947            uint32_t*               sumBuffer = storage.get();
948
949            //pass1: sp is source, dp is destination
950            build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
951            if (outer_weight == 255) {
952                apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
953            } else {
954                apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outer_weight);
955            }
956
957            if (kHigh_Quality == quality) {
958                //pass2: dp is source, tmpBuffer is destination
959                int tmp_sw = sw + 2 * rx;
960                int tmp_sh = sh + 2 * ry;
961                SkAutoTMalloc<uint8_t>  tmpBuffer(dstSize);
962                build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
963                if (outer_weight == 255)
964                    apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
965                else
966                    apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
967                                        tmp_sw, tmp_sh, outer_weight);
968
969                //pass3: tmpBuffer is source, dp is destination
970                tmp_sw += 2 * rx;
971                tmp_sh += 2 * ry;
972                build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
973                if (outer_weight == 255)
974                    apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
975                else
976                    apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
977                                        outer_weight);
978            }
979        }
980
981        dst->fImage = dp;
982        // if need be, alloc the "real" dst (same size as src) and copy/merge
983        // the blur into it (applying the src)
984        if (style == kInner_Style) {
985            // now we allocate the "real" dst, mirror the size of src
986            size_t srcSize = src.computeImageSize();
987            if (0 == srcSize) {
988                return false;   // too big to allocate, abort
989            }
990            dst->fImage = SkMask::AllocImage(srcSize);
991            merge_src_with_blur(dst->fImage, src.fRowBytes,
992                                sp, src.fRowBytes,
993                                dp + passCount * (rx + ry * dst->fRowBytes),
994                                dst->fRowBytes, sw, sh);
995            SkMask::FreeImage(dp);
996        } else if (style != kNormal_Style) {
997            clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
998                            dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
999        }
1000        (void)autoCall.detach();
1001    }
1002
1003    if (style == kInner_Style) {
1004        dst->fBounds = src.fBounds; // restore trimmed bounds
1005        dst->fRowBytes = src.fRowBytes;
1006    }
1007
1008    return true;
1009}
1010
1011bool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src,
1012                               SkScalar radius, Style style, Quality quality,
1013                               SkIPoint* margin)
1014{
1015    return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true);
1016}
1017
1018bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
1019                     SkScalar radius, Style style, Quality quality,
1020                     SkIPoint* margin)
1021{
1022    return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false);
1023}
1024