SkBlurImageFilter.cpp revision f8e2502819499894dff40c4f2f46e46edda15507
1/*
2 * Copyright 2011 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "SkBlurImageFilter.h"
9
10#include <algorithm>
11
12#include "SkArenaAlloc.h"
13#include "SkAutoPixmapStorage.h"
14#include "SkBitmap.h"
15#include "SkColorData.h"
16#include "SkColorSpaceXformer.h"
17#include "SkImageFilterPriv.h"
18#include "SkTFitsIn.h"
19#include "SkGpuBlurUtils.h"
20#include "SkNx.h"
21#include "SkOpts.h"
22#include "SkReadBuffer.h"
23#include "SkSpecialImage.h"
24#include "SkWriteBuffer.h"
25
26#if SK_SUPPORT_GPU
27#include "GrContext.h"
28#include "GrTextureProxy.h"
29#include "SkGr.h"
30#endif
31
32// The value where the three pass window calculation results in a zero window.
33// N[Solve[sigma*3*Sqrt[2 Pi]/4 == 1/2, sigma], 16]
34static constexpr double kZeroWindow = 0.26596152026762;
35static constexpr double kPi = 3.14159265358979323846264338327950288;
36
37class SkBlurImageFilterImpl final : public SkImageFilter {
38public:
39    SkBlurImageFilterImpl(SkScalar sigmaX,
40                          SkScalar sigmaY,
41                          sk_sp<SkImageFilter> input,
42                          const CropRect* cropRect,
43                          SkBlurImageFilter::TileMode tileMode);
44
45    SkRect computeFastBounds(const SkRect&) const override;
46
47    SK_TO_STRING_OVERRIDE()
48    SK_DECLARE_PUBLIC_FLATTENABLE_DESERIALIZATION_PROCS(SkBlurImageFilterImpl)
49
50protected:
51    void flatten(SkWriteBuffer&) const override;
52    sk_sp<SkSpecialImage> onFilterImage(SkSpecialImage* source, const Context&,
53                                        SkIPoint* offset) const override;
54    sk_sp<SkImageFilter> onMakeColorSpace(SkColorSpaceXformer*) const override;
55    SkIRect onFilterNodeBounds(const SkIRect& src, const SkMatrix&, MapDirection) const override;
56
57private:
58    typedef SkImageFilter INHERITED;
59    friend class SkImageFilter;
60
61    #if SK_SUPPORT_GPU
62    sk_sp<SkSpecialImage> gpuFilter(
63            SkSpecialImage *source,
64            SkVector sigma, const sk_sp<SkSpecialImage> &input,
65            SkIRect inputBounds, SkIRect dstBounds, const OutputProperties& outProps) const;
66    #endif
67
68    sk_sp<SkSpecialImage> cpuFilter(
69            SkSpecialImage *source,
70            SkVector sigma, const sk_sp<SkSpecialImage> &input,
71            SkIRect inputBounds, SkIRect dstBounds) const;
72
73    SkSize                      fSigma;
74    SkBlurImageFilter::TileMode fTileMode;
75};
76
77SK_DEFINE_FLATTENABLE_REGISTRAR_GROUP_START(SkImageFilter)
78    SK_DEFINE_FLATTENABLE_REGISTRAR_ENTRY(SkBlurImageFilterImpl)
79SK_DEFINE_FLATTENABLE_REGISTRAR_GROUP_END
80
81///////////////////////////////////////////////////////////////////////////////
82
83sk_sp<SkImageFilter> SkBlurImageFilter::Make(SkScalar sigmaX, SkScalar sigmaY,
84                                             sk_sp<SkImageFilter> input,
85                                             const SkImageFilter::CropRect* cropRect,
86                                             TileMode tileMode) {
87    if (0 == sigmaX && 0 == sigmaY && !cropRect) {
88        return input;
89    }
90    return sk_sp<SkImageFilter>(
91          new SkBlurImageFilterImpl(sigmaX, sigmaY, input, cropRect, tileMode));
92}
93
94// This rather arbitrary-looking value results in a maximum box blur kernel size
95// of 1000 pixels on the raster path, which matches the WebKit and Firefox
96// implementations. Since the GPU path does not compute a box blur, putting
97// the limit on sigma ensures consistent behaviour between the GPU and
98// raster paths.
99#define MAX_SIGMA SkIntToScalar(532)
100
101static SkVector map_sigma(const SkSize& localSigma, const SkMatrix& ctm) {
102    SkVector sigma = SkVector::Make(localSigma.width(), localSigma.height());
103    ctm.mapVectors(&sigma, 1);
104    sigma.fX = SkMinScalar(SkScalarAbs(sigma.fX), MAX_SIGMA);
105    sigma.fY = SkMinScalar(SkScalarAbs(sigma.fY), MAX_SIGMA);
106    return sigma;
107}
108
109SkBlurImageFilterImpl::SkBlurImageFilterImpl(SkScalar sigmaX,
110                                             SkScalar sigmaY,
111                                             sk_sp<SkImageFilter> input,
112                                             const CropRect* cropRect,
113                                             SkBlurImageFilter::TileMode tileMode)
114        : INHERITED(&input, 1, cropRect), fSigma{sigmaX, sigmaY}, fTileMode(tileMode) {}
115
116sk_sp<SkFlattenable> SkBlurImageFilterImpl::CreateProc(SkReadBuffer& buffer) {
117    SK_IMAGEFILTER_UNFLATTEN_COMMON(common, 1);
118    SkScalar sigmaX = buffer.readScalar();
119    SkScalar sigmaY = buffer.readScalar();
120    SkBlurImageFilter::TileMode tileMode;
121    if (buffer.isVersionLT(SkReadBuffer::kTileModeInBlurImageFilter_Version)) {
122        tileMode = SkBlurImageFilter::kClampToBlack_TileMode;
123    } else {
124        tileMode = static_cast<SkBlurImageFilter::TileMode>(buffer.readInt());
125    }
126
127    static_assert(SkBlurImageFilter::kMax_TileMode == 2, "CreateProc");
128    SkASSERT(tileMode <= SkBlurImageFilter::kMax_TileMode);
129
130    return SkBlurImageFilter::Make(
131          sigmaX, sigmaY, common.getInput(0), &common.cropRect(), tileMode);
132}
133
134void SkBlurImageFilterImpl::flatten(SkWriteBuffer& buffer) const {
135    this->INHERITED::flatten(buffer);
136    buffer.writeScalar(fSigma.fWidth);
137    buffer.writeScalar(fSigma.fHeight);
138
139    static_assert(SkBlurImageFilter::kMax_TileMode == 2, "flatten");
140    SkASSERT(fTileMode <= SkBlurImageFilter::kMax_TileMode);
141
142    buffer.writeInt(static_cast<int>(fTileMode));
143}
144
145#if SK_SUPPORT_GPU
146static GrTextureDomain::Mode to_texture_domain_mode(SkBlurImageFilter::TileMode tileMode) {
147    switch (tileMode) {
148      case SkBlurImageFilter::TileMode::kClamp_TileMode:
149        return GrTextureDomain::kClamp_Mode;
150      case SkBlurImageFilter::TileMode::kClampToBlack_TileMode:
151        return GrTextureDomain::kDecal_Mode;
152      case SkBlurImageFilter::TileMode::kRepeat_TileMode:
153        return GrTextureDomain::kRepeat_Mode;
154      default:
155        SK_ABORT("Unsupported tile mode.");
156        return GrTextureDomain::kDecal_Mode;
157    }
158}
159#endif
160
161static void get_box3_params(SkScalar s, int *kernelSize, int* kernelSize3, int *lowOffset,
162                            int *highOffset) {
163    float pi = SkScalarToFloat(SK_ScalarPI);
164    int d = static_cast<int>(floorf(SkScalarToFloat(s) * 3.0f * sqrtf(2.0f * pi) / 4.0f + 0.5f));
165    *kernelSize = d;
166    if (d % 2 == 1) {
167        *lowOffset = *highOffset = (d - 1) / 2;
168        *kernelSize3 = d;
169    } else {
170        *highOffset = d / 2;
171        *lowOffset = *highOffset - 1;
172        *kernelSize3 = d + 1;
173    }
174}
175
176#if !defined(SK_SUPPORT_LEGACY_BLUR_IMAGE)
177
178// This is defined by the SVG spec:
179// https://drafts.fxtf.org/filter-effects/#feGaussianBlurElement
180static int calculate_window(double sigma) {
181    // NB 136 is the largest sigma that will not cause a buffer full of 255 mask values to overflow
182    // using the Gauss filter. It also limits the size of buffers used hold intermediate values.
183    // Explanation of maximums:
184    //   sum0 = window * 255
185    //   sum1 = window * sum0 -> window * window * 255
186    //   sum2 = window * sum1 -> window * window * window * 255 -> window^3 * 255
187    //
188    //   The value window^3 * 255 must fit in a uint32_t. So,
189    //      window^3 < 2^32. window = 255.
190    //
191    //   window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5)
192    //   For window <= 255, the largest value for sigma is 136.
193    sigma = SkTPin(sigma, 0.0, 136.0);
194    auto possibleWindow = static_cast<int>(floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5));
195    return std::max(1, possibleWindow);
196}
197
198// Calculating the border is tricky. The border is the distance in pixels between the first dst
199// pixel and the first src pixel (or the last src pixel and the last dst pixel).
200// I will go through the odd case which is simpler, and then through the even case. Given a
201// stack of filters seven wide for the odd case of three passes.
202//
203//        S
204//     aaaAaaa
205//     bbbBbbb
206//     cccCccc
207//        D
208//
209// The furthest changed pixel is when the filters are in the following configuration.
210//
211//                 S
212//           aaaAaaa
213//        bbbBbbb
214//     cccCccc
215//        D
216//
217//  The A pixel is calculated using the value S, the B uses A, and the C uses B, and
218// finally D is C. So, with a window size of seven the border is nine. In the odd case, the
219// border is 3*((window - 1)/2).
220//
221// For even cases the filter stack is more complicated. The spec specifies two passes
222// of even filters and a final pass of odd filters. A stack for a width of six looks like
223// this.
224//
225//       S
226//    aaaAaa
227//     bbBbbb
228//    cccCccc
229//       D
230//
231// The furthest pixel looks like this.
232//
233//               S
234//          aaaAaa
235//        bbBbbb
236//    cccCccc
237//       D
238//
239// For a window of six, the border value is eight. In the even case the border is 3 *
240// (window/2) - 1.
241static int calculate_border(int window) {
242    return (window & 1) == 1 ? 3 * ((window - 1) / 2) : 3 * (window / 2) - 1;
243}
244
245static int calculate_buffer(int window) {
246    int bufferSize = window - 1;
247    return (window & 1) == 1 ? 3 * bufferSize : 3 * bufferSize + 1;
248}
249
250// blur_one_direction implements the common three pass box filter approximation of Gaussian blur,
251// but combines all three passes into a single pass. This approach is facilitated by three circular
252// buffers the width of the window which track values for trailing edges of each of the three
253// passes. This allows the algorithm to use more precision in the calculation because the values
254// are not rounded each pass. And this implementation also avoids a trap that's easy to fall
255// into resulting in blending in too many zeroes near the edge.
256//
257//  In general, a window sum has the form:
258//     sum_n+1 = sum_n + leading_edge - trailing_edge.
259//  If instead we do the subtraction at the end of the previous iteration, we can just
260// calculate the sums instead of having to do the subtractions too.
261//
262//      In previous iteration:
263//      sum_n+1 = sum_n - trailing_edge.
264//
265//      In this iteration:
266//      sum_n+1 = sum_n + leading_edge.
267//
268//  Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the
269// actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the
270// three passes at the same time has the form:
271//
272//    sum0_n+1 = sum0_n + leading edge
273//    sum1_n+1 = sum1_n + sum0_n+1
274//    sum2_n+1 = sum2_n + sum1_n+1
275//
276//    sum2_n+1 / window^3 is the new value of the destination pixel.
277//
278//    Reduce the sums by the trailing edges which were stored in the circular buffers,
279// for the next go around. This is the case for odd sized windows, even windows the the third
280// circular buffer is one larger then the first two circular buffers.
281//
282//    sum2_n+2 = sum2_n+1 - buffer2[i];
283//    buffer2[i] = sum1;
284//    sum1_n+2 = sum1_n+1 - buffer1[i];
285//    buffer1[i] = sum0;
286//    sum0_n+2 = sum0_n+1 - buffer0[i];
287//    buffer0[i] = leading edge
288//
289//   This is all encapsulated in the processValue function below.
290//
291using Pass0And1 = Sk4u[2];
292// The would be dLeft parameter is assumed to be 0.
293static void blur_one_direction(Sk4u* buffer, int window,
294                               int srcLeft, int srcRight, int dstRight,
295                               const uint32_t* src, int srcXStride, int srcYStride, int srcH,
296                                     uint32_t* dst, int dstXStride, int dstYStride) {
297
298    // The circular buffers are one less than the window.
299    auto pass0Count = window - 1,
300         pass1Count = window - 1,
301         pass2Count = (window & 1) == 1 ? window - 1 : window;
302
303    Pass0And1* buffer01Start = (Pass0And1*)buffer;
304    Sk4u*      buffer2Start  = buffer + pass0Count + pass1Count;
305    Pass0And1* buffer01End   = (Pass0And1*)buffer2Start;
306    Sk4u*      buffer2End    = buffer2Start + pass2Count;
307
308    // If the window is odd then the divisor is just window ^ 3 otherwise,
309    // it is window * window * (window + 1) = window ^ 3 + window ^ 2;
310    auto window2 = window * window;
311    auto window3 = window2 * window;
312    auto divisor = (window & 1) == 1 ? window3 : window3 + window2;
313
314    // NB the sums in the blur code use the following technique to avoid
315    // adding 1/2 to round the divide.
316    //
317    //   Sum/d + 1/2 == (Sum + h) / d
318    //   Sum + d(1/2) ==  Sum + h
319    //     h == (1/2)d
320    //
321    // But the d/2 it self should be rounded.
322    //    h == d/2 + 1/2 == (d + 1) / 2
323    //
324    // weight = 1 / d * 2 ^ 32
325    auto weight = static_cast<uint32_t>(round(1.0 / divisor * (1ull << 32)));
326    auto half = static_cast<uint32_t>((divisor + 1) / 2);
327
328    auto border = calculate_border(window);
329
330    // Calculate the start and end of the source pixels with respect to the destination start.
331    auto srcStart = srcLeft - border,
332         srcEnd   = srcRight - border,
333         dstEnd   = dstRight;
334
335    for (auto y = 0; y < srcH; y++) {
336        auto buffer01Cursor = buffer01Start;
337        auto buffer2Cursor  = buffer2Start;
338
339        Sk4u sum0{0u};
340        Sk4u sum1{0u};
341        Sk4u sum2{half};
342
343        sk_bzero(buffer01Start, (buffer2End - (Sk4u *) (buffer01Start)) * sizeof(*buffer2Start));
344
345        // Given an expanded input pixel, move the window ahead using the leadingEdge value.
346        auto processValue = [&](const Sk4u& leadingEdge) -> Sk4u {
347            sum0 += leadingEdge;
348            sum1 += sum0;
349            sum2 += sum1;
350
351            Sk4u value = sum2.mulHi(weight);
352
353            sum2 -= *buffer2Cursor;
354            *buffer2Cursor = sum1;
355            buffer2Cursor = (buffer2Cursor + 1) < buffer2End ? buffer2Cursor + 1 : buffer2Start;
356
357            sum1 -= (*buffer01Cursor)[1];
358            (*buffer01Cursor)[1] = sum0;
359            sum0 -= (*buffer01Cursor)[0];
360            (*buffer01Cursor)[0] = leadingEdge;
361            buffer01Cursor =
362                    (buffer01Cursor + 1) < buffer01End ? buffer01Cursor + 1 : buffer01Start;
363
364            return value;
365        };
366
367        auto srcIdx = srcStart;
368        auto dstIdx = 0;
369        const uint32_t* srcCursor = src;
370              uint32_t* dstCursor = dst;
371
372        // The destination pixels are not effected by the src pixels,
373        // change to zero as per the spec.
374        // https://drafts.fxtf.org/filter-effects/#FilterPrimitivesOverviewIntro
375        while (dstIdx < srcIdx) {
376            *dstCursor = 0;
377            dstCursor += dstXStride;
378            SK_PREFETCH(dstCursor);
379            dstIdx++;
380        }
381
382        // The edge of the source is before the edge of the destination. Calculate the sums for
383        // the pixels before the start of the destination.
384        while (dstIdx > srcIdx) {
385            Sk4u leadingEdge = srcIdx < srcEnd ? SkNx_cast<uint32_t>(Sk4b::Load(srcCursor)) : 0;
386            (void) processValue(leadingEdge);
387            srcCursor += srcXStride;
388            srcIdx++;
389        }
390
391        // The dstIdx and srcIdx are in sync now; the code just uses the dstIdx for both now.
392        // Consume the source generating pixels to dst.
393        auto loopEnd = std::min(dstEnd, srcEnd);
394        while (dstIdx < loopEnd) {
395            Sk4u leadingEdge = SkNx_cast<uint32_t>(Sk4b::Load(srcCursor));
396            SkNx_cast<uint8_t>(processValue(leadingEdge)).store(dstCursor);
397            srcCursor += srcXStride;
398            dstCursor += dstXStride;
399            SK_PREFETCH(dstCursor);
400            dstIdx++;
401        }
402
403        // The leading edge is beyond the end of the source. Assume that the pixels
404        // are now 0x0000 until the end of the destination.
405        loopEnd = dstEnd;
406        while (dstIdx < loopEnd) {
407            SkNx_cast<uint8_t>(processValue(0u)).store(dstCursor);
408            dstCursor += dstXStride;
409            SK_PREFETCH(dstCursor);
410            dstIdx++;
411        }
412
413        src += srcYStride;
414        dst += dstYStride;
415    }
416}
417
418static sk_sp<SkSpecialImage> combined_pass_blur(
419        SkVector sigma,
420        SkSpecialImage* source, const sk_sp<SkSpecialImage>& input,
421        SkIRect srcBounds, SkIRect dstBounds) {
422    SkBitmap inputBM;
423
424    if (!input->getROPixels(&inputBM)) {
425        return nullptr;
426    }
427
428    if (inputBM.colorType() != kN32_SkColorType) {
429        return nullptr;
430    }
431
432    auto windowW = calculate_window(sigma.x()),
433         windowH = calculate_window(sigma.y());
434
435    SkBitmap src;
436    inputBM.extractSubset(&src, srcBounds);
437
438    // Make everything relative to the destination bounds.
439    srcBounds.offset(-dstBounds.x(), -dstBounds.y());
440    dstBounds.offset(-dstBounds.x(), -dstBounds.y());
441
442    auto srcW = srcBounds.width(),
443         srcH = srcBounds.height(),
444         dstW = dstBounds.width(),
445         dstH = dstBounds.height();
446
447    SkImageInfo dstInfo = SkImageInfo::Make(dstW, dstH, inputBM.colorType(), inputBM.alphaType());
448
449    SkBitmap dst;
450    if (!dst.tryAllocPixels(dstInfo)) {
451        return nullptr;
452    }
453
454    auto bufferSizeW = calculate_buffer(windowW),
455         bufferSizeH = calculate_buffer(windowH);
456
457    // The amount 1024 is enough for buffers up to 10 sigma. The tmp bitmap will be
458    // allocated on the heap.
459    SkSTArenaAlloc<1024> alloc;
460    Sk4u* buffer = alloc.makeArrayDefault<Sk4u>(std::max(bufferSizeW, bufferSizeH));
461
462    if (windowW > 1 && windowH > 1) {
463        // Blur both directions.
464
465        auto tmpW = srcH,
466             tmpH = dstW;
467
468        auto tmp = alloc.makeArrayDefault<uint32_t>(tmpW * tmpH);
469
470        // Blur horizontally, and transpose.
471        blur_one_direction(
472                buffer, windowW,
473                srcBounds.left(), srcBounds.right(), dstBounds.right(),
474                static_cast<uint32_t*>(src.getPixels()), 1, src.rowBytesAsPixels(), srcH,
475                tmp, tmpW, 1);
476
477        // Blur vertically (scan in memory order because of the transposition),
478        // and transpose back to the original orientation.
479        blur_one_direction(
480                buffer, windowH,
481                srcBounds.top(), srcBounds.bottom(), dstBounds.bottom(),
482                tmp, 1, tmpW, tmpH,
483                static_cast<uint32_t*>(dst.getPixels()), dst.rowBytesAsPixels(), 1);
484    } else if (windowW > 1) {
485        // Blur only horizontally.
486
487        blur_one_direction(
488                buffer, windowW,
489                srcBounds.left(), srcBounds.right(), dstBounds.right(),
490                static_cast<uint32_t*>(src.getPixels()), 1, src.rowBytesAsPixels(), srcH,
491                static_cast<uint32_t*>(dst.getPixels()), 1, dst.rowBytesAsPixels());
492    } else if (windowH > 1) {
493        // Blur only vertically.
494
495        blur_one_direction(
496                buffer, windowH,
497                srcBounds.top(), srcBounds.bottom(), dstBounds.bottom(),
498                static_cast<uint32_t*>(src.getPixels()), src.rowBytesAsPixels(), 1, srcW,
499                static_cast<uint32_t*>(dst.getPixels()), dst.rowBytesAsPixels(), 1);
500    } else {
501        // There is no blurring to do, but we still need to copy the source while accounting for the
502        // dstBounds. Remember that the src was intersected with the dst.
503        int y = 0;
504        size_t dstWBytes = dstW * sizeof(uint32_t);
505        for (;y < srcBounds.top(); y++) {
506            sk_bzero(dst.getAddr32(0, y), dstWBytes);
507        }
508        for (;y < srcBounds.bottom(); y++) {
509            int x = 0;
510            uint32_t* dstPtr = dst.getAddr32(0, y);
511            for (;x < srcBounds.left(); x++) {
512                *dstPtr++ = 0;
513            }
514
515            memcpy(dstPtr,
516                   src.getAddr32(x - srcBounds.left(), y - srcBounds.top()),
517                   srcW * sizeof(uint32_t));
518
519            dstPtr += srcW;
520            x += srcW;
521
522            for (;x < dstBounds.right(); x++) {
523                *dstPtr++ = 0;
524            }
525        }
526        for (;y < dstBounds.bottom(); y++) {
527            sk_bzero(dst.getAddr32(0, y), dstWBytes);
528        }
529    }
530
531    return SkSpecialImage::MakeFromRaster(SkIRect::MakeWH(dstBounds.width(),
532                                                          dstBounds.height()),
533                                          dst, &source->props());
534}
535#endif
536
537sk_sp<SkSpecialImage> SkBlurImageFilterImpl::onFilterImage(SkSpecialImage* source,
538                                                           const Context& ctx,
539                                                           SkIPoint* offset) const {
540    SkIPoint inputOffset = SkIPoint::Make(0, 0);
541
542    sk_sp<SkSpecialImage> input(this->filterInput(0, source, ctx, &inputOffset));
543    if (!input) {
544        return nullptr;
545    }
546
547    SkIRect inputBounds = SkIRect::MakeXYWH(inputOffset.fX, inputOffset.fY,
548                                            input->width(), input->height());
549
550    // Calculate the destination bounds.
551    SkIRect dstBounds;
552    if (!this->applyCropRect(this->mapContext(ctx), inputBounds, &dstBounds)) {
553        return nullptr;
554    }
555    if (!inputBounds.intersect(dstBounds)) {
556        return nullptr;
557    }
558
559    // Save the offset in preparation to make all rectangles relative to the inputOffset.
560    SkIPoint resultOffset = SkIPoint::Make(dstBounds.fLeft, dstBounds.fTop);
561
562    // Make all bounds relative to the inputOffset.
563    inputBounds.offset(-inputOffset);
564    dstBounds.offset(-inputOffset);
565
566    const SkVector sigma = map_sigma(fSigma, ctx.ctm());
567    if (sigma.x() < 0 || sigma.y() < 0) {
568        return nullptr;
569    }
570
571    sk_sp<SkSpecialImage> result;
572#if SK_SUPPORT_GPU
573    if (source->isTextureBacked()) {
574        // Ensure the input is in the destination's gamut. This saves us from having to do the
575        // xform during the filter itself.
576        input = ImageToColorSpace(input.get(), ctx.outputProperties());
577
578        result = this->gpuFilter(source, sigma, input, inputBounds, dstBounds,
579                                 ctx.outputProperties());
580    } else
581#endif
582    {
583        // If both sigmas will result in a zero width window, there is nothing to do.
584        if (sigma.x() < kZeroWindow && sigma.y() < kZeroWindow) {
585            result = input->makeSubset(inputBounds);
586        } else {
587            #if defined(SK_SUPPORT_LEGACY_BLUR_IMAGE)
588                result = this->cpuFilter(source, sigma, input, inputBounds, dstBounds);
589            #else
590                result = combined_pass_blur(sigma, source, input, inputBounds, dstBounds);
591            #endif
592        }
593    }
594
595    // Return the resultOffset if the blur succeeded.
596    if (result != nullptr) {
597        *offset = resultOffset;
598    }
599    return result;
600}
601
602#if SK_SUPPORT_GPU
603sk_sp<SkSpecialImage> SkBlurImageFilterImpl::gpuFilter(
604        SkSpecialImage *source,
605        SkVector sigma, const sk_sp<SkSpecialImage> &input,
606        SkIRect inputBounds, SkIRect dstBounds, const OutputProperties& outProps) const
607{
608    // If both sigmas produce arms of the cross that are less than 1/2048, then they
609    // do not contribute to the sum of the filter in a way to change a gamma corrected result.
610    // Let s = 1/(2*sigma^2)
611    // The normalizing value   n = 1 + 4*E^(-s) + 4*E^(-2s)
612    // The raw cross arm value c = E^-s
613    // The normalized cross arm value = c/n
614    // N[Solve[{c/n == 1/2048, sigma > 0}, sigma], 16]
615    static constexpr double kZeroWindowGPU = 0.2561130112451658;
616    if (sigma.x() < kZeroWindowGPU && sigma.y() < kZeroWindowGPU) {
617        return input->makeSubset(inputBounds);
618    }
619
620    GrContext* context = source->getContext();
621
622    sk_sp<GrTextureProxy> inputTexture(input->asTextureProxyRef(context));
623    if (!inputTexture) {
624        return nullptr;
625    }
626
627    // Typically, we would create the RTC with the output's color space (from ctx), but we
628    // always blur in the PixelConfig of the *input*. Those might not be compatible (if they
629    // have different transfer functions). We've already guaranteed that those color spaces
630    // have the same gamut, so in this case, we do everything in the input's color space.
631    // ...
632    // Unless the output is legacy. In that case, the input could be almost anything (if we're
633    // using SkColorSpaceXformCanvas), but we can't make a corresponding RTC. We don't care to,
634    // either, we want to do our blending (and blurring) without any color correction, so pass
635    // nullptr here, causing us to operate entirely in the input's color space, with no decoding.
636    // Then, when we create the output image later, we tag it with the input's color space, so
637    // it will be tagged correctly, regardless of how we created the intermediate RTCs.
638    sk_sp<GrRenderTargetContext> renderTargetContext(SkGpuBlurUtils::GaussianBlur(
639        context,
640        std::move(inputTexture),
641        outProps.colorSpace() ? sk_ref_sp(input->getColorSpace()) : nullptr,
642        dstBounds,
643        inputBounds,
644        sigma.x(),
645        sigma.y(),
646        to_texture_domain_mode(fTileMode)));
647    if (!renderTargetContext) {
648        return nullptr;
649    }
650
651    return SkSpecialImage::MakeDeferredFromGpu(
652            context,
653            SkIRect::MakeWH(dstBounds.width(), dstBounds.height()),
654            kNeedNewImageUniqueID_SpecialImage,
655            renderTargetContext->asTextureProxyRef(),
656            sk_ref_sp(input->getColorSpace()),
657            &source->props());
658}
659#endif
660
661// TODO: Implement CPU backend for different fTileMode.
662sk_sp<SkSpecialImage> SkBlurImageFilterImpl::cpuFilter(
663        SkSpecialImage *source,
664        SkVector sigma, const sk_sp<SkSpecialImage> &input,
665        SkIRect inputBounds, SkIRect dstBounds) const
666{
667    int kernelSizeX, kernelSizeX3, lowOffsetX, highOffsetX;
668    int kernelSizeY, kernelSizeY3, lowOffsetY, highOffsetY;
669    get_box3_params(sigma.x(), &kernelSizeX, &kernelSizeX3, &lowOffsetX, &highOffsetX);
670    get_box3_params(sigma.y(), &kernelSizeY, &kernelSizeY3, &lowOffsetY, &highOffsetY);
671
672    SkBitmap inputBM;
673
674    if (!input->getROPixels(&inputBM) && inputBM.colorType() != kN32_SkColorType) {
675        return nullptr;
676    }
677
678    SkImageInfo info = SkImageInfo::Make(dstBounds.width(), dstBounds.height(),
679                                         inputBM.colorType(), inputBM.alphaType());
680
681    SkBitmap tmp, dst;
682    if (!tmp.tryAllocPixels(info) || !dst.tryAllocPixels(info)) {
683        return nullptr;
684    }
685
686    // Get ready to blur.
687    const SkPMColor* s = inputBM.getAddr32(inputBounds.x(), inputBounds.y());
688          SkPMColor* t = tmp.getAddr32(0, 0);
689          SkPMColor* d = dst.getAddr32(0, 0);
690
691    // Shift everything from being relative to the orignal input bounds to the destination bounds.
692    inputBounds.offset(-dstBounds.x(), -dstBounds.y());
693    dstBounds.offset(-dstBounds.x(), -dstBounds.y());
694
695    int w  = dstBounds.width(),
696        h  = dstBounds.height(),
697        sw = inputBM.rowBytesAsPixels();
698
699    SkIRect inputBoundsT = SkIRect::MakeLTRB(inputBounds.top(), inputBounds.left(),
700                                             inputBounds.bottom(), inputBounds.right());
701    SkIRect dstBoundsT = SkIRect::MakeWH(dstBounds.height(), dstBounds.width());
702
703    /**
704     *
705     * In order to make memory accesses cache-friendly, we reorder the passes to
706     * use contiguous memory reads wherever possible.
707     *
708     * For example, the 6 passes of the X-and-Y blur case are rewritten as
709     * follows. Instead of 3 passes in X and 3 passes in Y, we perform
710     * 2 passes in X, 1 pass in X transposed to Y on write, 2 passes in X,
711     * then 1 pass in X transposed to Y on write.
712     *
713     * +----+       +----+       +----+        +---+       +---+       +---+        +----+
714     * + AB + ----> | AB | ----> | AB | -----> | A | ----> | A | ----> | A | -----> | AB |
715     * +----+ blurX +----+ blurX +----+ blurXY | B | blurX | B | blurX | B | blurXY +----+
716     *                                         +---+       +---+       +---+
717     *
718     * In this way, two of the y-blurs become x-blurs applied to transposed
719     * images, and all memory reads are contiguous.
720     */
721    if (kernelSizeX > 0 && kernelSizeY > 0) {
722        SkOpts::box_blur_xx(s, sw,  inputBounds,  t, kernelSizeX,  lowOffsetX,  highOffsetX, w, h);
723        SkOpts::box_blur_xx(t,  w,  dstBounds,    d, kernelSizeX,  highOffsetX, lowOffsetX,  w, h);
724        SkOpts::box_blur_xy(d,  w,  dstBounds,    t, kernelSizeX3, highOffsetX, highOffsetX, w, h);
725        SkOpts::box_blur_xx(t,  h,  dstBoundsT,   d, kernelSizeY,  lowOffsetY,  highOffsetY, h, w);
726        SkOpts::box_blur_xx(d,  h,  dstBoundsT,   t, kernelSizeY,  highOffsetY, lowOffsetY,  h, w);
727        SkOpts::box_blur_xy(t,  h,  dstBoundsT,   d, kernelSizeY3, highOffsetY, highOffsetY, h, w);
728    } else if (kernelSizeX > 0) {
729        SkOpts::box_blur_xx(s, sw,  inputBounds,  d, kernelSizeX,  lowOffsetX,  highOffsetX, w, h);
730        SkOpts::box_blur_xx(d,  w,  dstBounds,    t, kernelSizeX,  highOffsetX, lowOffsetX,  w, h);
731        SkOpts::box_blur_xx(t,  w,  dstBounds,    d, kernelSizeX3, highOffsetX, highOffsetX, w, h);
732    } else if (kernelSizeY > 0) {
733        SkOpts::box_blur_yx(s, sw,  inputBoundsT, d, kernelSizeY,  lowOffsetY,  highOffsetY, h, w);
734        SkOpts::box_blur_xx(d,  h,  dstBoundsT,   t, kernelSizeY,  highOffsetY, lowOffsetY,  h, w);
735        SkOpts::box_blur_xy(t,  h,  dstBoundsT,   d, kernelSizeY3, highOffsetY, highOffsetY, h, w);
736    }
737
738    return SkSpecialImage::MakeFromRaster(SkIRect::MakeSize(dstBounds.size()),
739                                          dst, &source->props());
740}
741
742sk_sp<SkImageFilter> SkBlurImageFilterImpl::onMakeColorSpace(SkColorSpaceXformer* xformer)
743const {
744    SkASSERT(1 == this->countInputs());
745
746    auto input = xformer->apply(this->getInput(0));
747    if (this->getInput(0) != input.get()) {
748        return SkBlurImageFilter::Make(fSigma.width(), fSigma.height(), std::move(input),
749                                       this->getCropRectIfSet(), fTileMode);
750    }
751    return this->refMe();
752}
753
754SkRect SkBlurImageFilterImpl::computeFastBounds(const SkRect& src) const {
755    SkRect bounds = this->getInput(0) ? this->getInput(0)->computeFastBounds(src) : src;
756    bounds.outset(fSigma.width() * 3, fSigma.height() * 3);
757    return bounds;
758}
759
760SkIRect SkBlurImageFilterImpl::onFilterNodeBounds(const SkIRect& src, const SkMatrix& ctm,
761                                              MapDirection) const {
762    SkVector sigma = map_sigma(fSigma, ctm);
763    return src.makeOutset(SkScalarCeilToInt(sigma.x() * 3), SkScalarCeilToInt(sigma.y() * 3));
764}
765
766#ifndef SK_IGNORE_TO_STRING
767void SkBlurImageFilterImpl::toString(SkString* str) const {
768    str->appendf("SkBlurImageFilterImpl: (");
769    str->appendf("sigma: (%f, %f) tileMode: %d input (", fSigma.fWidth, fSigma.fHeight,
770                 static_cast<int>(fTileMode));
771
772    if (this->getInput(0)) {
773        this->getInput(0)->toString(str);
774    }
775
776    str->append("))");
777}
778#endif
779