SkBlurImageFilter.cpp revision f8e2502819499894dff40c4f2f46e46edda15507
1/* 2 * Copyright 2011 The Android Open Source Project 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#include "SkBlurImageFilter.h" 9 10#include <algorithm> 11 12#include "SkArenaAlloc.h" 13#include "SkAutoPixmapStorage.h" 14#include "SkBitmap.h" 15#include "SkColorData.h" 16#include "SkColorSpaceXformer.h" 17#include "SkImageFilterPriv.h" 18#include "SkTFitsIn.h" 19#include "SkGpuBlurUtils.h" 20#include "SkNx.h" 21#include "SkOpts.h" 22#include "SkReadBuffer.h" 23#include "SkSpecialImage.h" 24#include "SkWriteBuffer.h" 25 26#if SK_SUPPORT_GPU 27#include "GrContext.h" 28#include "GrTextureProxy.h" 29#include "SkGr.h" 30#endif 31 32// The value where the three pass window calculation results in a zero window. 33// N[Solve[sigma*3*Sqrt[2 Pi]/4 == 1/2, sigma], 16] 34static constexpr double kZeroWindow = 0.26596152026762; 35static constexpr double kPi = 3.14159265358979323846264338327950288; 36 37class SkBlurImageFilterImpl final : public SkImageFilter { 38public: 39 SkBlurImageFilterImpl(SkScalar sigmaX, 40 SkScalar sigmaY, 41 sk_sp<SkImageFilter> input, 42 const CropRect* cropRect, 43 SkBlurImageFilter::TileMode tileMode); 44 45 SkRect computeFastBounds(const SkRect&) const override; 46 47 SK_TO_STRING_OVERRIDE() 48 SK_DECLARE_PUBLIC_FLATTENABLE_DESERIALIZATION_PROCS(SkBlurImageFilterImpl) 49 50protected: 51 void flatten(SkWriteBuffer&) const override; 52 sk_sp<SkSpecialImage> onFilterImage(SkSpecialImage* source, const Context&, 53 SkIPoint* offset) const override; 54 sk_sp<SkImageFilter> onMakeColorSpace(SkColorSpaceXformer*) const override; 55 SkIRect onFilterNodeBounds(const SkIRect& src, const SkMatrix&, MapDirection) const override; 56 57private: 58 typedef SkImageFilter INHERITED; 59 friend class SkImageFilter; 60 61 #if SK_SUPPORT_GPU 62 sk_sp<SkSpecialImage> gpuFilter( 63 SkSpecialImage *source, 64 SkVector sigma, const sk_sp<SkSpecialImage> &input, 65 SkIRect inputBounds, SkIRect dstBounds, const OutputProperties& outProps) const; 66 #endif 67 68 sk_sp<SkSpecialImage> cpuFilter( 69 SkSpecialImage *source, 70 SkVector sigma, const sk_sp<SkSpecialImage> &input, 71 SkIRect inputBounds, SkIRect dstBounds) const; 72 73 SkSize fSigma; 74 SkBlurImageFilter::TileMode fTileMode; 75}; 76 77SK_DEFINE_FLATTENABLE_REGISTRAR_GROUP_START(SkImageFilter) 78 SK_DEFINE_FLATTENABLE_REGISTRAR_ENTRY(SkBlurImageFilterImpl) 79SK_DEFINE_FLATTENABLE_REGISTRAR_GROUP_END 80 81/////////////////////////////////////////////////////////////////////////////// 82 83sk_sp<SkImageFilter> SkBlurImageFilter::Make(SkScalar sigmaX, SkScalar sigmaY, 84 sk_sp<SkImageFilter> input, 85 const SkImageFilter::CropRect* cropRect, 86 TileMode tileMode) { 87 if (0 == sigmaX && 0 == sigmaY && !cropRect) { 88 return input; 89 } 90 return sk_sp<SkImageFilter>( 91 new SkBlurImageFilterImpl(sigmaX, sigmaY, input, cropRect, tileMode)); 92} 93 94// This rather arbitrary-looking value results in a maximum box blur kernel size 95// of 1000 pixels on the raster path, which matches the WebKit and Firefox 96// implementations. Since the GPU path does not compute a box blur, putting 97// the limit on sigma ensures consistent behaviour between the GPU and 98// raster paths. 99#define MAX_SIGMA SkIntToScalar(532) 100 101static SkVector map_sigma(const SkSize& localSigma, const SkMatrix& ctm) { 102 SkVector sigma = SkVector::Make(localSigma.width(), localSigma.height()); 103 ctm.mapVectors(&sigma, 1); 104 sigma.fX = SkMinScalar(SkScalarAbs(sigma.fX), MAX_SIGMA); 105 sigma.fY = SkMinScalar(SkScalarAbs(sigma.fY), MAX_SIGMA); 106 return sigma; 107} 108 109SkBlurImageFilterImpl::SkBlurImageFilterImpl(SkScalar sigmaX, 110 SkScalar sigmaY, 111 sk_sp<SkImageFilter> input, 112 const CropRect* cropRect, 113 SkBlurImageFilter::TileMode tileMode) 114 : INHERITED(&input, 1, cropRect), fSigma{sigmaX, sigmaY}, fTileMode(tileMode) {} 115 116sk_sp<SkFlattenable> SkBlurImageFilterImpl::CreateProc(SkReadBuffer& buffer) { 117 SK_IMAGEFILTER_UNFLATTEN_COMMON(common, 1); 118 SkScalar sigmaX = buffer.readScalar(); 119 SkScalar sigmaY = buffer.readScalar(); 120 SkBlurImageFilter::TileMode tileMode; 121 if (buffer.isVersionLT(SkReadBuffer::kTileModeInBlurImageFilter_Version)) { 122 tileMode = SkBlurImageFilter::kClampToBlack_TileMode; 123 } else { 124 tileMode = static_cast<SkBlurImageFilter::TileMode>(buffer.readInt()); 125 } 126 127 static_assert(SkBlurImageFilter::kMax_TileMode == 2, "CreateProc"); 128 SkASSERT(tileMode <= SkBlurImageFilter::kMax_TileMode); 129 130 return SkBlurImageFilter::Make( 131 sigmaX, sigmaY, common.getInput(0), &common.cropRect(), tileMode); 132} 133 134void SkBlurImageFilterImpl::flatten(SkWriteBuffer& buffer) const { 135 this->INHERITED::flatten(buffer); 136 buffer.writeScalar(fSigma.fWidth); 137 buffer.writeScalar(fSigma.fHeight); 138 139 static_assert(SkBlurImageFilter::kMax_TileMode == 2, "flatten"); 140 SkASSERT(fTileMode <= SkBlurImageFilter::kMax_TileMode); 141 142 buffer.writeInt(static_cast<int>(fTileMode)); 143} 144 145#if SK_SUPPORT_GPU 146static GrTextureDomain::Mode to_texture_domain_mode(SkBlurImageFilter::TileMode tileMode) { 147 switch (tileMode) { 148 case SkBlurImageFilter::TileMode::kClamp_TileMode: 149 return GrTextureDomain::kClamp_Mode; 150 case SkBlurImageFilter::TileMode::kClampToBlack_TileMode: 151 return GrTextureDomain::kDecal_Mode; 152 case SkBlurImageFilter::TileMode::kRepeat_TileMode: 153 return GrTextureDomain::kRepeat_Mode; 154 default: 155 SK_ABORT("Unsupported tile mode."); 156 return GrTextureDomain::kDecal_Mode; 157 } 158} 159#endif 160 161static void get_box3_params(SkScalar s, int *kernelSize, int* kernelSize3, int *lowOffset, 162 int *highOffset) { 163 float pi = SkScalarToFloat(SK_ScalarPI); 164 int d = static_cast<int>(floorf(SkScalarToFloat(s) * 3.0f * sqrtf(2.0f * pi) / 4.0f + 0.5f)); 165 *kernelSize = d; 166 if (d % 2 == 1) { 167 *lowOffset = *highOffset = (d - 1) / 2; 168 *kernelSize3 = d; 169 } else { 170 *highOffset = d / 2; 171 *lowOffset = *highOffset - 1; 172 *kernelSize3 = d + 1; 173 } 174} 175 176#if !defined(SK_SUPPORT_LEGACY_BLUR_IMAGE) 177 178// This is defined by the SVG spec: 179// https://drafts.fxtf.org/filter-effects/#feGaussianBlurElement 180static int calculate_window(double sigma) { 181 // NB 136 is the largest sigma that will not cause a buffer full of 255 mask values to overflow 182 // using the Gauss filter. It also limits the size of buffers used hold intermediate values. 183 // Explanation of maximums: 184 // sum0 = window * 255 185 // sum1 = window * sum0 -> window * window * 255 186 // sum2 = window * sum1 -> window * window * window * 255 -> window^3 * 255 187 // 188 // The value window^3 * 255 must fit in a uint32_t. So, 189 // window^3 < 2^32. window = 255. 190 // 191 // window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5) 192 // For window <= 255, the largest value for sigma is 136. 193 sigma = SkTPin(sigma, 0.0, 136.0); 194 auto possibleWindow = static_cast<int>(floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5)); 195 return std::max(1, possibleWindow); 196} 197 198// Calculating the border is tricky. The border is the distance in pixels between the first dst 199// pixel and the first src pixel (or the last src pixel and the last dst pixel). 200// I will go through the odd case which is simpler, and then through the even case. Given a 201// stack of filters seven wide for the odd case of three passes. 202// 203// S 204// aaaAaaa 205// bbbBbbb 206// cccCccc 207// D 208// 209// The furthest changed pixel is when the filters are in the following configuration. 210// 211// S 212// aaaAaaa 213// bbbBbbb 214// cccCccc 215// D 216// 217// The A pixel is calculated using the value S, the B uses A, and the C uses B, and 218// finally D is C. So, with a window size of seven the border is nine. In the odd case, the 219// border is 3*((window - 1)/2). 220// 221// For even cases the filter stack is more complicated. The spec specifies two passes 222// of even filters and a final pass of odd filters. A stack for a width of six looks like 223// this. 224// 225// S 226// aaaAaa 227// bbBbbb 228// cccCccc 229// D 230// 231// The furthest pixel looks like this. 232// 233// S 234// aaaAaa 235// bbBbbb 236// cccCccc 237// D 238// 239// For a window of six, the border value is eight. In the even case the border is 3 * 240// (window/2) - 1. 241static int calculate_border(int window) { 242 return (window & 1) == 1 ? 3 * ((window - 1) / 2) : 3 * (window / 2) - 1; 243} 244 245static int calculate_buffer(int window) { 246 int bufferSize = window - 1; 247 return (window & 1) == 1 ? 3 * bufferSize : 3 * bufferSize + 1; 248} 249 250// blur_one_direction implements the common three pass box filter approximation of Gaussian blur, 251// but combines all three passes into a single pass. This approach is facilitated by three circular 252// buffers the width of the window which track values for trailing edges of each of the three 253// passes. This allows the algorithm to use more precision in the calculation because the values 254// are not rounded each pass. And this implementation also avoids a trap that's easy to fall 255// into resulting in blending in too many zeroes near the edge. 256// 257// In general, a window sum has the form: 258// sum_n+1 = sum_n + leading_edge - trailing_edge. 259// If instead we do the subtraction at the end of the previous iteration, we can just 260// calculate the sums instead of having to do the subtractions too. 261// 262// In previous iteration: 263// sum_n+1 = sum_n - trailing_edge. 264// 265// In this iteration: 266// sum_n+1 = sum_n + leading_edge. 267// 268// Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the 269// actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the 270// three passes at the same time has the form: 271// 272// sum0_n+1 = sum0_n + leading edge 273// sum1_n+1 = sum1_n + sum0_n+1 274// sum2_n+1 = sum2_n + sum1_n+1 275// 276// sum2_n+1 / window^3 is the new value of the destination pixel. 277// 278// Reduce the sums by the trailing edges which were stored in the circular buffers, 279// for the next go around. This is the case for odd sized windows, even windows the the third 280// circular buffer is one larger then the first two circular buffers. 281// 282// sum2_n+2 = sum2_n+1 - buffer2[i]; 283// buffer2[i] = sum1; 284// sum1_n+2 = sum1_n+1 - buffer1[i]; 285// buffer1[i] = sum0; 286// sum0_n+2 = sum0_n+1 - buffer0[i]; 287// buffer0[i] = leading edge 288// 289// This is all encapsulated in the processValue function below. 290// 291using Pass0And1 = Sk4u[2]; 292// The would be dLeft parameter is assumed to be 0. 293static void blur_one_direction(Sk4u* buffer, int window, 294 int srcLeft, int srcRight, int dstRight, 295 const uint32_t* src, int srcXStride, int srcYStride, int srcH, 296 uint32_t* dst, int dstXStride, int dstYStride) { 297 298 // The circular buffers are one less than the window. 299 auto pass0Count = window - 1, 300 pass1Count = window - 1, 301 pass2Count = (window & 1) == 1 ? window - 1 : window; 302 303 Pass0And1* buffer01Start = (Pass0And1*)buffer; 304 Sk4u* buffer2Start = buffer + pass0Count + pass1Count; 305 Pass0And1* buffer01End = (Pass0And1*)buffer2Start; 306 Sk4u* buffer2End = buffer2Start + pass2Count; 307 308 // If the window is odd then the divisor is just window ^ 3 otherwise, 309 // it is window * window * (window + 1) = window ^ 3 + window ^ 2; 310 auto window2 = window * window; 311 auto window3 = window2 * window; 312 auto divisor = (window & 1) == 1 ? window3 : window3 + window2; 313 314 // NB the sums in the blur code use the following technique to avoid 315 // adding 1/2 to round the divide. 316 // 317 // Sum/d + 1/2 == (Sum + h) / d 318 // Sum + d(1/2) == Sum + h 319 // h == (1/2)d 320 // 321 // But the d/2 it self should be rounded. 322 // h == d/2 + 1/2 == (d + 1) / 2 323 // 324 // weight = 1 / d * 2 ^ 32 325 auto weight = static_cast<uint32_t>(round(1.0 / divisor * (1ull << 32))); 326 auto half = static_cast<uint32_t>((divisor + 1) / 2); 327 328 auto border = calculate_border(window); 329 330 // Calculate the start and end of the source pixels with respect to the destination start. 331 auto srcStart = srcLeft - border, 332 srcEnd = srcRight - border, 333 dstEnd = dstRight; 334 335 for (auto y = 0; y < srcH; y++) { 336 auto buffer01Cursor = buffer01Start; 337 auto buffer2Cursor = buffer2Start; 338 339 Sk4u sum0{0u}; 340 Sk4u sum1{0u}; 341 Sk4u sum2{half}; 342 343 sk_bzero(buffer01Start, (buffer2End - (Sk4u *) (buffer01Start)) * sizeof(*buffer2Start)); 344 345 // Given an expanded input pixel, move the window ahead using the leadingEdge value. 346 auto processValue = [&](const Sk4u& leadingEdge) -> Sk4u { 347 sum0 += leadingEdge; 348 sum1 += sum0; 349 sum2 += sum1; 350 351 Sk4u value = sum2.mulHi(weight); 352 353 sum2 -= *buffer2Cursor; 354 *buffer2Cursor = sum1; 355 buffer2Cursor = (buffer2Cursor + 1) < buffer2End ? buffer2Cursor + 1 : buffer2Start; 356 357 sum1 -= (*buffer01Cursor)[1]; 358 (*buffer01Cursor)[1] = sum0; 359 sum0 -= (*buffer01Cursor)[0]; 360 (*buffer01Cursor)[0] = leadingEdge; 361 buffer01Cursor = 362 (buffer01Cursor + 1) < buffer01End ? buffer01Cursor + 1 : buffer01Start; 363 364 return value; 365 }; 366 367 auto srcIdx = srcStart; 368 auto dstIdx = 0; 369 const uint32_t* srcCursor = src; 370 uint32_t* dstCursor = dst; 371 372 // The destination pixels are not effected by the src pixels, 373 // change to zero as per the spec. 374 // https://drafts.fxtf.org/filter-effects/#FilterPrimitivesOverviewIntro 375 while (dstIdx < srcIdx) { 376 *dstCursor = 0; 377 dstCursor += dstXStride; 378 SK_PREFETCH(dstCursor); 379 dstIdx++; 380 } 381 382 // The edge of the source is before the edge of the destination. Calculate the sums for 383 // the pixels before the start of the destination. 384 while (dstIdx > srcIdx) { 385 Sk4u leadingEdge = srcIdx < srcEnd ? SkNx_cast<uint32_t>(Sk4b::Load(srcCursor)) : 0; 386 (void) processValue(leadingEdge); 387 srcCursor += srcXStride; 388 srcIdx++; 389 } 390 391 // The dstIdx and srcIdx are in sync now; the code just uses the dstIdx for both now. 392 // Consume the source generating pixels to dst. 393 auto loopEnd = std::min(dstEnd, srcEnd); 394 while (dstIdx < loopEnd) { 395 Sk4u leadingEdge = SkNx_cast<uint32_t>(Sk4b::Load(srcCursor)); 396 SkNx_cast<uint8_t>(processValue(leadingEdge)).store(dstCursor); 397 srcCursor += srcXStride; 398 dstCursor += dstXStride; 399 SK_PREFETCH(dstCursor); 400 dstIdx++; 401 } 402 403 // The leading edge is beyond the end of the source. Assume that the pixels 404 // are now 0x0000 until the end of the destination. 405 loopEnd = dstEnd; 406 while (dstIdx < loopEnd) { 407 SkNx_cast<uint8_t>(processValue(0u)).store(dstCursor); 408 dstCursor += dstXStride; 409 SK_PREFETCH(dstCursor); 410 dstIdx++; 411 } 412 413 src += srcYStride; 414 dst += dstYStride; 415 } 416} 417 418static sk_sp<SkSpecialImage> combined_pass_blur( 419 SkVector sigma, 420 SkSpecialImage* source, const sk_sp<SkSpecialImage>& input, 421 SkIRect srcBounds, SkIRect dstBounds) { 422 SkBitmap inputBM; 423 424 if (!input->getROPixels(&inputBM)) { 425 return nullptr; 426 } 427 428 if (inputBM.colorType() != kN32_SkColorType) { 429 return nullptr; 430 } 431 432 auto windowW = calculate_window(sigma.x()), 433 windowH = calculate_window(sigma.y()); 434 435 SkBitmap src; 436 inputBM.extractSubset(&src, srcBounds); 437 438 // Make everything relative to the destination bounds. 439 srcBounds.offset(-dstBounds.x(), -dstBounds.y()); 440 dstBounds.offset(-dstBounds.x(), -dstBounds.y()); 441 442 auto srcW = srcBounds.width(), 443 srcH = srcBounds.height(), 444 dstW = dstBounds.width(), 445 dstH = dstBounds.height(); 446 447 SkImageInfo dstInfo = SkImageInfo::Make(dstW, dstH, inputBM.colorType(), inputBM.alphaType()); 448 449 SkBitmap dst; 450 if (!dst.tryAllocPixels(dstInfo)) { 451 return nullptr; 452 } 453 454 auto bufferSizeW = calculate_buffer(windowW), 455 bufferSizeH = calculate_buffer(windowH); 456 457 // The amount 1024 is enough for buffers up to 10 sigma. The tmp bitmap will be 458 // allocated on the heap. 459 SkSTArenaAlloc<1024> alloc; 460 Sk4u* buffer = alloc.makeArrayDefault<Sk4u>(std::max(bufferSizeW, bufferSizeH)); 461 462 if (windowW > 1 && windowH > 1) { 463 // Blur both directions. 464 465 auto tmpW = srcH, 466 tmpH = dstW; 467 468 auto tmp = alloc.makeArrayDefault<uint32_t>(tmpW * tmpH); 469 470 // Blur horizontally, and transpose. 471 blur_one_direction( 472 buffer, windowW, 473 srcBounds.left(), srcBounds.right(), dstBounds.right(), 474 static_cast<uint32_t*>(src.getPixels()), 1, src.rowBytesAsPixels(), srcH, 475 tmp, tmpW, 1); 476 477 // Blur vertically (scan in memory order because of the transposition), 478 // and transpose back to the original orientation. 479 blur_one_direction( 480 buffer, windowH, 481 srcBounds.top(), srcBounds.bottom(), dstBounds.bottom(), 482 tmp, 1, tmpW, tmpH, 483 static_cast<uint32_t*>(dst.getPixels()), dst.rowBytesAsPixels(), 1); 484 } else if (windowW > 1) { 485 // Blur only horizontally. 486 487 blur_one_direction( 488 buffer, windowW, 489 srcBounds.left(), srcBounds.right(), dstBounds.right(), 490 static_cast<uint32_t*>(src.getPixels()), 1, src.rowBytesAsPixels(), srcH, 491 static_cast<uint32_t*>(dst.getPixels()), 1, dst.rowBytesAsPixels()); 492 } else if (windowH > 1) { 493 // Blur only vertically. 494 495 blur_one_direction( 496 buffer, windowH, 497 srcBounds.top(), srcBounds.bottom(), dstBounds.bottom(), 498 static_cast<uint32_t*>(src.getPixels()), src.rowBytesAsPixels(), 1, srcW, 499 static_cast<uint32_t*>(dst.getPixels()), dst.rowBytesAsPixels(), 1); 500 } else { 501 // There is no blurring to do, but we still need to copy the source while accounting for the 502 // dstBounds. Remember that the src was intersected with the dst. 503 int y = 0; 504 size_t dstWBytes = dstW * sizeof(uint32_t); 505 for (;y < srcBounds.top(); y++) { 506 sk_bzero(dst.getAddr32(0, y), dstWBytes); 507 } 508 for (;y < srcBounds.bottom(); y++) { 509 int x = 0; 510 uint32_t* dstPtr = dst.getAddr32(0, y); 511 for (;x < srcBounds.left(); x++) { 512 *dstPtr++ = 0; 513 } 514 515 memcpy(dstPtr, 516 src.getAddr32(x - srcBounds.left(), y - srcBounds.top()), 517 srcW * sizeof(uint32_t)); 518 519 dstPtr += srcW; 520 x += srcW; 521 522 for (;x < dstBounds.right(); x++) { 523 *dstPtr++ = 0; 524 } 525 } 526 for (;y < dstBounds.bottom(); y++) { 527 sk_bzero(dst.getAddr32(0, y), dstWBytes); 528 } 529 } 530 531 return SkSpecialImage::MakeFromRaster(SkIRect::MakeWH(dstBounds.width(), 532 dstBounds.height()), 533 dst, &source->props()); 534} 535#endif 536 537sk_sp<SkSpecialImage> SkBlurImageFilterImpl::onFilterImage(SkSpecialImage* source, 538 const Context& ctx, 539 SkIPoint* offset) const { 540 SkIPoint inputOffset = SkIPoint::Make(0, 0); 541 542 sk_sp<SkSpecialImage> input(this->filterInput(0, source, ctx, &inputOffset)); 543 if (!input) { 544 return nullptr; 545 } 546 547 SkIRect inputBounds = SkIRect::MakeXYWH(inputOffset.fX, inputOffset.fY, 548 input->width(), input->height()); 549 550 // Calculate the destination bounds. 551 SkIRect dstBounds; 552 if (!this->applyCropRect(this->mapContext(ctx), inputBounds, &dstBounds)) { 553 return nullptr; 554 } 555 if (!inputBounds.intersect(dstBounds)) { 556 return nullptr; 557 } 558 559 // Save the offset in preparation to make all rectangles relative to the inputOffset. 560 SkIPoint resultOffset = SkIPoint::Make(dstBounds.fLeft, dstBounds.fTop); 561 562 // Make all bounds relative to the inputOffset. 563 inputBounds.offset(-inputOffset); 564 dstBounds.offset(-inputOffset); 565 566 const SkVector sigma = map_sigma(fSigma, ctx.ctm()); 567 if (sigma.x() < 0 || sigma.y() < 0) { 568 return nullptr; 569 } 570 571 sk_sp<SkSpecialImage> result; 572#if SK_SUPPORT_GPU 573 if (source->isTextureBacked()) { 574 // Ensure the input is in the destination's gamut. This saves us from having to do the 575 // xform during the filter itself. 576 input = ImageToColorSpace(input.get(), ctx.outputProperties()); 577 578 result = this->gpuFilter(source, sigma, input, inputBounds, dstBounds, 579 ctx.outputProperties()); 580 } else 581#endif 582 { 583 // If both sigmas will result in a zero width window, there is nothing to do. 584 if (sigma.x() < kZeroWindow && sigma.y() < kZeroWindow) { 585 result = input->makeSubset(inputBounds); 586 } else { 587 #if defined(SK_SUPPORT_LEGACY_BLUR_IMAGE) 588 result = this->cpuFilter(source, sigma, input, inputBounds, dstBounds); 589 #else 590 result = combined_pass_blur(sigma, source, input, inputBounds, dstBounds); 591 #endif 592 } 593 } 594 595 // Return the resultOffset if the blur succeeded. 596 if (result != nullptr) { 597 *offset = resultOffset; 598 } 599 return result; 600} 601 602#if SK_SUPPORT_GPU 603sk_sp<SkSpecialImage> SkBlurImageFilterImpl::gpuFilter( 604 SkSpecialImage *source, 605 SkVector sigma, const sk_sp<SkSpecialImage> &input, 606 SkIRect inputBounds, SkIRect dstBounds, const OutputProperties& outProps) const 607{ 608 // If both sigmas produce arms of the cross that are less than 1/2048, then they 609 // do not contribute to the sum of the filter in a way to change a gamma corrected result. 610 // Let s = 1/(2*sigma^2) 611 // The normalizing value n = 1 + 4*E^(-s) + 4*E^(-2s) 612 // The raw cross arm value c = E^-s 613 // The normalized cross arm value = c/n 614 // N[Solve[{c/n == 1/2048, sigma > 0}, sigma], 16] 615 static constexpr double kZeroWindowGPU = 0.2561130112451658; 616 if (sigma.x() < kZeroWindowGPU && sigma.y() < kZeroWindowGPU) { 617 return input->makeSubset(inputBounds); 618 } 619 620 GrContext* context = source->getContext(); 621 622 sk_sp<GrTextureProxy> inputTexture(input->asTextureProxyRef(context)); 623 if (!inputTexture) { 624 return nullptr; 625 } 626 627 // Typically, we would create the RTC with the output's color space (from ctx), but we 628 // always blur in the PixelConfig of the *input*. Those might not be compatible (if they 629 // have different transfer functions). We've already guaranteed that those color spaces 630 // have the same gamut, so in this case, we do everything in the input's color space. 631 // ... 632 // Unless the output is legacy. In that case, the input could be almost anything (if we're 633 // using SkColorSpaceXformCanvas), but we can't make a corresponding RTC. We don't care to, 634 // either, we want to do our blending (and blurring) without any color correction, so pass 635 // nullptr here, causing us to operate entirely in the input's color space, with no decoding. 636 // Then, when we create the output image later, we tag it with the input's color space, so 637 // it will be tagged correctly, regardless of how we created the intermediate RTCs. 638 sk_sp<GrRenderTargetContext> renderTargetContext(SkGpuBlurUtils::GaussianBlur( 639 context, 640 std::move(inputTexture), 641 outProps.colorSpace() ? sk_ref_sp(input->getColorSpace()) : nullptr, 642 dstBounds, 643 inputBounds, 644 sigma.x(), 645 sigma.y(), 646 to_texture_domain_mode(fTileMode))); 647 if (!renderTargetContext) { 648 return nullptr; 649 } 650 651 return SkSpecialImage::MakeDeferredFromGpu( 652 context, 653 SkIRect::MakeWH(dstBounds.width(), dstBounds.height()), 654 kNeedNewImageUniqueID_SpecialImage, 655 renderTargetContext->asTextureProxyRef(), 656 sk_ref_sp(input->getColorSpace()), 657 &source->props()); 658} 659#endif 660 661// TODO: Implement CPU backend for different fTileMode. 662sk_sp<SkSpecialImage> SkBlurImageFilterImpl::cpuFilter( 663 SkSpecialImage *source, 664 SkVector sigma, const sk_sp<SkSpecialImage> &input, 665 SkIRect inputBounds, SkIRect dstBounds) const 666{ 667 int kernelSizeX, kernelSizeX3, lowOffsetX, highOffsetX; 668 int kernelSizeY, kernelSizeY3, lowOffsetY, highOffsetY; 669 get_box3_params(sigma.x(), &kernelSizeX, &kernelSizeX3, &lowOffsetX, &highOffsetX); 670 get_box3_params(sigma.y(), &kernelSizeY, &kernelSizeY3, &lowOffsetY, &highOffsetY); 671 672 SkBitmap inputBM; 673 674 if (!input->getROPixels(&inputBM) && inputBM.colorType() != kN32_SkColorType) { 675 return nullptr; 676 } 677 678 SkImageInfo info = SkImageInfo::Make(dstBounds.width(), dstBounds.height(), 679 inputBM.colorType(), inputBM.alphaType()); 680 681 SkBitmap tmp, dst; 682 if (!tmp.tryAllocPixels(info) || !dst.tryAllocPixels(info)) { 683 return nullptr; 684 } 685 686 // Get ready to blur. 687 const SkPMColor* s = inputBM.getAddr32(inputBounds.x(), inputBounds.y()); 688 SkPMColor* t = tmp.getAddr32(0, 0); 689 SkPMColor* d = dst.getAddr32(0, 0); 690 691 // Shift everything from being relative to the orignal input bounds to the destination bounds. 692 inputBounds.offset(-dstBounds.x(), -dstBounds.y()); 693 dstBounds.offset(-dstBounds.x(), -dstBounds.y()); 694 695 int w = dstBounds.width(), 696 h = dstBounds.height(), 697 sw = inputBM.rowBytesAsPixels(); 698 699 SkIRect inputBoundsT = SkIRect::MakeLTRB(inputBounds.top(), inputBounds.left(), 700 inputBounds.bottom(), inputBounds.right()); 701 SkIRect dstBoundsT = SkIRect::MakeWH(dstBounds.height(), dstBounds.width()); 702 703 /** 704 * 705 * In order to make memory accesses cache-friendly, we reorder the passes to 706 * use contiguous memory reads wherever possible. 707 * 708 * For example, the 6 passes of the X-and-Y blur case are rewritten as 709 * follows. Instead of 3 passes in X and 3 passes in Y, we perform 710 * 2 passes in X, 1 pass in X transposed to Y on write, 2 passes in X, 711 * then 1 pass in X transposed to Y on write. 712 * 713 * +----+ +----+ +----+ +---+ +---+ +---+ +----+ 714 * + AB + ----> | AB | ----> | AB | -----> | A | ----> | A | ----> | A | -----> | AB | 715 * +----+ blurX +----+ blurX +----+ blurXY | B | blurX | B | blurX | B | blurXY +----+ 716 * +---+ +---+ +---+ 717 * 718 * In this way, two of the y-blurs become x-blurs applied to transposed 719 * images, and all memory reads are contiguous. 720 */ 721 if (kernelSizeX > 0 && kernelSizeY > 0) { 722 SkOpts::box_blur_xx(s, sw, inputBounds, t, kernelSizeX, lowOffsetX, highOffsetX, w, h); 723 SkOpts::box_blur_xx(t, w, dstBounds, d, kernelSizeX, highOffsetX, lowOffsetX, w, h); 724 SkOpts::box_blur_xy(d, w, dstBounds, t, kernelSizeX3, highOffsetX, highOffsetX, w, h); 725 SkOpts::box_blur_xx(t, h, dstBoundsT, d, kernelSizeY, lowOffsetY, highOffsetY, h, w); 726 SkOpts::box_blur_xx(d, h, dstBoundsT, t, kernelSizeY, highOffsetY, lowOffsetY, h, w); 727 SkOpts::box_blur_xy(t, h, dstBoundsT, d, kernelSizeY3, highOffsetY, highOffsetY, h, w); 728 } else if (kernelSizeX > 0) { 729 SkOpts::box_blur_xx(s, sw, inputBounds, d, kernelSizeX, lowOffsetX, highOffsetX, w, h); 730 SkOpts::box_blur_xx(d, w, dstBounds, t, kernelSizeX, highOffsetX, lowOffsetX, w, h); 731 SkOpts::box_blur_xx(t, w, dstBounds, d, kernelSizeX3, highOffsetX, highOffsetX, w, h); 732 } else if (kernelSizeY > 0) { 733 SkOpts::box_blur_yx(s, sw, inputBoundsT, d, kernelSizeY, lowOffsetY, highOffsetY, h, w); 734 SkOpts::box_blur_xx(d, h, dstBoundsT, t, kernelSizeY, highOffsetY, lowOffsetY, h, w); 735 SkOpts::box_blur_xy(t, h, dstBoundsT, d, kernelSizeY3, highOffsetY, highOffsetY, h, w); 736 } 737 738 return SkSpecialImage::MakeFromRaster(SkIRect::MakeSize(dstBounds.size()), 739 dst, &source->props()); 740} 741 742sk_sp<SkImageFilter> SkBlurImageFilterImpl::onMakeColorSpace(SkColorSpaceXformer* xformer) 743const { 744 SkASSERT(1 == this->countInputs()); 745 746 auto input = xformer->apply(this->getInput(0)); 747 if (this->getInput(0) != input.get()) { 748 return SkBlurImageFilter::Make(fSigma.width(), fSigma.height(), std::move(input), 749 this->getCropRectIfSet(), fTileMode); 750 } 751 return this->refMe(); 752} 753 754SkRect SkBlurImageFilterImpl::computeFastBounds(const SkRect& src) const { 755 SkRect bounds = this->getInput(0) ? this->getInput(0)->computeFastBounds(src) : src; 756 bounds.outset(fSigma.width() * 3, fSigma.height() * 3); 757 return bounds; 758} 759 760SkIRect SkBlurImageFilterImpl::onFilterNodeBounds(const SkIRect& src, const SkMatrix& ctm, 761 MapDirection) const { 762 SkVector sigma = map_sigma(fSigma, ctm); 763 return src.makeOutset(SkScalarCeilToInt(sigma.x() * 3), SkScalarCeilToInt(sigma.y() * 3)); 764} 765 766#ifndef SK_IGNORE_TO_STRING 767void SkBlurImageFilterImpl::toString(SkString* str) const { 768 str->appendf("SkBlurImageFilterImpl: ("); 769 str->appendf("sigma: (%f, %f) tileMode: %d input (", fSigma.fWidth, fSigma.fHeight, 770 static_cast<int>(fTileMode)); 771 772 if (this->getInput(0)) { 773 this->getInput(0)->toString(str); 774 } 775 776 str->append("))"); 777} 778#endif 779