SkBlurMask.cpp revision 9b0d4d79f023ce91b53d9eaa47508b6722c246e6
1ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com 2ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com/* 3ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com * Copyright 2006 The Android Open Source Project 4ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com * 5ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com * Use of this source code is governed by a BSD-style license that can be 6ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com * found in the LICENSE file. 7ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com */ 8ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com 98a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 108a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com#include "SkBlurMask.h" 11889bd8bd7f604acae0a6303365bc82c06da1e6f3tomhudson@google.com#include "SkMath.h" 128a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com#include "SkTemplates.h" 1301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#include "SkEndian.h" 1401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com 159b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#define UNROLL_SEPARABLE_LOOPS 169b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org 17908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org/** 18908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org * This function performs a box blur in X, of the given radius. If the 19884e60be30e20f38b3466a4697081187d2f1f814skia.committer@gmail.com * "transpose" parameter is true, it will transpose the pixels on write, 20908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org * such that X and Y are swapped. Reads are always performed from contiguous 21908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org * memory in X, for speed. The destination buffer (dst) must be at least 229b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org * (width + leftRadius + rightRadius) * height bytes in size. 23908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org */ 24908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.orgstatic int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst, 25c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org int leftRadius, int rightRadius, int width, int height, 26c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org bool transpose) 2771f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org{ 289b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org int diameter = leftRadius + rightRadius; 299b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org int kernelSize = diameter + 1; 309b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org int border = SkMin32(width, diameter); 3171f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org uint32_t scale = (1 << 24) / kernelSize; 32c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org int new_width = width + SkMax32(leftRadius, rightRadius) * 2; 33908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org int dst_x_stride = transpose ? height : 1; 34908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org int dst_y_stride = transpose ? 1 : new_width; 3571f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org for (int y = 0; y < height; ++y) { 3671f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org int sum = 0; 37908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org uint8_t* dptr = dst + y * dst_y_stride; 38908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org const uint8_t* right = src + y * src_y_stride; 39908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org const uint8_t* left = right; 40336b4da6b0d20f27f9980b03415354a2f0698e18senorblanco@chromium.org for (int x = 0; x < rightRadius - leftRadius; x++) { 41336b4da6b0d20f27f9980b03415354a2f0698e18senorblanco@chromium.org *dptr = 0; 42336b4da6b0d20f27f9980b03415354a2f0698e18senorblanco@chromium.org dptr += dst_x_stride; 43c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org } 449b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#define LEFT_BORDER_ITER \ 459b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org sum += *right++; \ 469b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org *dptr = (sum * scale) >> 24; \ 47908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org dptr += dst_x_stride; 489b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org 499b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org int x = 0; 509b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#ifdef UNROLL_SEPARABLE_LOOPS 519b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org for (; x < border - 16; x += 16) { 529b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 539b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 549b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 559b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 569b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 579b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 589b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 599b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 609b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 619b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 629b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 639b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 649b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 659b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 669b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 679b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 6871f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org } 699b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#endif 709b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org for (; x < border; ++x) { 719b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 729b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org } 739b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#undef LEFT_BORDER_ITER 749b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#define TRIVIAL_ITER \ 759b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org *dptr = (sum * scale) >> 24; \ 76908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org dptr += dst_x_stride; 779b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org x = width; 789b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#ifdef UNROLL_SEPARABLE_LOOPS 799b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org for (; x < diameter - 16; x += 16) { 809b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org TRIVIAL_ITER 819b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org TRIVIAL_ITER 829b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org TRIVIAL_ITER 839b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org TRIVIAL_ITER 849b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org TRIVIAL_ITER 859b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org TRIVIAL_ITER 869b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org TRIVIAL_ITER 879b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org TRIVIAL_ITER 889b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org TRIVIAL_ITER 899b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org TRIVIAL_ITER 909b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org TRIVIAL_ITER 919b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org TRIVIAL_ITER 929b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org TRIVIAL_ITER 939b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org TRIVIAL_ITER 949b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org TRIVIAL_ITER 959b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org TRIVIAL_ITER 969b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org } 979b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#endif 989b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org for (; x < diameter; ++x) { 999b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org TRIVIAL_ITER 10071f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org } 1019b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#undef TRIVIAL_ITER 1029b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#define CENTER_ITER \ 1039b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org sum += *right++; \ 1049b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org *dptr = (sum * scale) >> 24; \ 1059b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org sum -= *left++; \ 106908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org dptr += dst_x_stride; 1079b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org 1089b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org x = diameter; 1099b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#ifdef UNROLL_SEPARABLE_LOOPS 1109b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org for (; x < width - 16; x += 16) { 1119b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 1129b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 1139b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 1149b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 1159b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 1169b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 1179b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 1189b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 1199b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 1209b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 1219b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 1229b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 1239b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 1249b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 1259b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 1269b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 1279b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org } 1289b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#endif 1299b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org for (; x < width; ++x) { 1309b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 13171f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org } 1329b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#undef CENTER_ITER 1339b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#define RIGHT_BORDER_ITER \ 1349b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org *dptr = (sum * scale) >> 24; \ 1359b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org sum -= *left++; \ 136908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org dptr += dst_x_stride; 1379b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org 1389b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org x = 0; 1399b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#ifdef UNROLL_SEPARABLE_LOOPS 1409b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org for (; x < border - 16; x += 16) { 1419b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 1429b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 1439b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 1449b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 1459b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 1469b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 1479b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 1489b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 1499b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 1509b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 1519b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 1529b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 1539b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 1549b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 1559b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 1569b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 1579b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org } 1589b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#endif 1599b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org for (; x < border; ++x) { 1609b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 16171f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org } 1629b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#undef RIGHT_BORDER_ITER 163336b4da6b0d20f27f9980b03415354a2f0698e18senorblanco@chromium.org for (int x = 0; x < leftRadius - rightRadius; x++) { 164336b4da6b0d20f27f9980b03415354a2f0698e18senorblanco@chromium.org *dptr = 0; 165336b4da6b0d20f27f9980b03415354a2f0698e18senorblanco@chromium.org dptr += dst_x_stride; 166c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org } 16771f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org SkASSERT(sum == 0); 16871f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org } 169908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org return new_width; 17071f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org} 17171f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org 1729b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org/** 1739b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org * This variant of the box blur handles blurring of non-integer radii. It 1749b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org * keeps two running sums: an outer sum for the rounded-up kernel radius, and 1759b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org * an inner sum for the rounded-down kernel radius. For each pixel, it linearly 1769b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org * interpolates between them. In float this would be: 1779b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org * outer_weight * outer_sum / kernelSize + 1789b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org * (1.0 - outer_weight) * innerSum / (kernelSize - 2) 1799b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org */ 1809b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.orgstatic int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst, 1819b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org int radius, int width, int height, 1829b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org bool transpose, uint8_t outer_weight) 1839b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org{ 1849b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org int diameter = radius * 2; 1859b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org int kernelSize = diameter + 1; 1869b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org int border = SkMin32(width, diameter); 1879b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org int inner_weight = 255 - outer_weight; 1889b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org outer_weight += outer_weight >> 7; 1899b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org inner_weight += inner_weight >> 7; 1909b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org uint32_t outer_scale = (outer_weight << 16) / kernelSize; 1919b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2); 1929b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org int new_width = width + diameter; 1939b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org int dst_x_stride = transpose ? height : 1; 1949b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org int dst_y_stride = transpose ? 1 : new_width; 1959b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org for (int y = 0; y < height; ++y) { 1969b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org int outer_sum = 0, inner_sum = 0; 1979b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org uint8_t* dptr = dst + y * dst_y_stride; 1989b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org const uint8_t* right = src + y * src_y_stride; 1999b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org const uint8_t* left = right; 2009b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org int x = 0; 2019b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org 2029b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#define LEFT_BORDER_ITER \ 2039b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org inner_sum = outer_sum; \ 2049b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org outer_sum += *right++; \ 2059b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24; \ 2069b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org dptr += dst_x_stride; 2079b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org 2089b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#ifdef UNROLL_SEPARABLE_LOOPS 2099b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org for (;x < border - 16; x += 16) { 2109b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 2119b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 2129b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 2139b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 2149b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 2159b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 2169b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 2179b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 2189b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 2199b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 2209b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 2219b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 2229b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 2239b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 2249b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 2259b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 2269b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org } 2279b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#endif 2289b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org 2299b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org for (;x < border; x++) { 2309b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org LEFT_BORDER_ITER 2319b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org } 2329b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#undef LEFT_BORDER_ITER 2339b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org for (int x = width; x < diameter; ++x) { 2349b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24; 2359b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org dptr += dst_x_stride; 2369b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org } 2379b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org x = diameter; 2389b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org 2399b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#define CENTER_ITER \ 2409b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org inner_sum = outer_sum - *left; \ 2419b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org outer_sum += *right++; \ 2429b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24; \ 2439b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org dptr += dst_x_stride; \ 2449b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org outer_sum -= *left++; 2459b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org 2469b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#ifdef UNROLL_SEPARABLE_LOOPS 2479b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org for (; x < width - 16; x += 16) { 2489b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 2499b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 2509b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 2519b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 2529b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 2539b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 2549b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 2559b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 2569b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 2579b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 2589b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 2599b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 2609b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 2619b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 2629b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 2639b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 2649b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org } 2659b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#endif 2669b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org for (; x < width; ++x) { 2679b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org CENTER_ITER 2689b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org } 2699b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#undef CENTER_ITER 2709b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org 2719b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org #define RIGHT_BORDER_ITER \ 2729b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org inner_sum = outer_sum - *left++; \ 2739b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24; \ 2749b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org dptr += dst_x_stride; \ 2759b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org outer_sum = inner_sum; 2769b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org 2779b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org x = 0; 2789b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#ifdef UNROLL_SEPARABLE_LOOPS 2799b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org for (; x < border - 16; x += 16) { 2809b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 2819b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 2829b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 2839b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 2849b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 2859b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 2869b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 2879b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 2889b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 2899b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 2909b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 2919b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 2929b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 2939b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 2949b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 2959b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 2969b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org } 2979b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#endif 2989b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org for (; x < border; x++) { 2999b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org RIGHT_BORDER_ITER 3009b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org } 3019b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org#undef RIGHT_BORDER_ITER 3029b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org SkASSERT(outer_sum == 0 && inner_sum == 0); 3039b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org } 3049b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org return new_width; 3059b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org} 3069b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org 307c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.orgstatic void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius) 308c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org{ 309c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org *loRadius = *hiRadius = SkScalarCeil(passRadius); 310c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) { 311c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org *loRadius = *hiRadius - 1; 312c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org } 313c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org} 314c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org 31501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows, 31601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com// breakeven on Mac, and ~15% slowdown on Linux. 31701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com// Reading a word at a time when bulding the sum buffer seems to give 31801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux. 319054ff1efa4f9187ce7fd20aaf3aed7cecf14e12btomhudson@google.com#if defined(SK_BUILD_FOR_WIN32) 32001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#define UNROLL_KERNEL_LOOP 1 32101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#endif 3228a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 3234560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com/** The sum buffer is an array of u32 to hold the accumulated sum of all of the 3244560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com src values at their position, plus all values above and to the left. 3254560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com When we sample into this buffer, we need an initial row and column of 0s, 3264560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com so we have an index correspondence as follows: 327fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 3284560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com src[i, j] == sum[i+1, j+1] 3294560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com sum[0, j] == sum[i, 0] == 0 330fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 3314560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com We assume that the sum buffer's stride == its width 3324560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com */ 33303016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.comstatic void build_sum_buffer(uint32_t sum[], int srcW, int srcH, 33403016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com const uint8_t src[], int srcRB) { 3354560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com int sumW = srcW + 1; 3364560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com 3374560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com SkASSERT(srcRB >= srcW); 3388a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com // mod srcRB so we can apply it after each row 3394560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com srcRB -= srcW; 3408a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 3418a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com int x, y; 3428a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 3434560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com // zero out the top row and column 3444560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com memset(sum, 0, sumW * sizeof(sum[0])); 3454560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com sum += sumW; 3464560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com 3478a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com // special case first row 3488a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com uint32_t X = 0; 3494560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com *sum++ = 0; // initialze the first column to 0 35003016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com for (x = srcW - 1; x >= 0; --x) { 3518a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com X = *src++ + X; 3524560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com *sum++ = X; 3538a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com } 3548a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com src += srcRB; 3558a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 3568a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com // now do the rest of the rows 35703016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com for (y = srcH - 1; y > 0; --y) { 3588a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com uint32_t L = 0; 3598a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com uint32_t C = 0; 3604560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com *sum++ = 0; // initialze the first column to 0 36101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com 36201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) { 36301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com uint32_t T = sum[-sumW]; 36401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com X = *src++ + L + T - C; 36501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *sum++ = X; 36601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com L = X; 36701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com C = T; 36801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com } 36901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com 37001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com for (; x >= 4; x-=4) { 37101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com uint32_t T = sum[-sumW]; 37201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com X = *src++ + L + T - C; 37301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *sum++ = X; 37401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com L = X; 37501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com C = T; 37601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com T = sum[-sumW]; 37701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com X = *src++ + L + T - C; 37801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *sum++ = X; 37901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com L = X; 38001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com C = T; 38101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com T = sum[-sumW]; 38201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com X = *src++ + L + T - C; 38301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *sum++ = X; 38401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com L = X; 38501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com C = T; 38601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com T = sum[-sumW]; 38701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com X = *src++ + L + T - C; 38801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *sum++ = X; 38901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com L = X; 39001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com C = T; 39101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com } 39201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com 39301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com for (; x >= 0; --x) { 3944560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com uint32_t T = sum[-sumW]; 3958a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com X = *src++ + L + T - C; 3964560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com *sum++ = X; 3978a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com L = X; 3988a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com C = T; 3998a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com } 4008a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com src += srcRB; 4018a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com } 4028a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com} 4038a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 40403016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com/** 4058caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * This is the path for apply_kernel() to be taken when the kernel 4068caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * is wider than the source image. 4078caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com */ 4088caac6447dd68655b57dfe876626a9733b191416tomhudson@google.comstatic void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[], 4098caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int sw, int sh) { 4108caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com SkASSERT(2*rx > sw); 4118caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 4128caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1)); 4138caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 4148caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int sumStride = sw + 1; 4158caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 4168caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int dw = sw + 2*rx; 4178caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int dh = sh + 2*ry; 4188caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 4198caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int prev_y = -2*ry; 4208caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int next_y = 1; 4218caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 4228caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com for (int y = 0; y < dh; y++) { 4238caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int py = SkClampPos(prev_y) * sumStride; 4248caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int ny = SkFastMin32(next_y, sh) * sumStride; 4258caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 4268caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int prev_x = -2*rx; 4278caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int next_x = 1; 4288caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 4298caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com for (int x = 0; x < dw; x++) { 4308caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int px = SkClampPos(prev_x); 4318caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int nx = SkFastMin32(next_x, sw); 4328caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 4338caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; 4348caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *dst++ = SkToU8(tmp * scale >> 24); 4358caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 4368caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com prev_x += 1; 4378caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com next_x += 1; 4388caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com } 4398caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 4408caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com prev_y += 1; 4418caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com next_y += 1; 4428caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com } 4438caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com} 4448caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com/** 44503016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com * sw and sh are the width and height of the src. Since the sum buffer 44603016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com * matches that, but has an extra row and col at the beginning (with zeros), 44703016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com * we can just use sw and sh as our "max" values for pinning coordinates 44803016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com * when sampling into sum[][] 4498caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * 4508caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * The inner loop is conceptually simple; we break it into several sections 4518caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * to improve performance. Here's the original version: 4528caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com for (int x = 0; x < dw; x++) { 4538caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int px = SkClampPos(prev_x); 4548caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int nx = SkFastMin32(next_x, sw); 4558caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 4568caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; 4578caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *dst++ = SkToU8(tmp * scale >> 24); 4588caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 4598caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com prev_x += 1; 4608caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com next_x += 1; 4618caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com } 46201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com * The sections are: 46301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com * left-hand section, where prev_x is clamped to 0 46401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com * center section, where neither prev_x nor next_x is clamped 46501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com * right-hand section, where next_x is clamped to sw 46601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com * On some operating systems, the center section is unrolled for additional 46701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com * speedup. 4688caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com*/ 4694560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.comstatic void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[], 4704560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com int sw, int sh) { 4718caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com if (2*rx > sw) { 4728caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com kernel_clamped(dst, rx, ry, sum, sw, sh); 4738caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com return; 4748caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com } 4758caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 4768a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1)); 4778a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 4784560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com int sumStride = sw + 1; 4798a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 4808a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com int dw = sw + 2*rx; 4818a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com int dh = sh + 2*ry; 4828a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 4834560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com int prev_y = -2*ry; 4844560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com int next_y = 1; 4858a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 4868caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com SkASSERT(2*rx <= dw - 2*rx); 4878caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 4884560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com for (int y = 0; y < dh; y++) { 4894560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com int py = SkClampPos(prev_y) * sumStride; 4904560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com int ny = SkFastMin32(next_y, sh) * sumStride; 4918a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 4924560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com int prev_x = -2*rx; 4934560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com int next_x = 1; 4948caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int x = 0; 4958caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 4968caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com for (; x < 2*rx; x++) { 4978caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com SkASSERT(prev_x <= 0); 4988caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com SkASSERT(next_x <= sw); 4998caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 5008caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int px = 0; 5018caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int nx = next_x; 5028caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 5038caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; 5048caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *dst++ = SkToU8(tmp * scale >> 24); 5058caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 5068caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com prev_x += 1; 5078caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com next_x += 1; 5088caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com } 5098caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 51001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com int i0 = prev_x + py; 51101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com int i1 = next_x + ny; 51201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com int i2 = next_x + py; 51301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com int i3 = prev_x + ny; 51401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com 51501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#if UNROLL_KERNEL_LOOP 51601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com for (; x < dw - 2*rx - 4; x += 4) { 5178caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com SkASSERT(prev_x >= 0); 5188caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com SkASSERT(next_x <= sw); 5198caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 52001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; 52101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *dst++ = SkToU8(tmp * scale >> 24); 52201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; 52301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *dst++ = SkToU8(tmp * scale >> 24); 52401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; 52501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *dst++ = SkToU8(tmp * scale >> 24); 52601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; 52701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *dst++ = SkToU8(tmp * scale >> 24); 5288caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 52901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com prev_x += 4; 53001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com next_x += 4; 53101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com } 53201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#endif 53301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com 53401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com for (; x < dw - 2*rx; x++) { 53501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com SkASSERT(prev_x >= 0); 53601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com SkASSERT(next_x <= sw); 53701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com 53801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; 5398caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *dst++ = SkToU8(tmp * scale >> 24); 5408caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 5418caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com prev_x += 1; 5428caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com next_x += 1; 5438caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com } 5448caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 5458caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com for (; x < dw; x++) { 5468caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com SkASSERT(prev_x >= 0); 5478caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com SkASSERT(next_x > sw); 5488caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 5498caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int px = prev_x; 5508caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int nx = sw; 5518caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 5528caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; 5538caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *dst++ = SkToU8(tmp * scale >> 24); 5548caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 5558caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com prev_x += 1; 5568caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com next_x += 1; 5578caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com } 5588caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 5598caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com prev_y += 1; 5608caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com next_y += 1; 5618caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com } 5628caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com} 5638caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 5648caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com/** 5658caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * This is the path for apply_kernel_interp() to be taken when the kernel 5668caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * is wider than the source image. 5678caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com */ 5688caac6447dd68655b57dfe876626a9733b191416tomhudson@google.comstatic void kernel_interp_clamped(uint8_t dst[], int rx, int ry, 5698caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com const uint32_t sum[], int sw, int sh, U8CPU outer_weight) { 5708caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com SkASSERT(2*rx > sw); 5718caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 5728caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int inner_weight = 255 - outer_weight; 5738caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 5748caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com // round these guys up if they're bigger than 127 5758caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com outer_weight += outer_weight >> 7; 5768caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com inner_weight += inner_weight >> 7; 5778caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 5788caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1)); 5798caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1)); 5808caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 5818caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int sumStride = sw + 1; 5828caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 5838caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int dw = sw + 2*rx; 5848caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int dh = sh + 2*ry; 5858caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 5868caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int prev_y = -2*ry; 5878caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int next_y = 1; 5888caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 5898caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com for (int y = 0; y < dh; y++) { 5908caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int py = SkClampPos(prev_y) * sumStride; 5918caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int ny = SkFastMin32(next_y, sh) * sumStride; 5928caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 5938caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int ipy = SkClampPos(prev_y + 1) * sumStride; 5948caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int iny = SkClampMax(next_y - 1, sh) * sumStride; 5958caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 5968caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int prev_x = -2*rx; 5978caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int next_x = 1; 5988a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 5994560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com for (int x = 0; x < dw; x++) { 6008a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com int px = SkClampPos(prev_x); 6018a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com int nx = SkFastMin32(next_x, sw); 6028a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 6038caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int ipx = SkClampPos(prev_x + 1); 6048caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int inx = SkClampMax(next_x - 1, sw); 6058caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 6068caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com uint32_t outer_sum = sum[px+py] + sum[nx+ny] 6078caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com - sum[nx+py] - sum[px+ny]; 6088caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny] 6098caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com - sum[inx+ipy] - sum[ipx+iny]; 6108caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *dst++ = SkToU8((outer_sum * outer_scale 6118caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com + inner_sum * inner_scale) >> 24); 6128a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 6138a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com prev_x += 1; 6148a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com next_x += 1; 6158a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com } 6168a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com prev_y += 1; 6178a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com next_y += 1; 6188a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com } 6198a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com} 6208a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 62103016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com/** 62203016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com * sw and sh are the width and height of the src. Since the sum buffer 62303016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com * matches that, but has an extra row and col at the beginning (with zeros), 62403016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com * we can just use sw and sh as our "max" values for pinning coordinates 62503016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com * when sampling into sum[][] 6268caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * 6278caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * The inner loop is conceptually simple; we break it into several variants 6288caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com * to improve performance. Here's the original version: 6298caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com for (int x = 0; x < dw; x++) { 6308caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int px = SkClampPos(prev_x); 6318caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int nx = SkFastMin32(next_x, sw); 6328caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 6338caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int ipx = SkClampPos(prev_x + 1); 6348caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int inx = SkClampMax(next_x - 1, sw); 6358caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 6368caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com uint32_t outer_sum = sum[px+py] + sum[nx+ny] 6378caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com - sum[nx+py] - sum[px+ny]; 6388caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny] 6398caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com - sum[inx+ipy] - sum[ipx+iny]; 6408caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *dst++ = SkToU8((outer_sum * outer_scale 6418caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com + inner_sum * inner_scale) >> 24); 6428caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 6438caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com prev_x += 1; 6448caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com next_x += 1; 6458caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com } 64601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com * The sections are: 64701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com * left-hand section, where prev_x is clamped to 0 64801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com * center section, where neither prev_x nor next_x is clamped 64901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com * right-hand section, where next_x is clamped to sw 65001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com * On some operating systems, the center section is unrolled for additional 65101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com * speedup. 6528caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com*/ 6534560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.comstatic void apply_kernel_interp(uint8_t dst[], int rx, int ry, 6544560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com const uint32_t sum[], int sw, int sh, U8CPU outer_weight) { 6558a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com SkASSERT(rx > 0 && ry > 0); 6568a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com SkASSERT(outer_weight <= 255); 6578a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 6588caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com if (2*rx > sw) { 6598caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outer_weight); 6608caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com return; 6618caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com } 6628caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 6638a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com int inner_weight = 255 - outer_weight; 6648a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 6658a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com // round these guys up if they're bigger than 127 6668a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com outer_weight += outer_weight >> 7; 6678a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com inner_weight += inner_weight >> 7; 6688a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 6698a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1)); 6708a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1)); 6718a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 6724560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com int sumStride = sw + 1; 6738a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 6748a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com int dw = sw + 2*rx; 6758a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com int dh = sh + 2*ry; 6768a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 6774560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com int prev_y = -2*ry; 6784560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com int next_y = 1; 6798a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 6808caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com SkASSERT(2*rx <= dw - 2*rx); 6818caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 6824560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com for (int y = 0; y < dh; y++) { 6834560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com int py = SkClampPos(prev_y) * sumStride; 6844560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com int ny = SkFastMin32(next_y, sh) * sumStride; 6858a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 6864560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com int ipy = SkClampPos(prev_y + 1) * sumStride; 6874560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com int iny = SkClampMax(next_y - 1, sh) * sumStride; 6888a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 6894560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com int prev_x = -2*rx; 6904560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com int next_x = 1; 6918caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int x = 0; 6928a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 6938caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com for (; x < 2*rx; x++) { 6948caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com SkASSERT(prev_x < 0); 6958caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com SkASSERT(next_x <= sw); 6968a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 6978caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int px = 0; 6988caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int nx = next_x; 6998caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 7008caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int ipx = 0; 7018caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int inx = next_x - 1; 7028caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 7038caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com uint32_t outer_sum = sum[px+py] + sum[nx+ny] 7048caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com - sum[nx+py] - sum[px+ny]; 7058caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny] 7068caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com - sum[inx+ipy] - sum[ipx+iny]; 7078caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *dst++ = SkToU8((outer_sum * outer_scale 7088caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com + inner_sum * inner_scale) >> 24); 7098caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 7108caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com prev_x += 1; 7118caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com next_x += 1; 7128caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com } 7138caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 71401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com int i0 = prev_x + py; 71501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com int i1 = next_x + ny; 71601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com int i2 = next_x + py; 71701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com int i3 = prev_x + ny; 71801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com int i4 = prev_x + 1 + ipy; 71901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com int i5 = next_x - 1 + iny; 72001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com int i6 = next_x - 1 + ipy; 72101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com int i7 = prev_x + 1 + iny; 72201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com 72301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#if UNROLL_KERNEL_LOOP 72401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com for (; x < dw - 2*rx - 4; x += 4) { 7258caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com SkASSERT(prev_x >= 0); 7268caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com SkASSERT(next_x <= sw); 7278caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 72801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; 72901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++]; 73001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *dst++ = SkToU8((outer_sum * outer_scale 73101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com + inner_sum * inner_scale) >> 24); 73201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; 73301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++]; 73401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *dst++ = SkToU8((outer_sum * outer_scale 73501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com + inner_sum * inner_scale) >> 24); 73601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; 73701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++]; 73801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *dst++ = SkToU8((outer_sum * outer_scale 73901224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com + inner_sum * inner_scale) >> 24); 74001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; 74101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++]; 74201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com *dst++ = SkToU8((outer_sum * outer_scale 74301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com + inner_sum * inner_scale) >> 24); 7448caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 74501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com prev_x += 4; 74601224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com next_x += 4; 74701224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com } 74801224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com#endif 7498a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 75001224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com for (; x < dw - 2*rx; x++) { 75101224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com SkASSERT(prev_x >= 0); 75201224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com SkASSERT(next_x <= sw); 75301224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com 75401224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; 75501224d5d0a3228fe47e63d8346e0e433a87563a8tomhudson@google.com uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++]; 7568caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *dst++ = SkToU8((outer_sum * outer_scale 7578caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com + inner_sum * inner_scale) >> 24); 7588a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 7598a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com prev_x += 1; 7608a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com next_x += 1; 7618a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com } 7628caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 7638caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com for (; x < dw; x++) { 7648caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com SkASSERT(prev_x >= 0); 7658caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com SkASSERT(next_x > sw); 7668caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 7678caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int px = prev_x; 7688caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int nx = sw; 7698caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 7708caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int ipx = prev_x + 1; 7718caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com int inx = sw; 7728caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 7738caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com uint32_t outer_sum = sum[px+py] + sum[nx+ny] 7748caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com - sum[nx+py] - sum[px+ny]; 7758caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny] 7768caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com - sum[inx+ipy] - sum[ipx+iny]; 7778caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com *dst++ = SkToU8((outer_sum * outer_scale 7788caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com + inner_sum * inner_scale) >> 24); 7798caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 7808caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com prev_x += 1; 7818caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com next_x += 1; 7828caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com } 7838caac6447dd68655b57dfe876626a9733b191416tomhudson@google.com 7848a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com prev_y += 1; 7858a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com next_y += 1; 7868a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com } 7878a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com} 7888a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 7898a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com#include "SkColorPriv.h" 7908a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 7910e3c664250f561ec9f7107b92136517a72d03afdreed@android.comstatic void merge_src_with_blur(uint8_t dst[], int dstRB, 7920e3c664250f561ec9f7107b92136517a72d03afdreed@android.com const uint8_t src[], int srcRB, 7930e3c664250f561ec9f7107b92136517a72d03afdreed@android.com const uint8_t blur[], int blurRB, 7940e3c664250f561ec9f7107b92136517a72d03afdreed@android.com int sw, int sh) { 7950e3c664250f561ec9f7107b92136517a72d03afdreed@android.com dstRB -= sw; 7960e3c664250f561ec9f7107b92136517a72d03afdreed@android.com srcRB -= sw; 7970e3c664250f561ec9f7107b92136517a72d03afdreed@android.com blurRB -= sw; 7980e3c664250f561ec9f7107b92136517a72d03afdreed@android.com while (--sh >= 0) { 7990e3c664250f561ec9f7107b92136517a72d03afdreed@android.com for (int x = sw - 1; x >= 0; --x) { 8008a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src))); 8018a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com dst += 1; 8028a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com src += 1; 8038a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com blur += 1; 8048a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com } 8050e3c664250f561ec9f7107b92136517a72d03afdreed@android.com dst += dstRB; 8060e3c664250f561ec9f7107b92136517a72d03afdreed@android.com src += srcRB; 8070e3c664250f561ec9f7107b92136517a72d03afdreed@android.com blur += blurRB; 8088a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com } 8098a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com} 8108a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 8118a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.comstatic void clamp_with_orig(uint8_t dst[], int dstRowBytes, 8120e3c664250f561ec9f7107b92136517a72d03afdreed@android.com const uint8_t src[], int srcRowBytes, 8130e3c664250f561ec9f7107b92136517a72d03afdreed@android.com int sw, int sh, 8144560767bb0b3db530d48e2b0c1c11e28f3692984reed@android.com SkBlurMask::Style style) { 8158a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com int x; 8160e3c664250f561ec9f7107b92136517a72d03afdreed@android.com while (--sh >= 0) { 8178a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com switch (style) { 8188a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com case SkBlurMask::kSolid_Style: 8190e3c664250f561ec9f7107b92136517a72d03afdreed@android.com for (x = sw - 1; x >= 0; --x) { 8200e3c664250f561ec9f7107b92136517a72d03afdreed@android.com int s = *src; 8210e3c664250f561ec9f7107b92136517a72d03afdreed@android.com int d = *dst; 8220e3c664250f561ec9f7107b92136517a72d03afdreed@android.com *dst = SkToU8(s + d - SkMulDiv255Round(s, d)); 8238a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com dst += 1; 8248a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com src += 1; 8258a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com } 8268a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com break; 8278a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com case SkBlurMask::kOuter_Style: 8280e3c664250f561ec9f7107b92136517a72d03afdreed@android.com for (x = sw - 1; x >= 0; --x) { 8290e3c664250f561ec9f7107b92136517a72d03afdreed@android.com if (*src) { 8308a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src))); 8310e3c664250f561ec9f7107b92136517a72d03afdreed@android.com } 8328a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com dst += 1; 8338a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com src += 1; 8348a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com } 8358a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com break; 8368a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com default: 8370c00f21fee3f5cfa3aa7e5d46ff94cb8cf340451tomhudson@google.com SkDEBUGFAIL("Unexpected blur style here"); 8388a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com break; 8398a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com } 8408a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com dst += dstRowBytes - sw; 8410e3c664250f561ec9f7107b92136517a72d03afdreed@android.com src += srcRowBytes - sw; 8428a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com } 8438a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com} 8448a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 84503016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com/////////////////////////////////////////////////////////////////////////////// 8468a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 8478a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com// we use a local funciton to wrap the class static method to work around 8488a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com// a bug in gcc98 8498a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.comvoid SkMask_FreeImage(uint8_t* image); 85003016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.comvoid SkMask_FreeImage(uint8_t* image) { 8518a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com SkMask::FreeImage(image); 8528a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com} 8538a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 8548a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.combool SkBlurMask::Blur(SkMask* dst, const SkMask& src, 8555af16f8d670b3ce1c7644a4737e02e2e2257614ebungeman@google.com SkScalar radius, Style style, Quality quality, 85671f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org SkIPoint* margin, bool separable) 8575af16f8d670b3ce1c7644a4737e02e2e2257614ebungeman@google.com{ 85803016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com if (src.fFormat != SkMask::kA8_Format) { 8598a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com return false; 86003016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com } 8618a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 8624868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org // Force high quality off for small radii (performance) 863c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org if (radius < SkIntToScalar(3) && !separable) quality = kLow_Quality; 8644868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org 8654868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org // highQuality: use three box blur passes as a cheap way to approximate a Gaussian blur 8669b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org int passCount = (quality == kHigh_Quality || separable) ? 3 : 1; 8674868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org SkScalar passRadius = SkScalarDiv(radius, SkScalarSqrt(SkIntToScalar(passCount))); 8684868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org 8694868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org int rx = SkScalarCeil(passRadius); 8704868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org int outer_weight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255); 8718a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 8728a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com SkASSERT(rx >= 0); 8738a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com SkASSERT((unsigned)outer_weight <= 255); 8740e3c664250f561ec9f7107b92136517a72d03afdreed@android.com if (rx <= 0) { 8758a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com return false; 8760e3c664250f561ec9f7107b92136517a72d03afdreed@android.com } 8778a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 8788a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com int ry = rx; // only do square blur for now 8798a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 8804868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org int padx = passCount * rx; 8814868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org int pady = passCount * ry; 8825af16f8d670b3ce1c7644a4737e02e2e2257614ebungeman@google.com if (margin) { 8835af16f8d670b3ce1c7644a4737e02e2e2257614ebungeman@google.com margin->set(padx, pady); 8845af16f8d670b3ce1c7644a4737e02e2e2257614ebungeman@google.com } 8854868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady, 8864868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org src.fBounds.fRight + padx, src.fBounds.fBottom + pady); 88749f0ff25a046d6001dc2d095b6fa3c30f0f46b6areed@android.com dst->fRowBytes = dst->fBounds.width(); 8888a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com dst->fFormat = SkMask::kA8_Format; 8898a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com dst->fImage = NULL; 8908a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 8910e3c664250f561ec9f7107b92136517a72d03afdreed@android.com if (src.fImage) { 892543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com size_t dstSize = dst->computeImageSize(); 893543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com if (0 == dstSize) { 894543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com return false; // too big to allocate, abort 895543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com } 896543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com 8978a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com int sw = src.fBounds.width(); 8988a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com int sh = src.fBounds.height(); 8998a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com const uint8_t* sp = src.fImage; 900543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com uint8_t* dp = SkMask::AllocImage(dstSize); 9018a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 9028a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp); 9038a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 9048a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com // build the blurry destination 90571f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org if (separable) { 90671f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org SkAutoTMalloc<uint8_t> tmpBuffer(dstSize); 90771f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org uint8_t* tp = tmpBuffer.get(); 90871f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org int w = sw, h = sh; 90971f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org 9109b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org if (outer_weight == 255 || quality == kLow_Quality) { 9119b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org // For separable blurs, low quality means no interpolation. 912c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org int loRadius, hiRadius; 913c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org get_adjusted_radii(passRadius, &loRadius, &hiRadius); 914908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org // Do three X blurs, with a transpose on the final one. 915c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false); 916c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, false); 917c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, true); 918908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org // Do three Y blurs, with a transpose on the final one. 919c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, false); 920c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, false); 921c4381309649c5cf338dcf6a7fc8296451a686d6bsenorblanco@chromium.org h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, true); 922908276b3969cf8f8eec28026363897134c0e54e0senorblanco@chromium.org } else { 9239b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org // Do three X blurs, with a transpose on the final one. 9249b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outer_weight); 9259b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org w = boxBlurInterp(tp, w, dp, rx, w, h, false, outer_weight); 9269b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org w = boxBlurInterp(dp, w, tp, rx, w, h, true, outer_weight); 9279b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org // Do three Y blurs, with a transpose on the final one. 9289b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org h = boxBlurInterp(tp, h, dp, ry, h, w, false, outer_weight); 9299b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org h = boxBlurInterp(dp, h, tp, ry, h, w, false, outer_weight); 9309b0d4d79f023ce91b53d9eaa47508b6722c246e6senorblanco@chromium.org h = boxBlurInterp(tp, h, dp, ry, h, w, true, outer_weight); 93171f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org } 93271f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org } else { 93303016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com const size_t storageW = sw + 2 * (passCount - 1) * rx + 1; 93403016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com const size_t storageH = sh + 2 * (passCount - 1) * ry + 1; 93503016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com SkAutoTMalloc<uint32_t> storage(storageW * storageH); 9368a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com uint32_t* sumBuffer = storage.get(); 9378a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 9384868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org //pass1: sp is source, dp is destination 9398a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes); 94003016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com if (outer_weight == 255) { 9418a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com apply_kernel(dp, rx, ry, sumBuffer, sw, sh); 94203016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com } else { 9438a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outer_weight); 94403016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com } 9454868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org 94603016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com if (quality == kHigh_Quality) { 9474868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org //pass2: dp is source, tmpBuffer is destination 9484868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org int tmp_sw = sw + 2 * rx; 9494868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org int tmp_sh = sh + 2 * ry; 9504868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org SkAutoTMalloc<uint8_t> tmpBuffer(dstSize); 9514868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw); 9524868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org if (outer_weight == 255) 9534868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh); 9544868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org else 95503016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer, 95603016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com tmp_sw, tmp_sh, outer_weight); 9574868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org 9584868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org //pass3: tmpBuffer is source, dp is destination 9594868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org tmp_sw += 2 * rx; 9604868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org tmp_sh += 2 * ry; 9614868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw); 9624868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org if (outer_weight == 255) 9634868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh); 9644868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org else 96503016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh, 96603016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com outer_weight); 9674868e6b221a4a98e40f977851af5fcf09631ea15senorblanco@chromium.org } 9688a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com } 9698a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 9708a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com dst->fImage = dp; 9718a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com // if need be, alloc the "real" dst (same size as src) and copy/merge 9728a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com // the blur into it (applying the src) 9730e3c664250f561ec9f7107b92136517a72d03afdreed@android.com if (style == kInner_Style) { 9740e3c664250f561ec9f7107b92136517a72d03afdreed@android.com // now we allocate the "real" dst, mirror the size of src 975543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com size_t srcSize = src.computeImageSize(); 976543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com if (0 == srcSize) { 977543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com return false; // too big to allocate, abort 978543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com } 979543ed9352c7dfd93071c08b14930cca2e82a08d4reed@android.com dst->fImage = SkMask::AllocImage(srcSize); 9800e3c664250f561ec9f7107b92136517a72d03afdreed@android.com merge_src_with_blur(dst->fImage, src.fRowBytes, 9810e3c664250f561ec9f7107b92136517a72d03afdreed@android.com sp, src.fRowBytes, 98203016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com dp + passCount * (rx + ry * dst->fRowBytes), 98303016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com dst->fRowBytes, sw, sh); 9848a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com SkMask::FreeImage(dp); 9850e3c664250f561ec9f7107b92136517a72d03afdreed@android.com } else if (style != kNormal_Style) { 98603016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes), 98703016a36206be42e91e8e0eb62fe8fb95da97b38reed@google.com dst->fRowBytes, sp, src.fRowBytes, sw, sh, style); 9888a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com } 9898a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com (void)autoCall.detach(); 9908a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com } 9918a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 9920e3c664250f561ec9f7107b92136517a72d03afdreed@android.com if (style == kInner_Style) { 9938a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com dst->fBounds = src.fBounds; // restore trimmed bounds 9940e3c664250f561ec9f7107b92136517a72d03afdreed@android.com dst->fRowBytes = src.fRowBytes; 9958a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com } 9968a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 9978a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com return true; 9988a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com} 9998a1c16ff38322f0210116fa7293eb8817c7e477ereed@android.com 100071f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.orgbool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src, 100171f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org SkScalar radius, Style style, Quality quality, 100271f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org SkIPoint* margin) 100371f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org{ 100471f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true); 100571f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org} 100671f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org 100771f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.orgbool SkBlurMask::Blur(SkMask* dst, const SkMask& src, 100871f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org SkScalar radius, Style style, Quality quality, 100971f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org SkIPoint* margin) 101071f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org{ 101171f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false); 101271f0f34f7d8e80fe760f318f29ba88ab58baff7dsenorblanco@chromium.org} 1013