SkConvolver.cpp revision 138ebc3e4061cf533ea2f7f3717239670fdc6e43
1// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "SkConvolver.h" 6#include "SkSize.h" 7#include "SkTypes.h" 8 9namespace { 10 11 // Converts the argument to an 8-bit unsigned value by clamping to the range 12 // 0-255. 13 inline unsigned char ClampTo8(int a) { 14 if (static_cast<unsigned>(a) < 256) { 15 return a; // Avoid the extra check in the common case. 16 } 17 if (a < 0) { 18 return 0; 19 } 20 return 255; 21 } 22 23 // Takes the value produced by accumulating element-wise product of image with 24 // a kernel and brings it back into range. 25 // All of the filter scaling factors are in fixed point with kShiftBits bits of 26 // fractional part. 27 inline unsigned char BringBackTo8(int a, bool takeAbsolute) { 28 a >>= SkConvolutionFilter1D::kShiftBits; 29 if (takeAbsolute) { 30 a = abs(a); 31 } 32 return ClampTo8(a); 33 } 34 35 // Stores a list of rows in a circular buffer. The usage is you write into it 36 // by calling AdvanceRow. It will keep track of which row in the buffer it 37 // should use next, and the total number of rows added. 38 class CircularRowBuffer { 39 public: 40 // The number of pixels in each row is given in |sourceRowPixelWidth|. 41 // The maximum number of rows needed in the buffer is |maxYFilterSize| 42 // (we only need to store enough rows for the biggest filter). 43 // 44 // We use the |firstInputRow| to compute the coordinates of all of the 45 // following rows returned by Advance(). 46 CircularRowBuffer(int destRowPixelWidth, int maxYFilterSize, 47 int firstInputRow) 48 : fRowByteWidth(destRowPixelWidth * 4), 49 fNumRows(maxYFilterSize), 50 fNextRow(0), 51 fNextRowCoordinate(firstInputRow) { 52 fBuffer.reset(fRowByteWidth * maxYFilterSize); 53 fRowAddresses.reset(fNumRows); 54 } 55 56 // Moves to the next row in the buffer, returning a pointer to the beginning 57 // of it. 58 unsigned char* advanceRow() { 59 unsigned char* row = &fBuffer[fNextRow * fRowByteWidth]; 60 fNextRowCoordinate++; 61 62 // Set the pointer to the next row to use, wrapping around if necessary. 63 fNextRow++; 64 if (fNextRow == fNumRows) { 65 fNextRow = 0; 66 } 67 return row; 68 } 69 70 // Returns a pointer to an "unrolled" array of rows. These rows will start 71 // at the y coordinate placed into |*firstRowIndex| and will continue in 72 // order for the maximum number of rows in this circular buffer. 73 // 74 // The |firstRowIndex_| may be negative. This means the circular buffer 75 // starts before the top of the image (it hasn't been filled yet). 76 unsigned char* const* GetRowAddresses(int* firstRowIndex) { 77 // Example for a 4-element circular buffer holding coords 6-9. 78 // Row 0 Coord 8 79 // Row 1 Coord 9 80 // Row 2 Coord 6 <- fNextRow = 2, fNextRowCoordinate = 10. 81 // Row 3 Coord 7 82 // 83 // The "next" row is also the first (lowest) coordinate. This computation 84 // may yield a negative value, but that's OK, the math will work out 85 // since the user of this buffer will compute the offset relative 86 // to the firstRowIndex and the negative rows will never be used. 87 *firstRowIndex = fNextRowCoordinate - fNumRows; 88 89 int curRow = fNextRow; 90 for (int i = 0; i < fNumRows; i++) { 91 fRowAddresses[i] = &fBuffer[curRow * fRowByteWidth]; 92 93 // Advance to the next row, wrapping if necessary. 94 curRow++; 95 if (curRow == fNumRows) { 96 curRow = 0; 97 } 98 } 99 return &fRowAddresses[0]; 100 } 101 102 private: 103 // The buffer storing the rows. They are packed, each one fRowByteWidth. 104 SkTArray<unsigned char> fBuffer; 105 106 // Number of bytes per row in the |buffer|. 107 int fRowByteWidth; 108 109 // The number of rows available in the buffer. 110 int fNumRows; 111 112 // The next row index we should write into. This wraps around as the 113 // circular buffer is used. 114 int fNextRow; 115 116 // The y coordinate of the |fNextRow|. This is incremented each time a 117 // new row is appended and does not wrap. 118 int fNextRowCoordinate; 119 120 // Buffer used by GetRowAddresses(). 121 SkTArray<unsigned char*> fRowAddresses; 122 }; 123 124// Convolves horizontally along a single row. The row data is given in 125// |srcData| and continues for the numValues() of the filter. 126template<bool hasAlpha> 127 void ConvolveHorizontally(const unsigned char* srcData, 128 const SkConvolutionFilter1D& filter, 129 unsigned char* outRow) { 130 // Loop over each pixel on this row in the output image. 131 int numValues = filter.numValues(); 132 for (int outX = 0; outX < numValues; outX++) { 133 // Get the filter that determines the current output pixel. 134 int filterOffset, filterLength; 135 const SkConvolutionFilter1D::ConvolutionFixed* filterValues = 136 filter.FilterForValue(outX, &filterOffset, &filterLength); 137 138 // Compute the first pixel in this row that the filter affects. It will 139 // touch |filterLength| pixels (4 bytes each) after this. 140 const unsigned char* rowToFilter = &srcData[filterOffset * 4]; 141 142 // Apply the filter to the row to get the destination pixel in |accum|. 143 int accum[4] = {0}; 144 for (int filterX = 0; filterX < filterLength; filterX++) { 145 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterX]; 146 accum[0] += curFilter * rowToFilter[filterX * 4 + 0]; 147 accum[1] += curFilter * rowToFilter[filterX * 4 + 1]; 148 accum[2] += curFilter * rowToFilter[filterX * 4 + 2]; 149 if (hasAlpha) { 150 accum[3] += curFilter * rowToFilter[filterX * 4 + 3]; 151 } 152 } 153 154 // Bring this value back in range. All of the filter scaling factors 155 // are in fixed point with kShiftBits bits of fractional part. 156 accum[0] >>= SkConvolutionFilter1D::kShiftBits; 157 accum[1] >>= SkConvolutionFilter1D::kShiftBits; 158 accum[2] >>= SkConvolutionFilter1D::kShiftBits; 159 if (hasAlpha) { 160 accum[3] >>= SkConvolutionFilter1D::kShiftBits; 161 } 162 163 // Store the new pixel. 164 outRow[outX * 4 + 0] = ClampTo8(accum[0]); 165 outRow[outX * 4 + 1] = ClampTo8(accum[1]); 166 outRow[outX * 4 + 2] = ClampTo8(accum[2]); 167 if (hasAlpha) { 168 outRow[outX * 4 + 3] = ClampTo8(accum[3]); 169 } 170 } 171 } 172 173// Does vertical convolution to produce one output row. The filter values and 174// length are given in the first two parameters. These are applied to each 175// of the rows pointed to in the |sourceDataRows| array, with each row 176// being |pixelWidth| wide. 177// 178// The output must have room for |pixelWidth * 4| bytes. 179template<bool hasAlpha> 180 void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues, 181 int filterLength, 182 unsigned char* const* sourceDataRows, 183 int pixelWidth, 184 unsigned char* outRow) { 185 // We go through each column in the output and do a vertical convolution, 186 // generating one output pixel each time. 187 for (int outX = 0; outX < pixelWidth; outX++) { 188 // Compute the number of bytes over in each row that the current column 189 // we're convolving starts at. The pixel will cover the next 4 bytes. 190 int byteOffset = outX * 4; 191 192 // Apply the filter to one column of pixels. 193 int accum[4] = {0}; 194 for (int filterY = 0; filterY < filterLength; filterY++) { 195 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterY]; 196 accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0]; 197 accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1]; 198 accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2]; 199 if (hasAlpha) { 200 accum[3] += curFilter * sourceDataRows[filterY][byteOffset + 3]; 201 } 202 } 203 204 // Bring this value back in range. All of the filter scaling factors 205 // are in fixed point with kShiftBits bits of precision. 206 accum[0] >>= SkConvolutionFilter1D::kShiftBits; 207 accum[1] >>= SkConvolutionFilter1D::kShiftBits; 208 accum[2] >>= SkConvolutionFilter1D::kShiftBits; 209 if (hasAlpha) { 210 accum[3] >>= SkConvolutionFilter1D::kShiftBits; 211 } 212 213 // Store the new pixel. 214 outRow[byteOffset + 0] = ClampTo8(accum[0]); 215 outRow[byteOffset + 1] = ClampTo8(accum[1]); 216 outRow[byteOffset + 2] = ClampTo8(accum[2]); 217 if (hasAlpha) { 218 unsigned char alpha = ClampTo8(accum[3]); 219 220 // Make sure the alpha channel doesn't come out smaller than any of the 221 // color channels. We use premultipled alpha channels, so this should 222 // never happen, but rounding errors will cause this from time to time. 223 // These "impossible" colors will cause overflows (and hence random pixel 224 // values) when the resulting bitmap is drawn to the screen. 225 // 226 // We only need to do this when generating the final output row (here). 227 int maxColorChannel = SkTMax(outRow[byteOffset + 0], 228 SkTMax(outRow[byteOffset + 1], 229 outRow[byteOffset + 2])); 230 if (alpha < maxColorChannel) { 231 outRow[byteOffset + 3] = maxColorChannel; 232 } else { 233 outRow[byteOffset + 3] = alpha; 234 } 235 } else { 236 // No alpha channel, the image is opaque. 237 outRow[byteOffset + 3] = 0xff; 238 } 239 } 240 } 241 242 void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues, 243 int filterLength, 244 unsigned char* const* sourceDataRows, 245 int pixelWidth, 246 unsigned char* outRow, 247 bool sourceHasAlpha) { 248 if (sourceHasAlpha) { 249 ConvolveVertically<true>(filterValues, filterLength, 250 sourceDataRows, pixelWidth, 251 outRow); 252 } else { 253 ConvolveVertically<false>(filterValues, filterLength, 254 sourceDataRows, pixelWidth, 255 outRow); 256 } 257 } 258 259} // namespace 260 261// SkConvolutionFilter1D --------------------------------------------------------- 262 263SkConvolutionFilter1D::SkConvolutionFilter1D() 264: fMaxFilter(0) { 265} 266 267SkConvolutionFilter1D::~SkConvolutionFilter1D() { 268} 269 270void SkConvolutionFilter1D::AddFilter(int filterOffset, 271 const float* filterValues, 272 int filterLength) { 273 SkASSERT(filterLength > 0); 274 275 SkTArray<ConvolutionFixed> fixedValues; 276 fixedValues.reset(filterLength); 277 278 for (int i = 0; i < filterLength; ++i) { 279 fixedValues.push_back(FloatToFixed(filterValues[i])); 280 } 281 282 AddFilter(filterOffset, &fixedValues[0], filterLength); 283} 284 285void SkConvolutionFilter1D::AddFilter(int filterOffset, 286 const ConvolutionFixed* filterValues, 287 int filterLength) { 288 // It is common for leading/trailing filter values to be zeros. In such 289 // cases it is beneficial to only store the central factors. 290 // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on 291 // a 1080p image this optimization gives a ~10% speed improvement. 292 int filterSize = filterLength; 293 int firstNonZero = 0; 294 while (firstNonZero < filterLength && filterValues[firstNonZero] == 0) { 295 firstNonZero++; 296 } 297 298 if (firstNonZero < filterLength) { 299 // Here we have at least one non-zero factor. 300 int lastNonZero = filterLength - 1; 301 while (lastNonZero >= 0 && filterValues[lastNonZero] == 0) { 302 lastNonZero--; 303 } 304 305 filterOffset += firstNonZero; 306 filterLength = lastNonZero + 1 - firstNonZero; 307 SkASSERT(filterLength > 0); 308 309 for (int i = firstNonZero; i <= lastNonZero; i++) { 310 fFilterValues.push_back(filterValues[i]); 311 } 312 } else { 313 // Here all the factors were zeroes. 314 filterLength = 0; 315 } 316 317 FilterInstance instance; 318 319 // We pushed filterLength elements onto fFilterValues 320 instance.fDataLocation = (static_cast<int>(fFilterValues.count()) - 321 filterLength); 322 instance.fOffset = filterOffset; 323 instance.fTrimmedLength = filterLength; 324 instance.fLength = filterSize; 325 fFilters.push_back(instance); 326 327 fMaxFilter = SkTMax(fMaxFilter, filterLength); 328} 329 330const SkConvolutionFilter1D::ConvolutionFixed* SkConvolutionFilter1D::GetSingleFilter( 331 int* specifiedFilterlength, 332 int* filterOffset, 333 int* filterLength) const { 334 const FilterInstance& filter = fFilters[0]; 335 *filterOffset = filter.fOffset; 336 *filterLength = filter.fTrimmedLength; 337 *specifiedFilterlength = filter.fLength; 338 if (filter.fTrimmedLength == 0) { 339 return NULL; 340 } 341 342 return &fFilterValues[filter.fDataLocation]; 343} 344 345void BGRAConvolve2D(const unsigned char* sourceData, 346 int sourceByteRowStride, 347 bool sourceHasAlpha, 348 const SkConvolutionFilter1D& filterX, 349 const SkConvolutionFilter1D& filterY, 350 int outputByteRowStride, 351 unsigned char* output, 352 SkConvolutionProcs* convolveProcs, 353 bool useSimdIfPossible) { 354 355 int maxYFilterSize = filterY.maxFilter(); 356 357 // The next row in the input that we will generate a horizontally 358 // convolved row for. If the filter doesn't start at the beginning of the 359 // image (this is the case when we are only resizing a subset), then we 360 // don't want to generate any output rows before that. Compute the starting 361 // row for convolution as the first pixel for the first vertical filter. 362 int filterOffset, filterLength; 363 const SkConvolutionFilter1D::ConvolutionFixed* filterValues = 364 filterY.FilterForValue(0, &filterOffset, &filterLength); 365 int nextXRow = filterOffset; 366 367 // We loop over each row in the input doing a horizontal convolution. This 368 // will result in a horizontally convolved image. We write the results into 369 // a circular buffer of convolved rows and do vertical convolution as rows 370 // are available. This prevents us from having to store the entire 371 // intermediate image and helps cache coherency. 372 // We will need four extra rows to allow horizontal convolution could be done 373 // simultaneously. We also pad each row in row buffer to be aligned-up to 374 // 16 bytes. 375 // TODO(jiesun): We do not use aligned load from row buffer in vertical 376 // convolution pass yet. Somehow Windows does not like it. 377 int rowBufferWidth = (filterX.numValues() + 15) & ~0xF; 378 int rowBufferHeight = maxYFilterSize + 379 (convolveProcs->fConvolve4RowsHorizontally ? 4 : 0); 380 CircularRowBuffer rowBuffer(rowBufferWidth, 381 rowBufferHeight, 382 filterOffset); 383 384 // Loop over every possible output row, processing just enough horizontal 385 // convolutions to run each subsequent vertical convolution. 386 SkASSERT(outputByteRowStride >= filterX.numValues() * 4); 387 int numOutputRows = filterY.numValues(); 388 389 // We need to check which is the last line to convolve before we advance 4 390 // lines in one iteration. 391 int lastFilterOffset, lastFilterLength; 392 393 // SSE2 can access up to 3 extra pixels past the end of the 394 // buffer. At the bottom of the image, we have to be careful 395 // not to access data past the end of the buffer. Normally 396 // we fall back to the C++ implementation for the last row. 397 // If the last row is less than 3 pixels wide, we may have to fall 398 // back to the C++ version for more rows. Compute how many 399 // rows we need to avoid the SSE implementation for here. 400 filterX.FilterForValue(filterX.numValues() - 1, &lastFilterOffset, 401 &lastFilterLength); 402 int avoidSimdRows = 1 + convolveProcs->fExtraHorizontalReads / 403 (lastFilterOffset + lastFilterLength); 404 405 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, 406 &lastFilterLength); 407 408 for (int outY = 0; outY < numOutputRows; outY++) { 409 filterValues = filterY.FilterForValue(outY, 410 &filterOffset, &filterLength); 411 412 // Generate output rows until we have enough to run the current filter. 413 while (nextXRow < filterOffset + filterLength) { 414 if (convolveProcs->fConvolve4RowsHorizontally && 415 nextXRow + 3 < lastFilterOffset + lastFilterLength - 416 avoidSimdRows) { 417 const unsigned char* src[4]; 418 unsigned char* outRow[4]; 419 for (int i = 0; i < 4; ++i) { 420 src[i] = &sourceData[(nextXRow + i) * sourceByteRowStride]; 421 outRow[i] = rowBuffer.advanceRow(); 422 } 423 convolveProcs->fConvolve4RowsHorizontally(src, filterX, outRow); 424 nextXRow += 4; 425 } else { 426 // Check if we need to avoid SSE2 for this row. 427 if (convolveProcs->fConvolveHorizontally && 428 nextXRow < lastFilterOffset + lastFilterLength - 429 avoidSimdRows) { 430 convolveProcs->fConvolveHorizontally( 431 &sourceData[nextXRow * sourceByteRowStride], 432 filterX, rowBuffer.advanceRow(), sourceHasAlpha); 433 } else { 434 if (sourceHasAlpha) { 435 ConvolveHorizontally<true>( 436 &sourceData[nextXRow * sourceByteRowStride], 437 filterX, rowBuffer.advanceRow()); 438 } else { 439 ConvolveHorizontally<false>( 440 &sourceData[nextXRow * sourceByteRowStride], 441 filterX, rowBuffer.advanceRow()); 442 } 443 } 444 nextXRow++; 445 } 446 } 447 448 // Compute where in the output image this row of final data will go. 449 unsigned char* curOutputRow = &output[outY * outputByteRowStride]; 450 451 // Get the list of rows that the circular buffer has, in order. 452 int firstRowInCircularBuffer; 453 unsigned char* const* rowsToConvolve = 454 rowBuffer.GetRowAddresses(&firstRowInCircularBuffer); 455 456 // Now compute the start of the subset of those rows that the filter 457 // needs. 458 unsigned char* const* firstRowForFilter = 459 &rowsToConvolve[filterOffset - firstRowInCircularBuffer]; 460 461 if (convolveProcs->fConvolveVertically) { 462 convolveProcs->fConvolveVertically(filterValues, filterLength, 463 firstRowForFilter, 464 filterX.numValues(), curOutputRow, 465 sourceHasAlpha); 466 } else { 467 ConvolveVertically(filterValues, filterLength, 468 firstRowForFilter, 469 filterX.numValues(), curOutputRow, 470 sourceHasAlpha); 471 } 472 } 473} 474