1138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// Use of this source code is governed by a BSD-style license that can be 3138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// found in the LICENSE file. 4138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 5138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com#include "SkConvolver.h" 6138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com#include "SkSize.h" 7138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com#include "SkTypes.h" 8138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 9138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.comnamespace { 10138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 11138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Converts the argument to an 8-bit unsigned value by clamping to the range 12138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // 0-255. 13138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com inline unsigned char ClampTo8(int a) { 14138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com if (static_cast<unsigned>(a) < 256) { 15138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com return a; // Avoid the extra check in the common case. 16138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 17138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com if (a < 0) { 18138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com return 0; 19138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 20138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com return 255; 21138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 22138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 23138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Stores a list of rows in a circular buffer. The usage is you write into it 24138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // by calling AdvanceRow. It will keep track of which row in the buffer it 25138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // should use next, and the total number of rows added. 26138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com class CircularRowBuffer { 27138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com public: 28138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // The number of pixels in each row is given in |sourceRowPixelWidth|. 29138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // The maximum number of rows needed in the buffer is |maxYFilterSize| 30138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // (we only need to store enough rows for the biggest filter). 31138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // 32138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // We use the |firstInputRow| to compute the coordinates of all of the 33138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // following rows returned by Advance(). 34138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com CircularRowBuffer(int destRowPixelWidth, int maxYFilterSize, 35138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int firstInputRow) 36138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com : fRowByteWidth(destRowPixelWidth * 4), 37138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com fNumRows(maxYFilterSize), 38138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com fNextRow(0), 39138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com fNextRowCoordinate(firstInputRow) { 40138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com fBuffer.reset(fRowByteWidth * maxYFilterSize); 41138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com fRowAddresses.reset(fNumRows); 42138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 43138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 44138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Moves to the next row in the buffer, returning a pointer to the beginning 45138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // of it. 46138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com unsigned char* advanceRow() { 47138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com unsigned char* row = &fBuffer[fNextRow * fRowByteWidth]; 48138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com fNextRowCoordinate++; 49138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 50138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Set the pointer to the next row to use, wrapping around if necessary. 51138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com fNextRow++; 52138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com if (fNextRow == fNumRows) { 53138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com fNextRow = 0; 54138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 55138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com return row; 56138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 57138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 58138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Returns a pointer to an "unrolled" array of rows. These rows will start 59138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // at the y coordinate placed into |*firstRowIndex| and will continue in 60138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // order for the maximum number of rows in this circular buffer. 61138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // 62138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // The |firstRowIndex_| may be negative. This means the circular buffer 63138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // starts before the top of the image (it hasn't been filled yet). 64138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com unsigned char* const* GetRowAddresses(int* firstRowIndex) { 65138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Example for a 4-element circular buffer holding coords 6-9. 66138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Row 0 Coord 8 67138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Row 1 Coord 9 68138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Row 2 Coord 6 <- fNextRow = 2, fNextRowCoordinate = 10. 69138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Row 3 Coord 7 70138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // 71138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // The "next" row is also the first (lowest) coordinate. This computation 72138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // may yield a negative value, but that's OK, the math will work out 73138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // since the user of this buffer will compute the offset relative 74138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // to the firstRowIndex and the negative rows will never be used. 75138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com *firstRowIndex = fNextRowCoordinate - fNumRows; 76138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 77138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int curRow = fNextRow; 78138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com for (int i = 0; i < fNumRows; i++) { 79138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com fRowAddresses[i] = &fBuffer[curRow * fRowByteWidth]; 80138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 81138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Advance to the next row, wrapping if necessary. 82138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com curRow++; 83138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com if (curRow == fNumRows) { 84138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com curRow = 0; 85138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 86138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 87138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com return &fRowAddresses[0]; 88138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 89138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 90138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com private: 91138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // The buffer storing the rows. They are packed, each one fRowByteWidth. 92138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com SkTArray<unsigned char> fBuffer; 93138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 94138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Number of bytes per row in the |buffer|. 95138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int fRowByteWidth; 96138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 97138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // The number of rows available in the buffer. 98138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int fNumRows; 99138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 100138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // The next row index we should write into. This wraps around as the 101138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // circular buffer is used. 102138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int fNextRow; 103138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 104138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // The y coordinate of the |fNextRow|. This is incremented each time a 105138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // new row is appended and does not wrap. 106138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int fNextRowCoordinate; 107138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 108138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Buffer used by GetRowAddresses(). 109138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com SkTArray<unsigned char*> fRowAddresses; 110138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com }; 111138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 112138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// Convolves horizontally along a single row. The row data is given in 113138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// |srcData| and continues for the numValues() of the filter. 114138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.comtemplate<bool hasAlpha> 115138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com void ConvolveHorizontally(const unsigned char* srcData, 116138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com const SkConvolutionFilter1D& filter, 117138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com unsigned char* outRow) { 118138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Loop over each pixel on this row in the output image. 119138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int numValues = filter.numValues(); 120138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com for (int outX = 0; outX < numValues; outX++) { 121138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Get the filter that determines the current output pixel. 122138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int filterOffset, filterLength; 123138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com const SkConvolutionFilter1D::ConvolutionFixed* filterValues = 124138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com filter.FilterForValue(outX, &filterOffset, &filterLength); 125138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 126138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Compute the first pixel in this row that the filter affects. It will 127138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // touch |filterLength| pixels (4 bytes each) after this. 128138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com const unsigned char* rowToFilter = &srcData[filterOffset * 4]; 129138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 130138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Apply the filter to the row to get the destination pixel in |accum|. 131138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int accum[4] = {0}; 132138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com for (int filterX = 0; filterX < filterLength; filterX++) { 133138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterX]; 134138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com accum[0] += curFilter * rowToFilter[filterX * 4 + 0]; 135138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com accum[1] += curFilter * rowToFilter[filterX * 4 + 1]; 136138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com accum[2] += curFilter * rowToFilter[filterX * 4 + 2]; 137138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com if (hasAlpha) { 138138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com accum[3] += curFilter * rowToFilter[filterX * 4 + 3]; 139138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 140138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 141138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 142138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Bring this value back in range. All of the filter scaling factors 143138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // are in fixed point with kShiftBits bits of fractional part. 144138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com accum[0] >>= SkConvolutionFilter1D::kShiftBits; 145138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com accum[1] >>= SkConvolutionFilter1D::kShiftBits; 146138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com accum[2] >>= SkConvolutionFilter1D::kShiftBits; 147138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com if (hasAlpha) { 148138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com accum[3] >>= SkConvolutionFilter1D::kShiftBits; 149138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 150138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 151138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Store the new pixel. 152138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com outRow[outX * 4 + 0] = ClampTo8(accum[0]); 153138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com outRow[outX * 4 + 1] = ClampTo8(accum[1]); 154138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com outRow[outX * 4 + 2] = ClampTo8(accum[2]); 155138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com if (hasAlpha) { 156138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com outRow[outX * 4 + 3] = ClampTo8(accum[3]); 157138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 158138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 159138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 160138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 1614e4b935d526f5720b3f15c4742eef84d49ebb984humper // There's a bug somewhere here with GCC autovectorization (-ftree-vectorize). We originally 1624e4b935d526f5720b3f15c4742eef84d49ebb984humper // thought this was 32 bit only, but subsequent tests show that some 64 bit gcc compiles 1634e4b935d526f5720b3f15c4742eef84d49ebb984humper // suffer here too. 1644e4b935d526f5720b3f15c4742eef84d49ebb984humper // 165b726df472bb996aaab9ea0e62568208599385a1cmtklein // Dropping to -O2 disables -ftree-vectorize. GCC 4.6 needs noinline. http://skbug.com/2575 1664e4b935d526f5720b3f15c4742eef84d49ebb984humper #if SK_HAS_ATTRIBUTE(optimize) && defined(SK_RELEASE) 167b726df472bb996aaab9ea0e62568208599385a1cmtklein #define SK_MAYBE_DISABLE_VECTORIZATION __attribute__((optimize("O2"), noinline)) 1680cf27818e0259b08b155cae48a429ad681230348mtklein #else 1690cf27818e0259b08b155cae48a429ad681230348mtklein #define SK_MAYBE_DISABLE_VECTORIZATION 1700cf27818e0259b08b155cae48a429ad681230348mtklein #endif 1710cf27818e0259b08b155cae48a429ad681230348mtklein 1720cf27818e0259b08b155cae48a429ad681230348mtklein SK_MAYBE_DISABLE_VECTORIZATION 1730cf27818e0259b08b155cae48a429ad681230348mtklein static void ConvolveHorizontallyAlpha(const unsigned char* srcData, 1740cf27818e0259b08b155cae48a429ad681230348mtklein const SkConvolutionFilter1D& filter, 1750cf27818e0259b08b155cae48a429ad681230348mtklein unsigned char* outRow) { 1760cf27818e0259b08b155cae48a429ad681230348mtklein return ConvolveHorizontally<true>(srcData, filter, outRow); 1770cf27818e0259b08b155cae48a429ad681230348mtklein } 1780cf27818e0259b08b155cae48a429ad681230348mtklein 1790cf27818e0259b08b155cae48a429ad681230348mtklein SK_MAYBE_DISABLE_VECTORIZATION 1800cf27818e0259b08b155cae48a429ad681230348mtklein static void ConvolveHorizontallyNoAlpha(const unsigned char* srcData, 1810cf27818e0259b08b155cae48a429ad681230348mtklein const SkConvolutionFilter1D& filter, 1820cf27818e0259b08b155cae48a429ad681230348mtklein unsigned char* outRow) { 1830cf27818e0259b08b155cae48a429ad681230348mtklein return ConvolveHorizontally<false>(srcData, filter, outRow); 1840cf27818e0259b08b155cae48a429ad681230348mtklein } 1850cf27818e0259b08b155cae48a429ad681230348mtklein 1860cf27818e0259b08b155cae48a429ad681230348mtklein #undef SK_MAYBE_DISABLE_VECTORIZATION 1870cf27818e0259b08b155cae48a429ad681230348mtklein 1880cf27818e0259b08b155cae48a429ad681230348mtklein 189138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// Does vertical convolution to produce one output row. The filter values and 190138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// length are given in the first two parameters. These are applied to each 191138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// of the rows pointed to in the |sourceDataRows| array, with each row 192138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// being |pixelWidth| wide. 193138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// 194138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// The output must have room for |pixelWidth * 4| bytes. 195138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.comtemplate<bool hasAlpha> 196138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues, 197138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int filterLength, 198138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com unsigned char* const* sourceDataRows, 199138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int pixelWidth, 200138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com unsigned char* outRow) { 201138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // We go through each column in the output and do a vertical convolution, 202138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // generating one output pixel each time. 203138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com for (int outX = 0; outX < pixelWidth; outX++) { 204138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Compute the number of bytes over in each row that the current column 205138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // we're convolving starts at. The pixel will cover the next 4 bytes. 206138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int byteOffset = outX * 4; 207138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 208138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Apply the filter to one column of pixels. 209138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int accum[4] = {0}; 210138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com for (int filterY = 0; filterY < filterLength; filterY++) { 211138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterY]; 212138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0]; 213138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1]; 214138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2]; 215138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com if (hasAlpha) { 216138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com accum[3] += curFilter * sourceDataRows[filterY][byteOffset + 3]; 217138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 218138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 219138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 220138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Bring this value back in range. All of the filter scaling factors 221138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // are in fixed point with kShiftBits bits of precision. 222138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com accum[0] >>= SkConvolutionFilter1D::kShiftBits; 223138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com accum[1] >>= SkConvolutionFilter1D::kShiftBits; 224138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com accum[2] >>= SkConvolutionFilter1D::kShiftBits; 225138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com if (hasAlpha) { 226138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com accum[3] >>= SkConvolutionFilter1D::kShiftBits; 227138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 228138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 229138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Store the new pixel. 230138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com outRow[byteOffset + 0] = ClampTo8(accum[0]); 231138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com outRow[byteOffset + 1] = ClampTo8(accum[1]); 232138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com outRow[byteOffset + 2] = ClampTo8(accum[2]); 233138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com if (hasAlpha) { 234138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com unsigned char alpha = ClampTo8(accum[3]); 235138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 236138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Make sure the alpha channel doesn't come out smaller than any of the 237138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // color channels. We use premultipled alpha channels, so this should 238138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // never happen, but rounding errors will cause this from time to time. 239138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // These "impossible" colors will cause overflows (and hence random pixel 240138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // values) when the resulting bitmap is drawn to the screen. 241138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // 242138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // We only need to do this when generating the final output row (here). 243138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int maxColorChannel = SkTMax(outRow[byteOffset + 0], 2441f3c73825b8a1752abc6b74fbce978a430de6473skia.committer@gmail.com SkTMax(outRow[byteOffset + 1], 245138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com outRow[byteOffset + 2])); 246138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com if (alpha < maxColorChannel) { 247138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com outRow[byteOffset + 3] = maxColorChannel; 248138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } else { 249138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com outRow[byteOffset + 3] = alpha; 250138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 251138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } else { 252138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // No alpha channel, the image is opaque. 253138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com outRow[byteOffset + 3] = 0xff; 254138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 255138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 256138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 257138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 258138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues, 259138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int filterLength, 260138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com unsigned char* const* sourceDataRows, 261138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int pixelWidth, 262138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com unsigned char* outRow, 263138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com bool sourceHasAlpha) { 264138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com if (sourceHasAlpha) { 265138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com ConvolveVertically<true>(filterValues, filterLength, 266138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com sourceDataRows, pixelWidth, 267138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com outRow); 268138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } else { 269138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com ConvolveVertically<false>(filterValues, filterLength, 270138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com sourceDataRows, pixelWidth, 271138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com outRow); 272138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 273138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 274138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 275138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com} // namespace 276138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 277138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// SkConvolutionFilter1D --------------------------------------------------------- 278138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 279138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.comSkConvolutionFilter1D::SkConvolutionFilter1D() 280138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com: fMaxFilter(0) { 281138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com} 282138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 283138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.comSkConvolutionFilter1D::~SkConvolutionFilter1D() { 284138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com} 285138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 286138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.comvoid SkConvolutionFilter1D::AddFilter(int filterOffset, 287138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com const float* filterValues, 288138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int filterLength) { 289138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com SkASSERT(filterLength > 0); 290138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 291138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com SkTArray<ConvolutionFixed> fixedValues; 292138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com fixedValues.reset(filterLength); 293138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 294138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com for (int i = 0; i < filterLength; ++i) { 295138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com fixedValues.push_back(FloatToFixed(filterValues[i])); 296138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 297138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 298138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com AddFilter(filterOffset, &fixedValues[0], filterLength); 299138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com} 300138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 301138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.comvoid SkConvolutionFilter1D::AddFilter(int filterOffset, 302138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com const ConvolutionFixed* filterValues, 303138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int filterLength) { 304138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // It is common for leading/trailing filter values to be zeros. In such 305138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // cases it is beneficial to only store the central factors. 306138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on 307138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // a 1080p image this optimization gives a ~10% speed improvement. 308138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int filterSize = filterLength; 309138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int firstNonZero = 0; 310138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com while (firstNonZero < filterLength && filterValues[firstNonZero] == 0) { 311138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com firstNonZero++; 312138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 313138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 314138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com if (firstNonZero < filterLength) { 315138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Here we have at least one non-zero factor. 316138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int lastNonZero = filterLength - 1; 317138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com while (lastNonZero >= 0 && filterValues[lastNonZero] == 0) { 318138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com lastNonZero--; 319138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 320138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 321138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com filterOffset += firstNonZero; 322138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com filterLength = lastNonZero + 1 - firstNonZero; 323138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com SkASSERT(filterLength > 0); 324138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 325138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com for (int i = firstNonZero; i <= lastNonZero; i++) { 326d7a9fcc61f2c513064f67b125c0b2eb918768bb7rmistry@google.com fFilterValues.push_back(filterValues[i]); 327138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 328138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } else { 329138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Here all the factors were zeroes. 330138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com filterLength = 0; 331138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 332138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 333138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com FilterInstance instance; 334138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 335138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // We pushed filterLength elements onto fFilterValues 336138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com instance.fDataLocation = (static_cast<int>(fFilterValues.count()) - 337138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com filterLength); 338138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com instance.fOffset = filterOffset; 339138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com instance.fTrimmedLength = filterLength; 340138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com instance.fLength = filterSize; 341138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com fFilters.push_back(instance); 342138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 343138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com fMaxFilter = SkTMax(fMaxFilter, filterLength); 344138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com} 345138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 346138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.comconst SkConvolutionFilter1D::ConvolutionFixed* SkConvolutionFilter1D::GetSingleFilter( 347138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int* specifiedFilterlength, 348138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int* filterOffset, 349138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int* filterLength) const { 350138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com const FilterInstance& filter = fFilters[0]; 351138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com *filterOffset = filter.fOffset; 352138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com *filterLength = filter.fTrimmedLength; 353138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com *specifiedFilterlength = filter.fLength; 354138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com if (filter.fTrimmedLength == 0) { 355138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com return NULL; 356138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 357138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 358138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com return &fFilterValues[filter.fDataLocation]; 359138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com} 360138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 361138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.comvoid BGRAConvolve2D(const unsigned char* sourceData, 362138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int sourceByteRowStride, 363138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com bool sourceHasAlpha, 364138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com const SkConvolutionFilter1D& filterX, 365138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com const SkConvolutionFilter1D& filterY, 366138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int outputByteRowStride, 367138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com unsigned char* output, 368fed04b34315ed72dbb20e630908638d1c829c760reed@google.com const SkConvolutionProcs& convolveProcs, 369138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com bool useSimdIfPossible) { 370138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 371138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int maxYFilterSize = filterY.maxFilter(); 372138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 373138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // The next row in the input that we will generate a horizontally 374138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // convolved row for. If the filter doesn't start at the beginning of the 375138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // image (this is the case when we are only resizing a subset), then we 376138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // don't want to generate any output rows before that. Compute the starting 377138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // row for convolution as the first pixel for the first vertical filter. 378138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int filterOffset, filterLength; 379138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com const SkConvolutionFilter1D::ConvolutionFixed* filterValues = 380138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com filterY.FilterForValue(0, &filterOffset, &filterLength); 381138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int nextXRow = filterOffset; 382138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 383138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // We loop over each row in the input doing a horizontal convolution. This 384138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // will result in a horizontally convolved image. We write the results into 385138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // a circular buffer of convolved rows and do vertical convolution as rows 386138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // are available. This prevents us from having to store the entire 387138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // intermediate image and helps cache coherency. 388138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // We will need four extra rows to allow horizontal convolution could be done 389138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // simultaneously. We also pad each row in row buffer to be aligned-up to 390138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // 16 bytes. 391138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // TODO(jiesun): We do not use aligned load from row buffer in vertical 392138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // convolution pass yet. Somehow Windows does not like it. 393138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int rowBufferWidth = (filterX.numValues() + 15) & ~0xF; 394138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int rowBufferHeight = maxYFilterSize + 395fed04b34315ed72dbb20e630908638d1c829c760reed@google.com (convolveProcs.fConvolve4RowsHorizontally ? 4 : 0); 396138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com CircularRowBuffer rowBuffer(rowBufferWidth, 397138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com rowBufferHeight, 398138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com filterOffset); 399138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 400138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Loop over every possible output row, processing just enough horizontal 401138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // convolutions to run each subsequent vertical convolution. 402138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com SkASSERT(outputByteRowStride >= filterX.numValues() * 4); 403138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int numOutputRows = filterY.numValues(); 404138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 405138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // We need to check which is the last line to convolve before we advance 4 406138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // lines in one iteration. 407138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int lastFilterOffset, lastFilterLength; 408138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 409138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // SSE2 can access up to 3 extra pixels past the end of the 410138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // buffer. At the bottom of the image, we have to be careful 411138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // not to access data past the end of the buffer. Normally 412138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // we fall back to the C++ implementation for the last row. 413138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // If the last row is less than 3 pixels wide, we may have to fall 414138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // back to the C++ version for more rows. Compute how many 415138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // rows we need to avoid the SSE implementation for here. 416138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com filterX.FilterForValue(filterX.numValues() - 1, &lastFilterOffset, 417138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com &lastFilterLength); 418fed04b34315ed72dbb20e630908638d1c829c760reed@google.com int avoidSimdRows = 1 + convolveProcs.fExtraHorizontalReads / 419138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com (lastFilterOffset + lastFilterLength); 420138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 421138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, 422138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com &lastFilterLength); 423138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 424138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com for (int outY = 0; outY < numOutputRows; outY++) { 425138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com filterValues = filterY.FilterForValue(outY, 426138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com &filterOffset, &filterLength); 427138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 428138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Generate output rows until we have enough to run the current filter. 429138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com while (nextXRow < filterOffset + filterLength) { 430fed04b34315ed72dbb20e630908638d1c829c760reed@google.com if (convolveProcs.fConvolve4RowsHorizontally && 431138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com nextXRow + 3 < lastFilterOffset + lastFilterLength - 432138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com avoidSimdRows) { 433138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com const unsigned char* src[4]; 434138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com unsigned char* outRow[4]; 435138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com for (int i = 0; i < 4; ++i) { 43635fcd15d0598bca6a265100ace5d10a0d992cf9csugoi src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRowStride]; 437138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com outRow[i] = rowBuffer.advanceRow(); 438138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 439fed04b34315ed72dbb20e630908638d1c829c760reed@google.com convolveProcs.fConvolve4RowsHorizontally(src, filterX, outRow); 440138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com nextXRow += 4; 441138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } else { 442138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Check if we need to avoid SSE2 for this row. 443fed04b34315ed72dbb20e630908638d1c829c760reed@google.com if (convolveProcs.fConvolveHorizontally && 444138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com nextXRow < lastFilterOffset + lastFilterLength - 445138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com avoidSimdRows) { 446fed04b34315ed72dbb20e630908638d1c829c760reed@google.com convolveProcs.fConvolveHorizontally( 44735fcd15d0598bca6a265100ace5d10a0d992cf9csugoi &sourceData[(uint64_t)nextXRow * sourceByteRowStride], 448138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com filterX, rowBuffer.advanceRow(), sourceHasAlpha); 449138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } else { 450138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com if (sourceHasAlpha) { 4510cf27818e0259b08b155cae48a429ad681230348mtklein ConvolveHorizontallyAlpha( 45235fcd15d0598bca6a265100ace5d10a0d992cf9csugoi &sourceData[(uint64_t)nextXRow * sourceByteRowStride], 453138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com filterX, rowBuffer.advanceRow()); 454138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } else { 4550cf27818e0259b08b155cae48a429ad681230348mtklein ConvolveHorizontallyNoAlpha( 45635fcd15d0598bca6a265100ace5d10a0d992cf9csugoi &sourceData[(uint64_t)nextXRow * sourceByteRowStride], 457138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com filterX, rowBuffer.advanceRow()); 458138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 459138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 460138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com nextXRow++; 461138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 462138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 463138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 464138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Compute where in the output image this row of final data will go. 465c197c8a47eb9cf09b389f4dd5d235510feec18cbsugoi unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStride]; 466138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 467138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Get the list of rows that the circular buffer has, in order. 468138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com int firstRowInCircularBuffer; 469138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com unsigned char* const* rowsToConvolve = 470138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com rowBuffer.GetRowAddresses(&firstRowInCircularBuffer); 471138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 472138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // Now compute the start of the subset of those rows that the filter 473138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com // needs. 474138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com unsigned char* const* firstRowForFilter = 475138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com &rowsToConvolve[filterOffset - firstRowInCircularBuffer]; 476138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com 477fed04b34315ed72dbb20e630908638d1c829c760reed@google.com if (convolveProcs.fConvolveVertically) { 478fed04b34315ed72dbb20e630908638d1c829c760reed@google.com convolveProcs.fConvolveVertically(filterValues, filterLength, 479138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com firstRowForFilter, 480138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com filterX.numValues(), curOutputRow, 481138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com sourceHasAlpha); 482138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } else { 483138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com ConvolveVertically(filterValues, filterLength, 484138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com firstRowForFilter, 485138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com filterX.numValues(), curOutputRow, 486138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com sourceHasAlpha); 487138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 488138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com } 489138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com} 490