1138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// Use of this source code is governed by a BSD-style license that can be
3138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// found in the LICENSE file.
4138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
5138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com#include "SkConvolver.h"
6138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com#include "SkSize.h"
7138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com#include "SkTypes.h"
8138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
9138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.comnamespace {
10138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
11138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // Converts the argument to an 8-bit unsigned value by clamping to the range
12138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // 0-255.
13138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    inline unsigned char ClampTo8(int a) {
14138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        if (static_cast<unsigned>(a) < 256) {
15138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            return a;  // Avoid the extra check in the common case.
16138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        }
17138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        if (a < 0) {
18138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            return 0;
19138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        }
20138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        return 255;
21138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    }
22138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
23138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // Stores a list of rows in a circular buffer. The usage is you write into it
24138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // by calling AdvanceRow. It will keep track of which row in the buffer it
25138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // should use next, and the total number of rows added.
26138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    class CircularRowBuffer {
27138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    public:
28138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // The number of pixels in each row is given in |sourceRowPixelWidth|.
29138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // The maximum number of rows needed in the buffer is |maxYFilterSize|
30138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // (we only need to store enough rows for the biggest filter).
31138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        //
32138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // We use the |firstInputRow| to compute the coordinates of all of the
33138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // following rows returned by Advance().
34138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        CircularRowBuffer(int destRowPixelWidth, int maxYFilterSize,
35138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                          int firstInputRow)
36138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            : fRowByteWidth(destRowPixelWidth * 4),
37138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com              fNumRows(maxYFilterSize),
38138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com              fNextRow(0),
39138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com              fNextRowCoordinate(firstInputRow) {
40138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            fBuffer.reset(fRowByteWidth * maxYFilterSize);
41138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            fRowAddresses.reset(fNumRows);
42138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        }
43138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
44138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // Moves to the next row in the buffer, returning a pointer to the beginning
45138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // of it.
46138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        unsigned char* advanceRow() {
47138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            unsigned char* row = &fBuffer[fNextRow * fRowByteWidth];
48138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            fNextRowCoordinate++;
49138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
50138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            // Set the pointer to the next row to use, wrapping around if necessary.
51138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            fNextRow++;
52138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            if (fNextRow == fNumRows) {
53138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                fNextRow = 0;
54138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            }
55138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            return row;
56138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        }
57138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
58138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // Returns a pointer to an "unrolled" array of rows. These rows will start
59138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // at the y coordinate placed into |*firstRowIndex| and will continue in
60138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // order for the maximum number of rows in this circular buffer.
61138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        //
62138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // The |firstRowIndex_| may be negative. This means the circular buffer
63138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // starts before the top of the image (it hasn't been filled yet).
64138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        unsigned char* const* GetRowAddresses(int* firstRowIndex) {
65138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            // Example for a 4-element circular buffer holding coords 6-9.
66138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            //   Row 0   Coord 8
67138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            //   Row 1   Coord 9
68138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            //   Row 2   Coord 6  <- fNextRow = 2, fNextRowCoordinate = 10.
69138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            //   Row 3   Coord 7
70138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            //
71138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            // The "next" row is also the first (lowest) coordinate. This computation
72138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            // may yield a negative value, but that's OK, the math will work out
73138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            // since the user of this buffer will compute the offset relative
74138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            // to the firstRowIndex and the negative rows will never be used.
75138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            *firstRowIndex = fNextRowCoordinate - fNumRows;
76138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
77138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            int curRow = fNextRow;
78138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            for (int i = 0; i < fNumRows; i++) {
79138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                fRowAddresses[i] = &fBuffer[curRow * fRowByteWidth];
80138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
81138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                // Advance to the next row, wrapping if necessary.
82138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                curRow++;
83138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                if (curRow == fNumRows) {
84138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                    curRow = 0;
85138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                }
86138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            }
87138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            return &fRowAddresses[0];
88138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        }
89138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
90138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    private:
91138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // The buffer storing the rows. They are packed, each one fRowByteWidth.
92138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        SkTArray<unsigned char> fBuffer;
93138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
94138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // Number of bytes per row in the |buffer|.
95138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        int fRowByteWidth;
96138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
97138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // The number of rows available in the buffer.
98138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        int fNumRows;
99138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
100138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // The next row index we should write into. This wraps around as the
101138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // circular buffer is used.
102138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        int fNextRow;
103138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
104138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // The y coordinate of the |fNextRow|. This is incremented each time a
105138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // new row is appended and does not wrap.
106138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        int fNextRowCoordinate;
107138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
108138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // Buffer used by GetRowAddresses().
109138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        SkTArray<unsigned char*> fRowAddresses;
110138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    };
111138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
112138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// Convolves horizontally along a single row. The row data is given in
113138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// |srcData| and continues for the numValues() of the filter.
114138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.comtemplate<bool hasAlpha>
115138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    void ConvolveHorizontally(const unsigned char* srcData,
116138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                              const SkConvolutionFilter1D& filter,
117138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                              unsigned char* outRow) {
118138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // Loop over each pixel on this row in the output image.
119138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        int numValues = filter.numValues();
120138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        for (int outX = 0; outX < numValues; outX++) {
121138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            // Get the filter that determines the current output pixel.
122138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            int filterOffset, filterLength;
123138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
124138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                filter.FilterForValue(outX, &filterOffset, &filterLength);
125138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
126138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            // Compute the first pixel in this row that the filter affects. It will
127138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            // touch |filterLength| pixels (4 bytes each) after this.
128138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            const unsigned char* rowToFilter = &srcData[filterOffset * 4];
129138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
130138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            // Apply the filter to the row to get the destination pixel in |accum|.
131138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            int accum[4] = {0};
132138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            for (int filterX = 0; filterX < filterLength; filterX++) {
133138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterX];
134138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                accum[0] += curFilter * rowToFilter[filterX * 4 + 0];
135138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                accum[1] += curFilter * rowToFilter[filterX * 4 + 1];
136138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                accum[2] += curFilter * rowToFilter[filterX * 4 + 2];
137138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                if (hasAlpha) {
138138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                    accum[3] += curFilter * rowToFilter[filterX * 4 + 3];
139138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                }
140138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            }
141138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
142138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            // Bring this value back in range. All of the filter scaling factors
143138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            // are in fixed point with kShiftBits bits of fractional part.
144138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            accum[0] >>= SkConvolutionFilter1D::kShiftBits;
145138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            accum[1] >>= SkConvolutionFilter1D::kShiftBits;
146138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            accum[2] >>= SkConvolutionFilter1D::kShiftBits;
147138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            if (hasAlpha) {
148138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                accum[3] >>= SkConvolutionFilter1D::kShiftBits;
149138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            }
150138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
151138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            // Store the new pixel.
152138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            outRow[outX * 4 + 0] = ClampTo8(accum[0]);
153138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            outRow[outX * 4 + 1] = ClampTo8(accum[1]);
154138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            outRow[outX * 4 + 2] = ClampTo8(accum[2]);
155138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            if (hasAlpha) {
156138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                outRow[outX * 4 + 3] = ClampTo8(accum[3]);
157138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            }
158138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        }
159138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    }
160138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
1614e4b935d526f5720b3f15c4742eef84d49ebb984humper    // There's a bug somewhere here with GCC autovectorization (-ftree-vectorize).  We originally
1624e4b935d526f5720b3f15c4742eef84d49ebb984humper    // thought this was 32 bit only, but subsequent tests show that some 64 bit gcc compiles
1634e4b935d526f5720b3f15c4742eef84d49ebb984humper    // suffer here too.
1644e4b935d526f5720b3f15c4742eef84d49ebb984humper    //
165b726df472bb996aaab9ea0e62568208599385a1cmtklein    // Dropping to -O2 disables -ftree-vectorize.  GCC 4.6 needs noinline.  http://skbug.com/2575
1664e4b935d526f5720b3f15c4742eef84d49ebb984humper    #if SK_HAS_ATTRIBUTE(optimize) && defined(SK_RELEASE)
167b726df472bb996aaab9ea0e62568208599385a1cmtklein        #define SK_MAYBE_DISABLE_VECTORIZATION __attribute__((optimize("O2"), noinline))
1680cf27818e0259b08b155cae48a429ad681230348mtklein    #else
1690cf27818e0259b08b155cae48a429ad681230348mtklein        #define SK_MAYBE_DISABLE_VECTORIZATION
1700cf27818e0259b08b155cae48a429ad681230348mtklein    #endif
1710cf27818e0259b08b155cae48a429ad681230348mtklein
1720cf27818e0259b08b155cae48a429ad681230348mtklein    SK_MAYBE_DISABLE_VECTORIZATION
1730cf27818e0259b08b155cae48a429ad681230348mtklein    static void ConvolveHorizontallyAlpha(const unsigned char* srcData,
1740cf27818e0259b08b155cae48a429ad681230348mtklein                                          const SkConvolutionFilter1D& filter,
1750cf27818e0259b08b155cae48a429ad681230348mtklein                                          unsigned char* outRow) {
1760cf27818e0259b08b155cae48a429ad681230348mtklein        return ConvolveHorizontally<true>(srcData, filter, outRow);
1770cf27818e0259b08b155cae48a429ad681230348mtklein    }
1780cf27818e0259b08b155cae48a429ad681230348mtklein
1790cf27818e0259b08b155cae48a429ad681230348mtklein    SK_MAYBE_DISABLE_VECTORIZATION
1800cf27818e0259b08b155cae48a429ad681230348mtklein    static void ConvolveHorizontallyNoAlpha(const unsigned char* srcData,
1810cf27818e0259b08b155cae48a429ad681230348mtklein                                            const SkConvolutionFilter1D& filter,
1820cf27818e0259b08b155cae48a429ad681230348mtklein                                            unsigned char* outRow) {
1830cf27818e0259b08b155cae48a429ad681230348mtklein        return ConvolveHorizontally<false>(srcData, filter, outRow);
1840cf27818e0259b08b155cae48a429ad681230348mtklein    }
1850cf27818e0259b08b155cae48a429ad681230348mtklein
1860cf27818e0259b08b155cae48a429ad681230348mtklein    #undef SK_MAYBE_DISABLE_VECTORIZATION
1870cf27818e0259b08b155cae48a429ad681230348mtklein
1880cf27818e0259b08b155cae48a429ad681230348mtklein
189138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// Does vertical convolution to produce one output row. The filter values and
190138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// length are given in the first two parameters. These are applied to each
191138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// of the rows pointed to in the |sourceDataRows| array, with each row
192138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// being |pixelWidth| wide.
193138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com//
194138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// The output must have room for |pixelWidth * 4| bytes.
195138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.comtemplate<bool hasAlpha>
196138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
197138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                            int filterLength,
198138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                            unsigned char* const* sourceDataRows,
199138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                            int pixelWidth,
200138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                            unsigned char* outRow) {
201138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // We go through each column in the output and do a vertical convolution,
202138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // generating one output pixel each time.
203138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        for (int outX = 0; outX < pixelWidth; outX++) {
204138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            // Compute the number of bytes over in each row that the current column
205138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            // we're convolving starts at. The pixel will cover the next 4 bytes.
206138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            int byteOffset = outX * 4;
207138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
208138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            // Apply the filter to one column of pixels.
209138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            int accum[4] = {0};
210138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            for (int filterY = 0; filterY < filterLength; filterY++) {
211138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterY];
212138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0];
213138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1];
214138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2];
215138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                if (hasAlpha) {
216138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                    accum[3] += curFilter * sourceDataRows[filterY][byteOffset + 3];
217138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                }
218138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            }
219138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
220138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            // Bring this value back in range. All of the filter scaling factors
221138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            // are in fixed point with kShiftBits bits of precision.
222138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            accum[0] >>= SkConvolutionFilter1D::kShiftBits;
223138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            accum[1] >>= SkConvolutionFilter1D::kShiftBits;
224138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            accum[2] >>= SkConvolutionFilter1D::kShiftBits;
225138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            if (hasAlpha) {
226138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                accum[3] >>= SkConvolutionFilter1D::kShiftBits;
227138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            }
228138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
229138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            // Store the new pixel.
230138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            outRow[byteOffset + 0] = ClampTo8(accum[0]);
231138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            outRow[byteOffset + 1] = ClampTo8(accum[1]);
232138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            outRow[byteOffset + 2] = ClampTo8(accum[2]);
233138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            if (hasAlpha) {
234138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                unsigned char alpha = ClampTo8(accum[3]);
235138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
236138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                // Make sure the alpha channel doesn't come out smaller than any of the
237138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                // color channels. We use premultipled alpha channels, so this should
238138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                // never happen, but rounding errors will cause this from time to time.
239138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                // These "impossible" colors will cause overflows (and hence random pixel
240138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                // values) when the resulting bitmap is drawn to the screen.
241138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                //
242138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                // We only need to do this when generating the final output row (here).
243138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                int maxColorChannel = SkTMax(outRow[byteOffset + 0],
2441f3c73825b8a1752abc6b74fbce978a430de6473skia.committer@gmail.com                                               SkTMax(outRow[byteOffset + 1],
245138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                                                      outRow[byteOffset + 2]));
246138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                if (alpha < maxColorChannel) {
247138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                    outRow[byteOffset + 3] = maxColorChannel;
248138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                } else {
249138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                    outRow[byteOffset + 3] = alpha;
250138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                }
251138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            } else {
252138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                // No alpha channel, the image is opaque.
253138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                outRow[byteOffset + 3] = 0xff;
254138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            }
255138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        }
256138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    }
257138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
258138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
259138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                            int filterLength,
260138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                            unsigned char* const* sourceDataRows,
261138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                            int pixelWidth,
262138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                            unsigned char* outRow,
263138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                            bool sourceHasAlpha) {
264138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        if (sourceHasAlpha) {
265138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            ConvolveVertically<true>(filterValues, filterLength,
266138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                                     sourceDataRows, pixelWidth,
267138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                                     outRow);
268138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        } else {
269138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            ConvolveVertically<false>(filterValues, filterLength,
270138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                                      sourceDataRows, pixelWidth,
271138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                                      outRow);
272138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        }
273138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    }
274138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
275138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com}  // namespace
276138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
277138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com// SkConvolutionFilter1D ---------------------------------------------------------
278138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
279138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.comSkConvolutionFilter1D::SkConvolutionFilter1D()
280138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com: fMaxFilter(0) {
281138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com}
282138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
283138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.comSkConvolutionFilter1D::~SkConvolutionFilter1D() {
284138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com}
285138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
286138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.comvoid SkConvolutionFilter1D::AddFilter(int filterOffset,
287138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                                      const float* filterValues,
288138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                                      int filterLength) {
289138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    SkASSERT(filterLength > 0);
290138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
291138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    SkTArray<ConvolutionFixed> fixedValues;
292138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    fixedValues.reset(filterLength);
293138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
294138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    for (int i = 0; i < filterLength; ++i) {
295138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        fixedValues.push_back(FloatToFixed(filterValues[i]));
296138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    }
297138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
298138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    AddFilter(filterOffset, &fixedValues[0], filterLength);
299138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com}
300138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
301138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.comvoid SkConvolutionFilter1D::AddFilter(int filterOffset,
302138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                                      const ConvolutionFixed* filterValues,
303138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                                      int filterLength) {
304138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // It is common for leading/trailing filter values to be zeros. In such
305138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // cases it is beneficial to only store the central factors.
306138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on
307138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // a 1080p image this optimization gives a ~10% speed improvement.
308138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    int filterSize = filterLength;
309138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    int firstNonZero = 0;
310138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    while (firstNonZero < filterLength && filterValues[firstNonZero] == 0) {
311138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        firstNonZero++;
312138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    }
313138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
314138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    if (firstNonZero < filterLength) {
315138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // Here we have at least one non-zero factor.
316138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        int lastNonZero = filterLength - 1;
317138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        while (lastNonZero >= 0 && filterValues[lastNonZero] == 0) {
318138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            lastNonZero--;
319138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        }
320138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
321138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        filterOffset += firstNonZero;
322138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        filterLength = lastNonZero + 1 - firstNonZero;
323138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        SkASSERT(filterLength > 0);
324138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
325138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        for (int i = firstNonZero; i <= lastNonZero; i++) {
326d7a9fcc61f2c513064f67b125c0b2eb918768bb7rmistry@google.com            fFilterValues.push_back(filterValues[i]);
327138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        }
328138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    } else {
329138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // Here all the factors were zeroes.
330138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        filterLength = 0;
331138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    }
332138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
333138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    FilterInstance instance;
334138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
335138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // We pushed filterLength elements onto fFilterValues
336138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    instance.fDataLocation = (static_cast<int>(fFilterValues.count()) -
337138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                                               filterLength);
338138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    instance.fOffset = filterOffset;
339138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    instance.fTrimmedLength = filterLength;
340138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    instance.fLength = filterSize;
341138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    fFilters.push_back(instance);
342138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
343138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    fMaxFilter = SkTMax(fMaxFilter, filterLength);
344138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com}
345138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
346138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.comconst SkConvolutionFilter1D::ConvolutionFixed* SkConvolutionFilter1D::GetSingleFilter(
347138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                                        int* specifiedFilterlength,
348138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                                        int* filterOffset,
349138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                                        int* filterLength) const {
350138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    const FilterInstance& filter = fFilters[0];
351138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    *filterOffset = filter.fOffset;
352138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    *filterLength = filter.fTrimmedLength;
353138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    *specifiedFilterlength = filter.fLength;
354138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    if (filter.fTrimmedLength == 0) {
355138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        return NULL;
356138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    }
357138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
358138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    return &fFilterValues[filter.fDataLocation];
359138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com}
360138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
361138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.comvoid BGRAConvolve2D(const unsigned char* sourceData,
362138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                    int sourceByteRowStride,
363138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                    bool sourceHasAlpha,
364138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                    const SkConvolutionFilter1D& filterX,
365138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                    const SkConvolutionFilter1D& filterY,
366138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                    int outputByteRowStride,
367138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                    unsigned char* output,
368fed04b34315ed72dbb20e630908638d1c829c760reed@google.com                    const SkConvolutionProcs& convolveProcs,
369138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                    bool useSimdIfPossible) {
370138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
371138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    int maxYFilterSize = filterY.maxFilter();
372138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
373138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // The next row in the input that we will generate a horizontally
374138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // convolved row for. If the filter doesn't start at the beginning of the
375138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // image (this is the case when we are only resizing a subset), then we
376138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // don't want to generate any output rows before that. Compute the starting
377138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // row for convolution as the first pixel for the first vertical filter.
378138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    int filterOffset, filterLength;
379138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
380138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        filterY.FilterForValue(0, &filterOffset, &filterLength);
381138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    int nextXRow = filterOffset;
382138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
383138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // We loop over each row in the input doing a horizontal convolution. This
384138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // will result in a horizontally convolved image. We write the results into
385138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // a circular buffer of convolved rows and do vertical convolution as rows
386138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // are available. This prevents us from having to store the entire
387138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // intermediate image and helps cache coherency.
388138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // We will need four extra rows to allow horizontal convolution could be done
389138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // simultaneously. We also pad each row in row buffer to be aligned-up to
390138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // 16 bytes.
391138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // TODO(jiesun): We do not use aligned load from row buffer in vertical
392138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // convolution pass yet. Somehow Windows does not like it.
393138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    int rowBufferWidth = (filterX.numValues() + 15) & ~0xF;
394138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    int rowBufferHeight = maxYFilterSize +
395fed04b34315ed72dbb20e630908638d1c829c760reed@google.com                          (convolveProcs.fConvolve4RowsHorizontally ? 4 : 0);
396138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    CircularRowBuffer rowBuffer(rowBufferWidth,
397138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                                rowBufferHeight,
398138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                                filterOffset);
399138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
400138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // Loop over every possible output row, processing just enough horizontal
401138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // convolutions to run each subsequent vertical convolution.
402138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    SkASSERT(outputByteRowStride >= filterX.numValues() * 4);
403138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    int numOutputRows = filterY.numValues();
404138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
405138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // We need to check which is the last line to convolve before we advance 4
406138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // lines in one iteration.
407138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    int lastFilterOffset, lastFilterLength;
408138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
409138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // SSE2 can access up to 3 extra pixels past the end of the
410138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // buffer. At the bottom of the image, we have to be careful
411138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // not to access data past the end of the buffer. Normally
412138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // we fall back to the C++ implementation for the last row.
413138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // If the last row is less than 3 pixels wide, we may have to fall
414138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // back to the C++ version for more rows. Compute how many
415138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    // rows we need to avoid the SSE implementation for here.
416138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    filterX.FilterForValue(filterX.numValues() - 1, &lastFilterOffset,
417138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                           &lastFilterLength);
418fed04b34315ed72dbb20e630908638d1c829c760reed@google.com    int avoidSimdRows = 1 + convolveProcs.fExtraHorizontalReads /
419138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        (lastFilterOffset + lastFilterLength);
420138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
421138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset,
422138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                           &lastFilterLength);
423138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
424138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    for (int outY = 0; outY < numOutputRows; outY++) {
425138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        filterValues = filterY.FilterForValue(outY,
426138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                                              &filterOffset, &filterLength);
427138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
428138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // Generate output rows until we have enough to run the current filter.
429138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        while (nextXRow < filterOffset + filterLength) {
430fed04b34315ed72dbb20e630908638d1c829c760reed@google.com            if (convolveProcs.fConvolve4RowsHorizontally &&
431138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                nextXRow + 3 < lastFilterOffset + lastFilterLength -
432138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                avoidSimdRows) {
433138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                const unsigned char* src[4];
434138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                unsigned char* outRow[4];
435138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                for (int i = 0; i < 4; ++i) {
43635fcd15d0598bca6a265100ace5d10a0d992cf9csugoi                    src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRowStride];
437138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                    outRow[i] = rowBuffer.advanceRow();
438138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                }
439fed04b34315ed72dbb20e630908638d1c829c760reed@google.com                convolveProcs.fConvolve4RowsHorizontally(src, filterX, outRow);
440138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                nextXRow += 4;
441138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            } else {
442138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                // Check if we need to avoid SSE2 for this row.
443fed04b34315ed72dbb20e630908638d1c829c760reed@google.com                if (convolveProcs.fConvolveHorizontally &&
444138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                    nextXRow < lastFilterOffset + lastFilterLength -
445138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                    avoidSimdRows) {
446fed04b34315ed72dbb20e630908638d1c829c760reed@google.com                    convolveProcs.fConvolveHorizontally(
44735fcd15d0598bca6a265100ace5d10a0d992cf9csugoi                        &sourceData[(uint64_t)nextXRow * sourceByteRowStride],
448138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                        filterX, rowBuffer.advanceRow(), sourceHasAlpha);
449138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                } else {
450138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                    if (sourceHasAlpha) {
4510cf27818e0259b08b155cae48a429ad681230348mtklein                        ConvolveHorizontallyAlpha(
45235fcd15d0598bca6a265100ace5d10a0d992cf9csugoi                            &sourceData[(uint64_t)nextXRow * sourceByteRowStride],
453138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                            filterX, rowBuffer.advanceRow());
454138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                    } else {
4550cf27818e0259b08b155cae48a429ad681230348mtklein                        ConvolveHorizontallyNoAlpha(
45635fcd15d0598bca6a265100ace5d10a0d992cf9csugoi                            &sourceData[(uint64_t)nextXRow * sourceByteRowStride],
457138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                            filterX, rowBuffer.advanceRow());
458138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                    }
459138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                }
460138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                nextXRow++;
461138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            }
462138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        }
463138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
464138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // Compute where in the output image this row of final data will go.
465c197c8a47eb9cf09b389f4dd5d235510feec18cbsugoi        unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStride];
466138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
467138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // Get the list of rows that the circular buffer has, in order.
468138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        int firstRowInCircularBuffer;
469138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        unsigned char* const* rowsToConvolve =
470138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            rowBuffer.GetRowAddresses(&firstRowInCircularBuffer);
471138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
472138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // Now compute the start of the subset of those rows that the filter
473138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        // needs.
474138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        unsigned char* const* firstRowForFilter =
475138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            &rowsToConvolve[filterOffset - firstRowInCircularBuffer];
476138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com
477fed04b34315ed72dbb20e630908638d1c829c760reed@google.com        if (convolveProcs.fConvolveVertically) {
478fed04b34315ed72dbb20e630908638d1c829c760reed@google.com            convolveProcs.fConvolveVertically(filterValues, filterLength,
479138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                                               firstRowForFilter,
480138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                                               filterX.numValues(), curOutputRow,
481138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                                               sourceHasAlpha);
482138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        } else {
483138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com            ConvolveVertically(filterValues, filterLength,
484138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                               firstRowForFilter,
485138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                               filterX.numValues(), curOutputRow,
486138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com                               sourceHasAlpha);
487138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com        }
488138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com    }
489138ebc3e4061cf533ea2f7f3717239670fdc6e43humper@google.com}
490