SkBitmapProcState_matrixProcs.cpp revision 9cfc83cc8ac2ee50a7ce889e65a707941f48bdea
1/* NEON optimized code (C) COPYRIGHT 2009 Motorola
2 *
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
5 */
6
7#include "SkBitmapProcState.h"
8#include "SkPerspIter.h"
9#include "SkShader.h"
10#include "SkUtils.h"
11#include "SkUtilsArm.h"
12
13// Helper to ensure that when we shift down, we do it w/o sign-extension
14// so the caller doesn't have to manually mask off the top 16 bits
15//
16static unsigned SK_USHIFT16(unsigned x) {
17    return x >> 16;
18}
19
20/*  returns 0...(n-1) given any x (positive or negative).
21
22    As an example, if n (which is always positive) is 5...
23
24          x: -8 -7 -6 -5 -4 -3 -2 -1  0  1  2  3  4  5  6  7  8
25    returns:  2  3  4  0  1  2  3  4  0  1  2  3  4  0  1  2  3
26 */
27static inline int sk_int_mod(int x, int n) {
28    SkASSERT(n > 0);
29    if ((unsigned)x >= (unsigned)n) {
30        if (x < 0) {
31            x = n + ~(~x % n);
32        } else {
33            x = x % n;
34        }
35    }
36    return x;
37}
38
39/*
40 *  The decal_ functions require that
41 *  1. dx > 0
42 *  2. [fx, fx+dx, fx+2dx, fx+3dx, ... fx+(count-1)dx] are all <= maxX
43 *
44 *  In addition, we use SkFractionalInt to keep more fractional precision than
45 *  just SkFixed, so we will abort the decal_ call if dx is very small, since
46 *  the decal_ function just operates on SkFixed. If that were changed, we could
47 *  skip the very_small test here.
48 */
49static inline bool can_truncate_to_fixed_for_decal(SkFractionalInt frX,
50                                                   SkFractionalInt frDx,
51                                                   int count, unsigned max) {
52    SkFixed dx = SkFractionalIntToFixed(frDx);
53
54    // if decal_ kept SkFractionalInt precision, this would just be dx <= 0
55    // I just made up the 1/256. Just don't want to perceive accumulated error
56    // if we truncate frDx and lose its low bits.
57    if (dx <= SK_Fixed1 / 256) {
58        return false;
59    }
60
61    // We cast to unsigned so we don't have to check for negative values, which
62    // will now appear as very large positive values, and thus fail our test!
63    SkFixed fx = SkFractionalIntToFixed(frX);
64    return (unsigned)SkFixedFloorToInt(fx) <= max &&
65           (unsigned)SkFixedFloorToInt(fx + dx * (count - 1)) < max;
66}
67
68void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
69void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
70
71// Compile neon code paths if needed
72#if !SK_ARM_NEON_IS_NONE
73
74// These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp
75extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[];
76extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[];
77
78#endif // !SK_ARM_NEON_IS_NONE
79
80// Compile non-neon code path if needed
81#if !SK_ARM_NEON_IS_ALWAYS
82#define MAKENAME(suffix)        ClampX_ClampY ## suffix
83#define TILEX_PROCF(fx, max)    SkClampMax((fx) >> 16, max)
84#define TILEY_PROCF(fy, max)    SkClampMax((fy) >> 16, max)
85#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
86#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
87#define CHECK_FOR_DECAL
88#include "SkBitmapProcState_matrix.h"
89
90#define MAKENAME(suffix)        RepeatX_RepeatY ## suffix
91#define TILEX_PROCF(fx, max)    SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1))
92#define TILEY_PROCF(fy, max)    SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1))
93#define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
94#define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
95#include "SkBitmapProcState_matrix.h"
96#endif
97
98#define MAKENAME(suffix)        GeneralXY ## suffix
99#define PREAMBLE(state)         SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \
100                                SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \
101                                SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \
102                                SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY
103#define PREAMBLE_PARAM_X        , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX
104#define PREAMBLE_PARAM_Y        , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY
105#define PREAMBLE_ARG_X          , tileProcX, tileLowBitsProcX
106#define PREAMBLE_ARG_Y          , tileProcY, tileLowBitsProcY
107#define TILEX_PROCF(fx, max)    SK_USHIFT16(tileProcX(fx) * ((max) + 1))
108#define TILEY_PROCF(fy, max)    SK_USHIFT16(tileProcY(fy) * ((max) + 1))
109#define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1)
110#define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1)
111#include "SkBitmapProcState_matrix.h"
112
113static inline U16CPU fixed_clamp(SkFixed x)
114{
115#ifdef SK_CPU_HAS_CONDITIONAL_INSTR
116    if (x < 0)
117        x = 0;
118    if (x >> 16)
119        x = 0xFFFF;
120#else
121    if (x >> 16)
122    {
123#if 0   // is this faster?
124        x = (~x >> 31) & 0xFFFF;
125#else
126        if (x < 0)
127            x = 0;
128        else
129            x = 0xFFFF;
130#endif
131    }
132#endif
133    return x;
134}
135
136static inline U16CPU fixed_repeat(SkFixed x)
137{
138    return x & 0xFFFF;
139}
140
141// Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly.
142// See http://code.google.com/p/skia/issues/detail?id=472
143#if defined(_MSC_VER) && (_MSC_VER >= 1600)
144#pragma optimize("", off)
145#endif
146
147static inline U16CPU fixed_mirror(SkFixed x)
148{
149    SkFixed s = x << 15 >> 31;
150    // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval
151    return (x ^ s) & 0xFFFF;
152}
153
154#if defined(_MSC_VER) && (_MSC_VER >= 1600)
155#pragma optimize("", on)
156#endif
157
158static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m)
159{
160    if (SkShader::kClamp_TileMode == m)
161        return fixed_clamp;
162    if (SkShader::kRepeat_TileMode == m)
163        return fixed_repeat;
164    SkASSERT(SkShader::kMirror_TileMode == m);
165    return fixed_mirror;
166}
167
168static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) {
169    return (x >> 12) & 0xF;
170}
171
172static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) {
173    return ((x * scale) >> 12) & 0xF;
174}
175
176static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) {
177    if (SkShader::kClamp_TileMode == m) {
178        return fixed_clamp_lowbits;
179    } else {
180        SkASSERT(SkShader::kMirror_TileMode == m ||
181                 SkShader::kRepeat_TileMode == m);
182        // mirror and repeat have the same behavior for the low bits.
183        return fixed_repeat_or_mirrow_lowbits;
184    }
185}
186
187static inline U16CPU int_clamp(int x, int n) {
188#ifdef SK_CPU_HAS_CONDITIONAL_INSTR
189    if (x >= n)
190        x = n - 1;
191    if (x < 0)
192        x = 0;
193#else
194    if ((unsigned)x >= (unsigned)n) {
195        if (x < 0) {
196            x = 0;
197        } else {
198            x = n - 1;
199        }
200    }
201#endif
202    return x;
203}
204
205static inline U16CPU int_repeat(int x, int n) {
206    return sk_int_mod(x, n);
207}
208
209static inline U16CPU int_mirror(int x, int n) {
210    x = sk_int_mod(x, 2 * n);
211    if (x >= n) {
212        x = n + ~(x - n);
213    }
214    return x;
215}
216
217#if 0
218static void test_int_tileprocs() {
219    for (int i = -8; i <= 8; i++) {
220        SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3));
221    }
222}
223#endif
224
225static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) {
226    if (SkShader::kClamp_TileMode == tm)
227        return int_clamp;
228    if (SkShader::kRepeat_TileMode == tm)
229        return int_repeat;
230    SkASSERT(SkShader::kMirror_TileMode == tm);
231    return int_mirror;
232}
233
234//////////////////////////////////////////////////////////////////////////////
235
236void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
237{
238    int i;
239
240    for (i = (count >> 2); i > 0; --i)
241    {
242        *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
243        fx += dx+dx;
244        *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
245        fx += dx+dx;
246    }
247    count &= 3;
248
249    uint16_t* xx = (uint16_t*)dst;
250    for (i = count; i > 0; --i) {
251        *xx++ = SkToU16(fx >> 16); fx += dx;
252    }
253}
254
255void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
256{
257
258
259    if (count & 1)
260    {
261        SkASSERT((fx >> (16 + 14)) == 0);
262        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
263        fx += dx;
264    }
265    while ((count -= 2) >= 0)
266    {
267        SkASSERT((fx >> (16 + 14)) == 0);
268        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
269        fx += dx;
270
271        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
272        fx += dx;
273    }
274}
275
276///////////////////////////////////////////////////////////////////////////////
277// stores the same as SCALE, but is cheaper to compute. Also since there is no
278// scale, we don't need/have a FILTER version
279
280static void fill_sequential(uint16_t xptr[], int start, int count) {
281#if 1
282    if (reinterpret_cast<intptr_t>(xptr) & 0x2) {
283        *xptr++ = start++;
284        count -= 1;
285    }
286    if (count > 3) {
287        uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr);
288        uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1);
289        uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3);
290        start += count & ~3;
291        int qcount = count >> 2;
292        do {
293            *xxptr++ = pattern0;
294            pattern0 += 0x40004;
295            *xxptr++ = pattern1;
296            pattern1 += 0x40004;
297        } while (--qcount != 0);
298        xptr = reinterpret_cast<uint16_t*>(xxptr);
299        count &= 3;
300    }
301    while (--count >= 0) {
302        *xptr++ = start++;
303    }
304#else
305    for (int i = 0; i < count; i++) {
306        *xptr++ = start++;
307    }
308#endif
309}
310
311static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
312                                   int x, int y) {
313    SkPoint pt;
314    s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
315               SkIntToScalar(y) + SK_ScalarHalf, &pt);
316    **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16,
317                           s.fBitmap->height());
318    *xy += 1;   // bump the ptr
319    // return our starting X position
320    return SkScalarToFixed(pt.fX) >> 16;
321}
322
323static void clampx_nofilter_trans(const SkBitmapProcState& s,
324                                  uint32_t xy[], int count, int x, int y) {
325    SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
326
327    int xpos = nofilter_trans_preamble(s, &xy, x, y);
328    const int width = s.fBitmap->width();
329    if (1 == width) {
330        // all of the following X values must be 0
331        memset(xy, 0, count * sizeof(uint16_t));
332        return;
333    }
334
335    uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
336    int n;
337
338    // fill before 0 as needed
339    if (xpos < 0) {
340        n = -xpos;
341        if (n > count) {
342            n = count;
343        }
344        memset(xptr, 0, n * sizeof(uint16_t));
345        count -= n;
346        if (0 == count) {
347            return;
348        }
349        xptr += n;
350        xpos = 0;
351    }
352
353    // fill in 0..width-1 if needed
354    if (xpos < width) {
355        n = width - xpos;
356        if (n > count) {
357            n = count;
358        }
359        fill_sequential(xptr, xpos, n);
360        count -= n;
361        if (0 == count) {
362            return;
363        }
364        xptr += n;
365    }
366
367    // fill the remaining with the max value
368    sk_memset16(xptr, width - 1, count);
369}
370
371static void repeatx_nofilter_trans(const SkBitmapProcState& s,
372                                   uint32_t xy[], int count, int x, int y) {
373    SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
374
375    int xpos = nofilter_trans_preamble(s, &xy, x, y);
376    const int width = s.fBitmap->width();
377    if (1 == width) {
378        // all of the following X values must be 0
379        memset(xy, 0, count * sizeof(uint16_t));
380        return;
381    }
382
383    uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
384    int start = sk_int_mod(xpos, width);
385    int n = width - start;
386    if (n > count) {
387        n = count;
388    }
389    fill_sequential(xptr, start, n);
390    xptr += n;
391    count -= n;
392
393    while (count >= width) {
394        fill_sequential(xptr, 0, width);
395        xptr += width;
396        count -= width;
397    }
398
399    if (count > 0) {
400        fill_sequential(xptr, 0, count);
401    }
402}
403
404static void fill_backwards(uint16_t xptr[], int pos, int count) {
405    for (int i = 0; i < count; i++) {
406        SkASSERT(pos >= 0);
407        xptr[i] = pos--;
408    }
409}
410
411static void mirrorx_nofilter_trans(const SkBitmapProcState& s,
412                                   uint32_t xy[], int count, int x, int y) {
413    SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
414
415    int xpos = nofilter_trans_preamble(s, &xy, x, y);
416    const int width = s.fBitmap->width();
417    if (1 == width) {
418        // all of the following X values must be 0
419        memset(xy, 0, count * sizeof(uint16_t));
420        return;
421    }
422
423    uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
424    // need to know our start, and our initial phase (forward or backward)
425    bool forward;
426    int n;
427    int start = sk_int_mod(xpos, 2 * width);
428    if (start >= width) {
429        start = width + ~(start - width);
430        forward = false;
431        n = start + 1;  // [start .. 0]
432    } else {
433        forward = true;
434        n = width - start;  // [start .. width)
435    }
436    if (n > count) {
437        n = count;
438    }
439    if (forward) {
440        fill_sequential(xptr, start, n);
441    } else {
442        fill_backwards(xptr, start, n);
443    }
444    forward = !forward;
445    xptr += n;
446    count -= n;
447
448    while (count >= width) {
449        if (forward) {
450            fill_sequential(xptr, 0, width);
451        } else {
452            fill_backwards(xptr, width - 1, width);
453        }
454        forward = !forward;
455        xptr += width;
456        count -= width;
457    }
458
459    if (count > 0) {
460        if (forward) {
461            fill_sequential(xptr, 0, count);
462        } else {
463            fill_backwards(xptr, width - 1, count);
464        }
465    }
466}
467
468///////////////////////////////////////////////////////////////////////////////
469
470SkBitmapProcState::MatrixProc
471SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
472//    test_int_tileprocs();
473    // check for our special case when there is no scale/affine/perspective
474    if (trivial_matrix) {
475        SkASSERT(SkPaint::kNone_FilterLevel == fFilterLevel);
476        fIntTileProcY = choose_int_tile_proc(fTileModeY);
477        switch (fTileModeX) {
478            case SkShader::kClamp_TileMode:
479                return clampx_nofilter_trans;
480            case SkShader::kRepeat_TileMode:
481                return repeatx_nofilter_trans;
482            case SkShader::kMirror_TileMode:
483                return mirrorx_nofilter_trans;
484        }
485    }
486
487    int index = 0;
488    if (fFilterLevel != SkPaint::kNone_FilterLevel) {
489        index = 1;
490    }
491    if (fInvType & SkMatrix::kPerspective_Mask) {
492        index += 4;
493    } else if (fInvType & SkMatrix::kAffine_Mask) {
494        index += 2;
495    }
496
497    if (SkShader::kClamp_TileMode == fTileModeX &&
498        SkShader::kClamp_TileMode == fTileModeY)
499    {
500        // clamp gets special version of filterOne
501        fFilterOneX = SK_Fixed1;
502        fFilterOneY = SK_Fixed1;
503        return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index];
504    }
505
506    // all remaining procs use this form for filterOne
507    fFilterOneX = SK_Fixed1 / fBitmap->width();
508    fFilterOneY = SK_Fixed1 / fBitmap->height();
509
510    if (SkShader::kRepeat_TileMode == fTileModeX &&
511        SkShader::kRepeat_TileMode == fTileModeY)
512    {
513        return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index];
514    }
515
516    fTileProcX = choose_tile_proc(fTileModeX);
517    fTileProcY = choose_tile_proc(fTileModeY);
518    fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX);
519    fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY);
520    return GeneralXY_Procs[index];
521}
522