1/* NEON optimized code (C) COPYRIGHT 2009 Motorola
2 *
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
5 */
6
7#include "SkBitmapProcState.h"
8#include "SkPerspIter.h"
9#include "SkShader.h"
10#include "SkUtils.h"
11#include "SkUtilsArm.h"
12#include "SkBitmapProcState_utils.h"
13
14/*  returns 0...(n-1) given any x (positive or negative).
15
16    As an example, if n (which is always positive) is 5...
17
18          x: -8 -7 -6 -5 -4 -3 -2 -1  0  1  2  3  4  5  6  7  8
19    returns:  2  3  4  0  1  2  3  4  0  1  2  3  4  0  1  2  3
20 */
21static inline int sk_int_mod(int x, int n) {
22    SkASSERT(n > 0);
23    if ((unsigned)x >= (unsigned)n) {
24        if (x < 0) {
25            x = n + ~(~x % n);
26        } else {
27            x = x % n;
28        }
29    }
30    return x;
31}
32
33void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
34void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
35
36#include "SkBitmapProcState_matrix_template.h"
37
38///////////////////////////////////////////////////////////////////////////////
39
40// Compile neon code paths if needed
41#if !SK_ARM_NEON_IS_NONE
42
43// These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp
44extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[];
45extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[];
46
47#endif // !SK_ARM_NEON_IS_NONE
48
49// Compile non-neon code path if needed
50#if !SK_ARM_NEON_IS_ALWAYS
51#define MAKENAME(suffix)        ClampX_ClampY ## suffix
52#define TILEX_PROCF(fx, max)    SkClampMax((fx) >> 16, max)
53#define TILEY_PROCF(fy, max)    SkClampMax((fy) >> 16, max)
54#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
55#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
56#define CHECK_FOR_DECAL
57#include "SkBitmapProcState_matrix.h"
58
59struct ClampTileProcs {
60    static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
61        return SkClampMax(fx >> 16, max);
62    }
63    static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
64        return SkClampMax(fy >> 16, max);
65    }
66};
67
68// Referenced in opts_check_x86.cpp
69void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s, uint32_t xy[],
70                                  int count, int x, int y) {
71    return NoFilterProc_Scale<ClampTileProcs, true>(s, xy, count, x, y);
72}
73void ClampX_ClampY_nofilter_affine(const SkBitmapProcState& s, uint32_t xy[],
74                                  int count, int x, int y) {
75    return NoFilterProc_Affine<ClampTileProcs>(s, xy, count, x, y);
76}
77
78static SkBitmapProcState::MatrixProc ClampX_ClampY_Procs[] = {
79    // only clamp lives in the right coord space to check for decal
80    ClampX_ClampY_nofilter_scale,
81    ClampX_ClampY_filter_scale,
82    ClampX_ClampY_nofilter_affine,
83    ClampX_ClampY_filter_affine,
84    NoFilterProc_Persp<ClampTileProcs>,
85    ClampX_ClampY_filter_persp
86};
87
88#define MAKENAME(suffix)        RepeatX_RepeatY ## suffix
89#define TILEX_PROCF(fx, max)    SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1))
90#define TILEY_PROCF(fy, max)    SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1))
91#define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
92#define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
93#include "SkBitmapProcState_matrix.h"
94
95struct RepeatTileProcs {
96    static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
97        return SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1));
98    }
99    static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
100        return SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1));
101    }
102};
103
104static SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs[] = {
105    NoFilterProc_Scale<RepeatTileProcs, false>,
106    RepeatX_RepeatY_filter_scale,
107    NoFilterProc_Affine<RepeatTileProcs>,
108    RepeatX_RepeatY_filter_affine,
109    NoFilterProc_Persp<RepeatTileProcs>,
110    RepeatX_RepeatY_filter_persp
111};
112#endif
113
114#define MAKENAME(suffix)        GeneralXY ## suffix
115#define PREAMBLE(state)         SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \
116                                SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \
117                                SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \
118                                SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY
119#define PREAMBLE_PARAM_X        , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX
120#define PREAMBLE_PARAM_Y        , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY
121#define PREAMBLE_ARG_X          , tileProcX, tileLowBitsProcX
122#define PREAMBLE_ARG_Y          , tileProcY, tileLowBitsProcY
123#define TILEX_PROCF(fx, max)    SK_USHIFT16(tileProcX(fx) * ((max) + 1))
124#define TILEY_PROCF(fy, max)    SK_USHIFT16(tileProcY(fy) * ((max) + 1))
125#define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1)
126#define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1)
127#include "SkBitmapProcState_matrix.h"
128
129struct GeneralTileProcs {
130    static unsigned X(const SkBitmapProcState& s, SkFixed fx, int max) {
131        return SK_USHIFT16(s.fTileProcX(fx) * ((max) + 1));
132    }
133    static unsigned Y(const SkBitmapProcState& s, SkFixed fy, int max) {
134        return SK_USHIFT16(s.fTileProcY(fy) * ((max) + 1));
135    }
136};
137
138static SkBitmapProcState::MatrixProc GeneralXY_Procs[] = {
139    NoFilterProc_Scale<GeneralTileProcs, false>,
140    GeneralXY_filter_scale,
141    NoFilterProc_Affine<GeneralTileProcs>,
142    GeneralXY_filter_affine,
143    NoFilterProc_Persp<GeneralTileProcs>,
144    GeneralXY_filter_persp
145};
146
147///////////////////////////////////////////////////////////////////////////////
148
149static inline U16CPU fixed_clamp(SkFixed x) {
150    if (x < 0) {
151        x = 0;
152    }
153    if (x >> 16) {
154        x = 0xFFFF;
155    }
156    return x;
157}
158
159static inline U16CPU fixed_repeat(SkFixed x) {
160    return x & 0xFFFF;
161}
162
163// Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly.
164// See http://code.google.com/p/skia/issues/detail?id=472
165#if defined(_MSC_VER) && (_MSC_VER >= 1600)
166#pragma optimize("", off)
167#endif
168
169static inline U16CPU fixed_mirror(SkFixed x) {
170    SkFixed s = x << 15 >> 31;
171    // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval
172    return (x ^ s) & 0xFFFF;
173}
174
175#if defined(_MSC_VER) && (_MSC_VER >= 1600)
176#pragma optimize("", on)
177#endif
178
179static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m) {
180    if (SkShader::kClamp_TileMode == m) {
181        return fixed_clamp;
182    }
183    if (SkShader::kRepeat_TileMode == m) {
184        return fixed_repeat;
185    }
186    SkASSERT(SkShader::kMirror_TileMode == m);
187    return fixed_mirror;
188}
189
190static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) {
191    return (x >> 12) & 0xF;
192}
193
194static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) {
195    return ((x * scale) >> 12) & 0xF;
196}
197
198static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) {
199    if (SkShader::kClamp_TileMode == m) {
200        return fixed_clamp_lowbits;
201    } else {
202        SkASSERT(SkShader::kMirror_TileMode == m ||
203                 SkShader::kRepeat_TileMode == m);
204        // mirror and repeat have the same behavior for the low bits.
205        return fixed_repeat_or_mirrow_lowbits;
206    }
207}
208
209static inline U16CPU int_clamp(int x, int n) {
210    if (x >= n) {
211        x = n - 1;
212    }
213    if (x < 0) {
214        x = 0;
215    }
216    return x;
217}
218
219static inline U16CPU int_repeat(int x, int n) {
220    return sk_int_mod(x, n);
221}
222
223static inline U16CPU int_mirror(int x, int n) {
224    x = sk_int_mod(x, 2 * n);
225    if (x >= n) {
226        x = n + ~(x - n);
227    }
228    return x;
229}
230
231#if 0
232static void test_int_tileprocs() {
233    for (int i = -8; i <= 8; i++) {
234        SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3));
235    }
236}
237#endif
238
239static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) {
240    if (SkShader::kClamp_TileMode == tm)
241        return int_clamp;
242    if (SkShader::kRepeat_TileMode == tm)
243        return int_repeat;
244    SkASSERT(SkShader::kMirror_TileMode == tm);
245    return int_mirror;
246}
247
248//////////////////////////////////////////////////////////////////////////////
249
250void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
251    int i;
252
253    for (i = (count >> 2); i > 0; --i) {
254        *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
255        fx += dx+dx;
256        *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
257        fx += dx+dx;
258    }
259    count &= 3;
260
261    uint16_t* xx = (uint16_t*)dst;
262    for (i = count; i > 0; --i) {
263        *xx++ = SkToU16(fx >> 16); fx += dx;
264    }
265}
266
267void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
268    if (count & 1) {
269        SkASSERT((fx >> (16 + 14)) == 0);
270        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
271        fx += dx;
272    }
273    while ((count -= 2) >= 0) {
274        SkASSERT((fx >> (16 + 14)) == 0);
275        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
276        fx += dx;
277
278        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
279        fx += dx;
280    }
281}
282
283///////////////////////////////////////////////////////////////////////////////
284// stores the same as SCALE, but is cheaper to compute. Also since there is no
285// scale, we don't need/have a FILTER version
286
287static void fill_sequential(uint16_t xptr[], int start, int count) {
288#if 1
289    if (reinterpret_cast<intptr_t>(xptr) & 0x2) {
290        *xptr++ = start++;
291        count -= 1;
292    }
293    if (count > 3) {
294        uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr);
295        uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1);
296        uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3);
297        start += count & ~3;
298        int qcount = count >> 2;
299        do {
300            *xxptr++ = pattern0;
301            pattern0 += 0x40004;
302            *xxptr++ = pattern1;
303            pattern1 += 0x40004;
304        } while (--qcount != 0);
305        xptr = reinterpret_cast<uint16_t*>(xxptr);
306        count &= 3;
307    }
308    while (--count >= 0) {
309        *xptr++ = start++;
310    }
311#else
312    for (int i = 0; i < count; i++) {
313        *xptr++ = start++;
314    }
315#endif
316}
317
318static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
319                                   int x, int y) {
320    SkPoint pt;
321    s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
322               SkIntToScalar(y) + SK_ScalarHalf, &pt);
323    **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16,
324                           s.fBitmap->height());
325    *xy += 1;   // bump the ptr
326    // return our starting X position
327    return SkScalarToFixed(pt.fX) >> 16;
328}
329
330static void clampx_nofilter_trans(const SkBitmapProcState& s,
331                                  uint32_t xy[], int count, int x, int y) {
332    SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
333
334    int xpos = nofilter_trans_preamble(s, &xy, x, y);
335    const int width = s.fBitmap->width();
336    if (1 == width) {
337        // all of the following X values must be 0
338        memset(xy, 0, count * sizeof(uint16_t));
339        return;
340    }
341
342    uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
343    int n;
344
345    // fill before 0 as needed
346    if (xpos < 0) {
347        n = -xpos;
348        if (n > count) {
349            n = count;
350        }
351        memset(xptr, 0, n * sizeof(uint16_t));
352        count -= n;
353        if (0 == count) {
354            return;
355        }
356        xptr += n;
357        xpos = 0;
358    }
359
360    // fill in 0..width-1 if needed
361    if (xpos < width) {
362        n = width - xpos;
363        if (n > count) {
364            n = count;
365        }
366        fill_sequential(xptr, xpos, n);
367        count -= n;
368        if (0 == count) {
369            return;
370        }
371        xptr += n;
372    }
373
374    // fill the remaining with the max value
375    sk_memset16(xptr, width - 1, count);
376}
377
378static void repeatx_nofilter_trans(const SkBitmapProcState& s,
379                                   uint32_t xy[], int count, int x, int y) {
380    SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
381
382    int xpos = nofilter_trans_preamble(s, &xy, x, y);
383    const int width = s.fBitmap->width();
384    if (1 == width) {
385        // all of the following X values must be 0
386        memset(xy, 0, count * sizeof(uint16_t));
387        return;
388    }
389
390    uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
391    int start = sk_int_mod(xpos, width);
392    int n = width - start;
393    if (n > count) {
394        n = count;
395    }
396    fill_sequential(xptr, start, n);
397    xptr += n;
398    count -= n;
399
400    while (count >= width) {
401        fill_sequential(xptr, 0, width);
402        xptr += width;
403        count -= width;
404    }
405
406    if (count > 0) {
407        fill_sequential(xptr, 0, count);
408    }
409}
410
411static void fill_backwards(uint16_t xptr[], int pos, int count) {
412    for (int i = 0; i < count; i++) {
413        SkASSERT(pos >= 0);
414        xptr[i] = pos--;
415    }
416}
417
418static void mirrorx_nofilter_trans(const SkBitmapProcState& s,
419                                   uint32_t xy[], int count, int x, int y) {
420    SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
421
422    int xpos = nofilter_trans_preamble(s, &xy, x, y);
423    const int width = s.fBitmap->width();
424    if (1 == width) {
425        // all of the following X values must be 0
426        memset(xy, 0, count * sizeof(uint16_t));
427        return;
428    }
429
430    uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
431    // need to know our start, and our initial phase (forward or backward)
432    bool forward;
433    int n;
434    int start = sk_int_mod(xpos, 2 * width);
435    if (start >= width) {
436        start = width + ~(start - width);
437        forward = false;
438        n = start + 1;  // [start .. 0]
439    } else {
440        forward = true;
441        n = width - start;  // [start .. width)
442    }
443    if (n > count) {
444        n = count;
445    }
446    if (forward) {
447        fill_sequential(xptr, start, n);
448    } else {
449        fill_backwards(xptr, start, n);
450    }
451    forward = !forward;
452    xptr += n;
453    count -= n;
454
455    while (count >= width) {
456        if (forward) {
457            fill_sequential(xptr, 0, width);
458        } else {
459            fill_backwards(xptr, width - 1, width);
460        }
461        forward = !forward;
462        xptr += width;
463        count -= width;
464    }
465
466    if (count > 0) {
467        if (forward) {
468            fill_sequential(xptr, 0, count);
469        } else {
470            fill_backwards(xptr, width - 1, count);
471        }
472    }
473}
474
475///////////////////////////////////////////////////////////////////////////////
476
477SkBitmapProcState::MatrixProc SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
478//    test_int_tileprocs();
479    // check for our special case when there is no scale/affine/perspective
480    if (trivial_matrix) {
481        SkASSERT(SkPaint::kNone_FilterLevel == fFilterLevel);
482        fIntTileProcY = choose_int_tile_proc(fTileModeY);
483        switch (fTileModeX) {
484            case SkShader::kClamp_TileMode:
485                return clampx_nofilter_trans;
486            case SkShader::kRepeat_TileMode:
487                return repeatx_nofilter_trans;
488            case SkShader::kMirror_TileMode:
489                return mirrorx_nofilter_trans;
490        }
491    }
492
493    int index = 0;
494    if (fFilterLevel != SkPaint::kNone_FilterLevel) {
495        index = 1;
496    }
497    if (fInvType & SkMatrix::kPerspective_Mask) {
498        index += 4;
499    } else if (fInvType & SkMatrix::kAffine_Mask) {
500        index += 2;
501    }
502
503    if (SkShader::kClamp_TileMode == fTileModeX && SkShader::kClamp_TileMode == fTileModeY) {
504        // clamp gets special version of filterOne
505        fFilterOneX = SK_Fixed1;
506        fFilterOneY = SK_Fixed1;
507        return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index];
508    }
509
510    // all remaining procs use this form for filterOne
511    fFilterOneX = SK_Fixed1 / fBitmap->width();
512    fFilterOneY = SK_Fixed1 / fBitmap->height();
513
514    if (SkShader::kRepeat_TileMode == fTileModeX && SkShader::kRepeat_TileMode == fTileModeY) {
515        return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index];
516    }
517
518    fTileProcX = choose_tile_proc(fTileModeX);
519    fTileProcY = choose_tile_proc(fTileModeY);
520    fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX);
521    fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY);
522    return GeneralXY_Procs[index];
523}
524