1/*
2 * Copyright 2008 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8// The copyright below was added in 2009, but I see no record of moto contributions...?
9
10/* NEON optimized code (C) COPYRIGHT 2009 Motorola
11 *
12 * Use of this source code is governed by a BSD-style license that can be
13 * found in the LICENSE file.
14 */
15
16#include "SkBitmapProcState.h"
17#include "SkShader.h"
18#include "SkUtils.h"
19#include "SkUtilsArm.h"
20#include "SkBitmapProcState_utils.h"
21
22/*  returns 0...(n-1) given any x (positive or negative).
23
24    As an example, if n (which is always positive) is 5...
25
26          x: -8 -7 -6 -5 -4 -3 -2 -1  0  1  2  3  4  5  6  7  8
27    returns:  2  3  4  0  1  2  3  4  0  1  2  3  4  0  1  2  3
28 */
29static inline int sk_int_mod(int x, int n) {
30    SkASSERT(n > 0);
31    if ((unsigned)x >= (unsigned)n) {
32        if (x < 0) {
33            x = n + ~(~x % n);
34        } else {
35            x = x % n;
36        }
37    }
38    return x;
39}
40
41void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
42void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
43
44#include "SkBitmapProcState_matrix_template.h"
45
46///////////////////////////////////////////////////////////////////////////////
47
48// Compile neon code paths if needed
49#if defined(SK_ARM_HAS_NEON)
50
51// These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp
52extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[];
53extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[];
54
55#endif // defined(SK_ARM_HAS_NEON)
56
57// Compile non-neon code path if needed
58#if !defined(SK_ARM_HAS_NEON)
59#define MAKENAME(suffix)         ClampX_ClampY ## suffix
60#define TILEX_PROCF(fx, max)     SkClampMax((fx) >> 16, max)
61#define TILEY_PROCF(fy, max)     SkClampMax((fy) >> 16, max)
62#define EXTRACT_LOW_BITS(v, max) (((v) >> 12) & 0xF)
63#define CHECK_FOR_DECAL
64#include "SkBitmapProcState_matrix.h"
65
66struct ClampTileProcs {
67    static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
68        return SkClampMax(fx >> 16, max);
69    }
70    static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
71        return SkClampMax(fy >> 16, max);
72    }
73};
74
75// Referenced in opts_check_x86.cpp
76void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s, uint32_t xy[],
77                                  int count, int x, int y) {
78    return NoFilterProc_Scale<ClampTileProcs, true>(s, xy, count, x, y);
79}
80
81static SkBitmapProcState::MatrixProc ClampX_ClampY_Procs[] = {
82    // only clamp lives in the right coord space to check for decal
83    ClampX_ClampY_nofilter_scale,
84    ClampX_ClampY_filter_scale,
85};
86
87#define MAKENAME(suffix)         RepeatX_RepeatY ## suffix
88#define TILEX_PROCF(fx, max)     SK_USHIFT16((unsigned)((fx) & 0xFFFF) * ((max) + 1))
89#define TILEY_PROCF(fy, max)     SK_USHIFT16((unsigned)((fy) & 0xFFFF) * ((max) + 1))
90#define EXTRACT_LOW_BITS(v, max) (((unsigned)((v) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
91#include "SkBitmapProcState_matrix.h"
92
93struct RepeatTileProcs {
94    static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
95        SkASSERT(max < 65535);
96        return SK_USHIFT16((unsigned)((fx) & 0xFFFF) * ((max) + 1));
97    }
98    static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
99        SkASSERT(max < 65535);
100        return SK_USHIFT16((unsigned)((fy) & 0xFFFF) * ((max) + 1));
101    }
102};
103
104static SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs[] = {
105    NoFilterProc_Scale<RepeatTileProcs, false>,
106    RepeatX_RepeatY_filter_scale,
107};
108#endif
109
110#define MAKENAME(suffix)        GeneralXY ## suffix
111#define PREAMBLE(state)         SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \
112                                SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY;
113#define PREAMBLE_PARAM_X        , SkBitmapProcState::FixedTileProc tileProcX
114#define PREAMBLE_PARAM_Y        , SkBitmapProcState::FixedTileProc tileProcY
115#define PREAMBLE_ARG_X          , tileProcX
116#define PREAMBLE_ARG_Y          , tileProcY
117#define TILEX_PROCF(fx, max)    SK_USHIFT16(tileProcX(fx) * ((max) + 1))
118#define TILEY_PROCF(fy, max)    SK_USHIFT16(tileProcY(fy) * ((max) + 1))
119#define EXTRACT_LOW_BITS(v, max) (((v * (max + 1)) >> 12) & 0xF)
120#include "SkBitmapProcState_matrix.h"
121
122struct GeneralTileProcs {
123    static unsigned X(const SkBitmapProcState& s, SkFixed fx, int max) {
124        return SK_USHIFT16(s.fTileProcX(fx) * ((max) + 1));
125    }
126    static unsigned Y(const SkBitmapProcState& s, SkFixed fy, int max) {
127        return SK_USHIFT16(s.fTileProcY(fy) * ((max) + 1));
128    }
129};
130
131static SkBitmapProcState::MatrixProc GeneralXY_Procs[] = {
132    NoFilterProc_Scale<GeneralTileProcs, false>,
133    GeneralXY_filter_scale,
134};
135
136///////////////////////////////////////////////////////////////////////////////
137
138static inline U16CPU fixed_clamp(SkFixed x) {
139    if (x < 0) {
140        x = 0;
141    }
142    if (x >> 16) {
143        x = 0xFFFF;
144    }
145    return x;
146}
147
148static inline U16CPU fixed_repeat(SkFixed x) {
149    return x & 0xFFFF;
150}
151
152static inline U16CPU fixed_mirror(SkFixed x) {
153    SkFixed s = SkLeftShift(x, 15) >> 31;
154    // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval
155    return (x ^ s) & 0xFFFF;
156}
157
158static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m) {
159    if (SkShader::kClamp_TileMode == m) {
160        return fixed_clamp;
161    }
162    if (SkShader::kRepeat_TileMode == m) {
163        return fixed_repeat;
164    }
165    SkASSERT(SkShader::kMirror_TileMode == m);
166    return fixed_mirror;
167}
168
169static inline U16CPU int_clamp(int x, int n) {
170    if (x >= n) {
171        x = n - 1;
172    }
173    if (x < 0) {
174        x = 0;
175    }
176    return x;
177}
178
179static inline U16CPU int_repeat(int x, int n) {
180    return sk_int_mod(x, n);
181}
182
183static inline U16CPU int_mirror(int x, int n) {
184    x = sk_int_mod(x, 2 * n);
185    if (x >= n) {
186        x = n + ~(x - n);
187    }
188    return x;
189}
190
191#if 0
192static void test_int_tileprocs() {
193    for (int i = -8; i <= 8; i++) {
194        SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3));
195    }
196}
197#endif
198
199static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) {
200    if (SkShader::kClamp_TileMode == tm)
201        return int_clamp;
202    if (SkShader::kRepeat_TileMode == tm)
203        return int_repeat;
204    SkASSERT(SkShader::kMirror_TileMode == tm);
205    return int_mirror;
206}
207
208//////////////////////////////////////////////////////////////////////////////
209
210void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
211    int i;
212
213    for (i = (count >> 2); i > 0; --i) {
214        *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
215        fx += dx+dx;
216        *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
217        fx += dx+dx;
218    }
219    count &= 3;
220
221    uint16_t* xx = (uint16_t*)dst;
222    for (i = count; i > 0; --i) {
223        *xx++ = SkToU16(fx >> 16); fx += dx;
224    }
225}
226
227void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
228    if (count & 1) {
229        SkASSERT((fx >> (16 + 14)) == 0);
230        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
231        fx += dx;
232    }
233    while ((count -= 2) >= 0) {
234        SkASSERT((fx >> (16 + 14)) == 0);
235        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
236        fx += dx;
237
238        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
239        fx += dx;
240    }
241}
242
243///////////////////////////////////////////////////////////////////////////////
244// stores the same as SCALE, but is cheaper to compute. Also since there is no
245// scale, we don't need/have a FILTER version
246
247static void fill_sequential(uint16_t xptr[], int start, int count) {
248#if 1
249    if (reinterpret_cast<intptr_t>(xptr) & 0x2) {
250        *xptr++ = start++;
251        count -= 1;
252    }
253    if (count > 3) {
254        uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr);
255        uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1);
256        uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3);
257        start += count & ~3;
258        int qcount = count >> 2;
259        do {
260            *xxptr++ = pattern0;
261            pattern0 += 0x40004;
262            *xxptr++ = pattern1;
263            pattern1 += 0x40004;
264        } while (--qcount != 0);
265        xptr = reinterpret_cast<uint16_t*>(xxptr);
266        count &= 3;
267    }
268    while (--count >= 0) {
269        *xptr++ = start++;
270    }
271#else
272    for (int i = 0; i < count; i++) {
273        *xptr++ = start++;
274    }
275#endif
276}
277
278static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
279                                   int x, int y) {
280    const SkBitmapProcStateAutoMapper mapper(s, x, y);
281    **xy = s.fIntTileProcY(mapper.intY(), s.fPixmap.height());
282    *xy += 1;   // bump the ptr
283    // return our starting X position
284    return mapper.intX();
285}
286
287static void clampx_nofilter_trans(const SkBitmapProcState& s,
288                                  uint32_t xy[], int count, int x, int y) {
289    SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
290
291    int xpos = nofilter_trans_preamble(s, &xy, x, y);
292    const int width = s.fPixmap.width();
293    if (1 == width) {
294        // all of the following X values must be 0
295        memset(xy, 0, count * sizeof(uint16_t));
296        return;
297    }
298
299    uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
300    int n;
301
302    // fill before 0 as needed
303    if (xpos < 0) {
304        n = -xpos;
305        if (n > count) {
306            n = count;
307        }
308        memset(xptr, 0, n * sizeof(uint16_t));
309        count -= n;
310        if (0 == count) {
311            return;
312        }
313        xptr += n;
314        xpos = 0;
315    }
316
317    // fill in 0..width-1 if needed
318    if (xpos < width) {
319        n = width - xpos;
320        if (n > count) {
321            n = count;
322        }
323        fill_sequential(xptr, xpos, n);
324        count -= n;
325        if (0 == count) {
326            return;
327        }
328        xptr += n;
329    }
330
331    // fill the remaining with the max value
332    sk_memset16(xptr, width - 1, count);
333}
334
335static void repeatx_nofilter_trans(const SkBitmapProcState& s,
336                                   uint32_t xy[], int count, int x, int y) {
337    SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
338
339    int xpos = nofilter_trans_preamble(s, &xy, x, y);
340    const int width = s.fPixmap.width();
341    if (1 == width) {
342        // all of the following X values must be 0
343        memset(xy, 0, count * sizeof(uint16_t));
344        return;
345    }
346
347    uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
348    int start = sk_int_mod(xpos, width);
349    int n = width - start;
350    if (n > count) {
351        n = count;
352    }
353    fill_sequential(xptr, start, n);
354    xptr += n;
355    count -= n;
356
357    while (count >= width) {
358        fill_sequential(xptr, 0, width);
359        xptr += width;
360        count -= width;
361    }
362
363    if (count > 0) {
364        fill_sequential(xptr, 0, count);
365    }
366}
367
368static void fill_backwards(uint16_t xptr[], int pos, int count) {
369    for (int i = 0; i < count; i++) {
370        SkASSERT(pos >= 0);
371        xptr[i] = pos--;
372    }
373}
374
375static void mirrorx_nofilter_trans(const SkBitmapProcState& s,
376                                   uint32_t xy[], int count, int x, int y) {
377    SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
378
379    int xpos = nofilter_trans_preamble(s, &xy, x, y);
380    const int width = s.fPixmap.width();
381    if (1 == width) {
382        // all of the following X values must be 0
383        memset(xy, 0, count * sizeof(uint16_t));
384        return;
385    }
386
387    uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
388    // need to know our start, and our initial phase (forward or backward)
389    bool forward;
390    int n;
391    int start = sk_int_mod(xpos, 2 * width);
392    if (start >= width) {
393        start = width + ~(start - width);
394        forward = false;
395        n = start + 1;  // [start .. 0]
396    } else {
397        forward = true;
398        n = width - start;  // [start .. width)
399    }
400    if (n > count) {
401        n = count;
402    }
403    if (forward) {
404        fill_sequential(xptr, start, n);
405    } else {
406        fill_backwards(xptr, start, n);
407    }
408    forward = !forward;
409    xptr += n;
410    count -= n;
411
412    while (count >= width) {
413        if (forward) {
414            fill_sequential(xptr, 0, width);
415        } else {
416            fill_backwards(xptr, width - 1, width);
417        }
418        forward = !forward;
419        xptr += width;
420        count -= width;
421    }
422
423    if (count > 0) {
424        if (forward) {
425            fill_sequential(xptr, 0, count);
426        } else {
427            fill_backwards(xptr, width - 1, count);
428        }
429    }
430}
431
432///////////////////////////////////////////////////////////////////////////////
433
434SkBitmapProcState::MatrixProc SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
435    SkASSERT((fInvType & (SkMatrix::kAffine_Mask | SkMatrix::kPerspective_Mask)) == 0);
436
437//    test_int_tileprocs();
438    // check for our special case when there is no scale/affine/perspective
439    if (trivial_matrix && kNone_SkFilterQuality == fFilterQuality) {
440        fIntTileProcY = choose_int_tile_proc(fTileModeY);
441        switch (fTileModeX) {
442            case SkShader::kClamp_TileMode:
443                return clampx_nofilter_trans;
444            case SkShader::kRepeat_TileMode:
445                return repeatx_nofilter_trans;
446            case SkShader::kMirror_TileMode:
447                return mirrorx_nofilter_trans;
448            case SkShader::kDecal_TileMode:
449                SkASSERT(false);    // should never get here, handled by stages
450                return clampx_nofilter_trans;
451        }
452    }
453
454    int index = 0;
455    if (fFilterQuality != kNone_SkFilterQuality) {
456        index = 1;
457    }
458
459    if (SkShader::kClamp_TileMode == fTileModeX && SkShader::kClamp_TileMode == fTileModeY) {
460        // clamp gets special version of filterOne
461        fFilterOneX = SK_Fixed1;
462        fFilterOneY = SK_Fixed1;
463        return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index];
464    }
465
466    // all remaining procs use this form for filterOne
467    fFilterOneX = SK_Fixed1 / fPixmap.width();
468    fFilterOneY = SK_Fixed1 / fPixmap.height();
469
470    if (SkShader::kRepeat_TileMode == fTileModeX && SkShader::kRepeat_TileMode == fTileModeY) {
471        return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index];
472    }
473
474    fTileProcX = choose_tile_proc(fTileModeX);
475    fTileProcY = choose_tile_proc(fTileModeY);
476    return GeneralXY_Procs[index];
477}
478