1/*
2 * Copyright 2008 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8// The copyright below was added in 2009, but I see no record of moto contributions...?
9
10/* NEON optimized code (C) COPYRIGHT 2009 Motorola
11 *
12 * Use of this source code is governed by a BSD-style license that can be
13 * found in the LICENSE file.
14 */
15
16#include "SkBitmapProcState.h"
17#include "SkShader.h"
18#include "SkUtils.h"
19#include "SkUtilsArm.h"
20#include "SkBitmapProcState_utils.h"
21
22/*  returns 0...(n-1) given any x (positive or negative).
23
24    As an example, if n (which is always positive) is 5...
25
26          x: -8 -7 -6 -5 -4 -3 -2 -1  0  1  2  3  4  5  6  7  8
27    returns:  2  3  4  0  1  2  3  4  0  1  2  3  4  0  1  2  3
28 */
29static inline int sk_int_mod(int x, int n) {
30    SkASSERT(n > 0);
31    if ((unsigned)x >= (unsigned)n) {
32        if (x < 0) {
33            x = n + ~(~x % n);
34        } else {
35            x = x % n;
36        }
37    }
38    return x;
39}
40
41void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
42void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
43
44#include "SkBitmapProcState_matrix_template.h"
45
46///////////////////////////////////////////////////////////////////////////////
47
48// Compile neon code paths if needed
49#if defined(SK_ARM_HAS_NEON)
50
51// These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp
52extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[];
53extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[];
54
55#endif // defined(SK_ARM_HAS_NEON)
56
57// Compile non-neon code path if needed
58#if !defined(SK_ARM_HAS_NEON)
59#define MAKENAME(suffix)         ClampX_ClampY ## suffix
60#define TILEX_PROCF(fx, max)     SkClampMax((fx) >> 16, max)
61#define TILEY_PROCF(fy, max)     SkClampMax((fy) >> 16, max)
62#define EXTRACT_LOW_BITS(v, max) (((v) >> 12) & 0xF)
63#define CHECK_FOR_DECAL
64#include "SkBitmapProcState_matrix.h"
65
66struct ClampTileProcs {
67    static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
68        return SkClampMax(fx >> 16, max);
69    }
70    static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
71        return SkClampMax(fy >> 16, max);
72    }
73};
74
75// Referenced in opts_check_x86.cpp
76void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s, uint32_t xy[],
77                                  int count, int x, int y) {
78    return NoFilterProc_Scale<ClampTileProcs, true>(s, xy, count, x, y);
79}
80void ClampX_ClampY_nofilter_affine(const SkBitmapProcState& s, uint32_t xy[],
81                                  int count, int x, int y) {
82    return NoFilterProc_Affine<ClampTileProcs>(s, xy, count, x, y);
83}
84
85static SkBitmapProcState::MatrixProc ClampX_ClampY_Procs[] = {
86    // only clamp lives in the right coord space to check for decal
87    ClampX_ClampY_nofilter_scale,
88    ClampX_ClampY_filter_scale,
89    ClampX_ClampY_nofilter_affine,
90    ClampX_ClampY_filter_affine,
91};
92
93#define MAKENAME(suffix)         RepeatX_RepeatY ## suffix
94#define TILEX_PROCF(fx, max)     SK_USHIFT16((unsigned)((fx) & 0xFFFF) * ((max) + 1))
95#define TILEY_PROCF(fy, max)     SK_USHIFT16((unsigned)((fy) & 0xFFFF) * ((max) + 1))
96#define EXTRACT_LOW_BITS(v, max) (((unsigned)((v) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
97#include "SkBitmapProcState_matrix.h"
98
99struct RepeatTileProcs {
100    static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
101        SkASSERT(max < 65535);
102        return SK_USHIFT16((unsigned)((fx) & 0xFFFF) * ((max) + 1));
103    }
104    static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
105        SkASSERT(max < 65535);
106        return SK_USHIFT16((unsigned)((fy) & 0xFFFF) * ((max) + 1));
107    }
108};
109
110static SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs[] = {
111    NoFilterProc_Scale<RepeatTileProcs, false>,
112    RepeatX_RepeatY_filter_scale,
113    NoFilterProc_Affine<RepeatTileProcs>,
114    RepeatX_RepeatY_filter_affine,
115};
116#endif
117
118#define MAKENAME(suffix)        GeneralXY ## suffix
119#define PREAMBLE(state)         SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \
120                                SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY;
121#define PREAMBLE_PARAM_X        , SkBitmapProcState::FixedTileProc tileProcX
122#define PREAMBLE_PARAM_Y        , SkBitmapProcState::FixedTileProc tileProcY
123#define PREAMBLE_ARG_X          , tileProcX
124#define PREAMBLE_ARG_Y          , tileProcY
125#define TILEX_PROCF(fx, max)    SK_USHIFT16(tileProcX(fx) * ((max) + 1))
126#define TILEY_PROCF(fy, max)    SK_USHIFT16(tileProcY(fy) * ((max) + 1))
127#define EXTRACT_LOW_BITS(v, max) (((v * (max + 1)) >> 12) & 0xF)
128#include "SkBitmapProcState_matrix.h"
129
130struct GeneralTileProcs {
131    static unsigned X(const SkBitmapProcState& s, SkFixed fx, int max) {
132        return SK_USHIFT16(s.fTileProcX(fx) * ((max) + 1));
133    }
134    static unsigned Y(const SkBitmapProcState& s, SkFixed fy, int max) {
135        return SK_USHIFT16(s.fTileProcY(fy) * ((max) + 1));
136    }
137};
138
139static SkBitmapProcState::MatrixProc GeneralXY_Procs[] = {
140    NoFilterProc_Scale<GeneralTileProcs, false>,
141    GeneralXY_filter_scale,
142    NoFilterProc_Affine<GeneralTileProcs>,
143    GeneralXY_filter_affine,
144};
145
146///////////////////////////////////////////////////////////////////////////////
147
148static inline U16CPU fixed_clamp(SkFixed x) {
149    if (x < 0) {
150        x = 0;
151    }
152    if (x >> 16) {
153        x = 0xFFFF;
154    }
155    return x;
156}
157
158static inline U16CPU fixed_repeat(SkFixed x) {
159    return x & 0xFFFF;
160}
161
162static inline U16CPU fixed_mirror(SkFixed x) {
163    SkFixed s = SkLeftShift(x, 15) >> 31;
164    // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval
165    return (x ^ s) & 0xFFFF;
166}
167
168static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m) {
169    if (SkShader::kClamp_TileMode == m) {
170        return fixed_clamp;
171    }
172    if (SkShader::kRepeat_TileMode == m) {
173        return fixed_repeat;
174    }
175    SkASSERT(SkShader::kMirror_TileMode == m);
176    return fixed_mirror;
177}
178
179static inline U16CPU int_clamp(int x, int n) {
180    if (x >= n) {
181        x = n - 1;
182    }
183    if (x < 0) {
184        x = 0;
185    }
186    return x;
187}
188
189static inline U16CPU int_repeat(int x, int n) {
190    return sk_int_mod(x, n);
191}
192
193static inline U16CPU int_mirror(int x, int n) {
194    x = sk_int_mod(x, 2 * n);
195    if (x >= n) {
196        x = n + ~(x - n);
197    }
198    return x;
199}
200
201#if 0
202static void test_int_tileprocs() {
203    for (int i = -8; i <= 8; i++) {
204        SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3));
205    }
206}
207#endif
208
209static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) {
210    if (SkShader::kClamp_TileMode == tm)
211        return int_clamp;
212    if (SkShader::kRepeat_TileMode == tm)
213        return int_repeat;
214    SkASSERT(SkShader::kMirror_TileMode == tm);
215    return int_mirror;
216}
217
218//////////////////////////////////////////////////////////////////////////////
219
220void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
221    int i;
222
223    for (i = (count >> 2); i > 0; --i) {
224        *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
225        fx += dx+dx;
226        *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
227        fx += dx+dx;
228    }
229    count &= 3;
230
231    uint16_t* xx = (uint16_t*)dst;
232    for (i = count; i > 0; --i) {
233        *xx++ = SkToU16(fx >> 16); fx += dx;
234    }
235}
236
237void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
238    if (count & 1) {
239        SkASSERT((fx >> (16 + 14)) == 0);
240        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
241        fx += dx;
242    }
243    while ((count -= 2) >= 0) {
244        SkASSERT((fx >> (16 + 14)) == 0);
245        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
246        fx += dx;
247
248        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
249        fx += dx;
250    }
251}
252
253///////////////////////////////////////////////////////////////////////////////
254// stores the same as SCALE, but is cheaper to compute. Also since there is no
255// scale, we don't need/have a FILTER version
256
257static void fill_sequential(uint16_t xptr[], int start, int count) {
258#if 1
259    if (reinterpret_cast<intptr_t>(xptr) & 0x2) {
260        *xptr++ = start++;
261        count -= 1;
262    }
263    if (count > 3) {
264        uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr);
265        uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1);
266        uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3);
267        start += count & ~3;
268        int qcount = count >> 2;
269        do {
270            *xxptr++ = pattern0;
271            pattern0 += 0x40004;
272            *xxptr++ = pattern1;
273            pattern1 += 0x40004;
274        } while (--qcount != 0);
275        xptr = reinterpret_cast<uint16_t*>(xxptr);
276        count &= 3;
277    }
278    while (--count >= 0) {
279        *xptr++ = start++;
280    }
281#else
282    for (int i = 0; i < count; i++) {
283        *xptr++ = start++;
284    }
285#endif
286}
287
288static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
289                                   int x, int y) {
290    const SkBitmapProcStateAutoMapper mapper(s, x, y);
291    **xy = s.fIntTileProcY(mapper.intY(), s.fPixmap.height());
292    *xy += 1;   // bump the ptr
293    // return our starting X position
294    return mapper.intX();
295}
296
297static void clampx_nofilter_trans(const SkBitmapProcState& s,
298                                  uint32_t xy[], int count, int x, int y) {
299    SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
300
301    int xpos = nofilter_trans_preamble(s, &xy, x, y);
302    const int width = s.fPixmap.width();
303    if (1 == width) {
304        // all of the following X values must be 0
305        memset(xy, 0, count * sizeof(uint16_t));
306        return;
307    }
308
309    uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
310    int n;
311
312    // fill before 0 as needed
313    if (xpos < 0) {
314        n = -xpos;
315        if (n > count) {
316            n = count;
317        }
318        memset(xptr, 0, n * sizeof(uint16_t));
319        count -= n;
320        if (0 == count) {
321            return;
322        }
323        xptr += n;
324        xpos = 0;
325    }
326
327    // fill in 0..width-1 if needed
328    if (xpos < width) {
329        n = width - xpos;
330        if (n > count) {
331            n = count;
332        }
333        fill_sequential(xptr, xpos, n);
334        count -= n;
335        if (0 == count) {
336            return;
337        }
338        xptr += n;
339    }
340
341    // fill the remaining with the max value
342    sk_memset16(xptr, width - 1, count);
343}
344
345static void repeatx_nofilter_trans(const SkBitmapProcState& s,
346                                   uint32_t xy[], int count, int x, int y) {
347    SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
348
349    int xpos = nofilter_trans_preamble(s, &xy, x, y);
350    const int width = s.fPixmap.width();
351    if (1 == width) {
352        // all of the following X values must be 0
353        memset(xy, 0, count * sizeof(uint16_t));
354        return;
355    }
356
357    uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
358    int start = sk_int_mod(xpos, width);
359    int n = width - start;
360    if (n > count) {
361        n = count;
362    }
363    fill_sequential(xptr, start, n);
364    xptr += n;
365    count -= n;
366
367    while (count >= width) {
368        fill_sequential(xptr, 0, width);
369        xptr += width;
370        count -= width;
371    }
372
373    if (count > 0) {
374        fill_sequential(xptr, 0, count);
375    }
376}
377
378static void fill_backwards(uint16_t xptr[], int pos, int count) {
379    for (int i = 0; i < count; i++) {
380        SkASSERT(pos >= 0);
381        xptr[i] = pos--;
382    }
383}
384
385static void mirrorx_nofilter_trans(const SkBitmapProcState& s,
386                                   uint32_t xy[], int count, int x, int y) {
387    SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
388
389    int xpos = nofilter_trans_preamble(s, &xy, x, y);
390    const int width = s.fPixmap.width();
391    if (1 == width) {
392        // all of the following X values must be 0
393        memset(xy, 0, count * sizeof(uint16_t));
394        return;
395    }
396
397    uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
398    // need to know our start, and our initial phase (forward or backward)
399    bool forward;
400    int n;
401    int start = sk_int_mod(xpos, 2 * width);
402    if (start >= width) {
403        start = width + ~(start - width);
404        forward = false;
405        n = start + 1;  // [start .. 0]
406    } else {
407        forward = true;
408        n = width - start;  // [start .. width)
409    }
410    if (n > count) {
411        n = count;
412    }
413    if (forward) {
414        fill_sequential(xptr, start, n);
415    } else {
416        fill_backwards(xptr, start, n);
417    }
418    forward = !forward;
419    xptr += n;
420    count -= n;
421
422    while (count >= width) {
423        if (forward) {
424            fill_sequential(xptr, 0, width);
425        } else {
426            fill_backwards(xptr, width - 1, width);
427        }
428        forward = !forward;
429        xptr += width;
430        count -= width;
431    }
432
433    if (count > 0) {
434        if (forward) {
435            fill_sequential(xptr, 0, count);
436        } else {
437            fill_backwards(xptr, width - 1, count);
438        }
439    }
440}
441
442///////////////////////////////////////////////////////////////////////////////
443
444SkBitmapProcState::MatrixProc SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
445    SkASSERT((fInvType & SkMatrix::kPerspective_Mask) == 0);
446
447//    test_int_tileprocs();
448    // check for our special case when there is no scale/affine/perspective
449    if (trivial_matrix && kNone_SkFilterQuality == fFilterQuality) {
450        fIntTileProcY = choose_int_tile_proc(fTileModeY);
451        switch (fTileModeX) {
452            case SkShader::kClamp_TileMode:
453                return clampx_nofilter_trans;
454            case SkShader::kRepeat_TileMode:
455                return repeatx_nofilter_trans;
456            case SkShader::kMirror_TileMode:
457                return mirrorx_nofilter_trans;
458        }
459    }
460
461    int index = 0;
462    if (fFilterQuality != kNone_SkFilterQuality) {
463        index = 1;
464    }
465    if (fInvType & SkMatrix::kAffine_Mask) {
466        index += 2;
467    }
468
469    if (SkShader::kClamp_TileMode == fTileModeX && SkShader::kClamp_TileMode == fTileModeY) {
470        // clamp gets special version of filterOne
471        fFilterOneX = SK_Fixed1;
472        fFilterOneY = SK_Fixed1;
473        return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index];
474    }
475
476    // all remaining procs use this form for filterOne
477    fFilterOneX = SK_Fixed1 / fPixmap.width();
478    fFilterOneY = SK_Fixed1 / fPixmap.height();
479
480    if (SkShader::kRepeat_TileMode == fTileModeX && SkShader::kRepeat_TileMode == fTileModeY) {
481        return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index];
482    }
483
484    fTileProcX = choose_tile_proc(fTileModeX);
485    fTileProcY = choose_tile_proc(fTileModeY);
486    return GeneralXY_Procs[index];
487}
488