1/*
2 * Copyright 2008 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8// The copyright below was added in 2009, but I see no record of moto contributions...?
9
10/* NEON optimized code (C) COPYRIGHT 2009 Motorola
11 *
12 * Use of this source code is governed by a BSD-style license that can be
13 * found in the LICENSE file.
14 */
15
16#include "SkBitmapProcState.h"
17#include "SkPerspIter.h"
18#include "SkShader.h"
19#include "SkUtils.h"
20#include "SkUtilsArm.h"
21#include "SkBitmapProcState_utils.h"
22
23/*  returns 0...(n-1) given any x (positive or negative).
24
25    As an example, if n (which is always positive) is 5...
26
27          x: -8 -7 -6 -5 -4 -3 -2 -1  0  1  2  3  4  5  6  7  8
28    returns:  2  3  4  0  1  2  3  4  0  1  2  3  4  0  1  2  3
29 */
30static inline int sk_int_mod(int x, int n) {
31    SkASSERT(n > 0);
32    if ((unsigned)x >= (unsigned)n) {
33        if (x < 0) {
34            x = n + ~(~x % n);
35        } else {
36            x = x % n;
37        }
38    }
39    return x;
40}
41
42void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
43void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
44
45#include "SkBitmapProcState_matrix_template.h"
46
47///////////////////////////////////////////////////////////////////////////////
48
49// Compile neon code paths if needed
50#if !SK_ARM_NEON_IS_NONE
51
52// These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp
53extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[];
54extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[];
55
56#endif // !SK_ARM_NEON_IS_NONE
57
58// Compile non-neon code path if needed
59#if !SK_ARM_NEON_IS_ALWAYS
60#define MAKENAME(suffix)        ClampX_ClampY ## suffix
61#define TILEX_PROCF(fx, max)    SkClampMax((fx) >> 16, max)
62#define TILEY_PROCF(fy, max)    SkClampMax((fy) >> 16, max)
63#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
64#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
65#define CHECK_FOR_DECAL
66#include "SkBitmapProcState_matrix.h"
67
68struct ClampTileProcs {
69    static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
70        return SkClampMax(fx >> 16, max);
71    }
72    static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
73        return SkClampMax(fy >> 16, max);
74    }
75};
76
77// Referenced in opts_check_x86.cpp
78void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s, uint32_t xy[],
79                                  int count, int x, int y) {
80    return NoFilterProc_Scale<ClampTileProcs, true>(s, xy, count, x, y);
81}
82void ClampX_ClampY_nofilter_affine(const SkBitmapProcState& s, uint32_t xy[],
83                                  int count, int x, int y) {
84    return NoFilterProc_Affine<ClampTileProcs>(s, xy, count, x, y);
85}
86
87static SkBitmapProcState::MatrixProc ClampX_ClampY_Procs[] = {
88    // only clamp lives in the right coord space to check for decal
89    ClampX_ClampY_nofilter_scale,
90    ClampX_ClampY_filter_scale,
91    ClampX_ClampY_nofilter_affine,
92    ClampX_ClampY_filter_affine,
93    NoFilterProc_Persp<ClampTileProcs>,
94    ClampX_ClampY_filter_persp
95};
96
97#define MAKENAME(suffix)        RepeatX_RepeatY ## suffix
98#define TILEX_PROCF(fx, max)    SK_USHIFT16((unsigned)((fx) & 0xFFFF) * ((max) + 1))
99#define TILEY_PROCF(fy, max)    SK_USHIFT16((unsigned)((fy) & 0xFFFF) * ((max) + 1))
100#define TILEX_LOW_BITS(fx, max) (((unsigned)((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
101#define TILEY_LOW_BITS(fy, max) (((unsigned)((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
102#include "SkBitmapProcState_matrix.h"
103
104struct RepeatTileProcs {
105    static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
106        SkASSERT(max < 65535);
107        return SK_USHIFT16((unsigned)((fx) & 0xFFFF) * ((max) + 1));
108    }
109    static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
110        SkASSERT(max < 65535);
111        return SK_USHIFT16((unsigned)((fy) & 0xFFFF) * ((max) + 1));
112    }
113};
114
115static SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs[] = {
116    NoFilterProc_Scale<RepeatTileProcs, false>,
117    RepeatX_RepeatY_filter_scale,
118    NoFilterProc_Affine<RepeatTileProcs>,
119    RepeatX_RepeatY_filter_affine,
120    NoFilterProc_Persp<RepeatTileProcs>,
121    RepeatX_RepeatY_filter_persp
122};
123#endif
124
125#define MAKENAME(suffix)        GeneralXY ## suffix
126#define PREAMBLE(state)         SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \
127                                SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \
128                                SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \
129                                SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY
130#define PREAMBLE_PARAM_X        , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX
131#define PREAMBLE_PARAM_Y        , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY
132#define PREAMBLE_ARG_X          , tileProcX, tileLowBitsProcX
133#define PREAMBLE_ARG_Y          , tileProcY, tileLowBitsProcY
134#define TILEX_PROCF(fx, max)    SK_USHIFT16(tileProcX(fx) * ((max) + 1))
135#define TILEY_PROCF(fy, max)    SK_USHIFT16(tileProcY(fy) * ((max) + 1))
136#define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1)
137#define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1)
138#include "SkBitmapProcState_matrix.h"
139
140struct GeneralTileProcs {
141    static unsigned X(const SkBitmapProcState& s, SkFixed fx, int max) {
142        return SK_USHIFT16(s.fTileProcX(fx) * ((max) + 1));
143    }
144    static unsigned Y(const SkBitmapProcState& s, SkFixed fy, int max) {
145        return SK_USHIFT16(s.fTileProcY(fy) * ((max) + 1));
146    }
147};
148
149static SkBitmapProcState::MatrixProc GeneralXY_Procs[] = {
150    NoFilterProc_Scale<GeneralTileProcs, false>,
151    GeneralXY_filter_scale,
152    NoFilterProc_Affine<GeneralTileProcs>,
153    GeneralXY_filter_affine,
154    NoFilterProc_Persp<GeneralTileProcs>,
155    GeneralXY_filter_persp
156};
157
158///////////////////////////////////////////////////////////////////////////////
159
160static inline U16CPU fixed_clamp(SkFixed x) {
161    if (x < 0) {
162        x = 0;
163    }
164    if (x >> 16) {
165        x = 0xFFFF;
166    }
167    return x;
168}
169
170static inline U16CPU fixed_repeat(SkFixed x) {
171    return x & 0xFFFF;
172}
173
174// Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly.
175// See http://code.google.com/p/skia/issues/detail?id=472
176#if defined(_MSC_VER) && (_MSC_VER >= 1600)
177#pragma optimize("", off)
178#endif
179
180static inline U16CPU fixed_mirror(SkFixed x) {
181    SkFixed s = SkLeftShift(x, 15) >> 31;
182    // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval
183    return (x ^ s) & 0xFFFF;
184}
185
186#if defined(_MSC_VER) && (_MSC_VER >= 1600)
187#pragma optimize("", on)
188#endif
189
190static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m) {
191    if (SkShader::kClamp_TileMode == m) {
192        return fixed_clamp;
193    }
194    if (SkShader::kRepeat_TileMode == m) {
195        return fixed_repeat;
196    }
197    SkASSERT(SkShader::kMirror_TileMode == m);
198    return fixed_mirror;
199}
200
201static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) {
202    return (x >> 12) & 0xF;
203}
204
205static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) {
206    return ((x * scale) >> 12) & 0xF;
207}
208
209static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) {
210    if (SkShader::kClamp_TileMode == m) {
211        return fixed_clamp_lowbits;
212    } else {
213        SkASSERT(SkShader::kMirror_TileMode == m ||
214                 SkShader::kRepeat_TileMode == m);
215        // mirror and repeat have the same behavior for the low bits.
216        return fixed_repeat_or_mirrow_lowbits;
217    }
218}
219
220static inline U16CPU int_clamp(int x, int n) {
221    if (x >= n) {
222        x = n - 1;
223    }
224    if (x < 0) {
225        x = 0;
226    }
227    return x;
228}
229
230static inline U16CPU int_repeat(int x, int n) {
231    return sk_int_mod(x, n);
232}
233
234static inline U16CPU int_mirror(int x, int n) {
235    x = sk_int_mod(x, 2 * n);
236    if (x >= n) {
237        x = n + ~(x - n);
238    }
239    return x;
240}
241
242#if 0
243static void test_int_tileprocs() {
244    for (int i = -8; i <= 8; i++) {
245        SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3));
246    }
247}
248#endif
249
250static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) {
251    if (SkShader::kClamp_TileMode == tm)
252        return int_clamp;
253    if (SkShader::kRepeat_TileMode == tm)
254        return int_repeat;
255    SkASSERT(SkShader::kMirror_TileMode == tm);
256    return int_mirror;
257}
258
259//////////////////////////////////////////////////////////////////////////////
260
261void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
262    int i;
263
264    for (i = (count >> 2); i > 0; --i) {
265        *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
266        fx += dx+dx;
267        *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
268        fx += dx+dx;
269    }
270    count &= 3;
271
272    uint16_t* xx = (uint16_t*)dst;
273    for (i = count; i > 0; --i) {
274        *xx++ = SkToU16(fx >> 16); fx += dx;
275    }
276}
277
278void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
279    if (count & 1) {
280        SkASSERT((fx >> (16 + 14)) == 0);
281        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
282        fx += dx;
283    }
284    while ((count -= 2) >= 0) {
285        SkASSERT((fx >> (16 + 14)) == 0);
286        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
287        fx += dx;
288
289        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
290        fx += dx;
291    }
292}
293
294///////////////////////////////////////////////////////////////////////////////
295// stores the same as SCALE, but is cheaper to compute. Also since there is no
296// scale, we don't need/have a FILTER version
297
298static void fill_sequential(uint16_t xptr[], int start, int count) {
299#if 1
300    if (reinterpret_cast<intptr_t>(xptr) & 0x2) {
301        *xptr++ = start++;
302        count -= 1;
303    }
304    if (count > 3) {
305        uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr);
306        uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1);
307        uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3);
308        start += count & ~3;
309        int qcount = count >> 2;
310        do {
311            *xxptr++ = pattern0;
312            pattern0 += 0x40004;
313            *xxptr++ = pattern1;
314            pattern1 += 0x40004;
315        } while (--qcount != 0);
316        xptr = reinterpret_cast<uint16_t*>(xxptr);
317        count &= 3;
318    }
319    while (--count >= 0) {
320        *xptr++ = start++;
321    }
322#else
323    for (int i = 0; i < count; i++) {
324        *xptr++ = start++;
325    }
326#endif
327}
328
329static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
330                                   int x, int y) {
331    const SkBitmapProcStateAutoMapper mapper(s, x, y);
332    **xy = s.fIntTileProcY(mapper.intY(), s.fPixmap.height());
333    *xy += 1;   // bump the ptr
334    // return our starting X position
335    return mapper.intX();
336}
337
338static void clampx_nofilter_trans(const SkBitmapProcState& s,
339                                  uint32_t xy[], int count, int x, int y) {
340    SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
341
342    int xpos = nofilter_trans_preamble(s, &xy, x, y);
343    const int width = s.fPixmap.width();
344    if (1 == width) {
345        // all of the following X values must be 0
346        memset(xy, 0, count * sizeof(uint16_t));
347        return;
348    }
349
350    uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
351    int n;
352
353    // fill before 0 as needed
354    if (xpos < 0) {
355        n = -xpos;
356        if (n > count) {
357            n = count;
358        }
359        memset(xptr, 0, n * sizeof(uint16_t));
360        count -= n;
361        if (0 == count) {
362            return;
363        }
364        xptr += n;
365        xpos = 0;
366    }
367
368    // fill in 0..width-1 if needed
369    if (xpos < width) {
370        n = width - xpos;
371        if (n > count) {
372            n = count;
373        }
374        fill_sequential(xptr, xpos, n);
375        count -= n;
376        if (0 == count) {
377            return;
378        }
379        xptr += n;
380    }
381
382    // fill the remaining with the max value
383    sk_memset16(xptr, width - 1, count);
384}
385
386static void repeatx_nofilter_trans(const SkBitmapProcState& s,
387                                   uint32_t xy[], int count, int x, int y) {
388    SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
389
390    int xpos = nofilter_trans_preamble(s, &xy, x, y);
391    const int width = s.fPixmap.width();
392    if (1 == width) {
393        // all of the following X values must be 0
394        memset(xy, 0, count * sizeof(uint16_t));
395        return;
396    }
397
398    uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
399    int start = sk_int_mod(xpos, width);
400    int n = width - start;
401    if (n > count) {
402        n = count;
403    }
404    fill_sequential(xptr, start, n);
405    xptr += n;
406    count -= n;
407
408    while (count >= width) {
409        fill_sequential(xptr, 0, width);
410        xptr += width;
411        count -= width;
412    }
413
414    if (count > 0) {
415        fill_sequential(xptr, 0, count);
416    }
417}
418
419static void fill_backwards(uint16_t xptr[], int pos, int count) {
420    for (int i = 0; i < count; i++) {
421        SkASSERT(pos >= 0);
422        xptr[i] = pos--;
423    }
424}
425
426static void mirrorx_nofilter_trans(const SkBitmapProcState& s,
427                                   uint32_t xy[], int count, int x, int y) {
428    SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
429
430    int xpos = nofilter_trans_preamble(s, &xy, x, y);
431    const int width = s.fPixmap.width();
432    if (1 == width) {
433        // all of the following X values must be 0
434        memset(xy, 0, count * sizeof(uint16_t));
435        return;
436    }
437
438    uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
439    // need to know our start, and our initial phase (forward or backward)
440    bool forward;
441    int n;
442    int start = sk_int_mod(xpos, 2 * width);
443    if (start >= width) {
444        start = width + ~(start - width);
445        forward = false;
446        n = start + 1;  // [start .. 0]
447    } else {
448        forward = true;
449        n = width - start;  // [start .. width)
450    }
451    if (n > count) {
452        n = count;
453    }
454    if (forward) {
455        fill_sequential(xptr, start, n);
456    } else {
457        fill_backwards(xptr, start, n);
458    }
459    forward = !forward;
460    xptr += n;
461    count -= n;
462
463    while (count >= width) {
464        if (forward) {
465            fill_sequential(xptr, 0, width);
466        } else {
467            fill_backwards(xptr, width - 1, width);
468        }
469        forward = !forward;
470        xptr += width;
471        count -= width;
472    }
473
474    if (count > 0) {
475        if (forward) {
476            fill_sequential(xptr, 0, count);
477        } else {
478            fill_backwards(xptr, width - 1, count);
479        }
480    }
481}
482
483///////////////////////////////////////////////////////////////////////////////
484
485SkBitmapProcState::MatrixProc SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
486//    test_int_tileprocs();
487    // check for our special case when there is no scale/affine/perspective
488    if (trivial_matrix && kNone_SkFilterQuality == fFilterLevel) {
489        fIntTileProcY = choose_int_tile_proc(fTileModeY);
490        switch (fTileModeX) {
491            case SkShader::kClamp_TileMode:
492                return clampx_nofilter_trans;
493            case SkShader::kRepeat_TileMode:
494                return repeatx_nofilter_trans;
495            case SkShader::kMirror_TileMode:
496                return mirrorx_nofilter_trans;
497        }
498    }
499
500    int index = 0;
501    if (fFilterLevel != kNone_SkFilterQuality) {
502        index = 1;
503    }
504    if (fInvType & SkMatrix::kPerspective_Mask) {
505        index += 4;
506    } else if (fInvType & SkMatrix::kAffine_Mask) {
507        index += 2;
508    }
509
510    if (SkShader::kClamp_TileMode == fTileModeX && SkShader::kClamp_TileMode == fTileModeY) {
511        // clamp gets special version of filterOne
512        fFilterOneX = SK_Fixed1;
513        fFilterOneY = SK_Fixed1;
514        return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index];
515    }
516
517    // all remaining procs use this form for filterOne
518    fFilterOneX = SK_Fixed1 / fPixmap.width();
519    fFilterOneY = SK_Fixed1 / fPixmap.height();
520
521    if (SkShader::kRepeat_TileMode == fTileModeX && SkShader::kRepeat_TileMode == fTileModeY) {
522        return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index];
523    }
524
525    fTileProcX = choose_tile_proc(fTileModeX);
526    fTileProcY = choose_tile_proc(fTileModeY);
527    fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX);
528    fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY);
529    return GeneralXY_Procs[index];
530}
531