1/*
2 * Copyright 2014 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include <arm_neon.h>
9
10#define SCALE_NOFILTER_NAME     MAKENAME(_nofilter_scale)
11#define SCALE_FILTER_NAME       MAKENAME(_filter_scale)
12
13#define PACK_FILTER_X_NAME  MAKENAME(_pack_filter_x)
14#define PACK_FILTER_Y_NAME  MAKENAME(_pack_filter_y)
15#define PACK_FILTER_X4_NAME MAKENAME(_pack_filter_x4)
16#define PACK_FILTER_Y4_NAME MAKENAME(_pack_filter_y4)
17
18#ifndef PREAMBLE
19    #define PREAMBLE(state)
20    #define PREAMBLE_PARAM_X
21    #define PREAMBLE_PARAM_Y
22    #define PREAMBLE_ARG_X
23    #define PREAMBLE_ARG_Y
24#endif
25
26static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s,
27                                uint32_t xy[], int count, int x, int y) {
28    SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
29                             SkMatrix::kScale_Mask)) == 0);
30
31    PREAMBLE(s);
32
33    // we store y, x, x, x, x, x
34    const unsigned maxX = s.fPixmap.width() - 1;
35    SkFractionalInt fx;
36    {
37        const SkBitmapProcStateAutoMapper mapper(s, x, y);
38        const unsigned maxY = s.fPixmap.height() - 1;
39        *xy++ = TILEY_PROCF(mapper.fixedY(), maxY);
40        fx = mapper.fractionalIntX();
41    }
42
43    if (0 == maxX) {
44        // all of the following X values must be 0
45        memset(xy, 0, count * sizeof(uint16_t));
46        return;
47    }
48
49    const SkFractionalInt dx = s.fInvSxFractionalInt;
50
51#ifdef CHECK_FOR_DECAL
52    // test if we don't need to apply the tile proc
53    const SkFixed fixedFx = SkFractionalIntToFixed(fx);
54    const SkFixed fixedDx = SkFractionalIntToFixed(dx);
55    if (can_truncate_to_fixed_for_decal(fixedFx, fixedDx, count, maxX)) {
56        decal_nofilter_scale_neon(xy, fixedFx, fixedDx, count);
57        return;
58    }
59#endif
60
61    if (count >= 8) {
62        SkFractionalInt dx2 = dx+dx;
63        SkFractionalInt dx4 = dx2+dx2;
64        SkFractionalInt dx8 = dx4+dx4;
65
66        // now build fx/fx+dx/fx+2dx/fx+3dx
67        SkFractionalInt fx1, fx2, fx3;
68        int32x4_t lbase, hbase;
69        int16_t *dst16 = (int16_t *)xy;
70
71        fx1 = fx+dx;
72        fx2 = fx1+dx;
73        fx3 = fx2+dx;
74
75        lbase = vdupq_n_s32(SkFractionalIntToFixed(fx));
76        lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx1), lbase, 1);
77        lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx2), lbase, 2);
78        lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx3), lbase, 3);
79        hbase = vaddq_s32(lbase, vdupq_n_s32(SkFractionalIntToFixed(dx4)));
80
81        // store & bump
82        while (count >= 8) {
83
84            int16x8_t fx8;
85
86            fx8 = TILEX_PROCF_NEON8(lbase, hbase, maxX);
87
88            vst1q_s16(dst16, fx8);
89
90            // but preserving base & on to the next
91            lbase = vaddq_s32 (lbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
92            hbase = vaddq_s32 (hbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
93            dst16 += 8;
94            count -= 8;
95            fx += dx8;
96        };
97        xy = (uint32_t *) dst16;
98    }
99
100    uint16_t* xx = (uint16_t*)xy;
101    for (int i = count; i > 0; --i) {
102        *xx++ = TILEX_PROCF(SkFractionalIntToFixed(fx), maxX);
103        fx += dx;
104    }
105}
106
107static inline uint32_t PACK_FILTER_Y_NAME(SkFixed f, unsigned max,
108                                          SkFixed one PREAMBLE_PARAM_Y) {
109    unsigned i = TILEY_PROCF(f, max);
110    i = (i << 4) | EXTRACT_LOW_BITS(f, max);
111    return (i << 14) | (TILEY_PROCF((f + one), max));
112}
113
114static inline uint32_t PACK_FILTER_X_NAME(SkFixed f, unsigned max,
115                                          SkFixed one PREAMBLE_PARAM_X) {
116    unsigned i = TILEX_PROCF(f, max);
117    i = (i << 4) | EXTRACT_LOW_BITS(f, max);
118    return (i << 14) | (TILEX_PROCF((f + one), max));
119}
120
121static inline int32x4_t PACK_FILTER_X4_NAME(int32x4_t f, unsigned max,
122                                          SkFixed one PREAMBLE_PARAM_X) {
123    int32x4_t ret, res, wide_one;
124
125    // Prepare constants
126    wide_one = vdupq_n_s32(one);
127
128    // Step 1
129    res = TILEX_PROCF_NEON4(f, max);
130
131    // Step 2
132    ret = EXTRACT_LOW_BITS_NEON4(f, max);
133    ret = vsliq_n_s32(ret, res, 4);
134
135    // Step 3
136    res = TILEX_PROCF_NEON4(f + wide_one, max);
137    ret = vorrq_s32(vshlq_n_s32(ret, 14), res);
138
139    return ret;
140}
141
142static inline int32x4_t PACK_FILTER_Y4_NAME(int32x4_t f, unsigned max,
143                                          SkFixed one PREAMBLE_PARAM_X) {
144    int32x4_t ret, res, wide_one;
145
146    // Prepare constants
147    wide_one = vdupq_n_s32(one);
148
149    // Step 1
150    res = TILEY_PROCF_NEON4(f, max);
151
152    // Step 2
153    ret = EXTRACT_LOW_BITS_NEON4(f, max);
154    ret = vsliq_n_s32(ret, res, 4);
155
156    // Step 3
157    res = TILEY_PROCF_NEON4(f + wide_one, max);
158    ret = vorrq_s32(vshlq_n_s32(ret, 14), res);
159
160    return ret;
161}
162
163static void SCALE_FILTER_NAME(const SkBitmapProcState& s,
164                              uint32_t xy[], int count, int x, int y) {
165    SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
166                             SkMatrix::kScale_Mask)) == 0);
167    SkASSERT(s.fInvKy == 0);
168
169    PREAMBLE(s);
170
171    const unsigned maxX = s.fPixmap.width() - 1;
172    const SkFixed one = s.fFilterOneX;
173    const SkFractionalInt dx = s.fInvSxFractionalInt;
174    SkFractionalInt fx;
175
176    {
177        const SkBitmapProcStateAutoMapper mapper(s, x, y);
178        const SkFixed fy = mapper.fixedY();
179        const unsigned maxY = s.fPixmap.height() - 1;
180        // compute our two Y values up front
181        *xy++ = PACK_FILTER_Y_NAME(fy, maxY, s.fFilterOneY PREAMBLE_ARG_Y);
182        // now initialize fx
183        fx = mapper.fractionalIntX();
184    }
185
186#ifdef CHECK_FOR_DECAL
187    // test if we don't need to apply the tile proc
188    const SkFixed fixedFx = SkFractionalIntToFixed(fx);
189    const SkFixed fixedDx = SkFractionalIntToFixed(dx);
190    if (can_truncate_to_fixed_for_decal(fixedFx, fixedDx, count, maxX)) {
191        decal_filter_scale_neon(xy, fixedFx, fixedDx, count);
192        return;
193    }
194#endif
195    {
196
197    if (count >= 4) {
198        int32x4_t wide_fx;
199
200        wide_fx = vdupq_n_s32(SkFractionalIntToFixed(fx));
201        wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx), wide_fx, 1);
202        wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx), wide_fx, 2);
203        wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx+dx), wide_fx, 3);
204
205        while (count >= 4) {
206            int32x4_t res;
207
208            res = PACK_FILTER_X4_NAME(wide_fx, maxX, one PREAMBLE_ARG_X);
209
210            vst1q_u32(xy, vreinterpretq_u32_s32(res));
211
212            wide_fx += vdupq_n_s32(SkFractionalIntToFixed(dx+dx+dx+dx));
213            fx += dx+dx+dx+dx;
214            xy += 4;
215            count -= 4;
216        }
217    }
218
219    while (--count >= 0) {
220        *xy++ = PACK_FILTER_X_NAME(SkFractionalIntToFixed(fx), maxX, one PREAMBLE_ARG_X);
221        fx += dx;
222    }
223
224    }
225}
226
227const SkBitmapProcState::MatrixProc MAKENAME(_Procs)[] = {
228    SCALE_NOFILTER_NAME,
229    SCALE_FILTER_NAME,
230};
231
232#undef TILEX_PROCF_NEON8
233#undef TILEY_PROCF_NEON8
234#undef TILEX_PROCF_NEON4
235#undef TILEY_PROCF_NEON4
236#undef EXTRACT_LOW_BITS_NEON4
237
238#undef MAKENAME
239#undef TILEX_PROCF
240#undef TILEY_PROCF
241#ifdef CHECK_FOR_DECAL
242    #undef CHECK_FOR_DECAL
243#endif
244
245#undef SCALE_NOFILTER_NAME
246#undef SCALE_FILTER_NAME
247
248#undef PREAMBLE
249#undef PREAMBLE_PARAM_X
250#undef PREAMBLE_PARAM_Y
251#undef PREAMBLE_ARG_X
252#undef PREAMBLE_ARG_Y
253
254#undef EXTRACT_LOW_BITS
255