1fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com/* NEON optimized code (C) COPYRIGHT 2009 Motorola
2fce02aca62525c3041226501574f740f7ea3714bdigit@google.com *
3fce02aca62525c3041226501574f740f7ea3714bdigit@google.com * Use of this source code is governed by a BSD-style license that can be
4fce02aca62525c3041226501574f740f7ea3714bdigit@google.com * found in the LICENSE file.
5fce02aca62525c3041226501574f740f7ea3714bdigit@google.com */
6fce02aca62525c3041226501574f740f7ea3714bdigit@google.com
7fce02aca62525c3041226501574f740f7ea3714bdigit@google.com#include "SkBitmapProcState.h"
8fce02aca62525c3041226501574f740f7ea3714bdigit@google.com#include "SkPerspIter.h"
9fce02aca62525c3041226501574f740f7ea3714bdigit@google.com#include "SkShader.h"
10fce02aca62525c3041226501574f740f7ea3714bdigit@google.com#include "SkUtilsArm.h"
11a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org#include "SkBitmapProcState_utils.h"
12fce02aca62525c3041226501574f740f7ea3714bdigit@google.com
13a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#include <arm_neon.h>
14a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
15fce02aca62525c3041226501574f740f7ea3714bdigit@google.comextern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[];
16fce02aca62525c3041226501574f740f7ea3714bdigit@google.comextern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[];
17fce02aca62525c3041226501574f740f7ea3714bdigit@google.com
18fce02aca62525c3041226501574f740f7ea3714bdigit@google.comstatic void decal_nofilter_scale_neon(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
19fce02aca62525c3041226501574f740f7ea3714bdigit@google.comstatic void decal_filter_scale_neon(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
20fce02aca62525c3041226501574f740f7ea3714bdigit@google.com
21a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org// TILEX_PROCF(fx, max)    SkClampMax((fx) >> 16, max)
22a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.orgstatic inline int16x8_t sbpsm_clamp_tile8(int32x4_t low, int32x4_t high, unsigned max) {
23a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    int16x8_t res;
24a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
25a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    // get the hi 16s of all those 32s
26a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    res = vuzpq_s16(vreinterpretq_s16_s32(low), vreinterpretq_s16_s32(high)).val[1];
27a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
28a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    // clamp
29a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    res = vmaxq_s16(res, vdupq_n_s16(0));
30a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    res = vminq_s16(res, vdupq_n_s16(max));
31a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
32a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    return res;
33a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org}
34a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
35a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org// TILEX_PROCF(fx, max)    SkClampMax((fx) >> 16, max)
36a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.orgstatic inline int32x4_t sbpsm_clamp_tile4(int32x4_t f, unsigned max) {
37a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    int32x4_t res;
38a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
39a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    // get the hi 16s of all those 32s
40a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    res = vshrq_n_s32(f, 16);
41a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
42a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    // clamp
43a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    res = vmaxq_s32(res, vdupq_n_s32(0));
44a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    res = vminq_s32(res, vdupq_n_s32(max));
45a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
46a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    return res;
47a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org}
48a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
49a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org// TILEY_LOW_BITS(fy, max)         (((fy) >> 12) & 0xF)
50a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.orgstatic inline int32x4_t sbpsm_clamp_tile4_low_bits(int32x4_t fx) {
51a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    int32x4_t ret;
52a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
53a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    ret = vshrq_n_s32(fx, 12);
54a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
55a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    /* We don't need the mask below because the caller will
56a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org     * overwrite the non-masked bits
57a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org     */
58a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    //ret = vandq_s32(ret, vdupq_n_s32(0xF));
59a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
60a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    return ret;
61a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org}
62a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
63a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org// TILEX_PROCF(fx, max) (((fx)&0xFFFF)*((max)+1)>> 16)
64a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.orgstatic inline int16x8_t sbpsm_repeat_tile8(int32x4_t low, int32x4_t high, unsigned max) {
65a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    uint16x8_t res;
66a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    uint32x4_t tmpl, tmph;
67a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
68a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    // get the lower 16 bits
69a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    res = vuzpq_u16(vreinterpretq_u16_s32(low), vreinterpretq_u16_s32(high)).val[0];
70a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
71a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    // bare multiplication, not SkFixedMul
72a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    tmpl = vmull_u16(vget_low_u16(res), vdup_n_u16(max+1));
73a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    tmph = vmull_u16(vget_high_u16(res), vdup_n_u16(max+1));
74a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
75a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    // extraction of the 16 upper bits
76a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    res = vuzpq_u16(vreinterpretq_u16_u32(tmpl), vreinterpretq_u16_u32(tmph)).val[1];
77a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
78a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    return vreinterpretq_s16_u16(res);
79a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org}
80a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
81a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org// TILEX_PROCF(fx, max) (((fx)&0xFFFF)*((max)+1)>> 16)
82a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.orgstatic inline int32x4_t sbpsm_repeat_tile4(int32x4_t f, unsigned max) {
83a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    uint16x4_t res;
84a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    uint32x4_t tmp;
85a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
86a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    // get the lower 16 bits
87a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    res = vmovn_u32(vreinterpretq_u32_s32(f));
88a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
89a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    // bare multiplication, not SkFixedMul
90a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    tmp = vmull_u16(res, vdup_n_u16(max+1));
91a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
92a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    // extraction of the 16 upper bits
93a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    tmp = vshrq_n_u32(tmp, 16);
94a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
95a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    return vreinterpretq_s32_u32(tmp);
96a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org}
97a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
98a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org// TILEX_LOW_BITS(fx, max)         ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
99a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.orgstatic inline int32x4_t sbpsm_repeat_tile4_low_bits(int32x4_t fx, unsigned max) {
100a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    uint16x4_t res;
101a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    uint32x4_t tmp;
102a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    int32x4_t ret;
103a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
104a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    // get the lower 16 bits
105a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    res = vmovn_u32(vreinterpretq_u32_s32(fx));
106a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
107a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    // bare multiplication, not SkFixedMul
108a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    tmp = vmull_u16(res, vdup_n_u16(max + 1));
109a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
110a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    // shift and mask
111a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    ret = vshrq_n_s32(vreinterpretq_s32_u32(tmp), 12);
112a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
113a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    /* We don't need the mask below because the caller will
114a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org     * overwrite the non-masked bits
115a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org     */
116a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    //ret = vandq_s32(ret, vdupq_n_s32(0xF));
117a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
118a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org    return ret;
119a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org}
120a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
121a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define MAKENAME(suffix)                ClampX_ClampY ## suffix ## _neon
122a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define TILEX_PROCF(fx, max)            SkClampMax((fx) >> 16, max)
123a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define TILEY_PROCF(fy, max)            SkClampMax((fy) >> 16, max)
124a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define TILEX_PROCF_NEON8(l, h, max)    sbpsm_clamp_tile8(l, h, max)
125a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define TILEY_PROCF_NEON8(l, h, max)    sbpsm_clamp_tile8(l, h, max)
126a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define TILEX_PROCF_NEON4(fx, max)      sbpsm_clamp_tile4(fx, max)
127a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define TILEY_PROCF_NEON4(fy, max)      sbpsm_clamp_tile4(fy, max)
128a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define TILEX_LOW_BITS(fx, max)         (((fx) >> 12) & 0xF)
129a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define TILEY_LOW_BITS(fy, max)         (((fy) >> 12) & 0xF)
130a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define TILEX_LOW_BITS_NEON4(fx, max)   sbpsm_clamp_tile4_low_bits(fx)
131a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define TILEY_LOW_BITS_NEON4(fy, max)   sbpsm_clamp_tile4_low_bits(fy)
132fce02aca62525c3041226501574f740f7ea3714bdigit@google.com#define CHECK_FOR_DECAL
133a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#include "SkBitmapProcState_matrix_neon.h"
134a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org
135a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define MAKENAME(suffix)                RepeatX_RepeatY ## suffix ## _neon
136a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define TILEX_PROCF(fx, max)            SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1))
137a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define TILEY_PROCF(fy, max)            SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1))
138a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define TILEX_PROCF_NEON8(l, h, max)    sbpsm_repeat_tile8(l, h, max)
139a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define TILEY_PROCF_NEON8(l, h, max)    sbpsm_repeat_tile8(l, h, max)
140a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define TILEX_PROCF_NEON4(fx, max)      sbpsm_repeat_tile4(fx, max)
141a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define TILEY_PROCF_NEON4(fy, max)      sbpsm_repeat_tile4(fy, max)
142a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define TILEX_LOW_BITS(fx, max)         ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
143a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define TILEY_LOW_BITS(fy, max)         ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
144a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define TILEX_LOW_BITS_NEON4(fx, max)   sbpsm_repeat_tile4_low_bits(fx, max)
145a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#define TILEY_LOW_BITS_NEON4(fy, max)   sbpsm_repeat_tile4_low_bits(fy, max)
146a96176dc0315d786c187bfa9be5dccf2f08feba2commit-bot@chromium.org#include "SkBitmapProcState_matrix_neon.h"
147fce02aca62525c3041226501574f740f7ea3714bdigit@google.com
148fce02aca62525c3041226501574f740f7ea3714bdigit@google.com
149fce02aca62525c3041226501574f740f7ea3714bdigit@google.com
150a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.orgvoid decal_nofilter_scale_neon(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
151fce02aca62525c3041226501574f740f7ea3714bdigit@google.com    if (count >= 8) {
152a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org        // SkFixed is 16.16 fixed point
153a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org        SkFixed dx8 = dx * 8;
154a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org        int32x4_t vdx8 = vdupq_n_s32(dx8);
155fce02aca62525c3041226501574f740f7ea3714bdigit@google.com
156a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org        // setup lbase and hbase
157fce02aca62525c3041226501574f740f7ea3714bdigit@google.com        int32x4_t lbase, hbase;
158fce02aca62525c3041226501574f740f7ea3714bdigit@google.com        lbase = vdupq_n_s32(fx);
159a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org        lbase = vsetq_lane_s32(fx + dx, lbase, 1);
160a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org        lbase = vsetq_lane_s32(fx + dx + dx, lbase, 2);
161a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org        lbase = vsetq_lane_s32(fx + dx + dx + dx, lbase, 3);
162a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org        hbase = lbase + vdupq_n_s32(4 * dx);
163fce02aca62525c3041226501574f740f7ea3714bdigit@google.com
164fce02aca62525c3041226501574f740f7ea3714bdigit@google.com        do {
165a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org            // store the upper 16 bits
166a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org            vst1q_u32(dst, vreinterpretq_u32_s16(
167a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org                vuzpq_s16(vreinterpretq_s16_s32(lbase), vreinterpretq_s16_s32(hbase)).val[1]
168a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org            ));
169a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org
170a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org            // on to the next group of 8
171a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org            lbase += vdx8;
172a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org            hbase += vdx8;
173a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org            dst += 4; // we did 8 elements but the result is twice smaller
174fce02aca62525c3041226501574f740f7ea3714bdigit@google.com            count -= 8;
175fce02aca62525c3041226501574f740f7ea3714bdigit@google.com            fx += dx8;
176fce02aca62525c3041226501574f740f7ea3714bdigit@google.com        } while (count >= 8);
177fce02aca62525c3041226501574f740f7ea3714bdigit@google.com    }
178fce02aca62525c3041226501574f740f7ea3714bdigit@google.com
179fce02aca62525c3041226501574f740f7ea3714bdigit@google.com    uint16_t* xx = (uint16_t*)dst;
180a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org    for (int i = count; i > 0; --i) {
181fce02aca62525c3041226501574f740f7ea3714bdigit@google.com        *xx++ = SkToU16(fx >> 16); fx += dx;
182fce02aca62525c3041226501574f740f7ea3714bdigit@google.com    }
183fce02aca62525c3041226501574f740f7ea3714bdigit@google.com}
184fce02aca62525c3041226501574f740f7ea3714bdigit@google.com
185a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.orgvoid decal_filter_scale_neon(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
186fce02aca62525c3041226501574f740f7ea3714bdigit@google.com    if (count >= 8) {
187a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org        SkFixed dx8 = dx * 8;
188a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org        int32x4_t vdx8 = vdupq_n_s32(dx8);
189fce02aca62525c3041226501574f740f7ea3714bdigit@google.com
190a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org        int32x4_t wide_fx, wide_fx2;
191fce02aca62525c3041226501574f740f7ea3714bdigit@google.com        wide_fx = vdupq_n_s32(fx);
192a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org        wide_fx = vsetq_lane_s32(fx + dx, wide_fx, 1);
193a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org        wide_fx = vsetq_lane_s32(fx + dx + dx, wide_fx, 2);
194a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org        wide_fx = vsetq_lane_s32(fx + dx + dx + dx, wide_fx, 3);
195fce02aca62525c3041226501574f740f7ea3714bdigit@google.com
196a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org        wide_fx2 = vaddq_s32(wide_fx, vdupq_n_s32(4 * dx));
197fce02aca62525c3041226501574f740f7ea3714bdigit@google.com
198fce02aca62525c3041226501574f740f7ea3714bdigit@google.com        while (count >= 8) {
199fce02aca62525c3041226501574f740f7ea3714bdigit@google.com            int32x4_t wide_out;
200fce02aca62525c3041226501574f740f7ea3714bdigit@google.com            int32x4_t wide_out2;
201fce02aca62525c3041226501574f740f7ea3714bdigit@google.com
202fce02aca62525c3041226501574f740f7ea3714bdigit@google.com            wide_out = vshlq_n_s32(vshrq_n_s32(wide_fx, 12), 14);
203a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org            wide_out = wide_out | (vshrq_n_s32(wide_fx,16) + vdupq_n_s32(1));
204fce02aca62525c3041226501574f740f7ea3714bdigit@google.com
205fce02aca62525c3041226501574f740f7ea3714bdigit@google.com            wide_out2 = vshlq_n_s32(vshrq_n_s32(wide_fx2, 12), 14);
206a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org            wide_out2 = wide_out2 | (vshrq_n_s32(wide_fx2,16) + vdupq_n_s32(1));
207fce02aca62525c3041226501574f740f7ea3714bdigit@google.com
208fce02aca62525c3041226501574f740f7ea3714bdigit@google.com            vst1q_u32(dst, vreinterpretq_u32_s32(wide_out));
209fce02aca62525c3041226501574f740f7ea3714bdigit@google.com            vst1q_u32(dst+4, vreinterpretq_u32_s32(wide_out2));
210fce02aca62525c3041226501574f740f7ea3714bdigit@google.com
211fce02aca62525c3041226501574f740f7ea3714bdigit@google.com            dst += 8;
212a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org            fx += dx8;
213a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org            wide_fx += vdx8;
214a8c09668f9e602f77422a344dfa4d13155c91fd3commit-bot@chromium.org            wide_fx2 += vdx8;
215fce02aca62525c3041226501574f740f7ea3714bdigit@google.com            count -= 8;
216fce02aca62525c3041226501574f740f7ea3714bdigit@google.com        }
217fce02aca62525c3041226501574f740f7ea3714bdigit@google.com    }
218fce02aca62525c3041226501574f740f7ea3714bdigit@google.com
219fce02aca62525c3041226501574f740f7ea3714bdigit@google.com    if (count & 1)
220fce02aca62525c3041226501574f740f7ea3714bdigit@google.com    {
221fce02aca62525c3041226501574f740f7ea3714bdigit@google.com        SkASSERT((fx >> (16 + 14)) == 0);
222fce02aca62525c3041226501574f740f7ea3714bdigit@google.com        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
223fce02aca62525c3041226501574f740f7ea3714bdigit@google.com        fx += dx;
224fce02aca62525c3041226501574f740f7ea3714bdigit@google.com    }
225fce02aca62525c3041226501574f740f7ea3714bdigit@google.com    while ((count -= 2) >= 0)
226fce02aca62525c3041226501574f740f7ea3714bdigit@google.com    {
227fce02aca62525c3041226501574f740f7ea3714bdigit@google.com        SkASSERT((fx >> (16 + 14)) == 0);
228fce02aca62525c3041226501574f740f7ea3714bdigit@google.com        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
229fce02aca62525c3041226501574f740f7ea3714bdigit@google.com        fx += dx;
230fce02aca62525c3041226501574f740f7ea3714bdigit@google.com
231fce02aca62525c3041226501574f740f7ea3714bdigit@google.com        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
232fce02aca62525c3041226501574f740f7ea3714bdigit@google.com        fx += dx;
233fce02aca62525c3041226501574f740f7ea3714bdigit@google.com    }
234fce02aca62525c3041226501574f740f7ea3714bdigit@google.com}
235