SkBlitMask_opts_arm_neon.cpp revision 910f694aefb0b671dd8522a9afe9b6be645701c1
1
2#include "SkBlitMask.h"
3#include "SkColor_opts_neon.h"
4
5static void D32_A8_Black_neon(void* SK_RESTRICT dst, size_t dstRB,
6                              const void* SK_RESTRICT maskPtr, size_t maskRB,
7                              SkColor, int width, int height) {
8    SkPMColor* SK_RESTRICT device = (SkPMColor*)dst;
9    const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
10
11    maskRB -= width;
12    dstRB -= (width << 2);
13    do {
14        int w = width;
15        while (w >= 8) {
16            uint8x8_t vmask = vld1_u8(mask);
17            uint16x8_t vscale = vsubw_u8(vdupq_n_u16(256), vmask);
18            uint8x8x4_t vdevice = vld4_u8((uint8_t*)device);
19
20            vdevice = SkAlphaMulQ_neon8(vdevice, vscale);
21            vdevice.val[NEON_A] += vmask;
22
23            vst4_u8((uint8_t*)device, vdevice);
24
25            mask += 8;
26            device += 8;
27            w -= 8;
28        }
29        while (w-- > 0) {
30            unsigned aa = *mask++;
31            *device = (aa << SK_A32_SHIFT)
32                        + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa));
33            device += 1;
34        };
35        device = (uint32_t*)((char*)device + dstRB);
36        mask += maskRB;
37    } while (--height != 0);
38}
39
40template <bool isColor>
41static void D32_A8_Opaque_Color_neon(void* SK_RESTRICT dst, size_t dstRB,
42                                     const void* SK_RESTRICT maskPtr, size_t maskRB,
43                                     SkColor color, int width, int height) {
44    SkPMColor pmc = SkPreMultiplyColor(color);
45    SkPMColor* SK_RESTRICT device = (SkPMColor*)dst;
46    const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
47    uint8x8x4_t vpmc;
48
49    maskRB -= width;
50    dstRB -= (width << 2);
51
52    if (width >= 8) {
53        vpmc.val[NEON_A] = vdup_n_u8(SkGetPackedA32(pmc));
54        vpmc.val[NEON_R] = vdup_n_u8(SkGetPackedR32(pmc));
55        vpmc.val[NEON_G] = vdup_n_u8(SkGetPackedG32(pmc));
56        vpmc.val[NEON_B] = vdup_n_u8(SkGetPackedB32(pmc));
57    }
58    do {
59        int w = width;
60        while (w >= 8) {
61            uint8x8_t vmask = vld1_u8(mask);
62            uint16x8_t vscale, vmask256 = SkAlpha255To256_neon8(vmask);
63            if (isColor) {
64                vscale = vsubw_u8(vdupq_n_u16(256),
65                            SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256));
66            } else {
67                vscale = vsubw_u8(vdupq_n_u16(256), vmask);
68            }
69            uint8x8x4_t vdev = vld4_u8((uint8_t*)device);
70
71            vdev.val[NEON_A] =   SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256)
72                               + SkAlphaMul_neon8(vdev.val[NEON_A], vscale);
73            vdev.val[NEON_R] =   SkAlphaMul_neon8(vpmc.val[NEON_R], vmask256)
74                               + SkAlphaMul_neon8(vdev.val[NEON_R], vscale);
75            vdev.val[NEON_G] =   SkAlphaMul_neon8(vpmc.val[NEON_G], vmask256)
76                               + SkAlphaMul_neon8(vdev.val[NEON_G], vscale);
77            vdev.val[NEON_B] =   SkAlphaMul_neon8(vpmc.val[NEON_B], vmask256)
78                               + SkAlphaMul_neon8(vdev.val[NEON_B], vscale);
79
80            vst4_u8((uint8_t*)device, vdev);
81
82            mask += 8;
83            device += 8;
84            w -= 8;
85        }
86
87        while (w--) {
88            unsigned aa = *mask++;
89            if (isColor) {
90                *device = SkBlendARGB32(pmc, *device, aa);
91            } else {
92                *device = SkAlphaMulQ(pmc, SkAlpha255To256(aa))
93                            + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa));
94            }
95            device += 1;
96        };
97
98        device = (uint32_t*)((char*)device + dstRB);
99        mask += maskRB;
100
101    } while (--height != 0);
102}
103
104static void D32_A8_Opaque_neon(void* SK_RESTRICT dst, size_t dstRB,
105                               const void* SK_RESTRICT maskPtr, size_t maskRB,
106                               SkColor color, int width, int height) {
107    D32_A8_Opaque_Color_neon<false>(dst, dstRB, maskPtr, maskRB, color, width, height);
108}
109
110static void D32_A8_Color_neon(void* SK_RESTRICT dst, size_t dstRB,
111                              const void* SK_RESTRICT maskPtr, size_t maskRB,
112                              SkColor color, int width, int height) {
113    D32_A8_Opaque_Color_neon<true>(dst, dstRB, maskPtr, maskRB, color, width, height);
114}
115
116SkBlitMask::ColorProc D32_A8_Factory_neon(SkColor color) {
117    if (SK_ColorBLACK == color) {
118        return D32_A8_Black_neon;
119    } else if (0xFF == SkColorGetA(color)) {
120        return D32_A8_Opaque_neon;
121    } else {
122        return D32_A8_Color_neon;
123    }
124}
125
126////////////////////////////////////////////////////////////////////////////////
127
128void SkBlitLCD16OpaqueRow_neon(SkPMColor dst[], const uint16_t src[],
129                                        SkColor color, int width,
130                                        SkPMColor opaqueDst) {
131    int colR = SkColorGetR(color);
132    int colG = SkColorGetG(color);
133    int colB = SkColorGetB(color);
134
135    uint8x8_t vcolR, vcolG, vcolB;
136    uint8x8_t vopqDstA, vopqDstR, vopqDstG, vopqDstB;
137
138    if (width >= 8) {
139        vcolR = vdup_n_u8(colR);
140        vcolG = vdup_n_u8(colG);
141        vcolB = vdup_n_u8(colB);
142        vopqDstA = vdup_n_u8(SkGetPackedA32(opaqueDst));
143        vopqDstR = vdup_n_u8(SkGetPackedR32(opaqueDst));
144        vopqDstG = vdup_n_u8(SkGetPackedG32(opaqueDst));
145        vopqDstB = vdup_n_u8(SkGetPackedB32(opaqueDst));
146    }
147
148    while (width >= 8) {
149        uint8x8x4_t vdst;
150        uint16x8_t vmask;
151        uint16x8_t vmaskR, vmaskG, vmaskB;
152        uint8x8_t vsel_trans, vsel_opq;
153
154        vdst = vld4_u8((uint8_t*)dst);
155        vmask = vld1q_u16(src);
156
157        // Prepare compare masks
158        vsel_trans = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0)));
159        vsel_opq = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0xFFFF)));
160
161        // Get all the color masks on 5 bits
162        vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT);
163        vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS),
164                             SK_B16_BITS + SK_R16_BITS + 1);
165        vmaskB = vmask & vdupq_n_u16(SK_B16_MASK);
166
167        // Upscale to 0..32
168        vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4);
169        vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4);
170        vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4);
171
172        vdst.val[NEON_A] = vbsl_u8(vsel_trans, vdst.val[NEON_A], vdup_n_u8(0xFF));
173        vdst.val[NEON_A] = vbsl_u8(vsel_opq, vopqDstA, vdst.val[NEON_A]);
174
175        vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR);
176        vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG);
177        vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB);
178
179        vdst.val[NEON_R] = vbsl_u8(vsel_opq, vopqDstR, vdst.val[NEON_R]);
180        vdst.val[NEON_G] = vbsl_u8(vsel_opq, vopqDstG, vdst.val[NEON_G]);
181        vdst.val[NEON_B] = vbsl_u8(vsel_opq, vopqDstB, vdst.val[NEON_B]);
182
183        vst4_u8((uint8_t*)dst, vdst);
184
185        dst += 8;
186        src += 8;
187        width -= 8;
188    }
189
190    // Leftovers
191    for (int i = 0; i < width; i++) {
192        dst[i] = SkBlendLCD16Opaque(colR, colG, colB, dst[i], src[i],
193                                    opaqueDst);
194    }
195}
196
197void SkBlitLCD16Row_neon(SkPMColor dst[], const uint16_t src[],
198                                   SkColor color, int width, SkPMColor) {
199    int colA = SkColorGetA(color);
200    int colR = SkColorGetR(color);
201    int colG = SkColorGetG(color);
202    int colB = SkColorGetB(color);
203
204    colA = SkAlpha255To256(colA);
205
206    uint8x8_t vcolR, vcolG, vcolB;
207    uint16x8_t vcolA;
208
209    if (width >= 8) {
210        vcolA = vdupq_n_u16(colA);
211        vcolR = vdup_n_u8(colR);
212        vcolG = vdup_n_u8(colG);
213        vcolB = vdup_n_u8(colB);
214    }
215
216    while (width >= 8) {
217        uint8x8x4_t vdst;
218        uint16x8_t vmask;
219        uint16x8_t vmaskR, vmaskG, vmaskB;
220
221        vdst = vld4_u8((uint8_t*)dst);
222        vmask = vld1q_u16(src);
223
224        // Get all the color masks on 5 bits
225        vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT);
226        vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS),
227                             SK_B16_BITS + SK_R16_BITS + 1);
228        vmaskB = vmask & vdupq_n_u16(SK_B16_MASK);
229
230        // Upscale to 0..32
231        vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4);
232        vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4);
233        vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4);
234
235        vmaskR = vshrq_n_u16(vmaskR * vcolA, 8);
236        vmaskG = vshrq_n_u16(vmaskG * vcolA, 8);
237        vmaskB = vshrq_n_u16(vmaskB * vcolA, 8);
238
239        vdst.val[NEON_A] = vdup_n_u8(0xFF);
240        vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR);
241        vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG);
242        vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB);
243
244        vst4_u8((uint8_t*)dst, vdst);
245
246        dst += 8;
247        src += 8;
248        width -= 8;
249    }
250
251    for (int i = 0; i < width; i++) {
252        dst[i] = SkBlendLCD16(colA, colR, colG, colB, dst[i], src[i]);
253    }
254}
255