10a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger#ifndef SkColor_opts_neon_DEFINED
20a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger#define SkColor_opts_neon_DEFINED
30a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger
40a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger#include "SkTypes.h"
5910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger#include "SkColorPriv.h"
60a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger
70a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger#include <arm_neon.h>
80a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger
90a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger#define NEON_A (SK_A32_SHIFT / 8)
100a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger#define NEON_R (SK_R32_SHIFT / 8)
110a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger#define NEON_G (SK_G32_SHIFT / 8)
120a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger#define NEON_B (SK_B32_SHIFT / 8)
130a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger
140a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenbergerstatic inline uint16x8_t SkAlpha255To256_neon8(uint8x8_t alpha) {
150a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger    return vaddw_u8(vdupq_n_u16(1), alpha);
160a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger}
170a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger
180a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenbergerstatic inline uint8x8_t SkAlphaMul_neon8(uint8x8_t color, uint16x8_t scale) {
190a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger    return vshrn_n_u16(vmovl_u8(color) * scale, 8);
200a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger}
210a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger
220a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenbergerstatic inline uint8x8x4_t SkAlphaMulQ_neon8(uint8x8x4_t color, uint16x8_t scale) {
230a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger    uint8x8x4_t ret;
240a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger
250a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger    ret.val[NEON_A] = SkAlphaMul_neon8(color.val[NEON_A], scale);
260a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger    ret.val[NEON_R] = SkAlphaMul_neon8(color.val[NEON_R], scale);
270a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger    ret.val[NEON_G] = SkAlphaMul_neon8(color.val[NEON_G], scale);
280a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger    ret.val[NEON_B] = SkAlphaMul_neon8(color.val[NEON_B], scale);
290a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger
300a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger    return ret;
310a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger}
320a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger
33910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger/* This function expands 8 pixels from RGB565 (R, G, B from high to low) to
34910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger * SkPMColor (all possible configurations supported) in the exact same way as
35910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger * SkPixel16ToPixel32.
36910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger */
37910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenbergerstatic inline uint8x8x4_t SkPixel16ToPixel32_neon8(uint16x8_t vsrc) {
38910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
39910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    uint8x8x4_t ret;
40910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    uint8x8_t vr, vg, vb;
41910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
42910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    vr = vmovn_u16(vshrq_n_u16(vsrc, SK_R16_SHIFT));
43910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    vg = vmovn_u16(vshrq_n_u16(vshlq_n_u16(vsrc, SK_R16_BITS), SK_R16_BITS + SK_B16_BITS));
44910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    vb = vmovn_u16(vsrc & vdupq_n_u16(SK_B16_MASK));
45910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
46910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    ret.val[NEON_A] = vdup_n_u8(0xFF);
47910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    ret.val[NEON_R] = vshl_n_u8(vr, 8 - SK_R16_BITS) | vshr_n_u8(vr, 2 * SK_R16_BITS - 8);
48910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    ret.val[NEON_G] = vshl_n_u8(vg, 8 - SK_G16_BITS) | vshr_n_u8(vg, 2 * SK_G16_BITS - 8);
49910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    ret.val[NEON_B] = vshl_n_u8(vb, 8 - SK_B16_BITS) | vshr_n_u8(vb, 2 * SK_B16_BITS - 8);
50910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
51910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    return ret;
52910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger}
53910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
54910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger/* This function packs 8 pixels from SkPMColor (all possible configurations
55910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger * supported) to RGB565 (R, G, B from high to low) in the exact same way as
56910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger * SkPixel32ToPixel16.
57910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger */
58910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenbergerstatic inline uint16x8_t SkPixel32ToPixel16_neon8(uint8x8x4_t vsrc) {
59910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
60910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    uint16x8_t ret;
61910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
62910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    ret = vshll_n_u8(vsrc.val[NEON_R], 8);
63910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    ret = vsriq_n_u16(ret, vshll_n_u8(vsrc.val[NEON_G], 8), SK_R16_BITS);
64910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    ret = vsriq_n_u16(ret, vshll_n_u8(vsrc.val[NEON_B], 8), SK_R16_BITS + SK_G16_BITS);
65910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
66910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    return ret;
67910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger}
68910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
69910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger/* This function blends 8 pixels of the same channel in the exact same way as
70910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger * SkBlend32.
71910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger */
72910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenbergerstatic inline uint8x8_t SkBlend32_neon8(uint8x8_t src, uint8x8_t dst, uint16x8_t scale) {
73910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    int16x8_t src_wide, dst_wide;
74910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
75910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    src_wide = vreinterpretq_s16_u16(vmovl_u8(src));
76910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    dst_wide = vreinterpretq_s16_u16(vmovl_u8(dst));
77910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
78910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    src_wide = (src_wide - dst_wide) * vreinterpretq_s16_u16(scale);
79910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
80910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    dst_wide += vshrq_n_s16(src_wide, 5);
81910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
82910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    return vmovn_u16(vreinterpretq_u16_s16(dst_wide));
83910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger}
84910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
85910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenbergerstatic inline SkPMColor SkFourByteInterp256_neon(SkPMColor src, SkPMColor dst,
86910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger                                                 unsigned srcScale) {
87910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    SkASSERT(srcScale <= 256);
88910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    int16x8_t vscale = vdupq_n_s16(srcScale);
89910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    int16x8_t vsrc_wide, vdst_wide, vdiff;
90910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    uint8x8_t res;
91910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
92910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    vsrc_wide = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(src))));
93910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    vdst_wide = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(dst))));
94910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
95910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    vdiff = vsrc_wide - vdst_wide;
96910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    vdiff *= vscale;
97910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
98910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    vdiff = vshrq_n_s16(vdiff, 8);
99910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
100910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    vdst_wide += vdiff;
101910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
102910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    res = vmovn_u16(vreinterpretq_u16_s16(vdst_wide));
103910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
104910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    return vget_lane_u32(vreinterpret_u32_u8(res), 0);
105910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger}
106910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
107910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenbergerstatic inline SkPMColor SkFourByteInterp_neon(SkPMColor src, SkPMColor dst,
108910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger                                              U8CPU srcWeight) {
109910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    SkASSERT(srcWeight <= 255);
110910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    unsigned scale = SkAlpha255To256(srcWeight);
111910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger    return SkFourByteInterp256_neon(src, dst, scale);
112910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger}
113910f694aefb0b671dd8522a9afe9b6be645701c1Derek Sollenberger
1140a657bbc2c6fc9daf699942e023050536d5ec95fDerek Sollenberger#endif /* #ifndef SkColor_opts_neon_DEFINED */
115