1/*
2 * Copyright 2006 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkColorData_DEFINED
9#define SkColorData_DEFINED
10
11// turn this own for extra debug checking when blending onto 565
12#ifdef SK_DEBUG
13    #define CHECK_FOR_565_OVERFLOW
14#endif
15
16#include "SkColor.h"
17#include "SkColorPriv.h"
18
19//////////////////////////////////////////////////////////////////////////////
20
21#define SkASSERT_IS_BYTE(x)     SkASSERT(0 == ((x) & ~0xFF))
22
23/*
24 *  Skia's 32bit backend only supports 1 sizzle order at a time (compile-time).
25 *  This is specified by 4 defines SK_A32_SHIFT, SK_R32_SHIFT, ... for G and B.
26 *
27 *  For easier compatibility with Skia's GPU backend, we further restrict these
28 *  to either (in memory-byte-order) RGBA or BGRA. Note that this "order" does
29 *  not directly correspond to the same shift-order, since we have to take endianess
30 *  into account.
31 *
32 *  Here we enforce this constraint.
33 */
34
35#ifdef SK_CPU_BENDIAN
36    #define SK_BGRA_B32_SHIFT   24
37    #define SK_BGRA_G32_SHIFT   16
38    #define SK_BGRA_R32_SHIFT   8
39    #define SK_BGRA_A32_SHIFT   0
40#else
41    #define SK_BGRA_B32_SHIFT   0
42    #define SK_BGRA_G32_SHIFT   8
43    #define SK_BGRA_R32_SHIFT   16
44    #define SK_BGRA_A32_SHIFT   24
45#endif
46
47#if defined(SK_PMCOLOR_IS_RGBA) && defined(SK_PMCOLOR_IS_BGRA)
48    #error "can't define PMCOLOR to be RGBA and BGRA"
49#endif
50
51#define LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA  \
52    (SK_A32_SHIFT == SK_RGBA_A32_SHIFT &&    \
53     SK_R32_SHIFT == SK_RGBA_R32_SHIFT &&    \
54     SK_G32_SHIFT == SK_RGBA_G32_SHIFT &&    \
55     SK_B32_SHIFT == SK_RGBA_B32_SHIFT)
56
57#define LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA  \
58    (SK_A32_SHIFT == SK_BGRA_A32_SHIFT &&    \
59     SK_R32_SHIFT == SK_BGRA_R32_SHIFT &&    \
60     SK_G32_SHIFT == SK_BGRA_G32_SHIFT &&    \
61     SK_B32_SHIFT == SK_BGRA_B32_SHIFT)
62
63
64#define SK_A_INDEX  (SK_A32_SHIFT/8)
65#define SK_R_INDEX  (SK_R32_SHIFT/8)
66#define SK_G_INDEX  (SK_G32_SHIFT/8)
67#define SK_B_INDEX  (SK_B32_SHIFT/8)
68
69#if defined(SK_PMCOLOR_IS_RGBA) && !LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
70    #error "SK_PMCOLOR_IS_RGBA does not match SK_*32_SHIFT values"
71#endif
72
73#if defined(SK_PMCOLOR_IS_BGRA) && !LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
74    #error "SK_PMCOLOR_IS_BGRA does not match SK_*32_SHIFT values"
75#endif
76
77#if !defined(SK_PMCOLOR_IS_RGBA) && !defined(SK_PMCOLOR_IS_BGRA)
78    // deduce which to define from the _SHIFT defines
79
80    #if LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
81        #define SK_PMCOLOR_IS_RGBA
82    #elif LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
83        #define SK_PMCOLOR_IS_BGRA
84    #else
85        #error "need 32bit packing to be either RGBA or BGRA"
86    #endif
87#endif
88
89// hide these now that we're done
90#undef LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
91#undef LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
92
93//////////////////////////////////////////////////////////////////////////////
94
95// Reverse the bytes coorsponding to RED and BLUE in a packed pixels. Note the
96// pair of them are in the same 2 slots in both RGBA and BGRA, thus there is
97// no need to pass in the colortype to this function.
98static inline uint32_t SkSwizzle_RB(uint32_t c) {
99    static const uint32_t kRBMask = (0xFF << SK_R32_SHIFT) | (0xFF << SK_B32_SHIFT);
100
101    unsigned c0 = (c >> SK_R32_SHIFT) & 0xFF;
102    unsigned c1 = (c >> SK_B32_SHIFT) & 0xFF;
103    return (c & ~kRBMask) | (c0 << SK_B32_SHIFT) | (c1 << SK_R32_SHIFT);
104}
105
106static inline uint32_t SkPackARGB_as_RGBA(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
107    SkASSERT_IS_BYTE(a);
108    SkASSERT_IS_BYTE(r);
109    SkASSERT_IS_BYTE(g);
110    SkASSERT_IS_BYTE(b);
111    return (a << SK_RGBA_A32_SHIFT) | (r << SK_RGBA_R32_SHIFT) |
112           (g << SK_RGBA_G32_SHIFT) | (b << SK_RGBA_B32_SHIFT);
113}
114
115static inline uint32_t SkPackARGB_as_BGRA(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
116    SkASSERT_IS_BYTE(a);
117    SkASSERT_IS_BYTE(r);
118    SkASSERT_IS_BYTE(g);
119    SkASSERT_IS_BYTE(b);
120    return (a << SK_BGRA_A32_SHIFT) | (r << SK_BGRA_R32_SHIFT) |
121           (g << SK_BGRA_G32_SHIFT) | (b << SK_BGRA_B32_SHIFT);
122}
123
124static inline SkPMColor SkSwizzle_RGBA_to_PMColor(uint32_t c) {
125#ifdef SK_PMCOLOR_IS_RGBA
126    return c;
127#else
128    return SkSwizzle_RB(c);
129#endif
130}
131
132static inline SkPMColor SkSwizzle_BGRA_to_PMColor(uint32_t c) {
133#ifdef SK_PMCOLOR_IS_BGRA
134    return c;
135#else
136    return SkSwizzle_RB(c);
137#endif
138}
139
140//////////////////////////////////////////////////////////////////////////////
141
142///@{
143/** See ITU-R Recommendation BT.709 at http://www.itu.int/rec/R-REC-BT.709/ .*/
144#define SK_ITU_BT709_LUM_COEFF_R (0.2126f)
145#define SK_ITU_BT709_LUM_COEFF_G (0.7152f)
146#define SK_ITU_BT709_LUM_COEFF_B (0.0722f)
147///@}
148
149///@{
150/** A float value which specifies this channel's contribution to luminance. */
151#define SK_LUM_COEFF_R SK_ITU_BT709_LUM_COEFF_R
152#define SK_LUM_COEFF_G SK_ITU_BT709_LUM_COEFF_G
153#define SK_LUM_COEFF_B SK_ITU_BT709_LUM_COEFF_B
154///@}
155
156/** Computes the luminance from the given r, g, and b in accordance with
157    SK_LUM_COEFF_X. For correct results, r, g, and b should be in linear space.
158*/
159static inline U8CPU SkComputeLuminance(U8CPU r, U8CPU g, U8CPU b) {
160    //The following is
161    //r * SK_LUM_COEFF_R + g * SK_LUM_COEFF_G + b * SK_LUM_COEFF_B
162    //with SK_LUM_COEFF_X in 1.8 fixed point (rounding adjusted to sum to 256).
163    return (r * 54 + g * 183 + b * 19) >> 8;
164}
165
166/**
167 *  Turn a 0..255 value into a 0..256 value, rounding up if the value is >= 0x80.
168 *  This is slightly more accurate than SkAlpha255To256.
169 */
170static inline unsigned Sk255To256(U8CPU value) {
171    SkASSERT(SkToU8(value) == value);
172    return value + (value >> 7);
173}
174
175/** Calculates 256 - (value * alpha256) / 255 in range [0,256],
176 *  for [0,255] value and [0,256] alpha256.
177 */
178static inline U16CPU SkAlphaMulInv256(U16CPU value, U16CPU alpha256) {
179    unsigned prod = 0xFFFF - value * alpha256;
180    return (prod + (prod >> 8)) >> 8;
181}
182
183//  The caller may want negative values, so keep all params signed (int)
184//  so we don't accidentally slip into unsigned math and lose the sign
185//  extension when we shift (in SkAlphaMul)
186static inline int SkAlphaBlend(int src, int dst, int scale256) {
187    SkASSERT((unsigned)scale256 <= 256);
188    return dst + SkAlphaMul(src - dst, scale256);
189}
190
191/**
192 *  Returns (src * alpha + dst * (255 - alpha)) / 255
193 *
194 *  This is more accurate than SkAlphaBlend, but slightly slower
195 */
196static inline int SkAlphaBlend255(S16CPU src, S16CPU dst, U8CPU alpha) {
197    SkASSERT((int16_t)src == src);
198    SkASSERT((int16_t)dst == dst);
199    SkASSERT((uint8_t)alpha == alpha);
200
201    int prod = (src - dst) * alpha + 128;
202    prod = (prod + (prod >> 8)) >> 8;
203    return dst + prod;
204}
205
206#define SkR16Assert(r)  SkASSERT((unsigned)(r) <= SK_R16_MASK)
207#define SkG16Assert(g)  SkASSERT((unsigned)(g) <= SK_G16_MASK)
208#define SkB16Assert(b)  SkASSERT((unsigned)(b) <= SK_B16_MASK)
209
210static inline uint16_t SkPackRGB16(unsigned r, unsigned g, unsigned b) {
211    SkASSERT(r <= SK_R16_MASK);
212    SkASSERT(g <= SK_G16_MASK);
213    SkASSERT(b <= SK_B16_MASK);
214
215    return SkToU16((r << SK_R16_SHIFT) | (g << SK_G16_SHIFT) | (b << SK_B16_SHIFT));
216}
217
218#define SK_R16_MASK_IN_PLACE        (SK_R16_MASK << SK_R16_SHIFT)
219#define SK_G16_MASK_IN_PLACE        (SK_G16_MASK << SK_G16_SHIFT)
220#define SK_B16_MASK_IN_PLACE        (SK_B16_MASK << SK_B16_SHIFT)
221
222/** Expand the 16bit color into a 32bit value that can be scaled all at once
223    by a value up to 32. Used in conjunction with SkCompact_rgb_16.
224*/
225static inline uint32_t SkExpand_rgb_16(U16CPU c) {
226    SkASSERT(c == (uint16_t)c);
227
228    return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE);
229}
230
231/** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit
232    color value. The computation yields only 16bits of valid data, but we claim
233    to return 32bits, so that the compiler won't generate extra instructions to
234    "clean" the top 16bits. However, the top 16 can contain garbage, so it is
235    up to the caller to safely ignore them.
236*/
237static inline U16CPU SkCompact_rgb_16(uint32_t c) {
238    return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE);
239}
240
241/** Scale the 16bit color value by the 0..256 scale parameter.
242    The computation yields only 16bits of valid data, but we claim
243    to return 32bits, so that the compiler won't generate extra instructions to
244    "clean" the top 16bits.
245*/
246static inline U16CPU SkAlphaMulRGB16(U16CPU c, unsigned scale) {
247    return SkCompact_rgb_16(SkExpand_rgb_16(c) * (scale >> 3) >> 5);
248}
249
250// this helper explicitly returns a clean 16bit value (but slower)
251#define SkAlphaMulRGB16_ToU16(c, s)  (uint16_t)SkAlphaMulRGB16(c, s)
252
253/** Blend pre-expanded RGB32 with 16bit color value by the 0..32 scale parameter.
254    The computation yields only 16bits of valid data, but we claim to return
255    32bits, so that the compiler won't generate extra instructions to "clean"
256    the top 16bits.
257*/
258static inline U16CPU SkBlend32_RGB16(uint32_t src_expand, uint16_t dst, unsigned scale) {
259    uint32_t dst_expand = SkExpand_rgb_16(dst) * scale;
260    return SkCompact_rgb_16((src_expand + dst_expand) >> 5);
261}
262
263/** Blend src and dst 16bit colors by the 0..256 scale parameter.
264    The computation yields only 16bits of valid data, but we claim
265    to return 32bits, so that the compiler won't generate extra instructions to
266    "clean" the top 16bits.
267*/
268static inline U16CPU SkBlendRGB16(U16CPU src, U16CPU dst, int srcScale) {
269    SkASSERT((unsigned)srcScale <= 256);
270
271    srcScale >>= 3;
272
273    uint32_t src32 = SkExpand_rgb_16(src);
274    uint32_t dst32 = SkExpand_rgb_16(dst);
275    return SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
276}
277
278static inline void SkBlendRGB16(const uint16_t src[], uint16_t dst[],
279                                int srcScale, int count) {
280    SkASSERT(count > 0);
281    SkASSERT((unsigned)srcScale <= 256);
282
283    srcScale >>= 3;
284
285    do {
286        uint32_t src32 = SkExpand_rgb_16(*src++);
287        uint32_t dst32 = SkExpand_rgb_16(*dst);
288        *dst++ = static_cast<uint16_t>(
289            SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5)));
290    } while (--count > 0);
291}
292
293#ifdef SK_DEBUG
294    static inline U16CPU SkRGB16Add(U16CPU a, U16CPU b) {
295        SkASSERT(SkGetPackedR16(a) + SkGetPackedR16(b) <= SK_R16_MASK);
296        SkASSERT(SkGetPackedG16(a) + SkGetPackedG16(b) <= SK_G16_MASK);
297        SkASSERT(SkGetPackedB16(a) + SkGetPackedB16(b) <= SK_B16_MASK);
298
299        return a + b;
300    }
301#else
302    #define SkRGB16Add(a, b)  ((a) + (b))
303#endif
304
305///////////////////////////////////////////////////////////////////////////////
306
307#ifdef SK_DEBUG
308    #define SkPMColorAssert(color_value)                                    \
309        do {                                                                \
310            SkPMColor pm_color_value = (color_value);                       \
311            uint32_t alpha_color_value = SkGetPackedA32(pm_color_value);    \
312            SkA32Assert(alpha_color_value);                                 \
313            SkASSERT(SkGetPackedR32(pm_color_value) <= alpha_color_value);  \
314            SkASSERT(SkGetPackedG32(pm_color_value) <= alpha_color_value);  \
315            SkASSERT(SkGetPackedB32(pm_color_value) <= alpha_color_value);  \
316        } while (false)
317#else
318    #define SkPMColorAssert(c)
319#endif
320
321static inline bool SkPMColorValid(SkPMColor c) {
322    auto a = SkGetPackedA32(c);
323    bool valid = a <= SK_A32_MASK
324              && SkGetPackedR32(c) <= a
325              && SkGetPackedG32(c) <= a
326              && SkGetPackedB32(c) <= a;
327    if (valid) {
328        SkPMColorAssert(c);  // Make sure we're consistent when it counts.
329    }
330    return valid;
331}
332
333static inline uint32_t SkPackPMColor_as_RGBA(SkPMColor c) {
334    return SkPackARGB_as_RGBA(SkGetPackedA32(c), SkGetPackedR32(c),
335                              SkGetPackedG32(c), SkGetPackedB32(c));
336}
337
338static inline uint32_t SkPackPMColor_as_BGRA(SkPMColor c) {
339    return SkPackARGB_as_BGRA(SkGetPackedA32(c), SkGetPackedR32(c),
340                              SkGetPackedG32(c), SkGetPackedB32(c));
341}
342
343/**
344 * Abstract 4-byte interpolation, implemented on top of SkPMColor
345 * utility functions. Third parameter controls blending of the first two:
346 *   (src, dst, 0) returns dst
347 *   (src, dst, 0xFF) returns src
348 *   srcWeight is [0..256], unlike SkFourByteInterp which takes [0..255]
349 */
350static inline SkPMColor SkFourByteInterp256(SkPMColor src, SkPMColor dst,
351                                         unsigned scale) {
352    unsigned a = SkAlphaBlend(SkGetPackedA32(src), SkGetPackedA32(dst), scale);
353    unsigned r = SkAlphaBlend(SkGetPackedR32(src), SkGetPackedR32(dst), scale);
354    unsigned g = SkAlphaBlend(SkGetPackedG32(src), SkGetPackedG32(dst), scale);
355    unsigned b = SkAlphaBlend(SkGetPackedB32(src), SkGetPackedB32(dst), scale);
356
357    return SkPackARGB32(a, r, g, b);
358}
359
360/**
361 * Abstract 4-byte interpolation, implemented on top of SkPMColor
362 * utility functions. Third parameter controls blending of the first two:
363 *   (src, dst, 0) returns dst
364 *   (src, dst, 0xFF) returns src
365 */
366static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst,
367                                         U8CPU srcWeight) {
368    unsigned scale = SkAlpha255To256(srcWeight);
369    return SkFourByteInterp256(src, dst, scale);
370}
371
372/**
373 * 0xAARRGGBB -> 0x00AA00GG, 0x00RR00BB
374 */
375static inline void SkSplay(uint32_t color, uint32_t* ag, uint32_t* rb) {
376    const uint32_t mask = 0x00FF00FF;
377    *ag = (color >> 8) & mask;
378    *rb = color & mask;
379}
380
381/**
382 * 0xAARRGGBB -> 0x00AA00GG00RR00BB
383 * (note, ARGB -> AGRB)
384 */
385static inline uint64_t SkSplay(uint32_t color) {
386    const uint32_t mask = 0x00FF00FF;
387    uint64_t agrb = (color >> 8) & mask;  // 0x0000000000AA00GG
388    agrb <<= 32;                          // 0x00AA00GG00000000
389    agrb |= color & mask;                 // 0x00AA00GG00RR00BB
390    return agrb;
391}
392
393/**
394 * 0xAAxxGGxx, 0xRRxxBBxx-> 0xAARRGGBB
395 */
396static inline uint32_t SkUnsplay(uint32_t ag, uint32_t rb) {
397    const uint32_t mask = 0xFF00FF00;
398    return (ag & mask) | ((rb & mask) >> 8);
399}
400
401/**
402 * 0xAAxxGGxxRRxxBBxx -> 0xAARRGGBB
403 * (note, AGRB -> ARGB)
404 */
405static inline uint32_t SkUnsplay(uint64_t agrb) {
406    const uint32_t mask = 0xFF00FF00;
407    return SkPMColor(
408        ((agrb & mask) >> 8) |   // 0x00RR00BB
409        ((agrb >> 32) & mask));  // 0xAARRGGBB
410}
411
412static inline SkPMColor SkFastFourByteInterp256_32(SkPMColor src, SkPMColor dst, unsigned scale) {
413    SkASSERT(scale <= 256);
414
415    // Two 8-bit blends per two 32-bit registers, with space to make sure the math doesn't collide.
416    uint32_t src_ag, src_rb, dst_ag, dst_rb;
417    SkSplay(src, &src_ag, &src_rb);
418    SkSplay(dst, &dst_ag, &dst_rb);
419
420    const uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag;
421    const uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb;
422
423    return SkUnsplay(ret_ag, ret_rb);
424}
425
426static inline SkPMColor SkFastFourByteInterp256_64(SkPMColor src, SkPMColor dst, unsigned scale) {
427    SkASSERT(scale <= 256);
428    // Four 8-bit blends in one 64-bit register, with space to make sure the math doesn't collide.
429    return SkUnsplay(SkSplay(src) * scale + (256-scale) * SkSplay(dst));
430}
431
432// TODO(mtklein): Replace slow versions with fast versions, using scale + (scale>>7) everywhere.
433
434/**
435 * Same as SkFourByteInterp256, but faster.
436 */
437static inline SkPMColor SkFastFourByteInterp256(SkPMColor src, SkPMColor dst, unsigned scale) {
438    // On a 64-bit machine, _64 is about 10% faster than _32, but ~40% slower on a 32-bit machine.
439    if (sizeof(void*) == 4) {
440        return SkFastFourByteInterp256_32(src, dst, scale);
441    } else {
442        return SkFastFourByteInterp256_64(src, dst, scale);
443    }
444}
445
446/**
447 * Nearly the same as SkFourByteInterp, but faster and a touch more accurate, due to better
448 * srcWeight scaling to [0, 256].
449 */
450static inline SkPMColor SkFastFourByteInterp(SkPMColor src,
451                                             SkPMColor dst,
452                                             U8CPU srcWeight) {
453    SkASSERT(srcWeight <= 255);
454    // scale = srcWeight + (srcWeight >> 7) is more accurate than
455    // scale = srcWeight + 1, but 7% slower
456    return SkFastFourByteInterp256(src, dst, srcWeight + (srcWeight >> 7));
457}
458
459/**
460 * Interpolates between colors src and dst using [0,256] scale.
461 */
462static inline SkPMColor SkPMLerp(SkPMColor src, SkPMColor dst, unsigned scale) {
463    return SkFastFourByteInterp256(src, dst, scale);
464}
465
466static inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) {
467    SkASSERT((unsigned)aa <= 255);
468
469    unsigned src_scale = SkAlpha255To256(aa);
470    unsigned dst_scale = SkAlphaMulInv256(SkGetPackedA32(src), src_scale);
471
472    const uint32_t mask = 0xFF00FF;
473
474    uint32_t src_rb = (src & mask) * src_scale;
475    uint32_t src_ag = ((src >> 8) & mask) * src_scale;
476
477    uint32_t dst_rb = (dst & mask) * dst_scale;
478    uint32_t dst_ag = ((dst >> 8) & mask) * dst_scale;
479
480    return (((src_rb + dst_rb) >> 8) & mask) | ((src_ag + dst_ag) & ~mask);
481}
482
483////////////////////////////////////////////////////////////////////////////////////////////
484// Convert a 32bit pixel to a 16bit pixel (no dither)
485
486#define SkR32ToR16_MACRO(r)   ((unsigned)(r) >> (SK_R32_BITS - SK_R16_BITS))
487#define SkG32ToG16_MACRO(g)   ((unsigned)(g) >> (SK_G32_BITS - SK_G16_BITS))
488#define SkB32ToB16_MACRO(b)   ((unsigned)(b) >> (SK_B32_BITS - SK_B16_BITS))
489
490#ifdef SK_DEBUG
491    static inline unsigned SkR32ToR16(unsigned r) {
492        SkR32Assert(r);
493        return SkR32ToR16_MACRO(r);
494    }
495    static inline unsigned SkG32ToG16(unsigned g) {
496        SkG32Assert(g);
497        return SkG32ToG16_MACRO(g);
498    }
499    static inline unsigned SkB32ToB16(unsigned b) {
500        SkB32Assert(b);
501        return SkB32ToB16_MACRO(b);
502    }
503#else
504    #define SkR32ToR16(r)   SkR32ToR16_MACRO(r)
505    #define SkG32ToG16(g)   SkG32ToG16_MACRO(g)
506    #define SkB32ToB16(b)   SkB32ToB16_MACRO(b)
507#endif
508
509#define SkPacked32ToR16(c)  (((unsigned)(c) >> (SK_R32_SHIFT + SK_R32_BITS - SK_R16_BITS)) & SK_R16_MASK)
510#define SkPacked32ToG16(c)  (((unsigned)(c) >> (SK_G32_SHIFT + SK_G32_BITS - SK_G16_BITS)) & SK_G16_MASK)
511#define SkPacked32ToB16(c)  (((unsigned)(c) >> (SK_B32_SHIFT + SK_B32_BITS - SK_B16_BITS)) & SK_B16_MASK)
512
513static inline U16CPU SkPixel32ToPixel16(SkPMColor c) {
514    unsigned r = ((c >> (SK_R32_SHIFT + (8 - SK_R16_BITS))) & SK_R16_MASK) << SK_R16_SHIFT;
515    unsigned g = ((c >> (SK_G32_SHIFT + (8 - SK_G16_BITS))) & SK_G16_MASK) << SK_G16_SHIFT;
516    unsigned b = ((c >> (SK_B32_SHIFT + (8 - SK_B16_BITS))) & SK_B16_MASK) << SK_B16_SHIFT;
517    return r | g | b;
518}
519
520static inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
521    return  (SkR32ToR16(r) << SK_R16_SHIFT) |
522            (SkG32ToG16(g) << SK_G16_SHIFT) |
523            (SkB32ToB16(b) << SK_B16_SHIFT);
524}
525
526#define SkPixel32ToPixel16_ToU16(src)   SkToU16(SkPixel32ToPixel16(src))
527
528/////////////////////////////////////////////////////////////////////////////////////////
529// Fast dither from 32->16
530
531#define SkShouldDitherXY(x, y)  (((x) ^ (y)) & 1)
532
533static inline uint16_t SkDitherPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
534    r = ((r << 1) - ((r >> (8 - SK_R16_BITS) << (8 - SK_R16_BITS)) | (r >> SK_R16_BITS))) >> (8 - SK_R16_BITS);
535    g = ((g << 1) - ((g >> (8 - SK_G16_BITS) << (8 - SK_G16_BITS)) | (g >> SK_G16_BITS))) >> (8 - SK_G16_BITS);
536    b = ((b << 1) - ((b >> (8 - SK_B16_BITS) << (8 - SK_B16_BITS)) | (b >> SK_B16_BITS))) >> (8 - SK_B16_BITS);
537
538    return SkPackRGB16(r, g, b);
539}
540
541static inline uint16_t SkDitherPixel32ToPixel16(SkPMColor c) {
542    return SkDitherPack888ToRGB16(SkGetPackedR32(c), SkGetPackedG32(c), SkGetPackedB32(c));
543}
544
545/*  Return c in expanded_rgb_16 format, but also scaled up by 32 (5 bits)
546    It is now suitable for combining with a scaled expanded_rgb_16 color
547    as in SkSrcOver32To16().
548    We must do this 565 high-bit replication, in order for the subsequent add
549    to saturate properly (and not overflow). If we take the 8 bits as is, it is
550    possible to overflow.
551*/
552static inline uint32_t SkPMColorToExpanded16x5(SkPMColor c) {
553    unsigned sr = SkPacked32ToR16(c);
554    unsigned sg = SkPacked32ToG16(c);
555    unsigned sb = SkPacked32ToB16(c);
556
557    sr = (sr << 5) | sr;
558    sg = (sg << 5) | (sg >> 1);
559    sb = (sb << 5) | sb;
560    return (sr << 11) | (sg << 21) | (sb << 0);
561}
562
563/*  SrcOver the 32bit src color with the 16bit dst, returning a 16bit value
564    (with dirt in the high 16bits, so caller beware).
565*/
566static inline U16CPU SkSrcOver32To16(SkPMColor src, uint16_t dst) {
567    unsigned sr = SkGetPackedR32(src);
568    unsigned sg = SkGetPackedG32(src);
569    unsigned sb = SkGetPackedB32(src);
570
571    unsigned dr = SkGetPackedR16(dst);
572    unsigned dg = SkGetPackedG16(dst);
573    unsigned db = SkGetPackedB16(dst);
574
575    unsigned isa = 255 - SkGetPackedA32(src);
576
577    dr = (sr + SkMul16ShiftRound(dr, isa, SK_R16_BITS)) >> (8 - SK_R16_BITS);
578    dg = (sg + SkMul16ShiftRound(dg, isa, SK_G16_BITS)) >> (8 - SK_G16_BITS);
579    db = (sb + SkMul16ShiftRound(db, isa, SK_B16_BITS)) >> (8 - SK_B16_BITS);
580
581    return SkPackRGB16(dr, dg, db);
582}
583
584static inline SkPMColor SkPixel16ToPixel32(U16CPU src) {
585    SkASSERT(src == SkToU16(src));
586
587    unsigned    r = SkPacked16ToR32(src);
588    unsigned    g = SkPacked16ToG32(src);
589    unsigned    b = SkPacked16ToB32(src);
590
591    SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
592    SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
593    SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
594
595    return SkPackARGB32(0xFF, r, g, b);
596}
597
598// similar to SkPixel16ToPixel32, but returns SkColor instead of SkPMColor
599static inline SkColor SkPixel16ToColor(U16CPU src) {
600    SkASSERT(src == SkToU16(src));
601
602    unsigned    r = SkPacked16ToR32(src);
603    unsigned    g = SkPacked16ToG32(src);
604    unsigned    b = SkPacked16ToB32(src);
605
606    SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
607    SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
608    SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
609
610    return SkColorSetRGB(r, g, b);
611}
612
613///////////////////////////////////////////////////////////////////////////////
614
615typedef uint16_t SkPMColor16;
616
617// Put in OpenGL order (r g b a)
618#define SK_A4444_SHIFT    0
619#define SK_R4444_SHIFT    12
620#define SK_G4444_SHIFT    8
621#define SK_B4444_SHIFT    4
622
623#define SkA32To4444(a)  ((unsigned)(a) >> 4)
624#define SkR32To4444(r)  ((unsigned)(r) >> 4)
625#define SkG32To4444(g)  ((unsigned)(g) >> 4)
626#define SkB32To4444(b)  ((unsigned)(b) >> 4)
627
628static inline U8CPU SkReplicateNibble(unsigned nib) {
629    SkASSERT(nib <= 0xF);
630    return (nib << 4) | nib;
631}
632
633#define SkA4444ToA32(a)     SkReplicateNibble(a)
634#define SkR4444ToR32(r)     SkReplicateNibble(r)
635#define SkG4444ToG32(g)     SkReplicateNibble(g)
636#define SkB4444ToB32(b)     SkReplicateNibble(b)
637
638#define SkGetPackedA4444(c)     (((unsigned)(c) >> SK_A4444_SHIFT) & 0xF)
639#define SkGetPackedR4444(c)     (((unsigned)(c) >> SK_R4444_SHIFT) & 0xF)
640#define SkGetPackedG4444(c)     (((unsigned)(c) >> SK_G4444_SHIFT) & 0xF)
641#define SkGetPackedB4444(c)     (((unsigned)(c) >> SK_B4444_SHIFT) & 0xF)
642
643#define SkPacked4444ToA32(c)    SkReplicateNibble(SkGetPackedA4444(c))
644#define SkPacked4444ToR32(c)    SkReplicateNibble(SkGetPackedR4444(c))
645#define SkPacked4444ToG32(c)    SkReplicateNibble(SkGetPackedG4444(c))
646#define SkPacked4444ToB32(c)    SkReplicateNibble(SkGetPackedB4444(c))
647
648#ifdef SK_DEBUG
649static inline void SkPMColor16Assert(U16CPU c) {
650    unsigned a = SkGetPackedA4444(c);
651    unsigned r = SkGetPackedR4444(c);
652    unsigned g = SkGetPackedG4444(c);
653    unsigned b = SkGetPackedB4444(c);
654
655    SkASSERT(a <= 0xF);
656    SkASSERT(r <= a);
657    SkASSERT(g <= a);
658    SkASSERT(b <= a);
659}
660#else
661#define SkPMColor16Assert(c)
662#endif
663
664static inline unsigned SkAlpha15To16(unsigned a) {
665    SkASSERT(a <= 0xF);
666    return a + (a >> 3);
667}
668
669#ifdef SK_DEBUG
670    static inline int SkAlphaMul4(int value, int scale) {
671        SkASSERT((unsigned)scale <= 0x10);
672        return value * scale >> 4;
673    }
674#else
675    #define SkAlphaMul4(value, scale)   ((value) * (scale) >> 4)
676#endif
677
678static inline unsigned SkR4444ToR565(unsigned r) {
679    SkASSERT(r <= 0xF);
680    return (r << (SK_R16_BITS - 4)) | (r >> (8 - SK_R16_BITS));
681}
682
683static inline unsigned SkG4444ToG565(unsigned g) {
684    SkASSERT(g <= 0xF);
685    return (g << (SK_G16_BITS - 4)) | (g >> (8 - SK_G16_BITS));
686}
687
688static inline unsigned SkB4444ToB565(unsigned b) {
689    SkASSERT(b <= 0xF);
690    return (b << (SK_B16_BITS - 4)) | (b >> (8 - SK_B16_BITS));
691}
692
693static inline SkPMColor16 SkPackARGB4444(unsigned a, unsigned r,
694                                         unsigned g, unsigned b) {
695    SkASSERT(a <= 0xF);
696    SkASSERT(r <= a);
697    SkASSERT(g <= a);
698    SkASSERT(b <= a);
699
700    return (SkPMColor16)((a << SK_A4444_SHIFT) | (r << SK_R4444_SHIFT) |
701                         (g << SK_G4444_SHIFT) | (b << SK_B4444_SHIFT));
702}
703
704static inline SkPMColor16 SkAlphaMulQ4(SkPMColor16 c, int scale) {
705    SkASSERT(scale <= 16);
706
707    const unsigned mask = 0xF0F;    //gMask_0F0F;
708
709#if 0
710    unsigned rb = ((c & mask) * scale) >> 4;
711    unsigned ag = ((c >> 4) & mask) * scale;
712    return (rb & mask) | (ag & ~mask);
713#else
714    unsigned expanded_c = (c & mask) | ((c & (mask << 4)) << 12);
715    unsigned scaled_c = (expanded_c * scale) >> 4;
716    return (scaled_c & mask) | ((scaled_c >> 12) & (mask << 4));
717#endif
718}
719
720/** Expand the SkPMColor16 color into a 32bit value that can be scaled all at
721    once by a value up to 16.
722*/
723static inline uint32_t SkExpand_4444(U16CPU c) {
724    SkASSERT(c == (uint16_t)c);
725
726    const unsigned mask = 0xF0F;    //gMask_0F0F;
727    return (c & mask) | ((c & ~mask) << 12);
728}
729
730static inline uint16_t SkSrcOver4444To16(SkPMColor16 s, uint16_t d) {
731    unsigned sa = SkGetPackedA4444(s);
732    unsigned sr = SkR4444ToR565(SkGetPackedR4444(s));
733    unsigned sg = SkG4444ToG565(SkGetPackedG4444(s));
734    unsigned sb = SkB4444ToB565(SkGetPackedB4444(s));
735
736    // To avoid overflow, we have to clear the low bit of the synthetic sg
737    // if the src alpha is <= 7.
738    // to see why, try blending 0x4444 on top of 565-white and watch green
739    // overflow (sum == 64)
740    sg &= ~(~(sa >> 3) & 1);
741
742    unsigned scale = SkAlpha15To16(15 - sa);
743    unsigned dr = SkAlphaMul4(SkGetPackedR16(d), scale);
744    unsigned dg = SkAlphaMul4(SkGetPackedG16(d), scale);
745    unsigned db = SkAlphaMul4(SkGetPackedB16(d), scale);
746
747#if 0
748    if (sg + dg > 63) {
749        SkDebugf("---- SkSrcOver4444To16 src=%x dst=%x scale=%d, sg=%d dg=%d\n", s, d, scale, sg, dg);
750    }
751#endif
752    return SkPackRGB16(sr + dr, sg + dg, sb + db);
753}
754
755static inline uint16_t SkBlend4444To16(SkPMColor16 src, uint16_t dst, int scale16) {
756    SkASSERT((unsigned)scale16 <= 16);
757
758    return SkSrcOver4444To16(SkAlphaMulQ4(src, scale16), dst);
759}
760
761static inline SkPMColor SkPixel4444ToPixel32(U16CPU c) {
762    uint32_t d = (SkGetPackedA4444(c) << SK_A32_SHIFT) |
763                 (SkGetPackedR4444(c) << SK_R32_SHIFT) |
764                 (SkGetPackedG4444(c) << SK_G32_SHIFT) |
765                 (SkGetPackedB4444(c) << SK_B32_SHIFT);
766    return d | (d << 4);
767}
768
769static inline SkPMColor16 SkPixel32ToPixel4444(SkPMColor c) {
770    return  (((c >> (SK_A32_SHIFT + 4)) & 0xF) << SK_A4444_SHIFT) |
771    (((c >> (SK_R32_SHIFT + 4)) & 0xF) << SK_R4444_SHIFT) |
772    (((c >> (SK_G32_SHIFT + 4)) & 0xF) << SK_G4444_SHIFT) |
773    (((c >> (SK_B32_SHIFT + 4)) & 0xF) << SK_B4444_SHIFT);
774}
775
776// cheap 2x2 dither
777static inline SkPMColor16 SkDitherARGB32To4444(U8CPU a, U8CPU r,
778                                               U8CPU g, U8CPU b) {
779    // to ensure that we stay a legal premultiplied color, we take the max()
780    // of the truncated and dithered alpha values. If we didn't, cases like
781    // SkDitherARGB32To4444(0x31, 0x2E, ...) would generate SkPackARGB4444(2, 3, ...)
782    // which is not legal premultiplied, since a < color
783    unsigned dithered_a = ((a << 1) - ((a >> 4 << 4) | (a >> 4))) >> 4;
784    a = SkMax32(a >> 4, dithered_a);
785    // these we just dither in place
786    r = ((r << 1) - ((r >> 4 << 4) | (r >> 4))) >> 4;
787    g = ((g << 1) - ((g >> 4 << 4) | (g >> 4))) >> 4;
788    b = ((b << 1) - ((b >> 4 << 4) | (b >> 4))) >> 4;
789
790    return SkPackARGB4444(a, r, g, b);
791}
792
793static inline SkPMColor16 SkDitherPixel32To4444(SkPMColor c) {
794    return SkDitherARGB32To4444(SkGetPackedA32(c), SkGetPackedR32(c),
795                                SkGetPackedG32(c), SkGetPackedB32(c));
796}
797
798/*  Assumes 16bit is in standard RGBA order.
799    Transforms a normal ARGB_8888 into the same byte order as
800    expanded ARGB_4444, but keeps each component 8bits
801*/
802static inline uint32_t SkExpand_8888(SkPMColor c) {
803    return  (((c >> SK_R32_SHIFT) & 0xFF) << 24) |
804            (((c >> SK_G32_SHIFT) & 0xFF) <<  8) |
805            (((c >> SK_B32_SHIFT) & 0xFF) << 16) |
806            (((c >> SK_A32_SHIFT) & 0xFF) <<  0);
807}
808
809/*  Undo the operation of SkExpand_8888, turning the argument back into
810    a SkPMColor.
811*/
812static inline SkPMColor SkCompact_8888(uint32_t c) {
813    return  (((c >> 24) & 0xFF) << SK_R32_SHIFT) |
814            (((c >>  8) & 0xFF) << SK_G32_SHIFT) |
815            (((c >> 16) & 0xFF) << SK_B32_SHIFT) |
816            (((c >>  0) & 0xFF) << SK_A32_SHIFT);
817}
818
819/*  Like SkExpand_8888, this transforms a pmcolor into the expanded 4444 format,
820    but this routine just keeps the high 4bits of each component in the low
821    4bits of the result (just like a newly expanded PMColor16).
822*/
823static inline uint32_t SkExpand32_4444(SkPMColor c) {
824    return  (((c >> (SK_R32_SHIFT + 4)) & 0xF) << 24) |
825            (((c >> (SK_G32_SHIFT + 4)) & 0xF) <<  8) |
826            (((c >> (SK_B32_SHIFT + 4)) & 0xF) << 16) |
827            (((c >> (SK_A32_SHIFT + 4)) & 0xF) <<  0);
828}
829
830// takes two values and alternamtes them as part of a memset16
831// used for cheap 2x2 dithering when the colors are opaque
832void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n);
833
834///////////////////////////////////////////////////////////////////////////////
835
836static inline int SkUpscale31To32(int value) {
837    SkASSERT((unsigned)value <= 31);
838    return value + (value >> 4);
839}
840
841static inline int SkBlend32(int src, int dst, int scale) {
842    SkASSERT((unsigned)src <= 0xFF);
843    SkASSERT((unsigned)dst <= 0xFF);
844    SkASSERT((unsigned)scale <= 32);
845    return dst + ((src - dst) * scale >> 5);
846}
847
848static inline SkPMColor SkBlendLCD16(int srcA, int srcR, int srcG, int srcB,
849                                     SkPMColor dst, uint16_t mask) {
850    if (mask == 0) {
851        return dst;
852    }
853
854    /*  We want all of these in 5bits, hence the shifts in case one of them
855     *  (green) is 6bits.
856     */
857    int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
858    int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
859    int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
860
861    // Now upscale them to 0..32, so we can use blend32
862    maskR = SkUpscale31To32(maskR);
863    maskG = SkUpscale31To32(maskG);
864    maskB = SkUpscale31To32(maskB);
865
866    // srcA has been upscaled to 256 before passed into this function
867    maskR = maskR * srcA >> 8;
868    maskG = maskG * srcA >> 8;
869    maskB = maskB * srcA >> 8;
870
871    int dstR = SkGetPackedR32(dst);
872    int dstG = SkGetPackedG32(dst);
873    int dstB = SkGetPackedB32(dst);
874
875    // LCD blitting is only supported if the dst is known/required
876    // to be opaque
877    return SkPackARGB32(0xFF,
878                        SkBlend32(srcR, dstR, maskR),
879                        SkBlend32(srcG, dstG, maskG),
880                        SkBlend32(srcB, dstB, maskB));
881}
882
883static inline SkPMColor SkBlendLCD16Opaque(int srcR, int srcG, int srcB,
884                                           SkPMColor dst, uint16_t mask,
885                                           SkPMColor opaqueDst) {
886    if (mask == 0) {
887        return dst;
888    }
889
890    if (0xFFFF == mask) {
891        return opaqueDst;
892    }
893
894    /*  We want all of these in 5bits, hence the shifts in case one of them
895     *  (green) is 6bits.
896     */
897    int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
898    int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
899    int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
900
901    // Now upscale them to 0..32, so we can use blend32
902    maskR = SkUpscale31To32(maskR);
903    maskG = SkUpscale31To32(maskG);
904    maskB = SkUpscale31To32(maskB);
905
906    int dstR = SkGetPackedR32(dst);
907    int dstG = SkGetPackedG32(dst);
908    int dstB = SkGetPackedB32(dst);
909
910    // LCD blitting is only supported if the dst is known/required
911    // to be opaque
912    return SkPackARGB32(0xFF,
913                        SkBlend32(srcR, dstR, maskR),
914                        SkBlend32(srcG, dstG, maskG),
915                        SkBlend32(srcB, dstB, maskB));
916}
917
918static inline void SkBlitLCD16Row(SkPMColor dst[], const uint16_t mask[],
919                                  SkColor src, int width, SkPMColor) {
920    int srcA = SkColorGetA(src);
921    int srcR = SkColorGetR(src);
922    int srcG = SkColorGetG(src);
923    int srcB = SkColorGetB(src);
924
925    srcA = SkAlpha255To256(srcA);
926
927    for (int i = 0; i < width; i++) {
928        dst[i] = SkBlendLCD16(srcA, srcR, srcG, srcB, dst[i], mask[i]);
929    }
930}
931
932static inline void SkBlitLCD16OpaqueRow(SkPMColor dst[], const uint16_t mask[],
933                                        SkColor src, int width,
934                                        SkPMColor opaqueDst) {
935    int srcR = SkColorGetR(src);
936    int srcG = SkColorGetG(src);
937    int srcB = SkColorGetB(src);
938
939    for (int i = 0; i < width; i++) {
940        dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], mask[i],
941                                    opaqueDst);
942    }
943}
944
945#endif
946