1/*
2 * Copyright 2006 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkColorPriv_DEFINED
9#define SkColorPriv_DEFINED
10
11// turn this own for extra debug checking when blending onto 565
12#ifdef SK_DEBUG
13    #define CHECK_FOR_565_OVERFLOW
14#endif
15
16#include "SkColor.h"
17#include "SkMath.h"
18
19//////////////////////////////////////////////////////////////////////////////
20
21#define SkASSERT_IS_BYTE(x)     SkASSERT(0 == ((x) & ~0xFF))
22
23/*
24 *  Skia's 32bit backend only supports 1 sizzle order at a time (compile-time).
25 *  This is specified by 4 defines SK_A32_SHIFT, SK_R32_SHIFT, ... for G and B.
26 *
27 *  For easier compatibility with Skia's GPU backend, we further restrict these
28 *  to either (in memory-byte-order) RGBA or BGRA. Note that this "order" does
29 *  not directly correspond to the same shift-order, since we have to take endianess
30 *  into account.
31 *
32 *  Here we enforce this constraint.
33 */
34
35#ifdef SK_CPU_BENDIAN
36    #define SK_RGBA_R32_SHIFT   24
37    #define SK_RGBA_G32_SHIFT   16
38    #define SK_RGBA_B32_SHIFT   8
39    #define SK_RGBA_A32_SHIFT   0
40
41    #define SK_BGRA_B32_SHIFT   24
42    #define SK_BGRA_G32_SHIFT   16
43    #define SK_BGRA_R32_SHIFT   8
44    #define SK_BGRA_A32_SHIFT   0
45#else
46    #define SK_RGBA_R32_SHIFT   0
47    #define SK_RGBA_G32_SHIFT   8
48    #define SK_RGBA_B32_SHIFT   16
49    #define SK_RGBA_A32_SHIFT   24
50
51    #define SK_BGRA_B32_SHIFT   0
52    #define SK_BGRA_G32_SHIFT   8
53    #define SK_BGRA_R32_SHIFT   16
54    #define SK_BGRA_A32_SHIFT   24
55#endif
56
57#if defined(SK_PMCOLOR_IS_RGBA) && defined(SK_PMCOLOR_IS_BGRA)
58    #error "can't define PMCOLOR to be RGBA and BGRA"
59#endif
60
61#define LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA  \
62    (SK_A32_SHIFT == SK_RGBA_A32_SHIFT &&    \
63     SK_R32_SHIFT == SK_RGBA_R32_SHIFT &&    \
64     SK_G32_SHIFT == SK_RGBA_G32_SHIFT &&    \
65     SK_B32_SHIFT == SK_RGBA_B32_SHIFT)
66
67#define LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA  \
68    (SK_A32_SHIFT == SK_BGRA_A32_SHIFT &&    \
69     SK_R32_SHIFT == SK_BGRA_R32_SHIFT &&    \
70     SK_G32_SHIFT == SK_BGRA_G32_SHIFT &&    \
71     SK_B32_SHIFT == SK_BGRA_B32_SHIFT)
72
73
74#if defined(SK_PMCOLOR_IS_RGBA) && !LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
75    #error "SK_PMCOLOR_IS_RGBA does not match SK_*32_SHIFT values"
76#endif
77
78#if defined(SK_PMCOLOR_IS_BGRA) && !LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
79    #error "SK_PMCOLOR_IS_BGRA does not match SK_*32_SHIFT values"
80#endif
81
82#if !defined(SK_PMCOLOR_IS_RGBA) && !defined(SK_PMCOLOR_IS_RGBA)
83    // deduce which to define from the _SHIFT defines
84
85    #if LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
86        #define SK_PMCOLOR_IS_RGBA
87    #elif LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
88        #define SK_PMCOLOR_IS_BGRA
89    #else
90        #error "need 32bit packing to be either RGBA or BGRA"
91    #endif
92#endif
93
94// hide these now that we're done
95#undef LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
96#undef LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
97
98//////////////////////////////////////////////////////////////////////////////
99
100// Reverse the bytes coorsponding to RED and BLUE in a packed pixels. Note the
101// pair of them are in the same 2 slots in both RGBA and BGRA, thus there is
102// no need to pass in the colortype to this function.
103static inline uint32_t SkSwizzle_RB(uint32_t c) {
104    static const uint32_t kRBMask = (0xFF << SK_R32_SHIFT) | (0xFF << SK_B32_SHIFT);
105
106    unsigned c0 = (c >> SK_R32_SHIFT) & 0xFF;
107    unsigned c1 = (c >> SK_B32_SHIFT) & 0xFF;
108    return (c & ~kRBMask) | (c0 << SK_B32_SHIFT) | (c1 << SK_R32_SHIFT);
109}
110
111static inline uint32_t SkPackARGB_as_RGBA(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
112    SkASSERT_IS_BYTE(a);
113    SkASSERT_IS_BYTE(r);
114    SkASSERT_IS_BYTE(g);
115    SkASSERT_IS_BYTE(b);
116    return (a << SK_RGBA_A32_SHIFT) | (r << SK_RGBA_R32_SHIFT) |
117           (g << SK_RGBA_G32_SHIFT) | (b << SK_RGBA_B32_SHIFT);
118}
119
120static inline uint32_t SkPackARGB_as_BGRA(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
121    SkASSERT_IS_BYTE(a);
122    SkASSERT_IS_BYTE(r);
123    SkASSERT_IS_BYTE(g);
124    SkASSERT_IS_BYTE(b);
125    return (a << SK_BGRA_A32_SHIFT) | (r << SK_BGRA_R32_SHIFT) |
126           (g << SK_BGRA_G32_SHIFT) | (b << SK_BGRA_B32_SHIFT);
127}
128
129static inline SkPMColor SkSwizzle_RGBA_to_PMColor(uint32_t c) {
130#ifdef SK_PMCOLOR_IS_RGBA
131    return c;
132#else
133    return SkSwizzle_RB(c);
134#endif
135}
136
137static inline SkPMColor SkSwizzle_BGRA_to_PMColor(uint32_t c) {
138#ifdef SK_PMCOLOR_IS_BGRA
139    return c;
140#else
141    return SkSwizzle_RB(c);
142#endif
143}
144
145//////////////////////////////////////////////////////////////////////////////
146
147///@{
148/** See ITU-R Recommendation BT.709 at http://www.itu.int/rec/R-REC-BT.709/ .*/
149#define SK_ITU_BT709_LUM_COEFF_R (0.2126f)
150#define SK_ITU_BT709_LUM_COEFF_G (0.7152f)
151#define SK_ITU_BT709_LUM_COEFF_B (0.0722f)
152///@}
153
154///@{
155/** A float value which specifies this channel's contribution to luminance. */
156#define SK_LUM_COEFF_R SK_ITU_BT709_LUM_COEFF_R
157#define SK_LUM_COEFF_G SK_ITU_BT709_LUM_COEFF_G
158#define SK_LUM_COEFF_B SK_ITU_BT709_LUM_COEFF_B
159///@}
160
161/** Computes the luminance from the given r, g, and b in accordance with
162    SK_LUM_COEFF_X. For correct results, r, g, and b should be in linear space.
163*/
164static inline U8CPU SkComputeLuminance(U8CPU r, U8CPU g, U8CPU b) {
165    //The following is
166    //r * SK_LUM_COEFF_R + g * SK_LUM_COEFF_G + b * SK_LUM_COEFF_B
167    //with SK_LUM_COEFF_X in 1.8 fixed point (rounding adjusted to sum to 256).
168    return (r * 54 + g * 183 + b * 19) >> 8;
169}
170
171/** Turn 0..255 into 0..256 by adding 1 at the half-way point. Used to turn a
172    byte into a scale value, so that we can say scale * value >> 8 instead of
173    alpha * value / 255.
174
175    In debugging, asserts that alpha is 0..255
176*/
177static inline unsigned SkAlpha255To256(U8CPU alpha) {
178    SkASSERT(SkToU8(alpha) == alpha);
179    // this one assues that blending on top of an opaque dst keeps it that way
180    // even though it is less accurate than a+(a>>7) for non-opaque dsts
181    return alpha + 1;
182}
183
184/**
185 *  Turn a 0..255 value into a 0..256 value, rounding up if the value is >= 0x80.
186 *  This is slightly more accurate than SkAlpha255To256.
187 */
188static inline unsigned Sk255To256(U8CPU value) {
189    SkASSERT(SkToU8(value) == value);
190    return value + (value >> 7);
191}
192
193/** Multiplify value by 0..256, and shift the result down 8
194    (i.e. return (value * alpha256) >> 8)
195 */
196#define SkAlphaMul(value, alpha256)     (SkMulS16(value, alpha256) >> 8)
197
198//  The caller may want negative values, so keep all params signed (int)
199//  so we don't accidentally slip into unsigned math and lose the sign
200//  extension when we shift (in SkAlphaMul)
201static inline int SkAlphaBlend(int src, int dst, int scale256) {
202    SkASSERT((unsigned)scale256 <= 256);
203    return dst + SkAlphaMul(src - dst, scale256);
204}
205
206/**
207 *  Returns (src * alpha + dst * (255 - alpha)) / 255
208 *
209 *  This is more accurate than SkAlphaBlend, but slightly slower
210 */
211static inline int SkAlphaBlend255(S16CPU src, S16CPU dst, U8CPU alpha) {
212    SkASSERT((int16_t)src == src);
213    SkASSERT((int16_t)dst == dst);
214    SkASSERT((uint8_t)alpha == alpha);
215
216    int prod = SkMulS16(src - dst, alpha) + 128;
217    prod = (prod + (prod >> 8)) >> 8;
218    return dst + prod;
219}
220
221#define SK_R16_BITS     5
222#define SK_G16_BITS     6
223#define SK_B16_BITS     5
224
225#define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
226#define SK_G16_SHIFT    (SK_B16_BITS)
227#define SK_B16_SHIFT    0
228
229#define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
230#define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
231#define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
232
233#define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
234#define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
235#define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
236
237#define SkR16Assert(r)  SkASSERT((unsigned)(r) <= SK_R16_MASK)
238#define SkG16Assert(g)  SkASSERT((unsigned)(g) <= SK_G16_MASK)
239#define SkB16Assert(b)  SkASSERT((unsigned)(b) <= SK_B16_MASK)
240
241static inline uint16_t SkPackRGB16(unsigned r, unsigned g, unsigned b) {
242    SkASSERT(r <= SK_R16_MASK);
243    SkASSERT(g <= SK_G16_MASK);
244    SkASSERT(b <= SK_B16_MASK);
245
246    return SkToU16((r << SK_R16_SHIFT) | (g << SK_G16_SHIFT) | (b << SK_B16_SHIFT));
247}
248
249#define SK_R16_MASK_IN_PLACE        (SK_R16_MASK << SK_R16_SHIFT)
250#define SK_G16_MASK_IN_PLACE        (SK_G16_MASK << SK_G16_SHIFT)
251#define SK_B16_MASK_IN_PLACE        (SK_B16_MASK << SK_B16_SHIFT)
252
253/** Expand the 16bit color into a 32bit value that can be scaled all at once
254    by a value up to 32. Used in conjunction with SkCompact_rgb_16.
255*/
256static inline uint32_t SkExpand_rgb_16(U16CPU c) {
257    SkASSERT(c == (uint16_t)c);
258
259    return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE);
260}
261
262/** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit
263    color value. The computation yields only 16bits of valid data, but we claim
264    to return 32bits, so that the compiler won't generate extra instructions to
265    "clean" the top 16bits. However, the top 16 can contain garbage, so it is
266    up to the caller to safely ignore them.
267*/
268static inline U16CPU SkCompact_rgb_16(uint32_t c) {
269    return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE);
270}
271
272/** Scale the 16bit color value by the 0..256 scale parameter.
273    The computation yields only 16bits of valid data, but we claim
274    to return 32bits, so that the compiler won't generate extra instructions to
275    "clean" the top 16bits.
276*/
277static inline U16CPU SkAlphaMulRGB16(U16CPU c, unsigned scale) {
278    return SkCompact_rgb_16(SkExpand_rgb_16(c) * (scale >> 3) >> 5);
279}
280
281// this helper explicitly returns a clean 16bit value (but slower)
282#define SkAlphaMulRGB16_ToU16(c, s)  (uint16_t)SkAlphaMulRGB16(c, s)
283
284/** Blend src and dst 16bit colors by the 0..256 scale parameter.
285    The computation yields only 16bits of valid data, but we claim
286    to return 32bits, so that the compiler won't generate extra instructions to
287    "clean" the top 16bits.
288*/
289static inline U16CPU SkBlendRGB16(U16CPU src, U16CPU dst, int srcScale) {
290    SkASSERT((unsigned)srcScale <= 256);
291
292    srcScale >>= 3;
293
294    uint32_t src32 = SkExpand_rgb_16(src);
295    uint32_t dst32 = SkExpand_rgb_16(dst);
296    return SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
297}
298
299static inline void SkBlendRGB16(const uint16_t src[], uint16_t dst[],
300                                int srcScale, int count) {
301    SkASSERT(count > 0);
302    SkASSERT((unsigned)srcScale <= 256);
303
304    srcScale >>= 3;
305
306    do {
307        uint32_t src32 = SkExpand_rgb_16(*src++);
308        uint32_t dst32 = SkExpand_rgb_16(*dst);
309        *dst++ = SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
310    } while (--count > 0);
311}
312
313#ifdef SK_DEBUG
314    static inline U16CPU SkRGB16Add(U16CPU a, U16CPU b) {
315        SkASSERT(SkGetPackedR16(a) + SkGetPackedR16(b) <= SK_R16_MASK);
316        SkASSERT(SkGetPackedG16(a) + SkGetPackedG16(b) <= SK_G16_MASK);
317        SkASSERT(SkGetPackedB16(a) + SkGetPackedB16(b) <= SK_B16_MASK);
318
319        return a + b;
320    }
321#else
322    #define SkRGB16Add(a, b)  ((a) + (b))
323#endif
324
325///////////////////////////////////////////////////////////////////////////////
326
327#define SK_A32_BITS     8
328#define SK_R32_BITS     8
329#define SK_G32_BITS     8
330#define SK_B32_BITS     8
331
332#define SK_A32_MASK     ((1 << SK_A32_BITS) - 1)
333#define SK_R32_MASK     ((1 << SK_R32_BITS) - 1)
334#define SK_G32_MASK     ((1 << SK_G32_BITS) - 1)
335#define SK_B32_MASK     ((1 << SK_B32_BITS) - 1)
336
337#define SkGetPackedA32(packed)      ((uint32_t)((packed) << (24 - SK_A32_SHIFT)) >> 24)
338#define SkGetPackedR32(packed)      ((uint32_t)((packed) << (24 - SK_R32_SHIFT)) >> 24)
339#define SkGetPackedG32(packed)      ((uint32_t)((packed) << (24 - SK_G32_SHIFT)) >> 24)
340#define SkGetPackedB32(packed)      ((uint32_t)((packed) << (24 - SK_B32_SHIFT)) >> 24)
341
342#define SkA32Assert(a)  SkASSERT((unsigned)(a) <= SK_A32_MASK)
343#define SkR32Assert(r)  SkASSERT((unsigned)(r) <= SK_R32_MASK)
344#define SkG32Assert(g)  SkASSERT((unsigned)(g) <= SK_G32_MASK)
345#define SkB32Assert(b)  SkASSERT((unsigned)(b) <= SK_B32_MASK)
346
347#ifdef SK_DEBUG
348    static inline void SkPMColorAssert(SkPMColor c) {
349        unsigned a = SkGetPackedA32(c);
350        unsigned r = SkGetPackedR32(c);
351        unsigned g = SkGetPackedG32(c);
352        unsigned b = SkGetPackedB32(c);
353
354        SkA32Assert(a);
355        SkASSERT(r <= a);
356        SkASSERT(g <= a);
357        SkASSERT(b <= a);
358    }
359#else
360    #define SkPMColorAssert(c)
361#endif
362
363/**
364 *  Pack the components into a SkPMColor, checking (in the debug version) that
365 *  the components are 0..255, and are already premultiplied (i.e. alpha >= color)
366 */
367static inline SkPMColor SkPackARGB32(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
368    SkA32Assert(a);
369    SkASSERT(r <= a);
370    SkASSERT(g <= a);
371    SkASSERT(b <= a);
372
373    return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
374           (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
375}
376
377static inline uint32_t SkPackPMColor_as_RGBA(SkPMColor c) {
378    return SkPackARGB_as_RGBA(SkGetPackedA32(c), SkGetPackedR32(c),
379                              SkGetPackedG32(c), SkGetPackedB32(c));
380}
381
382static inline uint32_t SkPackPMColor_as_BGRA(SkPMColor c) {
383    return SkPackARGB_as_BGRA(SkGetPackedA32(c), SkGetPackedR32(c),
384                              SkGetPackedG32(c), SkGetPackedB32(c));
385}
386
387/**
388 * Abstract 4-byte interpolation, implemented on top of SkPMColor
389 * utility functions. Third parameter controls blending of the first two:
390 *   (src, dst, 0) returns dst
391 *   (src, dst, 0xFF) returns src
392 *   srcWeight is [0..256], unlike SkFourByteInterp which takes [0..255]
393 */
394static inline SkPMColor SkFourByteInterp256(SkPMColor src, SkPMColor dst,
395                                         unsigned scale) {
396    unsigned a = SkAlphaBlend(SkGetPackedA32(src), SkGetPackedA32(dst), scale);
397    unsigned r = SkAlphaBlend(SkGetPackedR32(src), SkGetPackedR32(dst), scale);
398    unsigned g = SkAlphaBlend(SkGetPackedG32(src), SkGetPackedG32(dst), scale);
399    unsigned b = SkAlphaBlend(SkGetPackedB32(src), SkGetPackedB32(dst), scale);
400
401    return SkPackARGB32(a, r, g, b);
402}
403
404/**
405 * Abstract 4-byte interpolation, implemented on top of SkPMColor
406 * utility functions. Third parameter controls blending of the first two:
407 *   (src, dst, 0) returns dst
408 *   (src, dst, 0xFF) returns src
409 */
410static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst,
411                                         U8CPU srcWeight) {
412    unsigned scale = SkAlpha255To256(srcWeight);
413    return SkFourByteInterp256(src, dst, scale);
414}
415
416/**
417 * 0xAARRGGBB -> 0x00AA00GG, 0x00RR00BB
418 */
419static inline void SkSplay(uint32_t color, uint32_t* ag, uint32_t* rb) {
420    const uint32_t mask = 0x00FF00FF;
421    *ag = (color >> 8) & mask;
422    *rb = color & mask;
423}
424
425/**
426 * 0xAARRGGBB -> 0x00AA00GG00RR00BB
427 * (note, ARGB -> AGRB)
428 */
429static inline uint64_t SkSplay(uint32_t color) {
430    const uint32_t mask = 0x00FF00FF;
431    uint64_t agrb = (color >> 8) & mask;  // 0x0000000000AA00GG
432    agrb <<= 32;                          // 0x00AA00GG00000000
433    agrb |= color & mask;                 // 0x00AA00GG00RR00BB
434    return agrb;
435}
436
437/**
438 * 0xAAxxGGxx, 0xRRxxBBxx-> 0xAARRGGBB
439 */
440static inline uint32_t SkUnsplay(uint32_t ag, uint32_t rb) {
441    const uint32_t mask = 0xFF00FF00;
442    return (ag & mask) | ((rb & mask) >> 8);
443}
444
445/**
446 * 0xAAxxGGxxRRxxBBxx -> 0xAARRGGBB
447 * (note, AGRB -> ARGB)
448 */
449static inline uint32_t SkUnsplay(uint64_t agrb) {
450    const uint32_t mask = 0xFF00FF00;
451    return SkPMColor(
452        ((agrb & mask) >> 8) |   // 0x00RR00BB
453        ((agrb >> 32) & mask));  // 0xAARRGGBB
454}
455
456static inline SkPMColor SkFastFourByteInterp256_32(SkPMColor src, SkPMColor dst, unsigned scale) {
457    SkASSERT(scale <= 256);
458
459    // Two 8-bit blends per two 32-bit registers, with space to make sure the math doesn't collide.
460    uint32_t src_ag, src_rb, dst_ag, dst_rb;
461    SkSplay(src, &src_ag, &src_rb);
462    SkSplay(dst, &dst_ag, &dst_rb);
463
464    const uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag;
465    const uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb;
466
467    return SkUnsplay(ret_ag, ret_rb);
468}
469
470static inline SkPMColor SkFastFourByteInterp256_64(SkPMColor src, SkPMColor dst, unsigned scale) {
471    SkASSERT(scale <= 256);
472    // Four 8-bit blends in one 64-bit register, with space to make sure the math doesn't collide.
473    return SkUnsplay(SkSplay(src) * scale + (256-scale) * SkSplay(dst));
474}
475
476// TODO(mtklein): Replace slow versions with fast versions, using scale + (scale>>7) everywhere.
477
478/**
479 * Same as SkFourByteInterp256, but faster.
480 */
481static inline SkPMColor SkFastFourByteInterp256(SkPMColor src, SkPMColor dst, unsigned scale) {
482    // On a 64-bit machine, _64 is about 10% faster than _32, but ~40% slower on a 32-bit machine.
483    if (sizeof(void*) == 4) {
484        return SkFastFourByteInterp256_32(src, dst, scale);
485    } else {
486        return SkFastFourByteInterp256_64(src, dst, scale);
487    }
488}
489
490/**
491 * Nearly the same as SkFourByteInterp, but faster and a touch more accurate, due to better
492 * srcWeight scaling to [0, 256].
493 */
494static inline SkPMColor SkFastFourByteInterp(SkPMColor src,
495                                             SkPMColor dst,
496                                             U8CPU srcWeight) {
497    SkASSERT(srcWeight <= 255);
498    // scale = srcWeight + (srcWeight >> 7) is more accurate than
499    // scale = srcWeight + 1, but 7% slower
500    return SkFastFourByteInterp256(src, dst, srcWeight + (srcWeight >> 7));
501}
502
503/**
504 *  Same as SkPackARGB32, but this version guarantees to not check that the
505 *  values are premultiplied in the debug version.
506 */
507static inline SkPMColor SkPackARGB32NoCheck(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
508    return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
509           (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
510}
511
512static inline
513SkPMColor SkPremultiplyARGBInline(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
514    SkA32Assert(a);
515    SkR32Assert(r);
516    SkG32Assert(g);
517    SkB32Assert(b);
518
519    if (a != 255) {
520        r = SkMulDiv255Round(r, a);
521        g = SkMulDiv255Round(g, a);
522        b = SkMulDiv255Round(b, a);
523    }
524    return SkPackARGB32(a, r, g, b);
525}
526
527// When Android is compiled optimizing for size, SkAlphaMulQ doesn't get
528// inlined; forcing inlining significantly improves performance.
529static SK_ALWAYS_INLINE uint32_t SkAlphaMulQ(uint32_t c, unsigned scale) {
530    uint32_t mask = 0xFF00FF;
531
532    uint32_t rb = ((c & mask) * scale) >> 8;
533    uint32_t ag = ((c >> 8) & mask) * scale;
534    return (rb & mask) | (ag & ~mask);
535}
536
537static inline SkPMColor SkPMSrcOver(SkPMColor src, SkPMColor dst) {
538    return src + SkAlphaMulQ(dst, SkAlpha255To256(255 - SkGetPackedA32(src)));
539}
540
541static inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) {
542    SkASSERT((unsigned)aa <= 255);
543
544    unsigned src_scale = SkAlpha255To256(aa);
545    unsigned dst_scale = SkAlpha255To256(255 - SkAlphaMul(SkGetPackedA32(src), src_scale));
546
547    return SkAlphaMulQ(src, src_scale) + SkAlphaMulQ(dst, dst_scale);
548}
549
550////////////////////////////////////////////////////////////////////////////////////////////
551// Convert a 32bit pixel to a 16bit pixel (no dither)
552
553#define SkR32ToR16_MACRO(r)   ((unsigned)(r) >> (SK_R32_BITS - SK_R16_BITS))
554#define SkG32ToG16_MACRO(g)   ((unsigned)(g) >> (SK_G32_BITS - SK_G16_BITS))
555#define SkB32ToB16_MACRO(b)   ((unsigned)(b) >> (SK_B32_BITS - SK_B16_BITS))
556
557#ifdef SK_DEBUG
558    static inline unsigned SkR32ToR16(unsigned r) {
559        SkR32Assert(r);
560        return SkR32ToR16_MACRO(r);
561    }
562    static inline unsigned SkG32ToG16(unsigned g) {
563        SkG32Assert(g);
564        return SkG32ToG16_MACRO(g);
565    }
566    static inline unsigned SkB32ToB16(unsigned b) {
567        SkB32Assert(b);
568        return SkB32ToB16_MACRO(b);
569    }
570#else
571    #define SkR32ToR16(r)   SkR32ToR16_MACRO(r)
572    #define SkG32ToG16(g)   SkG32ToG16_MACRO(g)
573    #define SkB32ToB16(b)   SkB32ToB16_MACRO(b)
574#endif
575
576#define SkPacked32ToR16(c)  (((unsigned)(c) >> (SK_R32_SHIFT + SK_R32_BITS - SK_R16_BITS)) & SK_R16_MASK)
577#define SkPacked32ToG16(c)  (((unsigned)(c) >> (SK_G32_SHIFT + SK_G32_BITS - SK_G16_BITS)) & SK_G16_MASK)
578#define SkPacked32ToB16(c)  (((unsigned)(c) >> (SK_B32_SHIFT + SK_B32_BITS - SK_B16_BITS)) & SK_B16_MASK)
579
580static inline U16CPU SkPixel32ToPixel16(SkPMColor c) {
581    unsigned r = ((c >> (SK_R32_SHIFT + (8 - SK_R16_BITS))) & SK_R16_MASK) << SK_R16_SHIFT;
582    unsigned g = ((c >> (SK_G32_SHIFT + (8 - SK_G16_BITS))) & SK_G16_MASK) << SK_G16_SHIFT;
583    unsigned b = ((c >> (SK_B32_SHIFT + (8 - SK_B16_BITS))) & SK_B16_MASK) << SK_B16_SHIFT;
584    return r | g | b;
585}
586
587static inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
588    return  (SkR32ToR16(r) << SK_R16_SHIFT) |
589            (SkG32ToG16(g) << SK_G16_SHIFT) |
590            (SkB32ToB16(b) << SK_B16_SHIFT);
591}
592
593#define SkPixel32ToPixel16_ToU16(src)   SkToU16(SkPixel32ToPixel16(src))
594
595/////////////////////////////////////////////////////////////////////////////////////////
596// Fast dither from 32->16
597
598#define SkShouldDitherXY(x, y)  (((x) ^ (y)) & 1)
599
600static inline uint16_t SkDitherPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
601    r = ((r << 1) - ((r >> (8 - SK_R16_BITS) << (8 - SK_R16_BITS)) | (r >> SK_R16_BITS))) >> (8 - SK_R16_BITS);
602    g = ((g << 1) - ((g >> (8 - SK_G16_BITS) << (8 - SK_G16_BITS)) | (g >> SK_G16_BITS))) >> (8 - SK_G16_BITS);
603    b = ((b << 1) - ((b >> (8 - SK_B16_BITS) << (8 - SK_B16_BITS)) | (b >> SK_B16_BITS))) >> (8 - SK_B16_BITS);
604
605    return SkPackRGB16(r, g, b);
606}
607
608static inline uint16_t SkDitherPixel32ToPixel16(SkPMColor c) {
609    return SkDitherPack888ToRGB16(SkGetPackedR32(c), SkGetPackedG32(c), SkGetPackedB32(c));
610}
611
612/*  Return c in expanded_rgb_16 format, but also scaled up by 32 (5 bits)
613    It is now suitable for combining with a scaled expanded_rgb_16 color
614    as in SkSrcOver32To16().
615    We must do this 565 high-bit replication, in order for the subsequent add
616    to saturate properly (and not overflow). If we take the 8 bits as is, it is
617    possible to overflow.
618*/
619static inline uint32_t SkPMColorToExpanded16x5(SkPMColor c) {
620    unsigned sr = SkPacked32ToR16(c);
621    unsigned sg = SkPacked32ToG16(c);
622    unsigned sb = SkPacked32ToB16(c);
623
624    sr = (sr << 5) | sr;
625    sg = (sg << 5) | (sg >> 1);
626    sb = (sb << 5) | sb;
627    return (sr << 11) | (sg << 21) | (sb << 0);
628}
629
630/*  SrcOver the 32bit src color with the 16bit dst, returning a 16bit value
631    (with dirt in the high 16bits, so caller beware).
632*/
633static inline U16CPU SkSrcOver32To16(SkPMColor src, uint16_t dst) {
634    unsigned sr = SkGetPackedR32(src);
635    unsigned sg = SkGetPackedG32(src);
636    unsigned sb = SkGetPackedB32(src);
637
638    unsigned dr = SkGetPackedR16(dst);
639    unsigned dg = SkGetPackedG16(dst);
640    unsigned db = SkGetPackedB16(dst);
641
642    unsigned isa = 255 - SkGetPackedA32(src);
643
644    dr = (sr + SkMul16ShiftRound(dr, isa, SK_R16_BITS)) >> (8 - SK_R16_BITS);
645    dg = (sg + SkMul16ShiftRound(dg, isa, SK_G16_BITS)) >> (8 - SK_G16_BITS);
646    db = (sb + SkMul16ShiftRound(db, isa, SK_B16_BITS)) >> (8 - SK_B16_BITS);
647
648    return SkPackRGB16(dr, dg, db);
649}
650
651////////////////////////////////////////////////////////////////////////////////////////////
652// Convert a 16bit pixel to a 32bit pixel
653
654static inline unsigned SkR16ToR32(unsigned r) {
655    return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
656}
657
658static inline unsigned SkG16ToG32(unsigned g) {
659    return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
660}
661
662static inline unsigned SkB16ToB32(unsigned b) {
663    return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
664}
665
666#define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
667#define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
668#define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
669
670static inline SkPMColor SkPixel16ToPixel32(U16CPU src) {
671    SkASSERT(src == SkToU16(src));
672
673    unsigned    r = SkPacked16ToR32(src);
674    unsigned    g = SkPacked16ToG32(src);
675    unsigned    b = SkPacked16ToB32(src);
676
677    SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
678    SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
679    SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
680
681    return SkPackARGB32(0xFF, r, g, b);
682}
683
684// similar to SkPixel16ToPixel32, but returns SkColor instead of SkPMColor
685static inline SkColor SkPixel16ToColor(U16CPU src) {
686    SkASSERT(src == SkToU16(src));
687
688    unsigned    r = SkPacked16ToR32(src);
689    unsigned    g = SkPacked16ToG32(src);
690    unsigned    b = SkPacked16ToB32(src);
691
692    SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
693    SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
694    SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
695
696    return SkColorSetRGB(r, g, b);
697}
698
699///////////////////////////////////////////////////////////////////////////////
700
701typedef uint16_t SkPMColor16;
702
703// Put in OpenGL order (r g b a)
704#define SK_A4444_SHIFT    0
705#define SK_R4444_SHIFT    12
706#define SK_G4444_SHIFT    8
707#define SK_B4444_SHIFT    4
708
709#define SkA32To4444(a)  ((unsigned)(a) >> 4)
710#define SkR32To4444(r)  ((unsigned)(r) >> 4)
711#define SkG32To4444(g)  ((unsigned)(g) >> 4)
712#define SkB32To4444(b)  ((unsigned)(b) >> 4)
713
714static inline U8CPU SkReplicateNibble(unsigned nib) {
715    SkASSERT(nib <= 0xF);
716    return (nib << 4) | nib;
717}
718
719#define SkA4444ToA32(a)     SkReplicateNibble(a)
720#define SkR4444ToR32(r)     SkReplicateNibble(r)
721#define SkG4444ToG32(g)     SkReplicateNibble(g)
722#define SkB4444ToB32(b)     SkReplicateNibble(b)
723
724#define SkGetPackedA4444(c)     (((unsigned)(c) >> SK_A4444_SHIFT) & 0xF)
725#define SkGetPackedR4444(c)     (((unsigned)(c) >> SK_R4444_SHIFT) & 0xF)
726#define SkGetPackedG4444(c)     (((unsigned)(c) >> SK_G4444_SHIFT) & 0xF)
727#define SkGetPackedB4444(c)     (((unsigned)(c) >> SK_B4444_SHIFT) & 0xF)
728
729#define SkPacked4444ToA32(c)    SkReplicateNibble(SkGetPackedA4444(c))
730#define SkPacked4444ToR32(c)    SkReplicateNibble(SkGetPackedR4444(c))
731#define SkPacked4444ToG32(c)    SkReplicateNibble(SkGetPackedG4444(c))
732#define SkPacked4444ToB32(c)    SkReplicateNibble(SkGetPackedB4444(c))
733
734#ifdef SK_DEBUG
735static inline void SkPMColor16Assert(U16CPU c) {
736    unsigned a = SkGetPackedA4444(c);
737    unsigned r = SkGetPackedR4444(c);
738    unsigned g = SkGetPackedG4444(c);
739    unsigned b = SkGetPackedB4444(c);
740
741    SkASSERT(a <= 0xF);
742    SkASSERT(r <= a);
743    SkASSERT(g <= a);
744    SkASSERT(b <= a);
745}
746#else
747#define SkPMColor16Assert(c)
748#endif
749
750static inline unsigned SkAlpha15To16(unsigned a) {
751    SkASSERT(a <= 0xF);
752    return a + (a >> 3);
753}
754
755#ifdef SK_DEBUG
756    static inline int SkAlphaMul4(int value, int scale) {
757        SkASSERT((unsigned)scale <= 0x10);
758        return value * scale >> 4;
759    }
760#else
761    #define SkAlphaMul4(value, scale)   ((value) * (scale) >> 4)
762#endif
763
764static inline unsigned SkR4444ToR565(unsigned r) {
765    SkASSERT(r <= 0xF);
766    return (r << (SK_R16_BITS - 4)) | (r >> (8 - SK_R16_BITS));
767}
768
769static inline unsigned SkG4444ToG565(unsigned g) {
770    SkASSERT(g <= 0xF);
771    return (g << (SK_G16_BITS - 4)) | (g >> (8 - SK_G16_BITS));
772}
773
774static inline unsigned SkB4444ToB565(unsigned b) {
775    SkASSERT(b <= 0xF);
776    return (b << (SK_B16_BITS - 4)) | (b >> (8 - SK_B16_BITS));
777}
778
779static inline SkPMColor16 SkPackARGB4444(unsigned a, unsigned r,
780                                         unsigned g, unsigned b) {
781    SkASSERT(a <= 0xF);
782    SkASSERT(r <= a);
783    SkASSERT(g <= a);
784    SkASSERT(b <= a);
785
786    return (SkPMColor16)((a << SK_A4444_SHIFT) | (r << SK_R4444_SHIFT) |
787                         (g << SK_G4444_SHIFT) | (b << SK_B4444_SHIFT));
788}
789
790static inline U16CPU SkAlphaMulQ4(U16CPU c, unsigned scale) {
791    SkASSERT(scale <= 16);
792
793    const unsigned mask = 0xF0F;    //gMask_0F0F;
794
795#if 0
796    unsigned rb = ((c & mask) * scale) >> 4;
797    unsigned ag = ((c >> 4) & mask) * scale;
798    return (rb & mask) | (ag & ~mask);
799#else
800    c = (c & mask) | ((c & (mask << 4)) << 12);
801    c = c * scale >> 4;
802    return (c & mask) | ((c >> 12) & (mask << 4));
803#endif
804}
805
806/** Expand the SkPMColor16 color into a 32bit value that can be scaled all at
807    once by a value up to 16. Used in conjunction with SkCompact_4444.
808*/
809static inline uint32_t SkExpand_4444(U16CPU c) {
810    SkASSERT(c == (uint16_t)c);
811
812    const unsigned mask = 0xF0F;    //gMask_0F0F;
813    return (c & mask) | ((c & ~mask) << 12);
814}
815
816/** Compress an expanded value (from SkExpand_4444) back down to a SkPMColor16.
817    NOTE: this explicitly does not clean the top 16 bits (which may be garbage).
818    It does this for speed, since if it is being written directly to 16bits of
819    memory, the top 16bits will be ignored. Casting the result to uint16_t here
820    would add 2 more instructions, slow us down. It is up to the caller to
821    perform the cast if needed.
822*/
823static inline U16CPU SkCompact_4444(uint32_t c) {
824    const unsigned mask = 0xF0F;    //gMask_0F0F;
825    return (c & mask) | ((c >> 12) & ~mask);
826}
827
828static inline uint16_t SkSrcOver4444To16(SkPMColor16 s, uint16_t d) {
829    unsigned sa = SkGetPackedA4444(s);
830    unsigned sr = SkR4444ToR565(SkGetPackedR4444(s));
831    unsigned sg = SkG4444ToG565(SkGetPackedG4444(s));
832    unsigned sb = SkB4444ToB565(SkGetPackedB4444(s));
833
834    // To avoid overflow, we have to clear the low bit of the synthetic sg
835    // if the src alpha is <= 7.
836    // to see why, try blending 0x4444 on top of 565-white and watch green
837    // overflow (sum == 64)
838    sg &= ~(~(sa >> 3) & 1);
839
840    unsigned scale = SkAlpha15To16(15 - sa);
841    unsigned dr = SkAlphaMul4(SkGetPackedR16(d), scale);
842    unsigned dg = SkAlphaMul4(SkGetPackedG16(d), scale);
843    unsigned db = SkAlphaMul4(SkGetPackedB16(d), scale);
844
845#if 0
846    if (sg + dg > 63) {
847        SkDebugf("---- SkSrcOver4444To16 src=%x dst=%x scale=%d, sg=%d dg=%d\n", s, d, scale, sg, dg);
848    }
849#endif
850    return SkPackRGB16(sr + dr, sg + dg, sb + db);
851}
852
853static inline uint16_t SkBlend4444To16(SkPMColor16 src, uint16_t dst, int scale16) {
854    SkASSERT((unsigned)scale16 <= 16);
855
856    return SkSrcOver4444To16(SkAlphaMulQ4(src, scale16), dst);
857}
858
859static inline uint16_t SkBlend4444(SkPMColor16 src, SkPMColor16 dst, int scale16) {
860    SkASSERT((unsigned)scale16 <= 16);
861
862    uint32_t src32 = SkExpand_4444(src) * scale16;
863    // the scaled srcAlpha is the bottom byte
864#ifdef SK_DEBUG
865    {
866        unsigned srcA = SkGetPackedA4444(src) * scale16;
867        SkASSERT(srcA == (src32 & 0xFF));
868    }
869#endif
870    unsigned dstScale = SkAlpha255To256(255 - (src32 & 0xFF)) >> 4;
871    uint32_t dst32 = SkExpand_4444(dst) * dstScale;
872    return SkCompact_4444((src32 + dst32) >> 4);
873}
874
875static inline SkPMColor SkPixel4444ToPixel32(U16CPU c) {
876    uint32_t d = (SkGetPackedA4444(c) << SK_A32_SHIFT) |
877                 (SkGetPackedR4444(c) << SK_R32_SHIFT) |
878                 (SkGetPackedG4444(c) << SK_G32_SHIFT) |
879                 (SkGetPackedB4444(c) << SK_B32_SHIFT);
880    return d | (d << 4);
881}
882
883static inline SkPMColor16 SkPixel32ToPixel4444(SkPMColor c) {
884    return  (((c >> (SK_A32_SHIFT + 4)) & 0xF) << SK_A4444_SHIFT) |
885    (((c >> (SK_R32_SHIFT + 4)) & 0xF) << SK_R4444_SHIFT) |
886    (((c >> (SK_G32_SHIFT + 4)) & 0xF) << SK_G4444_SHIFT) |
887    (((c >> (SK_B32_SHIFT + 4)) & 0xF) << SK_B4444_SHIFT);
888}
889
890// cheap 2x2 dither
891static inline SkPMColor16 SkDitherARGB32To4444(U8CPU a, U8CPU r,
892                                               U8CPU g, U8CPU b) {
893    // to ensure that we stay a legal premultiplied color, we take the max()
894    // of the truncated and dithered alpha values. If we didn't, cases like
895    // SkDitherARGB32To4444(0x31, 0x2E, ...) would generate SkPackARGB4444(2, 3, ...)
896    // which is not legal premultiplied, since a < color
897    unsigned dithered_a = ((a << 1) - ((a >> 4 << 4) | (a >> 4))) >> 4;
898    a = SkMax32(a >> 4, dithered_a);
899    // these we just dither in place
900    r = ((r << 1) - ((r >> 4 << 4) | (r >> 4))) >> 4;
901    g = ((g << 1) - ((g >> 4 << 4) | (g >> 4))) >> 4;
902    b = ((b << 1) - ((b >> 4 << 4) | (b >> 4))) >> 4;
903
904    return SkPackARGB4444(a, r, g, b);
905}
906
907static inline SkPMColor16 SkDitherPixel32To4444(SkPMColor c) {
908    return SkDitherARGB32To4444(SkGetPackedA32(c), SkGetPackedR32(c),
909                                SkGetPackedG32(c), SkGetPackedB32(c));
910}
911
912/*  Assumes 16bit is in standard RGBA order.
913    Transforms a normal ARGB_8888 into the same byte order as
914    expanded ARGB_4444, but keeps each component 8bits
915*/
916static inline uint32_t SkExpand_8888(SkPMColor c) {
917    return  (((c >> SK_R32_SHIFT) & 0xFF) << 24) |
918            (((c >> SK_G32_SHIFT) & 0xFF) <<  8) |
919            (((c >> SK_B32_SHIFT) & 0xFF) << 16) |
920            (((c >> SK_A32_SHIFT) & 0xFF) <<  0);
921}
922
923/*  Undo the operation of SkExpand_8888, turning the argument back into
924    a SkPMColor.
925*/
926static inline SkPMColor SkCompact_8888(uint32_t c) {
927    return  (((c >> 24) & 0xFF) << SK_R32_SHIFT) |
928            (((c >>  8) & 0xFF) << SK_G32_SHIFT) |
929            (((c >> 16) & 0xFF) << SK_B32_SHIFT) |
930            (((c >>  0) & 0xFF) << SK_A32_SHIFT);
931}
932
933/*  Like SkExpand_8888, this transforms a pmcolor into the expanded 4444 format,
934    but this routine just keeps the high 4bits of each component in the low
935    4bits of the result (just like a newly expanded PMColor16).
936*/
937static inline uint32_t SkExpand32_4444(SkPMColor c) {
938    return  (((c >> (SK_R32_SHIFT + 4)) & 0xF) << 24) |
939            (((c >> (SK_G32_SHIFT + 4)) & 0xF) <<  8) |
940            (((c >> (SK_B32_SHIFT + 4)) & 0xF) << 16) |
941            (((c >> (SK_A32_SHIFT + 4)) & 0xF) <<  0);
942}
943
944// takes two values and alternamtes them as part of a memset16
945// used for cheap 2x2 dithering when the colors are opaque
946void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n);
947
948///////////////////////////////////////////////////////////////////////////////
949
950static inline int SkUpscale31To32(int value) {
951    SkASSERT((unsigned)value <= 31);
952    return value + (value >> 4);
953}
954
955static inline int SkBlend32(int src, int dst, int scale) {
956    SkASSERT((unsigned)src <= 0xFF);
957    SkASSERT((unsigned)dst <= 0xFF);
958    SkASSERT((unsigned)scale <= 32);
959    return dst + ((src - dst) * scale >> 5);
960}
961
962static inline SkPMColor SkBlendLCD16(int srcA, int srcR, int srcG, int srcB,
963                                     SkPMColor dst, uint16_t mask) {
964    if (mask == 0) {
965        return dst;
966    }
967
968    /*  We want all of these in 5bits, hence the shifts in case one of them
969     *  (green) is 6bits.
970     */
971    int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
972    int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
973    int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
974
975    // Now upscale them to 0..32, so we can use blend32
976    maskR = SkUpscale31To32(maskR);
977    maskG = SkUpscale31To32(maskG);
978    maskB = SkUpscale31To32(maskB);
979
980    // srcA has been upscaled to 256 before passed into this function
981    maskR = maskR * srcA >> 8;
982    maskG = maskG * srcA >> 8;
983    maskB = maskB * srcA >> 8;
984
985    int dstR = SkGetPackedR32(dst);
986    int dstG = SkGetPackedG32(dst);
987    int dstB = SkGetPackedB32(dst);
988
989    // LCD blitting is only supported if the dst is known/required
990    // to be opaque
991    return SkPackARGB32(0xFF,
992                        SkBlend32(srcR, dstR, maskR),
993                        SkBlend32(srcG, dstG, maskG),
994                        SkBlend32(srcB, dstB, maskB));
995}
996
997static inline SkPMColor SkBlendLCD16Opaque(int srcR, int srcG, int srcB,
998                                           SkPMColor dst, uint16_t mask,
999                                           SkPMColor opaqueDst) {
1000    if (mask == 0) {
1001        return dst;
1002    }
1003
1004    if (0xFFFF == mask) {
1005        return opaqueDst;
1006    }
1007
1008    /*  We want all of these in 5bits, hence the shifts in case one of them
1009     *  (green) is 6bits.
1010     */
1011    int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
1012    int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
1013    int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
1014
1015    // Now upscale them to 0..32, so we can use blend32
1016    maskR = SkUpscale31To32(maskR);
1017    maskG = SkUpscale31To32(maskG);
1018    maskB = SkUpscale31To32(maskB);
1019
1020    int dstR = SkGetPackedR32(dst);
1021    int dstG = SkGetPackedG32(dst);
1022    int dstB = SkGetPackedB32(dst);
1023
1024    // LCD blitting is only supported if the dst is known/required
1025    // to be opaque
1026    return SkPackARGB32(0xFF,
1027                        SkBlend32(srcR, dstR, maskR),
1028                        SkBlend32(srcG, dstG, maskG),
1029                        SkBlend32(srcB, dstB, maskB));
1030}
1031
1032static inline void SkBlitLCD16Row(SkPMColor dst[], const uint16_t mask[],
1033                                  SkColor src, int width, SkPMColor) {
1034    int srcA = SkColorGetA(src);
1035    int srcR = SkColorGetR(src);
1036    int srcG = SkColorGetG(src);
1037    int srcB = SkColorGetB(src);
1038
1039    srcA = SkAlpha255To256(srcA);
1040
1041    for (int i = 0; i < width; i++) {
1042        dst[i] = SkBlendLCD16(srcA, srcR, srcG, srcB, dst[i], mask[i]);
1043    }
1044}
1045
1046static inline void SkBlitLCD16OpaqueRow(SkPMColor dst[], const uint16_t mask[],
1047                                        SkColor src, int width,
1048                                        SkPMColor opaqueDst) {
1049    int srcR = SkColorGetR(src);
1050    int srcG = SkColorGetG(src);
1051    int srcB = SkColorGetB(src);
1052
1053    for (int i = 0; i < width; i++) {
1054        dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], mask[i],
1055                                    opaqueDst);
1056    }
1057}
1058
1059#endif
1060