SkColorPriv.h revision 76692c34c7e2d69f4e43133e516c580f08caed99
1/*
2 * Copyright (C) 2006 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef SkColorPriv_DEFINED
18#define SkColorPriv_DEFINED
19
20// turn this own for extra debug checking when blending onto 565
21#ifdef SK_DEBUG
22    #define CHECK_FOR_565_OVERFLOW
23#endif
24
25#include "SkColor.h"
26#include "SkMath.h"
27
28/** Turn 0..255 into 0..256 by adding 1 at the half-way point. Used to turn a
29    byte into a scale value, so that we can say scale * value >> 8 instead of
30    alpha * value / 255.
31
32    In debugging, asserts that alpha is 0..255
33*/
34static inline unsigned SkAlpha255To256(U8CPU alpha) {
35    SkASSERT(SkToU8(alpha) == alpha);
36    return alpha + (alpha >> 7);
37}
38
39/** Multiplify value by 0..256, and shift the result down 8
40    (i.e. return (value * alpha256) >> 8)
41 */
42#define SkAlphaMul(value, alpha256)     (SkMulS16(value, alpha256) >> 8)
43
44//  The caller may want negative values, so keep all params signed (int)
45//  so we don't accidentally slip into unsigned math and lose the sign
46//  extension when we shift (in SkAlphaMul)
47inline int SkAlphaBlend(int src, int dst, int scale256) {
48    SkASSERT((unsigned)scale256 <= 256);
49    return dst + SkAlphaMul(src - dst, scale256);
50}
51
52#define SK_R16_BITS     5
53#define SK_G16_BITS     6
54#define SK_B16_BITS     5
55
56#define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
57#define SK_G16_SHIFT    (SK_B16_BITS)
58#define SK_B16_SHIFT    0
59
60#define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
61#define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
62#define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
63
64#define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
65#define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
66#define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
67
68#define SkR16Assert(r)  SkASSERT((unsigned)(r) <= SK_R16_MASK)
69#define SkG16Assert(g)  SkASSERT((unsigned)(g) <= SK_G16_MASK)
70#define SkB16Assert(b)  SkASSERT((unsigned)(b) <= SK_B16_MASK)
71
72static inline uint16_t SkPackRGB16(unsigned r, unsigned g, unsigned b) {
73    SkASSERT(r <= SK_R16_MASK);
74    SkASSERT(g <= SK_G16_MASK);
75    SkASSERT(b <= SK_B16_MASK);
76
77    return SkToU16((r << SK_R16_SHIFT) | (g << SK_G16_SHIFT) | (b << SK_B16_SHIFT));
78}
79
80#define SK_R16_MASK_IN_PLACE        (SK_R16_MASK << SK_R16_SHIFT)
81#define SK_G16_MASK_IN_PLACE        (SK_G16_MASK << SK_G16_SHIFT)
82#define SK_B16_MASK_IN_PLACE        (SK_B16_MASK << SK_B16_SHIFT)
83
84/** Expand the 16bit color into a 32bit value that can be scaled all at once
85    by a value up to 32. Used in conjunction with SkCompact_rgb_16.
86*/
87static inline uint32_t SkExpand_rgb_16(U16CPU c) {
88    SkASSERT(c == (uint16_t)c);
89
90    return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE);
91}
92
93/** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit
94    color value. The computation yields only 16bits of valid data, but we claim
95    to return 32bits, so that the compiler won't generate extra instructions to
96    "clean" the top 16bits. However, the top 16 can contain garbage, so it is
97    up to the caller to safely ignore them.
98*/
99static inline U16CPU SkCompact_rgb_16(uint32_t c) {
100    return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE);
101}
102
103/** Scale the 16bit color value by the 0..256 scale parameter.
104    The computation yields only 16bits of valid data, but we claim
105    to return 32bits, so that the compiler won't generate extra instructions to
106    "clean" the top 16bits.
107*/
108static inline U16CPU SkAlphaMulRGB16(U16CPU c, unsigned scale) {
109    return SkCompact_rgb_16(SkExpand_rgb_16(c) * (scale >> 3) >> 5);
110}
111
112// this helper explicitly returns a clean 16bit value (but slower)
113#define SkAlphaMulRGB16_ToU16(c, s)  (uint16_t)SkAlphaMulRGB16(c, s)
114
115/** Blend src and dst 16bit colors by the 0..256 scale parameter.
116    The computation yields only 16bits of valid data, but we claim
117    to return 32bits, so that the compiler won't generate extra instructions to
118    "clean" the top 16bits.
119*/
120static inline U16CPU SkBlendRGB16(U16CPU src, U16CPU dst, int srcScale) {
121    SkASSERT((unsigned)srcScale <= 256);
122
123    srcScale >>= 3;
124
125    uint32_t src32 = SkExpand_rgb_16(src);
126    uint32_t dst32 = SkExpand_rgb_16(dst);
127    return SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
128}
129
130static inline void SkBlendRGB16(const uint16_t src[], uint16_t dst[],
131                                int srcScale, int count) {
132    SkASSERT(count > 0);
133    SkASSERT((unsigned)srcScale <= 256);
134
135    srcScale >>= 3;
136
137    do {
138        uint32_t src32 = SkExpand_rgb_16(*src++);
139        uint32_t dst32 = SkExpand_rgb_16(*dst);
140        *dst++ = SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
141    } while (--count > 0);
142}
143
144#ifdef SK_DEBUG
145    static inline U16CPU SkRGB16Add(U16CPU a, U16CPU b) {
146        SkASSERT(SkGetPackedR16(a) + SkGetPackedR16(b) <= SK_R16_MASK);
147        SkASSERT(SkGetPackedG16(a) + SkGetPackedG16(b) <= SK_G16_MASK);
148        SkASSERT(SkGetPackedB16(a) + SkGetPackedB16(b) <= SK_B16_MASK);
149
150        return a + b;
151    }
152#else
153    #define SkRGB16Add(a, b)  ((a) + (b))
154#endif
155
156/////////////////////////////////////////////////////////////////////////////////////////////
157
158#define SK_A32_BITS     8
159#define SK_R32_BITS     8
160#define SK_G32_BITS     8
161#define SK_B32_BITS     8
162
163/* we check to see if the SHIFT value has already been defined (SkUserConfig.h)
164    if not, we define it ourself to some default values. We default to OpenGL
165    order (in memory: r,g,b,a)
166*/
167#ifndef SK_A32_SHIFT
168    #ifdef SK_CPU_BENDIAN
169        #define SK_R32_SHIFT    24
170        #define SK_G32_SHIFT    16
171        #define SK_B32_SHIFT    8
172        #define SK_A32_SHIFT    0
173    #else
174        #define SK_R32_SHIFT    0
175        #define SK_G32_SHIFT    8
176        #define SK_B32_SHIFT    16
177        #define SK_A32_SHIFT    24
178    #endif
179#endif
180
181#define SK_A32_MASK     ((1 << SK_A32_BITS) - 1)
182#define SK_R32_MASK     ((1 << SK_R32_BITS) - 1)
183#define SK_G32_MASK     ((1 << SK_G32_BITS) - 1)
184#define SK_B32_MASK     ((1 << SK_B32_BITS) - 1)
185
186#define SkGetPackedA32(packed)      ((uint32_t)((packed) << (24 - SK_A32_SHIFT)) >> 24)
187#define SkGetPackedR32(packed)      ((uint32_t)((packed) << (24 - SK_R32_SHIFT)) >> 24)
188#define SkGetPackedG32(packed)      ((uint32_t)((packed) << (24 - SK_G32_SHIFT)) >> 24)
189#define SkGetPackedB32(packed)      ((uint32_t)((packed) << (24 - SK_B32_SHIFT)) >> 24)
190
191#define SkA32Assert(a)  SkASSERT((unsigned)(a) <= SK_A32_MASK)
192#define SkR32Assert(r)  SkASSERT((unsigned)(r) <= SK_R32_MASK)
193#define SkG32Assert(g)  SkASSERT((unsigned)(g) <= SK_G32_MASK)
194#define SkB32Assert(b)  SkASSERT((unsigned)(b) <= SK_B32_MASK)
195
196#ifdef SK_DEBUG
197    inline void SkPMColorAssert(SkPMColor c) {
198        unsigned a = SkGetPackedA32(c);
199        unsigned r = SkGetPackedR32(c);
200        unsigned g = SkGetPackedG32(c);
201        unsigned b = SkGetPackedB32(c);
202
203        SkA32Assert(a);
204        SkASSERT(r <= a);
205        SkASSERT(g <= a);
206        SkASSERT(b <= a);
207    }
208#else
209    #define SkPMColorAssert(c)
210#endif
211
212inline SkPMColor SkPackARGB32(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
213    SkA32Assert(a);
214    SkASSERT(r <= a);
215    SkASSERT(g <= a);
216    SkASSERT(b <= a);
217
218    return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
219           (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
220}
221
222extern const uint32_t gMask_00FF00FF;
223
224inline uint32_t SkAlphaMulQ(uint32_t c, unsigned scale) {
225    uint32_t mask = gMask_00FF00FF;
226//    uint32_t mask = 0xFF00FF;
227
228    uint32_t rb = ((c & mask) * scale) >> 8;
229    uint32_t ag = ((c >> 8) & mask) * scale;
230    return (rb & mask) | (ag & ~mask);
231}
232
233inline SkPMColor SkPMSrcOver(SkPMColor src, SkPMColor dst) {
234    return src + SkAlphaMulQ(dst, SkAlpha255To256(255 - SkGetPackedA32(src)));
235}
236
237inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) {
238    SkASSERT((unsigned)aa <= 255);
239
240    unsigned src_scale = SkAlpha255To256(aa);
241    unsigned dst_scale = SkAlpha255To256(255 - SkAlphaMul(SkGetPackedA32(src), src_scale));
242
243    return SkAlphaMulQ(src, src_scale) + SkAlphaMulQ(dst, dst_scale);
244}
245
246////////////////////////////////////////////////////////////////////////////////////////////
247// Convert a 32bit pixel to a 16bit pixel (no dither)
248
249#define SkR32ToR16_MACRO(r)   ((unsigned)(r) >> (SK_R32_BITS - SK_R16_BITS))
250#define SkG32ToG16_MACRO(g)   ((unsigned)(g) >> (SK_G32_BITS - SK_G16_BITS))
251#define SkB32ToB16_MACRO(b)   ((unsigned)(b) >> (SK_B32_BITS - SK_B16_BITS))
252
253#ifdef SK_DEBUG
254    inline unsigned SkR32ToR16(unsigned r)
255    {
256        SkR32Assert(r);
257        return SkR32ToR16_MACRO(r);
258    }
259    inline unsigned SkG32ToG16(unsigned g)
260    {
261        SkG32Assert(g);
262        return SkG32ToG16_MACRO(g);
263    }
264    inline unsigned SkB32ToB16(unsigned b)
265    {
266        SkB32Assert(b);
267        return SkB32ToB16_MACRO(b);
268    }
269#else
270    #define SkR32ToR16(r)   SkR32ToR16_MACRO(r)
271    #define SkG32ToG16(g)   SkG32ToG16_MACRO(g)
272    #define SkB32ToB16(b)   SkB32ToB16_MACRO(b)
273#endif
274
275#define SkPacked32ToR16(c)  (((unsigned)(c) >> (SK_R32_SHIFT + SK_R32_BITS - SK_R16_BITS)) & SK_R16_MASK)
276#define SkPacked32ToG16(c)  (((unsigned)(c) >> (SK_G32_SHIFT + SK_G32_BITS - SK_G16_BITS)) & SK_G16_MASK)
277#define SkPacked32ToB16(c)  (((unsigned)(c) >> (SK_B32_SHIFT + SK_B32_BITS - SK_B16_BITS)) & SK_B16_MASK)
278
279inline U16CPU SkPixel32ToPixel16(SkPMColor c)
280{
281    unsigned r = ((c >> (SK_R32_SHIFT + (8 - SK_R16_BITS))) & SK_R16_MASK) << SK_R16_SHIFT;
282    unsigned g = ((c >> (SK_G32_SHIFT + (8 - SK_G16_BITS))) & SK_G16_MASK) << SK_G16_SHIFT;
283    unsigned b = ((c >> (SK_B32_SHIFT + (8 - SK_B16_BITS))) & SK_B16_MASK) << SK_B16_SHIFT;
284    return r | g | b;
285}
286
287inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b)
288{
289    return  (SkR32ToR16(r) << SK_R16_SHIFT) |
290            (SkG32ToG16(g) << SK_G16_SHIFT) |
291            (SkB32ToB16(b) << SK_B16_SHIFT);
292}
293
294#define SkPixel32ToPixel16_ToU16(src)   SkToU16(SkPixel32ToPixel16(src))
295
296/////////////////////////////////////////////////////////////////////////////////////////
297// Fast dither from 32->16
298
299#define SkShouldDitherXY(x, y)  (((x) ^ (y)) & 1)
300
301inline uint16_t SkDitherPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b)
302{
303    r = ((r << 1) - ((r >> (8 - SK_R16_BITS) << (8 - SK_R16_BITS)) | (r >> SK_R16_BITS))) >> (8 - SK_R16_BITS);
304    g = ((g << 1) - ((g >> (8 - SK_G16_BITS) << (8 - SK_G16_BITS)) | (g >> SK_G16_BITS))) >> (8 - SK_G16_BITS);
305    b = ((b << 1) - ((b >> (8 - SK_B16_BITS) << (8 - SK_B16_BITS)) | (b >> SK_B16_BITS))) >> (8 - SK_B16_BITS);
306
307    return SkPackRGB16(r, g, b);
308}
309
310inline uint16_t SkDitherPixel32ToPixel16(SkPMColor c)
311{
312    return SkDitherPack888ToRGB16(SkGetPackedR32(c), SkGetPackedG32(c), SkGetPackedB32(c));
313}
314
315/*  Return c in expanded_rgb_16 format, but also scaled up by 32 (5 bits)
316    It is now suitable for combining with a scaled expanded_rgb_16 color
317    as in SkSrcOver32To16().
318    We must do this 565 high-bit replication, in order for the subsequent add
319    to saturate properly (and not overflow). If we take the 8 bits as is, it is
320    possible to overflow.
321*/
322static inline uint32_t SkPMColorToExpanded16x5(SkPMColor c)
323{
324    unsigned sr = SkPacked32ToR16(c);
325    unsigned sg = SkPacked32ToG16(c);
326    unsigned sb = SkPacked32ToB16(c);
327
328    sr = (sr << 5) | sr;
329    sg = (sg << 5) | (sg >> 1);
330    sb = (sb << 5) | sb;
331    return (sr << 11) | (sg << 21) | (sb << 0);
332}
333
334/*  SrcOver the 32bit src color with the 16bit dst, returning a 16bit value
335    (with dirt in the high 16bits, so caller beware).
336*/
337static inline U16CPU SkSrcOver32To16(SkPMColor src, uint16_t dst) {
338    unsigned sr = SkGetPackedR32(src);
339    unsigned sg = SkGetPackedG32(src);
340    unsigned sb = SkGetPackedB32(src);
341
342    unsigned dr = SkGetPackedR16(dst);
343    unsigned dg = SkGetPackedG16(dst);
344    unsigned db = SkGetPackedB16(dst);
345
346    unsigned isa = 255 - SkGetPackedA32(src);
347
348    dr = (sr + SkMul16ShiftRound(dr, isa, SK_R16_BITS)) >> (8 - SK_R16_BITS);
349    dg = (sg + SkMul16ShiftRound(dg, isa, SK_G16_BITS)) >> (8 - SK_G16_BITS);
350    db = (sb + SkMul16ShiftRound(db, isa, SK_B16_BITS)) >> (8 - SK_B16_BITS);
351
352    return SkPackRGB16(dr, dg, db);
353}
354
355////////////////////////////////////////////////////////////////////////////////////////////
356// Convert a 16bit pixel to a 32bit pixel
357
358inline unsigned SkR16ToR32(unsigned r)
359{
360    return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
361}
362inline unsigned SkG16ToG32(unsigned g)
363{
364    return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
365}
366inline unsigned SkB16ToB32(unsigned b)
367{
368    return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
369}
370
371#define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
372#define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
373#define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
374
375inline SkPMColor SkPixel16ToPixel32(U16CPU src)
376{
377    SkASSERT(src == SkToU16(src));
378
379    unsigned    r = SkPacked16ToR32(src);
380    unsigned    g = SkPacked16ToG32(src);
381    unsigned    b = SkPacked16ToB32(src);
382
383    SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
384    SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
385    SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
386
387    return SkPackARGB32(0xFF, r, g, b);
388}
389
390///////////////////////////////////////////////////////////////////////////////
391
392typedef uint16_t SkPMColor16;
393
394// Put in OpenGL order (r g b a)
395#define SK_A4444_SHIFT    0
396#define SK_R4444_SHIFT    12
397#define SK_G4444_SHIFT    8
398#define SK_B4444_SHIFT    4
399
400#define SkA32To4444(a)  ((unsigned)(a) >> 4)
401#define SkR32To4444(r)  ((unsigned)(r) >> 4)
402#define SkG32To4444(g)  ((unsigned)(g) >> 4)
403#define SkB32To4444(b)  ((unsigned)(b) >> 4)
404
405static inline U8CPU SkReplicateNibble(unsigned nib)
406{
407    SkASSERT(nib <= 0xF);
408    return (nib << 4) | nib;
409}
410
411#define SkA4444ToA32(a)     SkReplicateNibble(a)
412#define SkR4444ToR32(r)     SkReplicateNibble(r)
413#define SkG4444ToG32(g)     SkReplicateNibble(g)
414#define SkB4444ToB32(b)     SkReplicateNibble(b)
415
416#define SkGetPackedA4444(c)     (((unsigned)(c) >> SK_A4444_SHIFT) & 0xF)
417#define SkGetPackedR4444(c)     (((unsigned)(c) >> SK_R4444_SHIFT) & 0xF)
418#define SkGetPackedG4444(c)     (((unsigned)(c) >> SK_G4444_SHIFT) & 0xF)
419#define SkGetPackedB4444(c)     (((unsigned)(c) >> SK_B4444_SHIFT) & 0xF)
420
421#define SkPacked4444ToA32(c)    SkReplicateNibble(SkGetPackedA4444(c))
422#define SkPacked4444ToR32(c)    SkReplicateNibble(SkGetPackedR4444(c))
423#define SkPacked4444ToG32(c)    SkReplicateNibble(SkGetPackedG4444(c))
424#define SkPacked4444ToB32(c)    SkReplicateNibble(SkGetPackedB4444(c))
425
426#ifdef SK_DEBUG
427static inline void SkPMColor16Assert(U16CPU c)
428{
429    unsigned a = SkGetPackedA4444(c);
430    unsigned r = SkGetPackedR4444(c);
431    unsigned g = SkGetPackedG4444(c);
432    unsigned b = SkGetPackedB4444(c);
433
434    SkASSERT(a <= 0xF);
435    SkASSERT(r <= a);
436    SkASSERT(g <= a);
437    SkASSERT(b <= a);
438}
439#else
440#define SkPMColor16Assert(c)
441#endif
442
443static inline unsigned SkAlpha15To16(unsigned a)
444{
445    SkASSERT(a <= 0xF);
446    return a + (a >> 3);
447}
448
449#ifdef SK_DEBUG
450    static inline int SkAlphaMul4(int value, int scale)
451    {
452        SkASSERT((unsigned)scale <= 0x10);
453        return value * scale >> 4;
454    }
455#else
456    #define SkAlphaMul4(value, scale)   ((value) * (scale) >> 4)
457#endif
458
459static inline unsigned SkR4444ToR565(unsigned r)
460{
461    SkASSERT(r <= 0xF);
462    return (r << (SK_R16_BITS - 4)) | (r >> (8 - SK_R16_BITS));
463}
464
465static inline unsigned SkG4444ToG565(unsigned g)
466{
467    SkASSERT(g <= 0xF);
468    return (g << (SK_G16_BITS - 4)) | (g >> (8 - SK_G16_BITS));
469}
470
471static inline unsigned SkB4444ToB565(unsigned b)
472{
473    SkASSERT(b <= 0xF);
474    return (b << (SK_B16_BITS - 4)) | (b >> (8 - SK_B16_BITS));
475}
476
477static inline SkPMColor16 SkPackARGB4444(unsigned a, unsigned r,
478                                         unsigned g, unsigned b)
479{
480    SkASSERT(a <= 0xF);
481    SkASSERT(r <= a);
482    SkASSERT(g <= a);
483    SkASSERT(b <= a);
484
485    return (SkPMColor16)((a << SK_A4444_SHIFT) | (r << SK_R4444_SHIFT) |
486                         (g << SK_G4444_SHIFT) | (b << SK_B4444_SHIFT));
487}
488
489extern const uint16_t gMask_0F0F;
490
491inline U16CPU SkAlphaMulQ4(U16CPU c, unsigned scale)
492{
493    SkASSERT(scale <= 16);
494
495    const unsigned mask = 0xF0F;    //gMask_0F0F;
496
497#if 0
498    unsigned rb = ((c & mask) * scale) >> 4;
499    unsigned ag = ((c >> 4) & mask) * scale;
500    return (rb & mask) | (ag & ~mask);
501#else
502    c = (c & mask) | ((c & (mask << 4)) << 12);
503    c = c * scale >> 4;
504    return (c & mask) | ((c >> 12) & (mask << 4));
505#endif
506}
507
508/** Expand the SkPMColor16 color into a 32bit value that can be scaled all at
509    once by a value up to 16. Used in conjunction with SkCompact_4444.
510*/
511inline uint32_t SkExpand_4444(U16CPU c)
512{
513    SkASSERT(c == (uint16_t)c);
514
515    const unsigned mask = 0xF0F;    //gMask_0F0F;
516    return (c & mask) | ((c & ~mask) << 12);
517}
518
519/** Compress an expanded value (from SkExpand_4444) back down to a SkPMColor16.
520    NOTE: this explicitly does not clean the top 16 bits (which may be garbage).
521    It does this for speed, since if it is being written directly to 16bits of
522    memory, the top 16bits will be ignored. Casting the result to uint16_t here
523    would add 2 more instructions, slow us down. It is up to the caller to
524    perform the cast if needed.
525*/
526static inline U16CPU SkCompact_4444(uint32_t c)
527{
528    const unsigned mask = 0xF0F;    //gMask_0F0F;
529    return (c & mask) | ((c >> 12) & ~mask);
530}
531
532static inline uint16_t SkSrcOver4444To16(SkPMColor16 s, uint16_t d)
533{
534    unsigned sa = SkGetPackedA4444(s);
535    unsigned sr = SkR4444ToR565(SkGetPackedR4444(s));
536    unsigned sg = SkG4444ToG565(SkGetPackedG4444(s));
537    unsigned sb = SkB4444ToB565(SkGetPackedB4444(s));
538
539    // To avoid overflow, we have to clear the low bit of the synthetic sg
540    // if the src alpha is <= 7.
541    // to see why, try blending 0x4444 on top of 565-white and watch green
542    // overflow (sum == 64)
543    sg &= ~(~(sa >> 3) & 1);
544
545    unsigned scale = SkAlpha15To16(15 - sa);
546    unsigned dr = SkAlphaMul4(SkGetPackedR16(d), scale);
547    unsigned dg = SkAlphaMul4(SkGetPackedG16(d), scale);
548    unsigned db = SkAlphaMul4(SkGetPackedB16(d), scale);
549
550#if 0
551    if (sg + dg > 63) {
552        SkDebugf("---- SkSrcOver4444To16 src=%x dst=%x scale=%d, sg=%d dg=%d\n", s, d, scale, sg, dg);
553    }
554#endif
555    return SkPackRGB16(sr + dr, sg + dg, sb + db);
556}
557
558static inline uint16_t SkBlend4444To16(SkPMColor16 src, uint16_t dst, int scale16)
559{
560    SkASSERT((unsigned)scale16 <= 16);
561
562    return SkSrcOver4444To16(SkAlphaMulQ4(src, scale16), dst);
563}
564
565static inline uint16_t SkBlend4444(SkPMColor16 src, SkPMColor16 dst, int scale16)
566{
567    SkASSERT((unsigned)scale16 <= 16);
568
569    uint32_t src32 = SkExpand_4444(src) * scale16;
570    // the scaled srcAlpha is the bottom byte
571#ifdef SK_DEBUG
572    {
573        unsigned srcA = SkGetPackedA4444(src) * scale16;
574        SkASSERT(srcA == (src32 & 0xFF));
575    }
576#endif
577    unsigned dstScale = SkAlpha255To256(255 - (src32 & 0xFF)) >> 4;
578    uint32_t dst32 = SkExpand_4444(dst) * dstScale;
579    return SkCompact_4444((src32 + dst32) >> 4);
580}
581
582static inline SkPMColor SkPixel4444ToPixel32(U16CPU c)
583{
584    uint32_t d = (SkGetPackedA4444(c) << SK_A32_SHIFT) |
585                 (SkGetPackedR4444(c) << SK_R32_SHIFT) |
586                 (SkGetPackedG4444(c) << SK_G32_SHIFT) |
587                 (SkGetPackedB4444(c) << SK_B32_SHIFT);
588    return d | (d << 4);
589}
590
591static inline SkPMColor16 SkPixel32ToPixel4444(SkPMColor c)
592{
593    return  (((c >> (SK_A32_SHIFT + 4)) & 0xF) << SK_A4444_SHIFT) |
594    (((c >> (SK_R32_SHIFT + 4)) & 0xF) << SK_R4444_SHIFT) |
595    (((c >> (SK_G32_SHIFT + 4)) & 0xF) << SK_G4444_SHIFT) |
596    (((c >> (SK_B32_SHIFT + 4)) & 0xF) << SK_B4444_SHIFT);
597}
598
599// cheap 2x2 dither
600static inline SkPMColor16 SkDitherARGB32To4444(U8CPU a, U8CPU r,
601                                               U8CPU g, U8CPU b)
602{
603    a = ((a << 1) - ((a >> 4 << 4) | (a >> 4))) >> 4;
604    r = ((r << 1) - ((r >> 4 << 4) | (r >> 4))) >> 4;
605    g = ((g << 1) - ((g >> 4 << 4) | (g >> 4))) >> 4;
606    b = ((b << 1) - ((b >> 4 << 4) | (b >> 4))) >> 4;
607
608    return SkPackARGB4444(a, r, g, b);
609}
610
611static inline SkPMColor16 SkDitherPixel32To4444(SkPMColor c)
612{
613    return SkDitherARGB32To4444(SkGetPackedA32(c), SkGetPackedR32(c),
614                                SkGetPackedG32(c), SkGetPackedB32(c));
615}
616
617/*  Assumes 16bit is in standard RGBA order.
618    Transforms a normal ARGB_8888 into the same byte order as
619    expanded ARGB_4444, but keeps each component 8bits
620*/
621static inline uint32_t SkExpand_8888(SkPMColor c)
622{
623    return  (((c >> SK_R32_SHIFT) & 0xFF) << 24) |
624            (((c >> SK_G32_SHIFT) & 0xFF) <<  8) |
625            (((c >> SK_B32_SHIFT) & 0xFF) << 16) |
626            (((c >> SK_A32_SHIFT) & 0xFF) <<  0);
627}
628
629/*  Undo the operation of SkExpand_8888, turning the argument back into
630    a SkPMColor.
631*/
632static inline SkPMColor SkCompact_8888(uint32_t c)
633{
634    return  (((c >> 24) & 0xFF) << SK_R32_SHIFT) |
635            (((c >>  8) & 0xFF) << SK_G32_SHIFT) |
636            (((c >> 16) & 0xFF) << SK_B32_SHIFT) |
637            (((c >>  0) & 0xFF) << SK_A32_SHIFT);
638}
639
640/*  Like SkExpand_8888, this transforms a pmcolor into the expanded 4444 format,
641    but this routine just keeps the high 4bits of each component in the low
642    4bits of the result (just like a newly expanded PMColor16).
643*/
644static inline uint32_t SkExpand32_4444(SkPMColor c)
645{
646    return  (((c >> (SK_R32_SHIFT + 4)) & 0xF) << 24) |
647            (((c >> (SK_G32_SHIFT + 4)) & 0xF) <<  8) |
648            (((c >> (SK_B32_SHIFT + 4)) & 0xF) << 16) |
649            (((c >> (SK_A32_SHIFT + 4)) & 0xF) <<  0);
650}
651
652// takes two values and alternamtes them as part of a memset16
653// used for cheap 2x2 dithering when the colors are opaque
654void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n);
655
656#endif
657
658