SkColorPriv.h revision 137a4ca42423bbb6d683067ea544c9a48f18f06c
1/*
2 * Copyright (C) 2006 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef SkColorPriv_DEFINED
18#define SkColorPriv_DEFINED
19
20// turn this own for extra debug checking when blending onto 565
21#ifdef SK_DEBUG
22    #define CHECK_FOR_565_OVERFLOW
23#endif
24
25#include "SkColor.h"
26#include "SkMath.h"
27
28/** Turn 0..255 into 0..256 by adding 1 at the half-way point. Used to turn a
29    byte into a scale value, so that we can say scale * value >> 8 instead of
30    alpha * value / 255.
31
32    In debugging, asserts that alpha is 0..255
33*/
34static inline unsigned SkAlpha255To256(U8CPU alpha) {
35    SkASSERT(SkToU8(alpha) == alpha);
36    // this one assues that blending on top of an opaque dst keeps it that way
37    // even though it is less accurate than a+(a>>7) for non-opaque dsts
38    return alpha + 1;
39}
40
41/** Multiplify value by 0..256, and shift the result down 8
42    (i.e. return (value * alpha256) >> 8)
43 */
44#define SkAlphaMul(value, alpha256)     (SkMulS16(value, alpha256) >> 8)
45
46//  The caller may want negative values, so keep all params signed (int)
47//  so we don't accidentally slip into unsigned math and lose the sign
48//  extension when we shift (in SkAlphaMul)
49static inline int SkAlphaBlend(int src, int dst, int scale256) {
50    SkASSERT((unsigned)scale256 <= 256);
51    return dst + SkAlphaMul(src - dst, scale256);
52}
53
54#define SK_R16_BITS     5
55#define SK_G16_BITS     6
56#define SK_B16_BITS     5
57
58#define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
59#define SK_G16_SHIFT    (SK_B16_BITS)
60#define SK_B16_SHIFT    0
61
62#define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
63#define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
64#define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
65
66#define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
67#define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
68#define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
69
70#define SkR16Assert(r)  SkASSERT((unsigned)(r) <= SK_R16_MASK)
71#define SkG16Assert(g)  SkASSERT((unsigned)(g) <= SK_G16_MASK)
72#define SkB16Assert(b)  SkASSERT((unsigned)(b) <= SK_B16_MASK)
73
74static inline uint16_t SkPackRGB16(unsigned r, unsigned g, unsigned b) {
75    SkASSERT(r <= SK_R16_MASK);
76    SkASSERT(g <= SK_G16_MASK);
77    SkASSERT(b <= SK_B16_MASK);
78
79    return SkToU16((r << SK_R16_SHIFT) | (g << SK_G16_SHIFT) | (b << SK_B16_SHIFT));
80}
81
82#define SK_R16_MASK_IN_PLACE        (SK_R16_MASK << SK_R16_SHIFT)
83#define SK_G16_MASK_IN_PLACE        (SK_G16_MASK << SK_G16_SHIFT)
84#define SK_B16_MASK_IN_PLACE        (SK_B16_MASK << SK_B16_SHIFT)
85
86/** Expand the 16bit color into a 32bit value that can be scaled all at once
87    by a value up to 32. Used in conjunction with SkCompact_rgb_16.
88*/
89static inline uint32_t SkExpand_rgb_16(U16CPU c) {
90    SkASSERT(c == (uint16_t)c);
91
92    return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE);
93}
94
95/** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit
96    color value. The computation yields only 16bits of valid data, but we claim
97    to return 32bits, so that the compiler won't generate extra instructions to
98    "clean" the top 16bits. However, the top 16 can contain garbage, so it is
99    up to the caller to safely ignore them.
100*/
101static inline U16CPU SkCompact_rgb_16(uint32_t c) {
102    return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE);
103}
104
105/** Scale the 16bit color value by the 0..256 scale parameter.
106    The computation yields only 16bits of valid data, but we claim
107    to return 32bits, so that the compiler won't generate extra instructions to
108    "clean" the top 16bits.
109*/
110static inline U16CPU SkAlphaMulRGB16(U16CPU c, unsigned scale) {
111    return SkCompact_rgb_16(SkExpand_rgb_16(c) * (scale >> 3) >> 5);
112}
113
114// this helper explicitly returns a clean 16bit value (but slower)
115#define SkAlphaMulRGB16_ToU16(c, s)  (uint16_t)SkAlphaMulRGB16(c, s)
116
117/** Blend src and dst 16bit colors by the 0..256 scale parameter.
118    The computation yields only 16bits of valid data, but we claim
119    to return 32bits, so that the compiler won't generate extra instructions to
120    "clean" the top 16bits.
121*/
122static inline U16CPU SkBlendRGB16(U16CPU src, U16CPU dst, int srcScale) {
123    SkASSERT((unsigned)srcScale <= 256);
124
125    srcScale >>= 3;
126
127    uint32_t src32 = SkExpand_rgb_16(src);
128    uint32_t dst32 = SkExpand_rgb_16(dst);
129    return SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
130}
131
132static inline void SkBlendRGB16(const uint16_t src[], uint16_t dst[],
133                                int srcScale, int count) {
134    SkASSERT(count > 0);
135    SkASSERT((unsigned)srcScale <= 256);
136
137    srcScale >>= 3;
138
139    do {
140        uint32_t src32 = SkExpand_rgb_16(*src++);
141        uint32_t dst32 = SkExpand_rgb_16(*dst);
142        *dst++ = SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
143    } while (--count > 0);
144}
145
146#ifdef SK_DEBUG
147    static inline U16CPU SkRGB16Add(U16CPU a, U16CPU b) {
148        SkASSERT(SkGetPackedR16(a) + SkGetPackedR16(b) <= SK_R16_MASK);
149        SkASSERT(SkGetPackedG16(a) + SkGetPackedG16(b) <= SK_G16_MASK);
150        SkASSERT(SkGetPackedB16(a) + SkGetPackedB16(b) <= SK_B16_MASK);
151
152        return a + b;
153    }
154#else
155    #define SkRGB16Add(a, b)  ((a) + (b))
156#endif
157
158/////////////////////////////////////////////////////////////////////////////////////////////
159
160#define SK_A32_BITS     8
161#define SK_R32_BITS     8
162#define SK_G32_BITS     8
163#define SK_B32_BITS     8
164
165/* we check to see if the SHIFT value has already been defined (SkUserConfig.h)
166    if not, we define it ourself to some default values. We default to OpenGL
167    order (in memory: r,g,b,a)
168*/
169#ifndef SK_A32_SHIFT
170    #ifdef SK_CPU_BENDIAN
171        #define SK_R32_SHIFT    24
172        #define SK_G32_SHIFT    16
173        #define SK_B32_SHIFT    8
174        #define SK_A32_SHIFT    0
175    #else
176        #define SK_R32_SHIFT    0
177        #define SK_G32_SHIFT    8
178        #define SK_B32_SHIFT    16
179        #define SK_A32_SHIFT    24
180    #endif
181#endif
182
183#define SK_A32_MASK     ((1 << SK_A32_BITS) - 1)
184#define SK_R32_MASK     ((1 << SK_R32_BITS) - 1)
185#define SK_G32_MASK     ((1 << SK_G32_BITS) - 1)
186#define SK_B32_MASK     ((1 << SK_B32_BITS) - 1)
187
188#define SkGetPackedA32(packed)      ((uint32_t)((packed) << (24 - SK_A32_SHIFT)) >> 24)
189#define SkGetPackedR32(packed)      ((uint32_t)((packed) << (24 - SK_R32_SHIFT)) >> 24)
190#define SkGetPackedG32(packed)      ((uint32_t)((packed) << (24 - SK_G32_SHIFT)) >> 24)
191#define SkGetPackedB32(packed)      ((uint32_t)((packed) << (24 - SK_B32_SHIFT)) >> 24)
192
193#define SkA32Assert(a)  SkASSERT((unsigned)(a) <= SK_A32_MASK)
194#define SkR32Assert(r)  SkASSERT((unsigned)(r) <= SK_R32_MASK)
195#define SkG32Assert(g)  SkASSERT((unsigned)(g) <= SK_G32_MASK)
196#define SkB32Assert(b)  SkASSERT((unsigned)(b) <= SK_B32_MASK)
197
198#ifdef SK_DEBUG
199    static inline void SkPMColorAssert(SkPMColor c) {
200        unsigned a = SkGetPackedA32(c);
201        unsigned r = SkGetPackedR32(c);
202        unsigned g = SkGetPackedG32(c);
203        unsigned b = SkGetPackedB32(c);
204
205        SkA32Assert(a);
206        SkASSERT(r <= a);
207        SkASSERT(g <= a);
208        SkASSERT(b <= a);
209    }
210#else
211    #define SkPMColorAssert(c)
212#endif
213
214/**
215 *  Pack the components into a SkPMColor, checking (in the debug version) that
216 *  the components are 0..255, and are already premultiplied (i.e. alpha >= color)
217 */
218static inline SkPMColor SkPackARGB32(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
219    SkA32Assert(a);
220    SkASSERT(r <= a);
221    SkASSERT(g <= a);
222    SkASSERT(b <= a);
223
224    return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
225           (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
226}
227
228/**
229 *  Same as SkPackARGB32, but this version guarantees to not check that the
230 *  values are premultiplied in the debug version.
231 */
232static inline SkPMColor SkPackARGB32NoCheck(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
233    return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
234           (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
235}
236
237SK_API extern const uint32_t gMask_00FF00FF;
238
239static inline uint32_t SkAlphaMulQ(uint32_t c, unsigned scale) {
240    uint32_t mask = gMask_00FF00FF;
241//    uint32_t mask = 0xFF00FF;
242
243    uint32_t rb = ((c & mask) * scale) >> 8;
244    uint32_t ag = ((c >> 8) & mask) * scale;
245    return (rb & mask) | (ag & ~mask);
246}
247
248static inline SkPMColor SkPMSrcOver(SkPMColor src, SkPMColor dst) {
249    return src + SkAlphaMulQ(dst, SkAlpha255To256(255 - SkGetPackedA32(src)));
250}
251
252static inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) {
253    SkASSERT((unsigned)aa <= 255);
254
255    unsigned src_scale = SkAlpha255To256(aa);
256    unsigned dst_scale = SkAlpha255To256(255 - SkAlphaMul(SkGetPackedA32(src), src_scale));
257
258    return SkAlphaMulQ(src, src_scale) + SkAlphaMulQ(dst, dst_scale);
259}
260
261////////////////////////////////////////////////////////////////////////////////////////////
262// Convert a 32bit pixel to a 16bit pixel (no dither)
263
264#define SkR32ToR16_MACRO(r)   ((unsigned)(r) >> (SK_R32_BITS - SK_R16_BITS))
265#define SkG32ToG16_MACRO(g)   ((unsigned)(g) >> (SK_G32_BITS - SK_G16_BITS))
266#define SkB32ToB16_MACRO(b)   ((unsigned)(b) >> (SK_B32_BITS - SK_B16_BITS))
267
268#ifdef SK_DEBUG
269    static inline unsigned SkR32ToR16(unsigned r) {
270        SkR32Assert(r);
271        return SkR32ToR16_MACRO(r);
272    }
273    static inline unsigned SkG32ToG16(unsigned g) {
274        SkG32Assert(g);
275        return SkG32ToG16_MACRO(g);
276    }
277    static inline unsigned SkB32ToB16(unsigned b) {
278        SkB32Assert(b);
279        return SkB32ToB16_MACRO(b);
280    }
281#else
282    #define SkR32ToR16(r)   SkR32ToR16_MACRO(r)
283    #define SkG32ToG16(g)   SkG32ToG16_MACRO(g)
284    #define SkB32ToB16(b)   SkB32ToB16_MACRO(b)
285#endif
286
287#define SkPacked32ToR16(c)  (((unsigned)(c) >> (SK_R32_SHIFT + SK_R32_BITS - SK_R16_BITS)) & SK_R16_MASK)
288#define SkPacked32ToG16(c)  (((unsigned)(c) >> (SK_G32_SHIFT + SK_G32_BITS - SK_G16_BITS)) & SK_G16_MASK)
289#define SkPacked32ToB16(c)  (((unsigned)(c) >> (SK_B32_SHIFT + SK_B32_BITS - SK_B16_BITS)) & SK_B16_MASK)
290
291static inline U16CPU SkPixel32ToPixel16(SkPMColor c) {
292    unsigned r = ((c >> (SK_R32_SHIFT + (8 - SK_R16_BITS))) & SK_R16_MASK) << SK_R16_SHIFT;
293    unsigned g = ((c >> (SK_G32_SHIFT + (8 - SK_G16_BITS))) & SK_G16_MASK) << SK_G16_SHIFT;
294    unsigned b = ((c >> (SK_B32_SHIFT + (8 - SK_B16_BITS))) & SK_B16_MASK) << SK_B16_SHIFT;
295    return r | g | b;
296}
297
298static inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
299    return  (SkR32ToR16(r) << SK_R16_SHIFT) |
300            (SkG32ToG16(g) << SK_G16_SHIFT) |
301            (SkB32ToB16(b) << SK_B16_SHIFT);
302}
303
304#define SkPixel32ToPixel16_ToU16(src)   SkToU16(SkPixel32ToPixel16(src))
305
306/////////////////////////////////////////////////////////////////////////////////////////
307// Fast dither from 32->16
308
309#define SkShouldDitherXY(x, y)  (((x) ^ (y)) & 1)
310
311static inline uint16_t SkDitherPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
312    r = ((r << 1) - ((r >> (8 - SK_R16_BITS) << (8 - SK_R16_BITS)) | (r >> SK_R16_BITS))) >> (8 - SK_R16_BITS);
313    g = ((g << 1) - ((g >> (8 - SK_G16_BITS) << (8 - SK_G16_BITS)) | (g >> SK_G16_BITS))) >> (8 - SK_G16_BITS);
314    b = ((b << 1) - ((b >> (8 - SK_B16_BITS) << (8 - SK_B16_BITS)) | (b >> SK_B16_BITS))) >> (8 - SK_B16_BITS);
315
316    return SkPackRGB16(r, g, b);
317}
318
319static inline uint16_t SkDitherPixel32ToPixel16(SkPMColor c) {
320    return SkDitherPack888ToRGB16(SkGetPackedR32(c), SkGetPackedG32(c), SkGetPackedB32(c));
321}
322
323/*  Return c in expanded_rgb_16 format, but also scaled up by 32 (5 bits)
324    It is now suitable for combining with a scaled expanded_rgb_16 color
325    as in SkSrcOver32To16().
326    We must do this 565 high-bit replication, in order for the subsequent add
327    to saturate properly (and not overflow). If we take the 8 bits as is, it is
328    possible to overflow.
329*/
330static inline uint32_t SkPMColorToExpanded16x5(SkPMColor c) {
331    unsigned sr = SkPacked32ToR16(c);
332    unsigned sg = SkPacked32ToG16(c);
333    unsigned sb = SkPacked32ToB16(c);
334
335    sr = (sr << 5) | sr;
336    sg = (sg << 5) | (sg >> 1);
337    sb = (sb << 5) | sb;
338    return (sr << 11) | (sg << 21) | (sb << 0);
339}
340
341/*  SrcOver the 32bit src color with the 16bit dst, returning a 16bit value
342    (with dirt in the high 16bits, so caller beware).
343*/
344static inline U16CPU SkSrcOver32To16(SkPMColor src, uint16_t dst) {
345    unsigned sr = SkGetPackedR32(src);
346    unsigned sg = SkGetPackedG32(src);
347    unsigned sb = SkGetPackedB32(src);
348
349    unsigned dr = SkGetPackedR16(dst);
350    unsigned dg = SkGetPackedG16(dst);
351    unsigned db = SkGetPackedB16(dst);
352
353    unsigned isa = 255 - SkGetPackedA32(src);
354
355    dr = (sr + SkMul16ShiftRound(dr, isa, SK_R16_BITS)) >> (8 - SK_R16_BITS);
356    dg = (sg + SkMul16ShiftRound(dg, isa, SK_G16_BITS)) >> (8 - SK_G16_BITS);
357    db = (sb + SkMul16ShiftRound(db, isa, SK_B16_BITS)) >> (8 - SK_B16_BITS);
358
359    return SkPackRGB16(dr, dg, db);
360}
361
362////////////////////////////////////////////////////////////////////////////////////////////
363// Convert a 16bit pixel to a 32bit pixel
364
365static inline unsigned SkR16ToR32(unsigned r) {
366    return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
367}
368
369static inline unsigned SkG16ToG32(unsigned g) {
370    return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
371}
372
373static inline unsigned SkB16ToB32(unsigned b) {
374    return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
375}
376
377#define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
378#define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
379#define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
380
381static inline SkPMColor SkPixel16ToPixel32(U16CPU src) {
382    SkASSERT(src == SkToU16(src));
383
384    unsigned    r = SkPacked16ToR32(src);
385    unsigned    g = SkPacked16ToG32(src);
386    unsigned    b = SkPacked16ToB32(src);
387
388    SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
389    SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
390    SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
391
392    return SkPackARGB32(0xFF, r, g, b);
393}
394
395// similar to SkPixel16ToPixel32, but returns SkColor instead of SkPMColor
396static inline SkColor SkPixel16ToColor(U16CPU src) {
397    SkASSERT(src == SkToU16(src));
398
399    unsigned    r = SkPacked16ToR32(src);
400    unsigned    g = SkPacked16ToG32(src);
401    unsigned    b = SkPacked16ToB32(src);
402
403    SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
404    SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
405    SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
406
407    return SkColorSetRGB(r, g, b);
408}
409
410///////////////////////////////////////////////////////////////////////////////
411
412typedef uint16_t SkPMColor16;
413
414// Put in OpenGL order (r g b a)
415#define SK_A4444_SHIFT    0
416#define SK_R4444_SHIFT    12
417#define SK_G4444_SHIFT    8
418#define SK_B4444_SHIFT    4
419
420#define SkA32To4444(a)  ((unsigned)(a) >> 4)
421#define SkR32To4444(r)  ((unsigned)(r) >> 4)
422#define SkG32To4444(g)  ((unsigned)(g) >> 4)
423#define SkB32To4444(b)  ((unsigned)(b) >> 4)
424
425static inline U8CPU SkReplicateNibble(unsigned nib) {
426    SkASSERT(nib <= 0xF);
427    return (nib << 4) | nib;
428}
429
430#define SkA4444ToA32(a)     SkReplicateNibble(a)
431#define SkR4444ToR32(r)     SkReplicateNibble(r)
432#define SkG4444ToG32(g)     SkReplicateNibble(g)
433#define SkB4444ToB32(b)     SkReplicateNibble(b)
434
435#define SkGetPackedA4444(c)     (((unsigned)(c) >> SK_A4444_SHIFT) & 0xF)
436#define SkGetPackedR4444(c)     (((unsigned)(c) >> SK_R4444_SHIFT) & 0xF)
437#define SkGetPackedG4444(c)     (((unsigned)(c) >> SK_G4444_SHIFT) & 0xF)
438#define SkGetPackedB4444(c)     (((unsigned)(c) >> SK_B4444_SHIFT) & 0xF)
439
440#define SkPacked4444ToA32(c)    SkReplicateNibble(SkGetPackedA4444(c))
441#define SkPacked4444ToR32(c)    SkReplicateNibble(SkGetPackedR4444(c))
442#define SkPacked4444ToG32(c)    SkReplicateNibble(SkGetPackedG4444(c))
443#define SkPacked4444ToB32(c)    SkReplicateNibble(SkGetPackedB4444(c))
444
445#ifdef SK_DEBUG
446static inline void SkPMColor16Assert(U16CPU c) {
447    unsigned a = SkGetPackedA4444(c);
448    unsigned r = SkGetPackedR4444(c);
449    unsigned g = SkGetPackedG4444(c);
450    unsigned b = SkGetPackedB4444(c);
451
452    SkASSERT(a <= 0xF);
453    SkASSERT(r <= a);
454    SkASSERT(g <= a);
455    SkASSERT(b <= a);
456}
457#else
458#define SkPMColor16Assert(c)
459#endif
460
461static inline unsigned SkAlpha15To16(unsigned a) {
462    SkASSERT(a <= 0xF);
463    return a + (a >> 3);
464}
465
466#ifdef SK_DEBUG
467    static inline int SkAlphaMul4(int value, int scale) {
468        SkASSERT((unsigned)scale <= 0x10);
469        return value * scale >> 4;
470    }
471#else
472    #define SkAlphaMul4(value, scale)   ((value) * (scale) >> 4)
473#endif
474
475static inline unsigned SkR4444ToR565(unsigned r) {
476    SkASSERT(r <= 0xF);
477    return (r << (SK_R16_BITS - 4)) | (r >> (8 - SK_R16_BITS));
478}
479
480static inline unsigned SkG4444ToG565(unsigned g) {
481    SkASSERT(g <= 0xF);
482    return (g << (SK_G16_BITS - 4)) | (g >> (8 - SK_G16_BITS));
483}
484
485static inline unsigned SkB4444ToB565(unsigned b) {
486    SkASSERT(b <= 0xF);
487    return (b << (SK_B16_BITS - 4)) | (b >> (8 - SK_B16_BITS));
488}
489
490static inline SkPMColor16 SkPackARGB4444(unsigned a, unsigned r,
491                                         unsigned g, unsigned b) {
492    SkASSERT(a <= 0xF);
493    SkASSERT(r <= a);
494    SkASSERT(g <= a);
495    SkASSERT(b <= a);
496
497    return (SkPMColor16)((a << SK_A4444_SHIFT) | (r << SK_R4444_SHIFT) |
498                         (g << SK_G4444_SHIFT) | (b << SK_B4444_SHIFT));
499}
500
501extern const uint16_t gMask_0F0F;
502
503static inline U16CPU SkAlphaMulQ4(U16CPU c, unsigned scale) {
504    SkASSERT(scale <= 16);
505
506    const unsigned mask = 0xF0F;    //gMask_0F0F;
507
508#if 0
509    unsigned rb = ((c & mask) * scale) >> 4;
510    unsigned ag = ((c >> 4) & mask) * scale;
511    return (rb & mask) | (ag & ~mask);
512#else
513    c = (c & mask) | ((c & (mask << 4)) << 12);
514    c = c * scale >> 4;
515    return (c & mask) | ((c >> 12) & (mask << 4));
516#endif
517}
518
519/** Expand the SkPMColor16 color into a 32bit value that can be scaled all at
520    once by a value up to 16. Used in conjunction with SkCompact_4444.
521*/
522static inline uint32_t SkExpand_4444(U16CPU c) {
523    SkASSERT(c == (uint16_t)c);
524
525    const unsigned mask = 0xF0F;    //gMask_0F0F;
526    return (c & mask) | ((c & ~mask) << 12);
527}
528
529/** Compress an expanded value (from SkExpand_4444) back down to a SkPMColor16.
530    NOTE: this explicitly does not clean the top 16 bits (which may be garbage).
531    It does this for speed, since if it is being written directly to 16bits of
532    memory, the top 16bits will be ignored. Casting the result to uint16_t here
533    would add 2 more instructions, slow us down. It is up to the caller to
534    perform the cast if needed.
535*/
536static inline U16CPU SkCompact_4444(uint32_t c) {
537    const unsigned mask = 0xF0F;    //gMask_0F0F;
538    return (c & mask) | ((c >> 12) & ~mask);
539}
540
541static inline uint16_t SkSrcOver4444To16(SkPMColor16 s, uint16_t d) {
542    unsigned sa = SkGetPackedA4444(s);
543    unsigned sr = SkR4444ToR565(SkGetPackedR4444(s));
544    unsigned sg = SkG4444ToG565(SkGetPackedG4444(s));
545    unsigned sb = SkB4444ToB565(SkGetPackedB4444(s));
546
547    // To avoid overflow, we have to clear the low bit of the synthetic sg
548    // if the src alpha is <= 7.
549    // to see why, try blending 0x4444 on top of 565-white and watch green
550    // overflow (sum == 64)
551    sg &= ~(~(sa >> 3) & 1);
552
553    unsigned scale = SkAlpha15To16(15 - sa);
554    unsigned dr = SkAlphaMul4(SkGetPackedR16(d), scale);
555    unsigned dg = SkAlphaMul4(SkGetPackedG16(d), scale);
556    unsigned db = SkAlphaMul4(SkGetPackedB16(d), scale);
557
558#if 0
559    if (sg + dg > 63) {
560        SkDebugf("---- SkSrcOver4444To16 src=%x dst=%x scale=%d, sg=%d dg=%d\n", s, d, scale, sg, dg);
561    }
562#endif
563    return SkPackRGB16(sr + dr, sg + dg, sb + db);
564}
565
566static inline uint16_t SkBlend4444To16(SkPMColor16 src, uint16_t dst, int scale16) {
567    SkASSERT((unsigned)scale16 <= 16);
568
569    return SkSrcOver4444To16(SkAlphaMulQ4(src, scale16), dst);
570}
571
572static inline uint16_t SkBlend4444(SkPMColor16 src, SkPMColor16 dst, int scale16) {
573    SkASSERT((unsigned)scale16 <= 16);
574
575    uint32_t src32 = SkExpand_4444(src) * scale16;
576    // the scaled srcAlpha is the bottom byte
577#ifdef SK_DEBUG
578    {
579        unsigned srcA = SkGetPackedA4444(src) * scale16;
580        SkASSERT(srcA == (src32 & 0xFF));
581    }
582#endif
583    unsigned dstScale = SkAlpha255To256(255 - (src32 & 0xFF)) >> 4;
584    uint32_t dst32 = SkExpand_4444(dst) * dstScale;
585    return SkCompact_4444((src32 + dst32) >> 4);
586}
587
588static inline SkPMColor SkPixel4444ToPixel32(U16CPU c) {
589    uint32_t d = (SkGetPackedA4444(c) << SK_A32_SHIFT) |
590                 (SkGetPackedR4444(c) << SK_R32_SHIFT) |
591                 (SkGetPackedG4444(c) << SK_G32_SHIFT) |
592                 (SkGetPackedB4444(c) << SK_B32_SHIFT);
593    return d | (d << 4);
594}
595
596static inline SkPMColor16 SkPixel32ToPixel4444(SkPMColor c) {
597    return  (((c >> (SK_A32_SHIFT + 4)) & 0xF) << SK_A4444_SHIFT) |
598    (((c >> (SK_R32_SHIFT + 4)) & 0xF) << SK_R4444_SHIFT) |
599    (((c >> (SK_G32_SHIFT + 4)) & 0xF) << SK_G4444_SHIFT) |
600    (((c >> (SK_B32_SHIFT + 4)) & 0xF) << SK_B4444_SHIFT);
601}
602
603// cheap 2x2 dither
604static inline SkPMColor16 SkDitherARGB32To4444(U8CPU a, U8CPU r,
605                                               U8CPU g, U8CPU b) {
606    // to ensure that we stay a legal premultiplied color, we take the max()
607    // of the truncated and dithered alpha values. If we didn't, cases like
608    // SkDitherARGB32To4444(0x31, 0x2E, ...) would generate SkPackARGB4444(2, 3, ...)
609    // which is not legal premultiplied, since a < color
610    unsigned dithered_a = ((a << 1) - ((a >> 4 << 4) | (a >> 4))) >> 4;
611    a = SkMax32(a >> 4, dithered_a);
612    // these we just dither in place
613    r = ((r << 1) - ((r >> 4 << 4) | (r >> 4))) >> 4;
614    g = ((g << 1) - ((g >> 4 << 4) | (g >> 4))) >> 4;
615    b = ((b << 1) - ((b >> 4 << 4) | (b >> 4))) >> 4;
616
617    return SkPackARGB4444(a, r, g, b);
618}
619
620static inline SkPMColor16 SkDitherPixel32To4444(SkPMColor c) {
621    return SkDitherARGB32To4444(SkGetPackedA32(c), SkGetPackedR32(c),
622                                SkGetPackedG32(c), SkGetPackedB32(c));
623}
624
625/*  Assumes 16bit is in standard RGBA order.
626    Transforms a normal ARGB_8888 into the same byte order as
627    expanded ARGB_4444, but keeps each component 8bits
628*/
629static inline uint32_t SkExpand_8888(SkPMColor c) {
630    return  (((c >> SK_R32_SHIFT) & 0xFF) << 24) |
631            (((c >> SK_G32_SHIFT) & 0xFF) <<  8) |
632            (((c >> SK_B32_SHIFT) & 0xFF) << 16) |
633            (((c >> SK_A32_SHIFT) & 0xFF) <<  0);
634}
635
636/*  Undo the operation of SkExpand_8888, turning the argument back into
637    a SkPMColor.
638*/
639static inline SkPMColor SkCompact_8888(uint32_t c) {
640    return  (((c >> 24) & 0xFF) << SK_R32_SHIFT) |
641            (((c >>  8) & 0xFF) << SK_G32_SHIFT) |
642            (((c >> 16) & 0xFF) << SK_B32_SHIFT) |
643            (((c >>  0) & 0xFF) << SK_A32_SHIFT);
644}
645
646/*  Like SkExpand_8888, this transforms a pmcolor into the expanded 4444 format,
647    but this routine just keeps the high 4bits of each component in the low
648    4bits of the result (just like a newly expanded PMColor16).
649*/
650static inline uint32_t SkExpand32_4444(SkPMColor c) {
651    return  (((c >> (SK_R32_SHIFT + 4)) & 0xF) << 24) |
652            (((c >> (SK_G32_SHIFT + 4)) & 0xF) <<  8) |
653            (((c >> (SK_B32_SHIFT + 4)) & 0xF) << 16) |
654            (((c >> (SK_A32_SHIFT + 4)) & 0xF) <<  0);
655}
656
657// takes two values and alternamtes them as part of a memset16
658// used for cheap 2x2 dithering when the colors are opaque
659void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n);
660
661#endif
662
663