1 2/* 3 * Copyright 2006 The Android Open Source Project 4 * 5 * Use of this source code is governed by a BSD-style license that can be 6 * found in the LICENSE file. 7 */ 8 9 10#ifndef SkColorPriv_DEFINED 11#define SkColorPriv_DEFINED 12 13// turn this own for extra debug checking when blending onto 565 14#ifdef SK_DEBUG 15 #define CHECK_FOR_565_OVERFLOW 16#endif 17 18#include "SkColor.h" 19#include "SkMath.h" 20 21///@{ 22/** See ITU-R Recommendation BT.709 at http://www.itu.int/rec/R-REC-BT.709/ .*/ 23#define SK_ITU_BT709_LUM_COEFF_R (0.2126f) 24#define SK_ITU_BT709_LUM_COEFF_G (0.7152f) 25#define SK_ITU_BT709_LUM_COEFF_B (0.0722f) 26///@} 27 28///@{ 29/** A float value which specifies this channel's contribution to luminance. */ 30#define SK_LUM_COEFF_R SK_ITU_BT709_LUM_COEFF_R 31#define SK_LUM_COEFF_G SK_ITU_BT709_LUM_COEFF_G 32#define SK_LUM_COEFF_B SK_ITU_BT709_LUM_COEFF_B 33///@} 34 35/** Computes the luminance from the given r, g, and b in accordance with 36 SK_LUM_COEFF_X. For correct results, r, g, and b should be in linear space. 37*/ 38static inline U8CPU SkComputeLuminance(U8CPU r, U8CPU g, U8CPU b) { 39 //The following is 40 //r * SK_LUM_COEFF_R + g * SK_LUM_COEFF_G + b * SK_LUM_COEFF_B 41 //with SK_LUM_COEFF_X in 1.8 fixed point (rounding adjusted to sum to 256). 42 return (r * 54 + g * 183 + b * 19) >> 8; 43} 44 45/** Turn 0..255 into 0..256 by adding 1 at the half-way point. Used to turn a 46 byte into a scale value, so that we can say scale * value >> 8 instead of 47 alpha * value / 255. 48 49 In debugging, asserts that alpha is 0..255 50*/ 51static inline unsigned SkAlpha255To256(U8CPU alpha) { 52 SkASSERT(SkToU8(alpha) == alpha); 53 // this one assues that blending on top of an opaque dst keeps it that way 54 // even though it is less accurate than a+(a>>7) for non-opaque dsts 55 return alpha + 1; 56} 57 58/** Multiplify value by 0..256, and shift the result down 8 59 (i.e. return (value * alpha256) >> 8) 60 */ 61#define SkAlphaMul(value, alpha256) (SkMulS16(value, alpha256) >> 8) 62 63// The caller may want negative values, so keep all params signed (int) 64// so we don't accidentally slip into unsigned math and lose the sign 65// extension when we shift (in SkAlphaMul) 66static inline int SkAlphaBlend(int src, int dst, int scale256) { 67 SkASSERT((unsigned)scale256 <= 256); 68 return dst + SkAlphaMul(src - dst, scale256); 69} 70 71/** 72 * Returns (src * alpha + dst * (255 - alpha)) / 255 73 * 74 * This is more accurate than SkAlphaBlend, but slightly slower 75 */ 76static inline int SkAlphaBlend255(S16CPU src, S16CPU dst, U8CPU alpha) { 77 SkASSERT((int16_t)src == src); 78 SkASSERT((int16_t)dst == dst); 79 SkASSERT((uint8_t)alpha == alpha); 80 81 int prod = SkMulS16(src - dst, alpha) + 128; 82 prod = (prod + (prod >> 8)) >> 8; 83 return dst + prod; 84} 85 86#define SK_R16_BITS 5 87#define SK_G16_BITS 6 88#define SK_B16_BITS 5 89 90#define SK_R16_SHIFT (SK_B16_BITS + SK_G16_BITS) 91#define SK_G16_SHIFT (SK_B16_BITS) 92#define SK_B16_SHIFT 0 93 94#define SK_R16_MASK ((1 << SK_R16_BITS) - 1) 95#define SK_G16_MASK ((1 << SK_G16_BITS) - 1) 96#define SK_B16_MASK ((1 << SK_B16_BITS) - 1) 97 98#define SkGetPackedR16(color) (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK) 99#define SkGetPackedG16(color) (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK) 100#define SkGetPackedB16(color) (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK) 101 102#define SkR16Assert(r) SkASSERT((unsigned)(r) <= SK_R16_MASK) 103#define SkG16Assert(g) SkASSERT((unsigned)(g) <= SK_G16_MASK) 104#define SkB16Assert(b) SkASSERT((unsigned)(b) <= SK_B16_MASK) 105 106static inline uint16_t SkPackRGB16(unsigned r, unsigned g, unsigned b) { 107 SkASSERT(r <= SK_R16_MASK); 108 SkASSERT(g <= SK_G16_MASK); 109 SkASSERT(b <= SK_B16_MASK); 110 111 return SkToU16((r << SK_R16_SHIFT) | (g << SK_G16_SHIFT) | (b << SK_B16_SHIFT)); 112} 113 114#define SK_R16_MASK_IN_PLACE (SK_R16_MASK << SK_R16_SHIFT) 115#define SK_G16_MASK_IN_PLACE (SK_G16_MASK << SK_G16_SHIFT) 116#define SK_B16_MASK_IN_PLACE (SK_B16_MASK << SK_B16_SHIFT) 117 118/** Expand the 16bit color into a 32bit value that can be scaled all at once 119 by a value up to 32. Used in conjunction with SkCompact_rgb_16. 120*/ 121static inline uint32_t SkExpand_rgb_16(U16CPU c) { 122 SkASSERT(c == (uint16_t)c); 123 124 return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE); 125} 126 127/** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit 128 color value. The computation yields only 16bits of valid data, but we claim 129 to return 32bits, so that the compiler won't generate extra instructions to 130 "clean" the top 16bits. However, the top 16 can contain garbage, so it is 131 up to the caller to safely ignore them. 132*/ 133static inline U16CPU SkCompact_rgb_16(uint32_t c) { 134 return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE); 135} 136 137/** Scale the 16bit color value by the 0..256 scale parameter. 138 The computation yields only 16bits of valid data, but we claim 139 to return 32bits, so that the compiler won't generate extra instructions to 140 "clean" the top 16bits. 141*/ 142static inline U16CPU SkAlphaMulRGB16(U16CPU c, unsigned scale) { 143 return SkCompact_rgb_16(SkExpand_rgb_16(c) * (scale >> 3) >> 5); 144} 145 146// this helper explicitly returns a clean 16bit value (but slower) 147#define SkAlphaMulRGB16_ToU16(c, s) (uint16_t)SkAlphaMulRGB16(c, s) 148 149/** Blend src and dst 16bit colors by the 0..256 scale parameter. 150 The computation yields only 16bits of valid data, but we claim 151 to return 32bits, so that the compiler won't generate extra instructions to 152 "clean" the top 16bits. 153*/ 154static inline U16CPU SkBlendRGB16(U16CPU src, U16CPU dst, int srcScale) { 155 SkASSERT((unsigned)srcScale <= 256); 156 157 srcScale >>= 3; 158 159 uint32_t src32 = SkExpand_rgb_16(src); 160 uint32_t dst32 = SkExpand_rgb_16(dst); 161 return SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5)); 162} 163 164static inline void SkBlendRGB16(const uint16_t src[], uint16_t dst[], 165 int srcScale, int count) { 166 SkASSERT(count > 0); 167 SkASSERT((unsigned)srcScale <= 256); 168 169 srcScale >>= 3; 170 171 do { 172 uint32_t src32 = SkExpand_rgb_16(*src++); 173 uint32_t dst32 = SkExpand_rgb_16(*dst); 174 *dst++ = SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5)); 175 } while (--count > 0); 176} 177 178#ifdef SK_DEBUG 179 static inline U16CPU SkRGB16Add(U16CPU a, U16CPU b) { 180 SkASSERT(SkGetPackedR16(a) + SkGetPackedR16(b) <= SK_R16_MASK); 181 SkASSERT(SkGetPackedG16(a) + SkGetPackedG16(b) <= SK_G16_MASK); 182 SkASSERT(SkGetPackedB16(a) + SkGetPackedB16(b) <= SK_B16_MASK); 183 184 return a + b; 185 } 186#else 187 #define SkRGB16Add(a, b) ((a) + (b)) 188#endif 189 190/////////////////////////////////////////////////////////////////////////////// 191 192#define SK_A32_BITS 8 193#define SK_R32_BITS 8 194#define SK_G32_BITS 8 195#define SK_B32_BITS 8 196 197#define SK_A32_MASK ((1 << SK_A32_BITS) - 1) 198#define SK_R32_MASK ((1 << SK_R32_BITS) - 1) 199#define SK_G32_MASK ((1 << SK_G32_BITS) - 1) 200#define SK_B32_MASK ((1 << SK_B32_BITS) - 1) 201 202#define SkGetPackedA32(packed) ((uint32_t)((packed) << (24 - SK_A32_SHIFT)) >> 24) 203#define SkGetPackedR32(packed) ((uint32_t)((packed) << (24 - SK_R32_SHIFT)) >> 24) 204#define SkGetPackedG32(packed) ((uint32_t)((packed) << (24 - SK_G32_SHIFT)) >> 24) 205#define SkGetPackedB32(packed) ((uint32_t)((packed) << (24 - SK_B32_SHIFT)) >> 24) 206 207#define SkA32Assert(a) SkASSERT((unsigned)(a) <= SK_A32_MASK) 208#define SkR32Assert(r) SkASSERT((unsigned)(r) <= SK_R32_MASK) 209#define SkG32Assert(g) SkASSERT((unsigned)(g) <= SK_G32_MASK) 210#define SkB32Assert(b) SkASSERT((unsigned)(b) <= SK_B32_MASK) 211 212#ifdef SK_DEBUG 213 static inline void SkPMColorAssert(SkPMColor c) { 214 unsigned a = SkGetPackedA32(c); 215 unsigned r = SkGetPackedR32(c); 216 unsigned g = SkGetPackedG32(c); 217 unsigned b = SkGetPackedB32(c); 218 219 SkA32Assert(a); 220 SkASSERT(r <= a); 221 SkASSERT(g <= a); 222 SkASSERT(b <= a); 223 } 224#else 225 #define SkPMColorAssert(c) 226#endif 227 228/** 229 * Pack the components into a SkPMColor, checking (in the debug version) that 230 * the components are 0..255, and are already premultiplied (i.e. alpha >= color) 231 */ 232static inline SkPMColor SkPackARGB32(U8CPU a, U8CPU r, U8CPU g, U8CPU b) { 233 SkA32Assert(a); 234 SkASSERT(r <= a); 235 SkASSERT(g <= a); 236 SkASSERT(b <= a); 237 238 return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) | 239 (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT); 240} 241 242/** 243 * Abstract 4-byte interpolation, implemented on top of SkPMColor 244 * utility functions. Third parameter controls blending of the first two: 245 * (src, dst, 0) returns dst 246 * (src, dst, 0xFF) returns src 247 * srcWeight is [0..256], unlike SkFourByteInterp which takes [0..255] 248 */ 249static inline SkPMColor SkFourByteInterp256(SkPMColor src, SkPMColor dst, 250 unsigned scale) { 251 unsigned a = SkAlphaBlend(SkGetPackedA32(src), SkGetPackedA32(dst), scale); 252 unsigned r = SkAlphaBlend(SkGetPackedR32(src), SkGetPackedR32(dst), scale); 253 unsigned g = SkAlphaBlend(SkGetPackedG32(src), SkGetPackedG32(dst), scale); 254 unsigned b = SkAlphaBlend(SkGetPackedB32(src), SkGetPackedB32(dst), scale); 255 256 return SkPackARGB32(a, r, g, b); 257} 258 259/** 260 * Abstract 4-byte interpolation, implemented on top of SkPMColor 261 * utility functions. Third parameter controls blending of the first two: 262 * (src, dst, 0) returns dst 263 * (src, dst, 0xFF) returns src 264 */ 265static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst, 266 U8CPU srcWeight) { 267 unsigned scale = SkAlpha255To256(srcWeight); 268 return SkFourByteInterp256(src, dst, scale); 269} 270 271/** 272 * 0xAARRGGBB -> 0x00AA00GG, 0x00RR00BB 273 */ 274static inline void SkSplay(uint32_t color, uint32_t* ag, uint32_t* rb) { 275 const uint32_t mask = 0x00FF00FF; 276 *ag = (color >> 8) & mask; 277 *rb = color & mask; 278} 279 280/** 281 * 0xAARRGGBB -> 0x00AA00GG00RR00BB 282 * (note, ARGB -> AGRB) 283 */ 284static inline uint64_t SkSplay(uint32_t color) { 285 const uint32_t mask = 0x00FF00FF; 286 uint64_t agrb = (color >> 8) & mask; // 0x0000000000AA00GG 287 agrb <<= 32; // 0x00AA00GG00000000 288 agrb |= color & mask; // 0x00AA00GG00RR00BB 289 return agrb; 290} 291 292/** 293 * 0xAAxxGGxx, 0xRRxxBBxx-> 0xAARRGGBB 294 */ 295static inline uint32_t SkUnsplay(uint32_t ag, uint32_t rb) { 296 const uint32_t mask = 0xFF00FF00; 297 return (ag & mask) | ((rb & mask) >> 8); 298} 299 300/** 301 * 0xAAxxGGxxRRxxBBxx -> 0xAARRGGBB 302 * (note, AGRB -> ARGB) 303 */ 304static inline uint32_t SkUnsplay(uint64_t agrb) { 305 const uint32_t mask = 0xFF00FF00; 306 return SkPMColor( 307 ((agrb & mask) >> 8) | // 0x00RR00BB 308 ((agrb >> 32) & mask)); // 0xAARRGGBB 309} 310 311static inline SkPMColor SkFastFourByteInterp256_32(SkPMColor src, SkPMColor dst, unsigned scale) { 312 SkASSERT(scale <= 256); 313 314 // Two 8-bit blends per two 32-bit registers, with space to make sure the math doesn't collide. 315 uint32_t src_ag, src_rb, dst_ag, dst_rb; 316 SkSplay(src, &src_ag, &src_rb); 317 SkSplay(dst, &dst_ag, &dst_rb); 318 319 const uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag; 320 const uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb; 321 322 return SkUnsplay(ret_ag, ret_rb); 323} 324 325static inline SkPMColor SkFastFourByteInterp256_64(SkPMColor src, SkPMColor dst, unsigned scale) { 326 SkASSERT(scale <= 256); 327 // Four 8-bit blends in one 64-bit register, with space to make sure the math doesn't collide. 328 return SkUnsplay(SkSplay(src) * scale + (256-scale) * SkSplay(dst)); 329} 330 331// TODO(mtklein): Replace slow versions with fast versions, using scale + (scale>>7) everywhere. 332 333/** 334 * Same as SkFourByteInterp256, but faster. 335 */ 336static inline SkPMColor SkFastFourByteInterp256(SkPMColor src, SkPMColor dst, unsigned scale) { 337 // On a 64-bit machine, _64 is about 10% faster than _32, but ~40% slower on a 32-bit machine. 338 if (sizeof(void*) == 4) { 339 return SkFastFourByteInterp256_32(src, dst, scale); 340 } else { 341 return SkFastFourByteInterp256_64(src, dst, scale); 342 } 343} 344 345/** 346 * Nearly the same as SkFourByteInterp, but faster and a touch more accurate, due to better 347 * srcWeight scaling to [0, 256]. 348 */ 349static inline SkPMColor SkFastFourByteInterp(SkPMColor src, 350 SkPMColor dst, 351 U8CPU srcWeight) { 352 SkASSERT(srcWeight <= 255); 353 // scale = srcWeight + (srcWeight >> 7) is more accurate than 354 // scale = srcWeight + 1, but 7% slower 355 return SkFastFourByteInterp256(src, dst, srcWeight + (srcWeight >> 7)); 356} 357 358/** 359 * Same as SkPackARGB32, but this version guarantees to not check that the 360 * values are premultiplied in the debug version. 361 */ 362static inline SkPMColor SkPackARGB32NoCheck(U8CPU a, U8CPU r, U8CPU g, U8CPU b) { 363 return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) | 364 (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT); 365} 366 367static inline 368SkPMColor SkPremultiplyARGBInline(U8CPU a, U8CPU r, U8CPU g, U8CPU b) { 369 SkA32Assert(a); 370 SkR32Assert(r); 371 SkG32Assert(g); 372 SkB32Assert(b); 373 374 if (a != 255) { 375 r = SkMulDiv255Round(r, a); 376 g = SkMulDiv255Round(g, a); 377 b = SkMulDiv255Round(b, a); 378 } 379 return SkPackARGB32(a, r, g, b); 380} 381 382SK_API extern const uint32_t gMask_00FF00FF; 383 384static inline uint32_t SkAlphaMulQ(uint32_t c, unsigned scale) { 385 uint32_t mask = gMask_00FF00FF; 386 387 uint32_t rb = ((c & mask) * scale) >> 8; 388 uint32_t ag = ((c >> 8) & mask) * scale; 389 return (rb & mask) | (ag & ~mask); 390} 391 392static inline SkPMColor SkPMSrcOver(SkPMColor src, SkPMColor dst) { 393 return src + SkAlphaMulQ(dst, SkAlpha255To256(255 - SkGetPackedA32(src))); 394} 395 396static inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) { 397 SkASSERT((unsigned)aa <= 255); 398 399 unsigned src_scale = SkAlpha255To256(aa); 400 unsigned dst_scale = SkAlpha255To256(255 - SkAlphaMul(SkGetPackedA32(src), src_scale)); 401 402 return SkAlphaMulQ(src, src_scale) + SkAlphaMulQ(dst, dst_scale); 403} 404 405//////////////////////////////////////////////////////////////////////////////////////////// 406// Convert a 32bit pixel to a 16bit pixel (no dither) 407 408#define SkR32ToR16_MACRO(r) ((unsigned)(r) >> (SK_R32_BITS - SK_R16_BITS)) 409#define SkG32ToG16_MACRO(g) ((unsigned)(g) >> (SK_G32_BITS - SK_G16_BITS)) 410#define SkB32ToB16_MACRO(b) ((unsigned)(b) >> (SK_B32_BITS - SK_B16_BITS)) 411 412#ifdef SK_DEBUG 413 static inline unsigned SkR32ToR16(unsigned r) { 414 SkR32Assert(r); 415 return SkR32ToR16_MACRO(r); 416 } 417 static inline unsigned SkG32ToG16(unsigned g) { 418 SkG32Assert(g); 419 return SkG32ToG16_MACRO(g); 420 } 421 static inline unsigned SkB32ToB16(unsigned b) { 422 SkB32Assert(b); 423 return SkB32ToB16_MACRO(b); 424 } 425#else 426 #define SkR32ToR16(r) SkR32ToR16_MACRO(r) 427 #define SkG32ToG16(g) SkG32ToG16_MACRO(g) 428 #define SkB32ToB16(b) SkB32ToB16_MACRO(b) 429#endif 430 431#define SkPacked32ToR16(c) (((unsigned)(c) >> (SK_R32_SHIFT + SK_R32_BITS - SK_R16_BITS)) & SK_R16_MASK) 432#define SkPacked32ToG16(c) (((unsigned)(c) >> (SK_G32_SHIFT + SK_G32_BITS - SK_G16_BITS)) & SK_G16_MASK) 433#define SkPacked32ToB16(c) (((unsigned)(c) >> (SK_B32_SHIFT + SK_B32_BITS - SK_B16_BITS)) & SK_B16_MASK) 434 435static inline U16CPU SkPixel32ToPixel16(SkPMColor c) { 436 unsigned r = ((c >> (SK_R32_SHIFT + (8 - SK_R16_BITS))) & SK_R16_MASK) << SK_R16_SHIFT; 437 unsigned g = ((c >> (SK_G32_SHIFT + (8 - SK_G16_BITS))) & SK_G16_MASK) << SK_G16_SHIFT; 438 unsigned b = ((c >> (SK_B32_SHIFT + (8 - SK_B16_BITS))) & SK_B16_MASK) << SK_B16_SHIFT; 439 return r | g | b; 440} 441 442static inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) { 443 return (SkR32ToR16(r) << SK_R16_SHIFT) | 444 (SkG32ToG16(g) << SK_G16_SHIFT) | 445 (SkB32ToB16(b) << SK_B16_SHIFT); 446} 447 448#define SkPixel32ToPixel16_ToU16(src) SkToU16(SkPixel32ToPixel16(src)) 449 450///////////////////////////////////////////////////////////////////////////////////////// 451// Fast dither from 32->16 452 453#define SkShouldDitherXY(x, y) (((x) ^ (y)) & 1) 454 455static inline uint16_t SkDitherPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) { 456 r = ((r << 1) - ((r >> (8 - SK_R16_BITS) << (8 - SK_R16_BITS)) | (r >> SK_R16_BITS))) >> (8 - SK_R16_BITS); 457 g = ((g << 1) - ((g >> (8 - SK_G16_BITS) << (8 - SK_G16_BITS)) | (g >> SK_G16_BITS))) >> (8 - SK_G16_BITS); 458 b = ((b << 1) - ((b >> (8 - SK_B16_BITS) << (8 - SK_B16_BITS)) | (b >> SK_B16_BITS))) >> (8 - SK_B16_BITS); 459 460 return SkPackRGB16(r, g, b); 461} 462 463static inline uint16_t SkDitherPixel32ToPixel16(SkPMColor c) { 464 return SkDitherPack888ToRGB16(SkGetPackedR32(c), SkGetPackedG32(c), SkGetPackedB32(c)); 465} 466 467/* Return c in expanded_rgb_16 format, but also scaled up by 32 (5 bits) 468 It is now suitable for combining with a scaled expanded_rgb_16 color 469 as in SkSrcOver32To16(). 470 We must do this 565 high-bit replication, in order for the subsequent add 471 to saturate properly (and not overflow). If we take the 8 bits as is, it is 472 possible to overflow. 473*/ 474static inline uint32_t SkPMColorToExpanded16x5(SkPMColor c) { 475 unsigned sr = SkPacked32ToR16(c); 476 unsigned sg = SkPacked32ToG16(c); 477 unsigned sb = SkPacked32ToB16(c); 478 479 sr = (sr << 5) | sr; 480 sg = (sg << 5) | (sg >> 1); 481 sb = (sb << 5) | sb; 482 return (sr << 11) | (sg << 21) | (sb << 0); 483} 484 485/* SrcOver the 32bit src color with the 16bit dst, returning a 16bit value 486 (with dirt in the high 16bits, so caller beware). 487*/ 488static inline U16CPU SkSrcOver32To16(SkPMColor src, uint16_t dst) { 489 unsigned sr = SkGetPackedR32(src); 490 unsigned sg = SkGetPackedG32(src); 491 unsigned sb = SkGetPackedB32(src); 492 493 unsigned dr = SkGetPackedR16(dst); 494 unsigned dg = SkGetPackedG16(dst); 495 unsigned db = SkGetPackedB16(dst); 496 497 unsigned isa = 255 - SkGetPackedA32(src); 498 499 dr = (sr + SkMul16ShiftRound(dr, isa, SK_R16_BITS)) >> (8 - SK_R16_BITS); 500 dg = (sg + SkMul16ShiftRound(dg, isa, SK_G16_BITS)) >> (8 - SK_G16_BITS); 501 db = (sb + SkMul16ShiftRound(db, isa, SK_B16_BITS)) >> (8 - SK_B16_BITS); 502 503 return SkPackRGB16(dr, dg, db); 504} 505 506//////////////////////////////////////////////////////////////////////////////////////////// 507// Convert a 16bit pixel to a 32bit pixel 508 509static inline unsigned SkR16ToR32(unsigned r) { 510 return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8)); 511} 512 513static inline unsigned SkG16ToG32(unsigned g) { 514 return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8)); 515} 516 517static inline unsigned SkB16ToB32(unsigned b) { 518 return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8)); 519} 520 521#define SkPacked16ToR32(c) SkR16ToR32(SkGetPackedR16(c)) 522#define SkPacked16ToG32(c) SkG16ToG32(SkGetPackedG16(c)) 523#define SkPacked16ToB32(c) SkB16ToB32(SkGetPackedB16(c)) 524 525static inline SkPMColor SkPixel16ToPixel32(U16CPU src) { 526 SkASSERT(src == SkToU16(src)); 527 528 unsigned r = SkPacked16ToR32(src); 529 unsigned g = SkPacked16ToG32(src); 530 unsigned b = SkPacked16ToB32(src); 531 532 SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src)); 533 SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src)); 534 SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src)); 535 536 return SkPackARGB32(0xFF, r, g, b); 537} 538 539// similar to SkPixel16ToPixel32, but returns SkColor instead of SkPMColor 540static inline SkColor SkPixel16ToColor(U16CPU src) { 541 SkASSERT(src == SkToU16(src)); 542 543 unsigned r = SkPacked16ToR32(src); 544 unsigned g = SkPacked16ToG32(src); 545 unsigned b = SkPacked16ToB32(src); 546 547 SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src)); 548 SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src)); 549 SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src)); 550 551 return SkColorSetRGB(r, g, b); 552} 553 554/////////////////////////////////////////////////////////////////////////////// 555 556typedef uint16_t SkPMColor16; 557 558// Put in OpenGL order (r g b a) 559#define SK_A4444_SHIFT 0 560#define SK_R4444_SHIFT 12 561#define SK_G4444_SHIFT 8 562#define SK_B4444_SHIFT 4 563 564#define SkA32To4444(a) ((unsigned)(a) >> 4) 565#define SkR32To4444(r) ((unsigned)(r) >> 4) 566#define SkG32To4444(g) ((unsigned)(g) >> 4) 567#define SkB32To4444(b) ((unsigned)(b) >> 4) 568 569static inline U8CPU SkReplicateNibble(unsigned nib) { 570 SkASSERT(nib <= 0xF); 571 return (nib << 4) | nib; 572} 573 574#define SkA4444ToA32(a) SkReplicateNibble(a) 575#define SkR4444ToR32(r) SkReplicateNibble(r) 576#define SkG4444ToG32(g) SkReplicateNibble(g) 577#define SkB4444ToB32(b) SkReplicateNibble(b) 578 579#define SkGetPackedA4444(c) (((unsigned)(c) >> SK_A4444_SHIFT) & 0xF) 580#define SkGetPackedR4444(c) (((unsigned)(c) >> SK_R4444_SHIFT) & 0xF) 581#define SkGetPackedG4444(c) (((unsigned)(c) >> SK_G4444_SHIFT) & 0xF) 582#define SkGetPackedB4444(c) (((unsigned)(c) >> SK_B4444_SHIFT) & 0xF) 583 584#define SkPacked4444ToA32(c) SkReplicateNibble(SkGetPackedA4444(c)) 585#define SkPacked4444ToR32(c) SkReplicateNibble(SkGetPackedR4444(c)) 586#define SkPacked4444ToG32(c) SkReplicateNibble(SkGetPackedG4444(c)) 587#define SkPacked4444ToB32(c) SkReplicateNibble(SkGetPackedB4444(c)) 588 589#ifdef SK_DEBUG 590static inline void SkPMColor16Assert(U16CPU c) { 591 unsigned a = SkGetPackedA4444(c); 592 unsigned r = SkGetPackedR4444(c); 593 unsigned g = SkGetPackedG4444(c); 594 unsigned b = SkGetPackedB4444(c); 595 596 SkASSERT(a <= 0xF); 597 SkASSERT(r <= a); 598 SkASSERT(g <= a); 599 SkASSERT(b <= a); 600} 601#else 602#define SkPMColor16Assert(c) 603#endif 604 605static inline unsigned SkAlpha15To16(unsigned a) { 606 SkASSERT(a <= 0xF); 607 return a + (a >> 3); 608} 609 610#ifdef SK_DEBUG 611 static inline int SkAlphaMul4(int value, int scale) { 612 SkASSERT((unsigned)scale <= 0x10); 613 return value * scale >> 4; 614 } 615#else 616 #define SkAlphaMul4(value, scale) ((value) * (scale) >> 4) 617#endif 618 619static inline unsigned SkR4444ToR565(unsigned r) { 620 SkASSERT(r <= 0xF); 621 return (r << (SK_R16_BITS - 4)) | (r >> (8 - SK_R16_BITS)); 622} 623 624static inline unsigned SkG4444ToG565(unsigned g) { 625 SkASSERT(g <= 0xF); 626 return (g << (SK_G16_BITS - 4)) | (g >> (8 - SK_G16_BITS)); 627} 628 629static inline unsigned SkB4444ToB565(unsigned b) { 630 SkASSERT(b <= 0xF); 631 return (b << (SK_B16_BITS - 4)) | (b >> (8 - SK_B16_BITS)); 632} 633 634static inline SkPMColor16 SkPackARGB4444(unsigned a, unsigned r, 635 unsigned g, unsigned b) { 636 SkASSERT(a <= 0xF); 637 SkASSERT(r <= a); 638 SkASSERT(g <= a); 639 SkASSERT(b <= a); 640 641 return (SkPMColor16)((a << SK_A4444_SHIFT) | (r << SK_R4444_SHIFT) | 642 (g << SK_G4444_SHIFT) | (b << SK_B4444_SHIFT)); 643} 644 645extern const uint16_t gMask_0F0F; 646 647static inline U16CPU SkAlphaMulQ4(U16CPU c, unsigned scale) { 648 SkASSERT(scale <= 16); 649 650 const unsigned mask = 0xF0F; //gMask_0F0F; 651 652#if 0 653 unsigned rb = ((c & mask) * scale) >> 4; 654 unsigned ag = ((c >> 4) & mask) * scale; 655 return (rb & mask) | (ag & ~mask); 656#else 657 c = (c & mask) | ((c & (mask << 4)) << 12); 658 c = c * scale >> 4; 659 return (c & mask) | ((c >> 12) & (mask << 4)); 660#endif 661} 662 663/** Expand the SkPMColor16 color into a 32bit value that can be scaled all at 664 once by a value up to 16. Used in conjunction with SkCompact_4444. 665*/ 666static inline uint32_t SkExpand_4444(U16CPU c) { 667 SkASSERT(c == (uint16_t)c); 668 669 const unsigned mask = 0xF0F; //gMask_0F0F; 670 return (c & mask) | ((c & ~mask) << 12); 671} 672 673/** Compress an expanded value (from SkExpand_4444) back down to a SkPMColor16. 674 NOTE: this explicitly does not clean the top 16 bits (which may be garbage). 675 It does this for speed, since if it is being written directly to 16bits of 676 memory, the top 16bits will be ignored. Casting the result to uint16_t here 677 would add 2 more instructions, slow us down. It is up to the caller to 678 perform the cast if needed. 679*/ 680static inline U16CPU SkCompact_4444(uint32_t c) { 681 const unsigned mask = 0xF0F; //gMask_0F0F; 682 return (c & mask) | ((c >> 12) & ~mask); 683} 684 685static inline uint16_t SkSrcOver4444To16(SkPMColor16 s, uint16_t d) { 686 unsigned sa = SkGetPackedA4444(s); 687 unsigned sr = SkR4444ToR565(SkGetPackedR4444(s)); 688 unsigned sg = SkG4444ToG565(SkGetPackedG4444(s)); 689 unsigned sb = SkB4444ToB565(SkGetPackedB4444(s)); 690 691 // To avoid overflow, we have to clear the low bit of the synthetic sg 692 // if the src alpha is <= 7. 693 // to see why, try blending 0x4444 on top of 565-white and watch green 694 // overflow (sum == 64) 695 sg &= ~(~(sa >> 3) & 1); 696 697 unsigned scale = SkAlpha15To16(15 - sa); 698 unsigned dr = SkAlphaMul4(SkGetPackedR16(d), scale); 699 unsigned dg = SkAlphaMul4(SkGetPackedG16(d), scale); 700 unsigned db = SkAlphaMul4(SkGetPackedB16(d), scale); 701 702#if 0 703 if (sg + dg > 63) { 704 SkDebugf("---- SkSrcOver4444To16 src=%x dst=%x scale=%d, sg=%d dg=%d\n", s, d, scale, sg, dg); 705 } 706#endif 707 return SkPackRGB16(sr + dr, sg + dg, sb + db); 708} 709 710static inline uint16_t SkBlend4444To16(SkPMColor16 src, uint16_t dst, int scale16) { 711 SkASSERT((unsigned)scale16 <= 16); 712 713 return SkSrcOver4444To16(SkAlphaMulQ4(src, scale16), dst); 714} 715 716static inline uint16_t SkBlend4444(SkPMColor16 src, SkPMColor16 dst, int scale16) { 717 SkASSERT((unsigned)scale16 <= 16); 718 719 uint32_t src32 = SkExpand_4444(src) * scale16; 720 // the scaled srcAlpha is the bottom byte 721#ifdef SK_DEBUG 722 { 723 unsigned srcA = SkGetPackedA4444(src) * scale16; 724 SkASSERT(srcA == (src32 & 0xFF)); 725 } 726#endif 727 unsigned dstScale = SkAlpha255To256(255 - (src32 & 0xFF)) >> 4; 728 uint32_t dst32 = SkExpand_4444(dst) * dstScale; 729 return SkCompact_4444((src32 + dst32) >> 4); 730} 731 732static inline SkPMColor SkPixel4444ToPixel32(U16CPU c) { 733 uint32_t d = (SkGetPackedA4444(c) << SK_A32_SHIFT) | 734 (SkGetPackedR4444(c) << SK_R32_SHIFT) | 735 (SkGetPackedG4444(c) << SK_G32_SHIFT) | 736 (SkGetPackedB4444(c) << SK_B32_SHIFT); 737 return d | (d << 4); 738} 739 740static inline SkPMColor16 SkPixel32ToPixel4444(SkPMColor c) { 741 return (((c >> (SK_A32_SHIFT + 4)) & 0xF) << SK_A4444_SHIFT) | 742 (((c >> (SK_R32_SHIFT + 4)) & 0xF) << SK_R4444_SHIFT) | 743 (((c >> (SK_G32_SHIFT + 4)) & 0xF) << SK_G4444_SHIFT) | 744 (((c >> (SK_B32_SHIFT + 4)) & 0xF) << SK_B4444_SHIFT); 745} 746 747// cheap 2x2 dither 748static inline SkPMColor16 SkDitherARGB32To4444(U8CPU a, U8CPU r, 749 U8CPU g, U8CPU b) { 750 // to ensure that we stay a legal premultiplied color, we take the max() 751 // of the truncated and dithered alpha values. If we didn't, cases like 752 // SkDitherARGB32To4444(0x31, 0x2E, ...) would generate SkPackARGB4444(2, 3, ...) 753 // which is not legal premultiplied, since a < color 754 unsigned dithered_a = ((a << 1) - ((a >> 4 << 4) | (a >> 4))) >> 4; 755 a = SkMax32(a >> 4, dithered_a); 756 // these we just dither in place 757 r = ((r << 1) - ((r >> 4 << 4) | (r >> 4))) >> 4; 758 g = ((g << 1) - ((g >> 4 << 4) | (g >> 4))) >> 4; 759 b = ((b << 1) - ((b >> 4 << 4) | (b >> 4))) >> 4; 760 761 return SkPackARGB4444(a, r, g, b); 762} 763 764static inline SkPMColor16 SkDitherPixel32To4444(SkPMColor c) { 765 return SkDitherARGB32To4444(SkGetPackedA32(c), SkGetPackedR32(c), 766 SkGetPackedG32(c), SkGetPackedB32(c)); 767} 768 769/* Assumes 16bit is in standard RGBA order. 770 Transforms a normal ARGB_8888 into the same byte order as 771 expanded ARGB_4444, but keeps each component 8bits 772*/ 773static inline uint32_t SkExpand_8888(SkPMColor c) { 774 return (((c >> SK_R32_SHIFT) & 0xFF) << 24) | 775 (((c >> SK_G32_SHIFT) & 0xFF) << 8) | 776 (((c >> SK_B32_SHIFT) & 0xFF) << 16) | 777 (((c >> SK_A32_SHIFT) & 0xFF) << 0); 778} 779 780/* Undo the operation of SkExpand_8888, turning the argument back into 781 a SkPMColor. 782*/ 783static inline SkPMColor SkCompact_8888(uint32_t c) { 784 return (((c >> 24) & 0xFF) << SK_R32_SHIFT) | 785 (((c >> 8) & 0xFF) << SK_G32_SHIFT) | 786 (((c >> 16) & 0xFF) << SK_B32_SHIFT) | 787 (((c >> 0) & 0xFF) << SK_A32_SHIFT); 788} 789 790/* Like SkExpand_8888, this transforms a pmcolor into the expanded 4444 format, 791 but this routine just keeps the high 4bits of each component in the low 792 4bits of the result (just like a newly expanded PMColor16). 793*/ 794static inline uint32_t SkExpand32_4444(SkPMColor c) { 795 return (((c >> (SK_R32_SHIFT + 4)) & 0xF) << 24) | 796 (((c >> (SK_G32_SHIFT + 4)) & 0xF) << 8) | 797 (((c >> (SK_B32_SHIFT + 4)) & 0xF) << 16) | 798 (((c >> (SK_A32_SHIFT + 4)) & 0xF) << 0); 799} 800 801// takes two values and alternamtes them as part of a memset16 802// used for cheap 2x2 dithering when the colors are opaque 803void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n); 804 805/////////////////////////////////////////////////////////////////////////////// 806 807static inline int SkUpscale31To32(int value) { 808 SkASSERT((unsigned)value <= 31); 809 return value + (value >> 4); 810} 811 812static inline int SkBlend32(int src, int dst, int scale) { 813 SkASSERT((unsigned)src <= 0xFF); 814 SkASSERT((unsigned)dst <= 0xFF); 815 SkASSERT((unsigned)scale <= 32); 816 return dst + ((src - dst) * scale >> 5); 817} 818 819static inline SkPMColor SkBlendLCD16(int srcA, int srcR, int srcG, int srcB, 820 SkPMColor dst, uint16_t mask) { 821 if (mask == 0) { 822 return dst; 823 } 824 825 /* We want all of these in 5bits, hence the shifts in case one of them 826 * (green) is 6bits. 827 */ 828 int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5); 829 int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5); 830 int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5); 831 832 // Now upscale them to 0..32, so we can use blend32 833 maskR = SkUpscale31To32(maskR); 834 maskG = SkUpscale31To32(maskG); 835 maskB = SkUpscale31To32(maskB); 836 837 // srcA has been upscaled to 256 before passed into this function 838 maskR = maskR * srcA >> 8; 839 maskG = maskG * srcA >> 8; 840 maskB = maskB * srcA >> 8; 841 842 int dstR = SkGetPackedR32(dst); 843 int dstG = SkGetPackedG32(dst); 844 int dstB = SkGetPackedB32(dst); 845 846 // LCD blitting is only supported if the dst is known/required 847 // to be opaque 848 return SkPackARGB32(0xFF, 849 SkBlend32(srcR, dstR, maskR), 850 SkBlend32(srcG, dstG, maskG), 851 SkBlend32(srcB, dstB, maskB)); 852} 853 854static inline SkPMColor SkBlendLCD16Opaque(int srcR, int srcG, int srcB, 855 SkPMColor dst, uint16_t mask, 856 SkPMColor opaqueDst) { 857 if (mask == 0) { 858 return dst; 859 } 860 861 if (0xFFFF == mask) { 862 return opaqueDst; 863 } 864 865 /* We want all of these in 5bits, hence the shifts in case one of them 866 * (green) is 6bits. 867 */ 868 int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5); 869 int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5); 870 int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5); 871 872 // Now upscale them to 0..32, so we can use blend32 873 maskR = SkUpscale31To32(maskR); 874 maskG = SkUpscale31To32(maskG); 875 maskB = SkUpscale31To32(maskB); 876 877 int dstR = SkGetPackedR32(dst); 878 int dstG = SkGetPackedG32(dst); 879 int dstB = SkGetPackedB32(dst); 880 881 // LCD blitting is only supported if the dst is known/required 882 // to be opaque 883 return SkPackARGB32(0xFF, 884 SkBlend32(srcR, dstR, maskR), 885 SkBlend32(srcG, dstG, maskG), 886 SkBlend32(srcB, dstB, maskB)); 887} 888 889static inline void SkBlitLCD16Row(SkPMColor dst[], const uint16_t mask[], 890 SkColor src, int width, SkPMColor) { 891 int srcA = SkColorGetA(src); 892 int srcR = SkColorGetR(src); 893 int srcG = SkColorGetG(src); 894 int srcB = SkColorGetB(src); 895 896 srcA = SkAlpha255To256(srcA); 897 898 for (int i = 0; i < width; i++) { 899 dst[i] = SkBlendLCD16(srcA, srcR, srcG, srcB, dst[i], mask[i]); 900 } 901} 902 903static inline void SkBlitLCD16OpaqueRow(SkPMColor dst[], const uint16_t mask[], 904 SkColor src, int width, 905 SkPMColor opaqueDst) { 906 int srcR = SkColorGetR(src); 907 int srcG = SkColorGetG(src); 908 int srcB = SkColorGetB(src); 909 910 for (int i = 0; i < width; i++) { 911 dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], mask[i], 912 opaqueDst); 913 } 914} 915 916#endif 917