SkBitmapProcState.cpp revision ce1f3cc1e22a50caaaaded7b91d9492b3ae5901c
1 2/* 3 * Copyright 2011 Google Inc. 4 * 5 * Use of this source code is governed by a BSD-style license that can be 6 * found in the LICENSE file. 7 */ 8#include "SkBitmapProcState.h" 9#include "SkColorPriv.h" 10#include "SkFilterProc.h" 11#include "SkPaint.h" 12#include "SkShader.h" // for tilemodes 13#include "SkUtilsArm.h" 14 15#if !SK_ARM_NEON_IS_NONE 16// These are defined in src/opts/SkBitmapProcState_arm_neon.cpp 17extern const SkBitmapProcState::SampleProc16 gSkBitmapProcStateSample16_neon[]; 18extern const SkBitmapProcState::SampleProc32 gSkBitmapProcStateSample32_neon[]; 19extern void S16_D16_filter_DX_neon(const SkBitmapProcState&, const uint32_t*, int, uint16_t*); 20extern void Clamp_S16_D16_filter_DX_shaderproc_neon(const SkBitmapProcState&, int, int, uint16_t*, int); 21extern void Repeat_S16_D16_filter_DX_shaderproc_neon(const SkBitmapProcState&, int, int, uint16_t*, int); 22extern void SI8_opaque_D32_filter_DX_neon(const SkBitmapProcState&, const uint32_t*, int, SkPMColor*); 23extern void SI8_opaque_D32_filter_DX_shaderproc_neon(const SkBitmapProcState&, int, int, uint32_t*, int); 24extern void Clamp_SI8_opaque_D32_filter_DX_shaderproc_neon(const SkBitmapProcState&, int, int, uint32_t*, int); 25#endif 26 27#if !SK_ARM_NEON_IS_ALWAYS 28#define NAME_WRAP(x) x 29#include "SkBitmapProcState_filter.h" 30#include "SkBitmapProcState_procs.h" 31#endif 32 33/////////////////////////////////////////////////////////////////////////////// 34 35/** 36 * For the purposes of drawing bitmaps, if a matrix is "almost" translate 37 * go ahead and treat it as if it were, so that subsequent code can go fast. 38 */ 39static bool just_trans_clamp(const SkMatrix& matrix, const SkBitmap& bitmap) { 40 SkMatrix::TypeMask mask = matrix.getType(); 41 42 if (mask & (SkMatrix::kAffine_Mask | SkMatrix::kPerspective_Mask)) { 43 return false; 44 } 45 if (mask & SkMatrix::kScale_Mask) { 46 SkScalar sx = matrix[SkMatrix::kMScaleX]; 47 SkScalar sy = matrix[SkMatrix::kMScaleY]; 48 int w = bitmap.width(); 49 int h = bitmap.height(); 50 int sw = SkScalarRound(SkScalarMul(sx, SkIntToScalar(w))); 51 int sh = SkScalarRound(SkScalarMul(sy, SkIntToScalar(h))); 52 return sw == w && sh == h; 53 } 54 // if we got here, we're either kTranslate_Mask or identity 55 return true; 56} 57 58static bool just_trans_general(const SkMatrix& matrix) { 59 SkMatrix::TypeMask mask = matrix.getType(); 60 61 if (mask & (SkMatrix::kAffine_Mask | SkMatrix::kPerspective_Mask)) { 62 return false; 63 } 64 if (mask & SkMatrix::kScale_Mask) { 65 const SkScalar tol = SK_Scalar1 / 32768; 66 67 if (!SkScalarNearlyZero(matrix[SkMatrix::kMScaleX] - SK_Scalar1, tol)) { 68 return false; 69 } 70 if (!SkScalarNearlyZero(matrix[SkMatrix::kMScaleY] - SK_Scalar1, tol)) { 71 return false; 72 } 73 } 74 // if we got here, treat us as either kTranslate_Mask or identity 75 return true; 76} 77 78/////////////////////////////////////////////////////////////////////////////// 79 80static bool valid_for_filtering(unsigned dimension) { 81 // for filtering, width and height must fit in 14bits, since we use steal 82 // 2 bits from each to store our 4bit subpixel data 83 return (dimension & ~0x3FFF) == 0; 84} 85 86bool SkBitmapProcState::chooseProcs(const SkMatrix& inv, const SkPaint& paint) { 87 if (fOrigBitmap.width() == 0 || fOrigBitmap.height() == 0) { 88 return false; 89 } 90 91 const SkMatrix* m; 92 bool trivial_matrix = (inv.getType() & ~SkMatrix::kTranslate_Mask) == 0; 93 bool clamp_clamp = SkShader::kClamp_TileMode == fTileModeX && 94 SkShader::kClamp_TileMode == fTileModeY; 95 96 if (clamp_clamp || trivial_matrix) { 97 m = &inv; 98 } else { 99 fUnitInvMatrix = inv; 100 fUnitInvMatrix.postIDiv(fOrigBitmap.width(), fOrigBitmap.height()); 101 m = &fUnitInvMatrix; 102 } 103 104 fBitmap = &fOrigBitmap; 105 if (fOrigBitmap.hasMipMap()) { 106 int shift = fOrigBitmap.extractMipLevel(&fMipBitmap, 107 SkScalarToFixed(m->getScaleX()), 108 SkScalarToFixed(m->getSkewY())); 109 110 if (shift > 0) { 111 if (m != &fUnitInvMatrix) { 112 fUnitInvMatrix = *m; 113 m = &fUnitInvMatrix; 114 } 115 116 SkScalar scale = SkFixedToScalar(SK_Fixed1 >> shift); 117 fUnitInvMatrix.postScale(scale, scale); 118 119 // now point here instead of fOrigBitmap 120 fBitmap = &fMipBitmap; 121 } 122 } 123 124 // wack our matrix to exactly no-scale, if we're really close to begin with 125 { 126 bool fixupMatrix = clamp_clamp ? 127 just_trans_clamp(*m, *fBitmap) : just_trans_general(*m); 128 if (fixupMatrix) { 129#ifdef SK_IGNORE_TRANS_CLAMP_FIX 130 if (m != &fUnitInvMatrix) { // can't mutate the original 131 fUnitInvMatrix = inv; 132 m = &fUnitInvMatrix; 133 } 134 fUnitInvMatrix.set(SkMatrix::kMScaleX, SK_Scalar1); 135 fUnitInvMatrix.set(SkMatrix::kMScaleY, SK_Scalar1); 136#else 137 // If we can be treated just like translate, construct that inverse 138 // such that we landed in the proper place. Given that m may have 139 // some slight scale, we have to invert it to compute this new 140 // matrix. 141 SkMatrix forward; 142 if (m->invert(&forward)) { 143 SkScalar tx = -SkScalarRoundToScalar(forward.getTranslateX()); 144 SkScalar ty = -SkScalarRoundToScalar(forward.getTranslateY()); 145 fUnitInvMatrix.setTranslate(tx, ty); 146 m = &fUnitInvMatrix; 147 // now the following code will sniff m, and decide to take the 148 // fast case (since m is purely translate). 149 } 150#endif 151 } 152 } 153 154 // Below this point, we should never refer to the inv parameter, since we 155 // may be using a munged version for "our" inverse. 156 157 fInvMatrix = m; 158 fInvProc = m->getMapXYProc(); 159 fInvType = m->getType(); 160 fInvSx = SkScalarToFixed(m->getScaleX()); 161 fInvSxFractionalInt = SkScalarToFractionalInt(m->getScaleX()); 162 fInvKy = SkScalarToFixed(m->getSkewY()); 163 fInvKyFractionalInt = SkScalarToFractionalInt(m->getSkewY()); 164 165 fAlphaScale = SkAlpha255To256(paint.getAlpha()); 166 167 // pick-up filtering from the paint, but only if the matrix is 168 // more complex than identity/translate (i.e. no need to pay the cost 169 // of filtering if we're not scaled etc.). 170 // note: we explicitly check inv, since m might be scaled due to unitinv 171 // trickery, but we don't want to see that for this test 172 fDoFilter = paint.isFilterBitmap() && 173 (fInvType > SkMatrix::kTranslate_Mask && 174 valid_for_filtering(fBitmap->width() | fBitmap->height())); 175 176 fShaderProc32 = NULL; 177 fShaderProc16 = NULL; 178 fSampleProc32 = NULL; 179 fSampleProc16 = NULL; 180 181 fMatrixProc = this->chooseMatrixProc(trivial_matrix); 182 if (NULL == fMatrixProc) { 183 return false; 184 } 185 186 /////////////////////////////////////////////////////////////////////// 187 188 int index = 0; 189 if (fAlphaScale < 256) { // note: this distinction is not used for D16 190 index |= 1; 191 } 192 if (fInvType <= (SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask)) { 193 index |= 2; 194 } 195 if (fDoFilter) { 196 index |= 4; 197 } 198 // bits 3,4,5 encoding the source bitmap format 199 switch (fBitmap->config()) { 200 case SkBitmap::kARGB_8888_Config: 201 index |= 0; 202 break; 203 case SkBitmap::kRGB_565_Config: 204 index |= 8; 205 break; 206 case SkBitmap::kIndex8_Config: 207 index |= 16; 208 break; 209 case SkBitmap::kARGB_4444_Config: 210 index |= 24; 211 break; 212 case SkBitmap::kA8_Config: 213 index |= 32; 214 fPaintPMColor = SkPreMultiplyColor(paint.getColor()); 215 break; 216 default: 217 return false; 218 } 219 220#if !SK_ARM_NEON_IS_ALWAYS 221 static const SampleProc32 gSkBitmapProcStateSample32[] = { 222 S32_opaque_D32_nofilter_DXDY, 223 S32_alpha_D32_nofilter_DXDY, 224 S32_opaque_D32_nofilter_DX, 225 S32_alpha_D32_nofilter_DX, 226 S32_opaque_D32_filter_DXDY, 227 S32_alpha_D32_filter_DXDY, 228 S32_opaque_D32_filter_DX, 229 S32_alpha_D32_filter_DX, 230 231 S16_opaque_D32_nofilter_DXDY, 232 S16_alpha_D32_nofilter_DXDY, 233 S16_opaque_D32_nofilter_DX, 234 S16_alpha_D32_nofilter_DX, 235 S16_opaque_D32_filter_DXDY, 236 S16_alpha_D32_filter_DXDY, 237 S16_opaque_D32_filter_DX, 238 S16_alpha_D32_filter_DX, 239 240 SI8_opaque_D32_nofilter_DXDY, 241 SI8_alpha_D32_nofilter_DXDY, 242 SI8_opaque_D32_nofilter_DX, 243 SI8_alpha_D32_nofilter_DX, 244 SI8_opaque_D32_filter_DXDY, 245 SI8_alpha_D32_filter_DXDY, 246 SI8_opaque_D32_filter_DX, 247 SI8_alpha_D32_filter_DX, 248 249 S4444_opaque_D32_nofilter_DXDY, 250 S4444_alpha_D32_nofilter_DXDY, 251 S4444_opaque_D32_nofilter_DX, 252 S4444_alpha_D32_nofilter_DX, 253 S4444_opaque_D32_filter_DXDY, 254 S4444_alpha_D32_filter_DXDY, 255 S4444_opaque_D32_filter_DX, 256 S4444_alpha_D32_filter_DX, 257 258 // A8 treats alpha/opauqe the same (equally efficient) 259 SA8_alpha_D32_nofilter_DXDY, 260 SA8_alpha_D32_nofilter_DXDY, 261 SA8_alpha_D32_nofilter_DX, 262 SA8_alpha_D32_nofilter_DX, 263 SA8_alpha_D32_filter_DXDY, 264 SA8_alpha_D32_filter_DXDY, 265 SA8_alpha_D32_filter_DX, 266 SA8_alpha_D32_filter_DX 267 }; 268 269 static const SampleProc16 gSkBitmapProcStateSample16[] = { 270 S32_D16_nofilter_DXDY, 271 S32_D16_nofilter_DX, 272 S32_D16_filter_DXDY, 273 S32_D16_filter_DX, 274 275 S16_D16_nofilter_DXDY, 276 S16_D16_nofilter_DX, 277 S16_D16_filter_DXDY, 278 S16_D16_filter_DX, 279 280 SI8_D16_nofilter_DXDY, 281 SI8_D16_nofilter_DX, 282 SI8_D16_filter_DXDY, 283 SI8_D16_filter_DX, 284 285 // Don't support 4444 -> 565 286 NULL, NULL, NULL, NULL, 287 // Don't support A8 -> 565 288 NULL, NULL, NULL, NULL 289 }; 290#endif 291 292 fSampleProc32 = SK_ARM_NEON_WRAP(gSkBitmapProcStateSample32)[index]; 293 index >>= 1; // shift away any opaque/alpha distinction 294 fSampleProc16 = SK_ARM_NEON_WRAP(gSkBitmapProcStateSample16)[index]; 295 296 // our special-case shaderprocs 297 if (SK_ARM_NEON_WRAP(S16_D16_filter_DX) == fSampleProc16) { 298 if (clamp_clamp) { 299 fShaderProc16 = SK_ARM_NEON_WRAP(Clamp_S16_D16_filter_DX_shaderproc); 300 } else if (SkShader::kRepeat_TileMode == fTileModeX && 301 SkShader::kRepeat_TileMode == fTileModeY) { 302 fShaderProc16 = SK_ARM_NEON_WRAP(Repeat_S16_D16_filter_DX_shaderproc); 303 } 304 } else if (SK_ARM_NEON_WRAP(SI8_opaque_D32_filter_DX) == fSampleProc32 && clamp_clamp) { 305 fShaderProc32 = SK_ARM_NEON_WRAP(Clamp_SI8_opaque_D32_filter_DX_shaderproc); 306 } 307 308 if (NULL == fShaderProc32) { 309 fShaderProc32 = this->chooseShaderProc32(); 310 } 311 312 // see if our platform has any accelerated overrides 313 this->platformProcs(); 314 return true; 315} 316 317static void Clamp_S32_D32_nofilter_trans_shaderproc(const SkBitmapProcState& s, 318 int x, int y, 319 SkPMColor* SK_RESTRICT colors, 320 int count) { 321 SkASSERT(((s.fInvType & ~SkMatrix::kTranslate_Mask)) == 0); 322 SkASSERT(s.fInvKy == 0); 323 SkASSERT(count > 0 && colors != NULL); 324 SkASSERT(!s.fDoFilter); 325 326 const int maxX = s.fBitmap->width() - 1; 327 const int maxY = s.fBitmap->height() - 1; 328 int ix = s.fFilterOneX + x; 329 int iy = SkClampMax(s.fFilterOneY + y, maxY); 330#ifdef SK_DEBUG 331 { 332 SkPoint pt; 333 s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf, 334 SkIntToScalar(y) + SK_ScalarHalf, &pt); 335 int iy2 = SkClampMax(SkScalarFloorToInt(pt.fY), maxY); 336 int ix2 = SkScalarFloorToInt(pt.fX); 337 338 SkASSERT(iy == iy2); 339 SkASSERT(ix == ix2); 340 } 341#endif 342 const SkPMColor* row = s.fBitmap->getAddr32(0, iy); 343 344 // clamp to the left 345 if (ix < 0) { 346 int n = SkMin32(-ix, count); 347 sk_memset32(colors, row[0], n); 348 count -= n; 349 if (0 == count) { 350 return; 351 } 352 colors += n; 353 SkASSERT(-ix == n); 354 ix = 0; 355 } 356 // copy the middle 357 if (ix <= maxX) { 358 int n = SkMin32(maxX - ix + 1, count); 359 memcpy(colors, row + ix, n * sizeof(SkPMColor)); 360 count -= n; 361 if (0 == count) { 362 return; 363 } 364 colors += n; 365 } 366 SkASSERT(count > 0); 367 // clamp to the right 368 sk_memset32(colors, row[maxX], count); 369} 370 371static inline int sk_int_mod(int x, int n) { 372 SkASSERT(n > 0); 373 if ((unsigned)x >= (unsigned)n) { 374 if (x < 0) { 375 x = n + ~(~x % n); 376 } else { 377 x = x % n; 378 } 379 } 380 return x; 381} 382 383static void Repeat_S32_D32_nofilter_trans_shaderproc(const SkBitmapProcState& s, 384 int x, int y, 385 SkPMColor* SK_RESTRICT colors, 386 int count) { 387 SkASSERT(((s.fInvType & ~SkMatrix::kTranslate_Mask)) == 0); 388 SkASSERT(s.fInvKy == 0); 389 SkASSERT(count > 0 && colors != NULL); 390 SkASSERT(!s.fDoFilter); 391 392 const int stopX = s.fBitmap->width(); 393 const int stopY = s.fBitmap->height(); 394 int ix = s.fFilterOneX + x; 395 int iy = sk_int_mod(s.fFilterOneY + y, stopY); 396#ifdef SK_DEBUG 397 { 398 SkPoint pt; 399 s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf, 400 SkIntToScalar(y) + SK_ScalarHalf, &pt); 401 int iy2 = sk_int_mod(SkScalarFloorToInt(pt.fY), stopY); 402 int ix2 = SkScalarFloorToInt(pt.fX); 403 404 SkASSERT(iy == iy2); 405 SkASSERT(ix == ix2); 406 } 407#endif 408 const SkPMColor* row = s.fBitmap->getAddr32(0, iy); 409 410 ix = sk_int_mod(ix, stopX); 411 for (;;) { 412 int n = SkMin32(stopX - ix, count); 413 memcpy(colors, row + ix, n * sizeof(SkPMColor)); 414 count -= n; 415 if (0 == count) { 416 return; 417 } 418 colors += n; 419 ix = 0; 420 } 421} 422 423static void DoNothing_shaderproc(const SkBitmapProcState&, int x, int y, 424 SkPMColor* SK_RESTRICT colors, int count) { 425 // if we get called, the matrix is too tricky, so we just draw nothing 426 sk_memset32(colors, 0, count); 427} 428 429bool SkBitmapProcState::setupForTranslate() { 430 SkPoint pt; 431 fInvProc(*fInvMatrix, SK_ScalarHalf, SK_ScalarHalf, &pt); 432 433 /* 434 * if the translate is larger than our ints, we can get random results, or 435 * worse, we might get 0x80000000, which wreaks havoc on us, since we can't 436 * negate it. 437 */ 438 const SkScalar too_big = SkIntToScalar(1 << 30); 439 if (SkScalarAbs(pt.fX) > too_big || SkScalarAbs(pt.fY) > too_big) { 440 return false; 441 } 442 443 // Since we know we're not filtered, we re-purpose these fields allow 444 // us to go from device -> src coordinates w/ just an integer add, 445 // rather than running through the inverse-matrix 446 fFilterOneX = SkScalarFloorToInt(pt.fX); 447 fFilterOneY = SkScalarFloorToInt(pt.fY); 448 return true; 449} 450 451SkBitmapProcState::ShaderProc32 SkBitmapProcState::chooseShaderProc32() { 452 if (fAlphaScale < 256) { 453 return NULL; 454 } 455 if (fInvType > SkMatrix::kTranslate_Mask) { 456 return NULL; 457 } 458 if (fDoFilter) { 459 return NULL; 460 } 461 if (SkBitmap::kARGB_8888_Config != fBitmap->config()) { 462 return NULL; 463 } 464 465 SkShader::TileMode tx = (SkShader::TileMode)fTileModeX; 466 SkShader::TileMode ty = (SkShader::TileMode)fTileModeY; 467 468 if (SkShader::kClamp_TileMode == tx && SkShader::kClamp_TileMode == ty) { 469 if (this->setupForTranslate()) { 470 return Clamp_S32_D32_nofilter_trans_shaderproc; 471 } 472 return DoNothing_shaderproc; 473 } 474 if (SkShader::kRepeat_TileMode == tx && SkShader::kRepeat_TileMode == ty) { 475 if (this->setupForTranslate()) { 476 return Repeat_S32_D32_nofilter_trans_shaderproc; 477 } 478 return DoNothing_shaderproc; 479 } 480 return NULL; 481} 482 483/////////////////////////////////////////////////////////////////////////////// 484 485#ifdef SK_DEBUG 486 487static void check_scale_nofilter(uint32_t bitmapXY[], int count, 488 unsigned mx, unsigned my) { 489 unsigned y = *bitmapXY++; 490 SkASSERT(y < my); 491 492 const uint16_t* xptr = reinterpret_cast<const uint16_t*>(bitmapXY); 493 for (int i = 0; i < count; ++i) { 494 SkASSERT(xptr[i] < mx); 495 } 496} 497 498static void check_scale_filter(uint32_t bitmapXY[], int count, 499 unsigned mx, unsigned my) { 500 uint32_t YY = *bitmapXY++; 501 unsigned y0 = YY >> 18; 502 unsigned y1 = YY & 0x3FFF; 503 SkASSERT(y0 < my); 504 SkASSERT(y1 < my); 505 506 for (int i = 0; i < count; ++i) { 507 uint32_t XX = bitmapXY[i]; 508 unsigned x0 = XX >> 18; 509 unsigned x1 = XX & 0x3FFF; 510 SkASSERT(x0 < mx); 511 SkASSERT(x1 < mx); 512 } 513} 514 515static void check_affine_nofilter(uint32_t bitmapXY[], int count, 516 unsigned mx, unsigned my) { 517 for (int i = 0; i < count; ++i) { 518 uint32_t XY = bitmapXY[i]; 519 unsigned x = XY & 0xFFFF; 520 unsigned y = XY >> 16; 521 SkASSERT(x < mx); 522 SkASSERT(y < my); 523 } 524} 525 526static void check_affine_filter(uint32_t bitmapXY[], int count, 527 unsigned mx, unsigned my) { 528 for (int i = 0; i < count; ++i) { 529 uint32_t YY = *bitmapXY++; 530 unsigned y0 = YY >> 18; 531 unsigned y1 = YY & 0x3FFF; 532 SkASSERT(y0 < my); 533 SkASSERT(y1 < my); 534 535 uint32_t XX = *bitmapXY++; 536 unsigned x0 = XX >> 18; 537 unsigned x1 = XX & 0x3FFF; 538 SkASSERT(x0 < mx); 539 SkASSERT(x1 < mx); 540 } 541} 542 543void SkBitmapProcState::DebugMatrixProc(const SkBitmapProcState& state, 544 uint32_t bitmapXY[], int count, 545 int x, int y) { 546 SkASSERT(bitmapXY); 547 SkASSERT(count > 0); 548 549 state.fMatrixProc(state, bitmapXY, count, x, y); 550 551 void (*proc)(uint32_t bitmapXY[], int count, unsigned mx, unsigned my); 552 553 // There are four formats possible: 554 // scale -vs- affine 555 // filter -vs- nofilter 556 if (state.fInvType <= (SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask)) { 557 proc = state.fDoFilter ? check_scale_filter : check_scale_nofilter; 558 } else { 559 proc = state.fDoFilter ? check_affine_filter : check_affine_nofilter; 560 } 561 proc(bitmapXY, count, state.fBitmap->width(), state.fBitmap->height()); 562} 563 564SkBitmapProcState::MatrixProc SkBitmapProcState::getMatrixProc() const { 565 return DebugMatrixProc; 566} 567 568#endif 569 570/////////////////////////////////////////////////////////////////////////////// 571/* 572 The storage requirements for the different matrix procs are as follows, 573 where each X or Y is 2 bytes, and N is the number of pixels/elements: 574 575 scale/translate nofilter Y(4bytes) + N * X 576 affine/perspective nofilter N * (X Y) 577 scale/translate filter Y Y + N * (X X) 578 affine/perspective filter N * (Y Y X X) 579 */ 580int SkBitmapProcState::maxCountForBufferSize(size_t bufferSize) const { 581 int32_t size = static_cast<int32_t>(bufferSize); 582 583 size &= ~3; // only care about 4-byte aligned chunks 584 if (fInvType <= (SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask)) { 585 size -= 4; // the shared Y (or YY) coordinate 586 if (size < 0) { 587 size = 0; 588 } 589 size >>= 1; 590 } else { 591 size >>= 2; 592 } 593 594 if (fDoFilter) { 595 size >>= 1; 596 } 597 598 return size; 599} 600 601