1/* 2 * Copyright 2008 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8// The copyright below was added in 2009, but I see no record of moto contributions...? 9 10/* NEON optimized code (C) COPYRIGHT 2009 Motorola 11 * 12 * Use of this source code is governed by a BSD-style license that can be 13 * found in the LICENSE file. 14 */ 15 16#include "SkBitmapProcState.h" 17#include "SkShader.h" 18#include "SkUtils.h" 19#include "SkUtilsArm.h" 20#include "SkBitmapProcState_utils.h" 21 22/* returns 0...(n-1) given any x (positive or negative). 23 24 As an example, if n (which is always positive) is 5... 25 26 x: -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 27 returns: 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 28 */ 29static inline int sk_int_mod(int x, int n) { 30 SkASSERT(n > 0); 31 if ((unsigned)x >= (unsigned)n) { 32 if (x < 0) { 33 x = n + ~(~x % n); 34 } else { 35 x = x % n; 36 } 37 } 38 return x; 39} 40 41void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count); 42void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count); 43 44#include "SkBitmapProcState_matrix_template.h" 45 46/////////////////////////////////////////////////////////////////////////////// 47 48// Compile neon code paths if needed 49#if defined(SK_ARM_HAS_NEON) 50 51// These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp 52extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[]; 53extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[]; 54 55#endif // defined(SK_ARM_HAS_NEON) 56 57// Compile non-neon code path if needed 58#if !defined(SK_ARM_HAS_NEON) 59#define MAKENAME(suffix) ClampX_ClampY ## suffix 60#define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max) 61#define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max) 62#define EXTRACT_LOW_BITS(v, max) (((v) >> 12) & 0xF) 63#define CHECK_FOR_DECAL 64#include "SkBitmapProcState_matrix.h" 65 66struct ClampTileProcs { 67 static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) { 68 return SkClampMax(fx >> 16, max); 69 } 70 static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) { 71 return SkClampMax(fy >> 16, max); 72 } 73}; 74 75// Referenced in opts_check_x86.cpp 76void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s, uint32_t xy[], 77 int count, int x, int y) { 78 return NoFilterProc_Scale<ClampTileProcs, true>(s, xy, count, x, y); 79} 80void ClampX_ClampY_nofilter_affine(const SkBitmapProcState& s, uint32_t xy[], 81 int count, int x, int y) { 82 return NoFilterProc_Affine<ClampTileProcs>(s, xy, count, x, y); 83} 84 85static SkBitmapProcState::MatrixProc ClampX_ClampY_Procs[] = { 86 // only clamp lives in the right coord space to check for decal 87 ClampX_ClampY_nofilter_scale, 88 ClampX_ClampY_filter_scale, 89 ClampX_ClampY_nofilter_affine, 90 ClampX_ClampY_filter_affine, 91}; 92 93#define MAKENAME(suffix) RepeatX_RepeatY ## suffix 94#define TILEX_PROCF(fx, max) SK_USHIFT16((unsigned)((fx) & 0xFFFF) * ((max) + 1)) 95#define TILEY_PROCF(fy, max) SK_USHIFT16((unsigned)((fy) & 0xFFFF) * ((max) + 1)) 96#define EXTRACT_LOW_BITS(v, max) (((unsigned)((v) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) 97#include "SkBitmapProcState_matrix.h" 98 99struct RepeatTileProcs { 100 static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) { 101 SkASSERT(max < 65535); 102 return SK_USHIFT16((unsigned)((fx) & 0xFFFF) * ((max) + 1)); 103 } 104 static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) { 105 SkASSERT(max < 65535); 106 return SK_USHIFT16((unsigned)((fy) & 0xFFFF) * ((max) + 1)); 107 } 108}; 109 110static SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs[] = { 111 NoFilterProc_Scale<RepeatTileProcs, false>, 112 RepeatX_RepeatY_filter_scale, 113 NoFilterProc_Affine<RepeatTileProcs>, 114 RepeatX_RepeatY_filter_affine, 115}; 116#endif 117 118#define MAKENAME(suffix) GeneralXY ## suffix 119#define PREAMBLE(state) SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \ 120 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; 121#define PREAMBLE_PARAM_X , SkBitmapProcState::FixedTileProc tileProcX 122#define PREAMBLE_PARAM_Y , SkBitmapProcState::FixedTileProc tileProcY 123#define PREAMBLE_ARG_X , tileProcX 124#define PREAMBLE_ARG_Y , tileProcY 125#define TILEX_PROCF(fx, max) SK_USHIFT16(tileProcX(fx) * ((max) + 1)) 126#define TILEY_PROCF(fy, max) SK_USHIFT16(tileProcY(fy) * ((max) + 1)) 127#define EXTRACT_LOW_BITS(v, max) (((v * (max + 1)) >> 12) & 0xF) 128#include "SkBitmapProcState_matrix.h" 129 130struct GeneralTileProcs { 131 static unsigned X(const SkBitmapProcState& s, SkFixed fx, int max) { 132 return SK_USHIFT16(s.fTileProcX(fx) * ((max) + 1)); 133 } 134 static unsigned Y(const SkBitmapProcState& s, SkFixed fy, int max) { 135 return SK_USHIFT16(s.fTileProcY(fy) * ((max) + 1)); 136 } 137}; 138 139static SkBitmapProcState::MatrixProc GeneralXY_Procs[] = { 140 NoFilterProc_Scale<GeneralTileProcs, false>, 141 GeneralXY_filter_scale, 142 NoFilterProc_Affine<GeneralTileProcs>, 143 GeneralXY_filter_affine, 144}; 145 146/////////////////////////////////////////////////////////////////////////////// 147 148static inline U16CPU fixed_clamp(SkFixed x) { 149 if (x < 0) { 150 x = 0; 151 } 152 if (x >> 16) { 153 x = 0xFFFF; 154 } 155 return x; 156} 157 158static inline U16CPU fixed_repeat(SkFixed x) { 159 return x & 0xFFFF; 160} 161 162static inline U16CPU fixed_mirror(SkFixed x) { 163 SkFixed s = SkLeftShift(x, 15) >> 31; 164 // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval 165 return (x ^ s) & 0xFFFF; 166} 167 168static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m) { 169 if (SkShader::kClamp_TileMode == m) { 170 return fixed_clamp; 171 } 172 if (SkShader::kRepeat_TileMode == m) { 173 return fixed_repeat; 174 } 175 SkASSERT(SkShader::kMirror_TileMode == m); 176 return fixed_mirror; 177} 178 179static inline U16CPU int_clamp(int x, int n) { 180 if (x >= n) { 181 x = n - 1; 182 } 183 if (x < 0) { 184 x = 0; 185 } 186 return x; 187} 188 189static inline U16CPU int_repeat(int x, int n) { 190 return sk_int_mod(x, n); 191} 192 193static inline U16CPU int_mirror(int x, int n) { 194 x = sk_int_mod(x, 2 * n); 195 if (x >= n) { 196 x = n + ~(x - n); 197 } 198 return x; 199} 200 201#if 0 202static void test_int_tileprocs() { 203 for (int i = -8; i <= 8; i++) { 204 SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3)); 205 } 206} 207#endif 208 209static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) { 210 if (SkShader::kClamp_TileMode == tm) 211 return int_clamp; 212 if (SkShader::kRepeat_TileMode == tm) 213 return int_repeat; 214 SkASSERT(SkShader::kMirror_TileMode == tm); 215 return int_mirror; 216} 217 218////////////////////////////////////////////////////////////////////////////// 219 220void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) { 221 int i; 222 223 for (i = (count >> 2); i > 0; --i) { 224 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16); 225 fx += dx+dx; 226 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16); 227 fx += dx+dx; 228 } 229 count &= 3; 230 231 uint16_t* xx = (uint16_t*)dst; 232 for (i = count; i > 0; --i) { 233 *xx++ = SkToU16(fx >> 16); fx += dx; 234 } 235} 236 237void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) { 238 if (count & 1) { 239 SkASSERT((fx >> (16 + 14)) == 0); 240 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 241 fx += dx; 242 } 243 while ((count -= 2) >= 0) { 244 SkASSERT((fx >> (16 + 14)) == 0); 245 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 246 fx += dx; 247 248 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 249 fx += dx; 250 } 251} 252 253/////////////////////////////////////////////////////////////////////////////// 254// stores the same as SCALE, but is cheaper to compute. Also since there is no 255// scale, we don't need/have a FILTER version 256 257static void fill_sequential(uint16_t xptr[], int start, int count) { 258#if 1 259 if (reinterpret_cast<intptr_t>(xptr) & 0x2) { 260 *xptr++ = start++; 261 count -= 1; 262 } 263 if (count > 3) { 264 uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr); 265 uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1); 266 uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3); 267 start += count & ~3; 268 int qcount = count >> 2; 269 do { 270 *xxptr++ = pattern0; 271 pattern0 += 0x40004; 272 *xxptr++ = pattern1; 273 pattern1 += 0x40004; 274 } while (--qcount != 0); 275 xptr = reinterpret_cast<uint16_t*>(xxptr); 276 count &= 3; 277 } 278 while (--count >= 0) { 279 *xptr++ = start++; 280 } 281#else 282 for (int i = 0; i < count; i++) { 283 *xptr++ = start++; 284 } 285#endif 286} 287 288static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy, 289 int x, int y) { 290 const SkBitmapProcStateAutoMapper mapper(s, x, y); 291 **xy = s.fIntTileProcY(mapper.intY(), s.fPixmap.height()); 292 *xy += 1; // bump the ptr 293 // return our starting X position 294 return mapper.intX(); 295} 296 297static void clampx_nofilter_trans(const SkBitmapProcState& s, 298 uint32_t xy[], int count, int x, int y) { 299 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 300 301 int xpos = nofilter_trans_preamble(s, &xy, x, y); 302 const int width = s.fPixmap.width(); 303 if (1 == width) { 304 // all of the following X values must be 0 305 memset(xy, 0, count * sizeof(uint16_t)); 306 return; 307 } 308 309 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 310 int n; 311 312 // fill before 0 as needed 313 if (xpos < 0) { 314 n = -xpos; 315 if (n > count) { 316 n = count; 317 } 318 memset(xptr, 0, n * sizeof(uint16_t)); 319 count -= n; 320 if (0 == count) { 321 return; 322 } 323 xptr += n; 324 xpos = 0; 325 } 326 327 // fill in 0..width-1 if needed 328 if (xpos < width) { 329 n = width - xpos; 330 if (n > count) { 331 n = count; 332 } 333 fill_sequential(xptr, xpos, n); 334 count -= n; 335 if (0 == count) { 336 return; 337 } 338 xptr += n; 339 } 340 341 // fill the remaining with the max value 342 sk_memset16(xptr, width - 1, count); 343} 344 345static void repeatx_nofilter_trans(const SkBitmapProcState& s, 346 uint32_t xy[], int count, int x, int y) { 347 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 348 349 int xpos = nofilter_trans_preamble(s, &xy, x, y); 350 const int width = s.fPixmap.width(); 351 if (1 == width) { 352 // all of the following X values must be 0 353 memset(xy, 0, count * sizeof(uint16_t)); 354 return; 355 } 356 357 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 358 int start = sk_int_mod(xpos, width); 359 int n = width - start; 360 if (n > count) { 361 n = count; 362 } 363 fill_sequential(xptr, start, n); 364 xptr += n; 365 count -= n; 366 367 while (count >= width) { 368 fill_sequential(xptr, 0, width); 369 xptr += width; 370 count -= width; 371 } 372 373 if (count > 0) { 374 fill_sequential(xptr, 0, count); 375 } 376} 377 378static void fill_backwards(uint16_t xptr[], int pos, int count) { 379 for (int i = 0; i < count; i++) { 380 SkASSERT(pos >= 0); 381 xptr[i] = pos--; 382 } 383} 384 385static void mirrorx_nofilter_trans(const SkBitmapProcState& s, 386 uint32_t xy[], int count, int x, int y) { 387 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 388 389 int xpos = nofilter_trans_preamble(s, &xy, x, y); 390 const int width = s.fPixmap.width(); 391 if (1 == width) { 392 // all of the following X values must be 0 393 memset(xy, 0, count * sizeof(uint16_t)); 394 return; 395 } 396 397 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 398 // need to know our start, and our initial phase (forward or backward) 399 bool forward; 400 int n; 401 int start = sk_int_mod(xpos, 2 * width); 402 if (start >= width) { 403 start = width + ~(start - width); 404 forward = false; 405 n = start + 1; // [start .. 0] 406 } else { 407 forward = true; 408 n = width - start; // [start .. width) 409 } 410 if (n > count) { 411 n = count; 412 } 413 if (forward) { 414 fill_sequential(xptr, start, n); 415 } else { 416 fill_backwards(xptr, start, n); 417 } 418 forward = !forward; 419 xptr += n; 420 count -= n; 421 422 while (count >= width) { 423 if (forward) { 424 fill_sequential(xptr, 0, width); 425 } else { 426 fill_backwards(xptr, width - 1, width); 427 } 428 forward = !forward; 429 xptr += width; 430 count -= width; 431 } 432 433 if (count > 0) { 434 if (forward) { 435 fill_sequential(xptr, 0, count); 436 } else { 437 fill_backwards(xptr, width - 1, count); 438 } 439 } 440} 441 442/////////////////////////////////////////////////////////////////////////////// 443 444SkBitmapProcState::MatrixProc SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) { 445 SkASSERT((fInvType & SkMatrix::kPerspective_Mask) == 0); 446 447// test_int_tileprocs(); 448 // check for our special case when there is no scale/affine/perspective 449 if (trivial_matrix && kNone_SkFilterQuality == fFilterQuality) { 450 fIntTileProcY = choose_int_tile_proc(fTileModeY); 451 switch (fTileModeX) { 452 case SkShader::kClamp_TileMode: 453 return clampx_nofilter_trans; 454 case SkShader::kRepeat_TileMode: 455 return repeatx_nofilter_trans; 456 case SkShader::kMirror_TileMode: 457 return mirrorx_nofilter_trans; 458 } 459 } 460 461 int index = 0; 462 if (fFilterQuality != kNone_SkFilterQuality) { 463 index = 1; 464 } 465 if (fInvType & SkMatrix::kAffine_Mask) { 466 index += 2; 467 } 468 469 if (SkShader::kClamp_TileMode == fTileModeX && SkShader::kClamp_TileMode == fTileModeY) { 470 // clamp gets special version of filterOne 471 fFilterOneX = SK_Fixed1; 472 fFilterOneY = SK_Fixed1; 473 return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index]; 474 } 475 476 // all remaining procs use this form for filterOne 477 fFilterOneX = SK_Fixed1 / fPixmap.width(); 478 fFilterOneY = SK_Fixed1 / fPixmap.height(); 479 480 if (SkShader::kRepeat_TileMode == fTileModeX && SkShader::kRepeat_TileMode == fTileModeY) { 481 return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index]; 482 } 483 484 fTileProcX = choose_tile_proc(fTileModeX); 485 fTileProcY = choose_tile_proc(fTileModeY); 486 return GeneralXY_Procs[index]; 487} 488