1/* 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "libyuv/scale.h" 12 13#include <assert.h> 14#include <string.h> 15 16#include "libyuv/cpu_id.h" 17#include "libyuv/planar_functions.h" // For CopyARGB 18#include "libyuv/row.h" 19#include "libyuv/scale_row.h" 20 21#ifdef __cplusplus 22namespace libyuv { 23extern "C" { 24#endif 25 26static __inline int Abs(int v) { 27 return v >= 0 ? v : -v; 28} 29 30// ScaleARGB ARGB, 1/2 31// This is an optimized version for scaling down a ARGB to 1/2 of 32// its original size. 33static void ScaleARGBDown2(int src_width, 34 int src_height, 35 int dst_width, 36 int dst_height, 37 int src_stride, 38 int dst_stride, 39 const uint8* src_argb, 40 uint8* dst_argb, 41 int x, 42 int dx, 43 int y, 44 int dy, 45 enum FilterMode filtering) { 46 int j; 47 int row_stride = src_stride * (dy >> 16); 48 void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride, 49 uint8* dst_argb, int dst_width) = 50 filtering == kFilterNone 51 ? ScaleARGBRowDown2_C 52 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C 53 : ScaleARGBRowDown2Box_C); 54 (void)src_width; 55 (void)src_height; 56 (void)dx; 57 assert(dx == 65536 * 2); // Test scale factor of 2. 58 assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2. 59 // Advance to odd row, even column. 60 if (filtering == kFilterBilinear) { 61 src_argb += (y >> 16) * src_stride + (x >> 16) * 4; 62 } else { 63 src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4; 64 } 65 66#if defined(HAS_SCALEARGBROWDOWN2_SSE2) 67 if (TestCpuFlag(kCpuHasSSE2)) { 68 ScaleARGBRowDown2 = 69 filtering == kFilterNone 70 ? ScaleARGBRowDown2_Any_SSE2 71 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2 72 : ScaleARGBRowDown2Box_Any_SSE2); 73 if (IS_ALIGNED(dst_width, 4)) { 74 ScaleARGBRowDown2 = 75 filtering == kFilterNone 76 ? ScaleARGBRowDown2_SSE2 77 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 78 : ScaleARGBRowDown2Box_SSE2); 79 } 80 } 81#endif 82#if defined(HAS_SCALEARGBROWDOWN2_NEON) 83 if (TestCpuFlag(kCpuHasNEON)) { 84 ScaleARGBRowDown2 = 85 filtering == kFilterNone 86 ? ScaleARGBRowDown2_Any_NEON 87 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON 88 : ScaleARGBRowDown2Box_Any_NEON); 89 if (IS_ALIGNED(dst_width, 8)) { 90 ScaleARGBRowDown2 = 91 filtering == kFilterNone 92 ? ScaleARGBRowDown2_NEON 93 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON 94 : ScaleARGBRowDown2Box_NEON); 95 } 96 } 97#endif 98#if defined(HAS_SCALEARGBROWDOWN2_MSA) 99 if (TestCpuFlag(kCpuHasMSA)) { 100 ScaleARGBRowDown2 = 101 filtering == kFilterNone 102 ? ScaleARGBRowDown2_Any_MSA 103 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MSA 104 : ScaleARGBRowDown2Box_Any_MSA); 105 if (IS_ALIGNED(dst_width, 4)) { 106 ScaleARGBRowDown2 = 107 filtering == kFilterNone 108 ? ScaleARGBRowDown2_MSA 109 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MSA 110 : ScaleARGBRowDown2Box_MSA); 111 } 112 } 113#endif 114 115 if (filtering == kFilterLinear) { 116 src_stride = 0; 117 } 118 for (j = 0; j < dst_height; ++j) { 119 ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width); 120 src_argb += row_stride; 121 dst_argb += dst_stride; 122 } 123} 124 125// ScaleARGB ARGB, 1/4 126// This is an optimized version for scaling down a ARGB to 1/4 of 127// its original size. 128static void ScaleARGBDown4Box(int src_width, 129 int src_height, 130 int dst_width, 131 int dst_height, 132 int src_stride, 133 int dst_stride, 134 const uint8* src_argb, 135 uint8* dst_argb, 136 int x, 137 int dx, 138 int y, 139 int dy) { 140 int j; 141 // Allocate 2 rows of ARGB. 142 const int kRowSize = (dst_width * 2 * 4 + 31) & ~31; 143 align_buffer_64(row, kRowSize * 2); 144 int row_stride = src_stride * (dy >> 16); 145 void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride, 146 uint8* dst_argb, int dst_width) = 147 ScaleARGBRowDown2Box_C; 148 // Advance to odd row, even column. 149 src_argb += (y >> 16) * src_stride + (x >> 16) * 4; 150 (void)src_width; 151 (void)src_height; 152 (void)dx; 153 assert(dx == 65536 * 4); // Test scale factor of 4. 154 assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4. 155#if defined(HAS_SCALEARGBROWDOWN2_SSE2) 156 if (TestCpuFlag(kCpuHasSSE2)) { 157 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_SSE2; 158 if (IS_ALIGNED(dst_width, 4)) { 159 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2; 160 } 161 } 162#endif 163#if defined(HAS_SCALEARGBROWDOWN2_NEON) 164 if (TestCpuFlag(kCpuHasNEON)) { 165 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_NEON; 166 if (IS_ALIGNED(dst_width, 8)) { 167 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON; 168 } 169 } 170#endif 171 172 for (j = 0; j < dst_height; ++j) { 173 ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2); 174 ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize, 175 dst_width * 2); 176 ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width); 177 src_argb += row_stride; 178 dst_argb += dst_stride; 179 } 180 free_aligned_buffer_64(row); 181} 182 183// ScaleARGB ARGB Even 184// This is an optimized version for scaling down a ARGB to even 185// multiple of its original size. 186static void ScaleARGBDownEven(int src_width, 187 int src_height, 188 int dst_width, 189 int dst_height, 190 int src_stride, 191 int dst_stride, 192 const uint8* src_argb, 193 uint8* dst_argb, 194 int x, 195 int dx, 196 int y, 197 int dy, 198 enum FilterMode filtering) { 199 int j; 200 int col_step = dx >> 16; 201 int row_stride = (dy >> 16) * src_stride; 202 void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride, 203 int src_step, uint8* dst_argb, int dst_width) = 204 filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C; 205 (void)src_width; 206 (void)src_height; 207 assert(IS_ALIGNED(src_width, 2)); 208 assert(IS_ALIGNED(src_height, 2)); 209 src_argb += (y >> 16) * src_stride + (x >> 16) * 4; 210#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) 211 if (TestCpuFlag(kCpuHasSSE2)) { 212 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 213 : ScaleARGBRowDownEven_Any_SSE2; 214 if (IS_ALIGNED(dst_width, 4)) { 215 ScaleARGBRowDownEven = 216 filtering ? ScaleARGBRowDownEvenBox_SSE2 : ScaleARGBRowDownEven_SSE2; 217 } 218 } 219#endif 220#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON) 221 if (TestCpuFlag(kCpuHasNEON)) { 222 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON 223 : ScaleARGBRowDownEven_Any_NEON; 224 if (IS_ALIGNED(dst_width, 4)) { 225 ScaleARGBRowDownEven = 226 filtering ? ScaleARGBRowDownEvenBox_NEON : ScaleARGBRowDownEven_NEON; 227 } 228 } 229#endif 230#if defined(HAS_SCALEARGBROWDOWNEVEN_MSA) 231 if (TestCpuFlag(kCpuHasMSA)) { 232 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MSA 233 : ScaleARGBRowDownEven_Any_MSA; 234 if (IS_ALIGNED(dst_width, 4)) { 235 ScaleARGBRowDownEven = 236 filtering ? ScaleARGBRowDownEvenBox_MSA : ScaleARGBRowDownEven_MSA; 237 } 238 } 239#endif 240 241 if (filtering == kFilterLinear) { 242 src_stride = 0; 243 } 244 for (j = 0; j < dst_height; ++j) { 245 ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width); 246 src_argb += row_stride; 247 dst_argb += dst_stride; 248 } 249} 250 251// Scale ARGB down with bilinear interpolation. 252static void ScaleARGBBilinearDown(int src_width, 253 int src_height, 254 int dst_width, 255 int dst_height, 256 int src_stride, 257 int dst_stride, 258 const uint8* src_argb, 259 uint8* dst_argb, 260 int x, 261 int dx, 262 int y, 263 int dy, 264 enum FilterMode filtering) { 265 int j; 266 void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb, 267 ptrdiff_t src_stride, int dst_width, 268 int source_y_fraction) = InterpolateRow_C; 269 void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb, 270 int dst_width, int x, int dx) = 271 (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C; 272 int64 xlast = x + (int64)(dst_width - 1) * dx; 273 int64 xl = (dx >= 0) ? x : xlast; 274 int64 xr = (dx >= 0) ? xlast : x; 275 int clip_src_width; 276 xl = (xl >> 16) & ~3; // Left edge aligned. 277 xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels. 278 xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel. 279 if (xr > src_width) { 280 xr = src_width; 281 } 282 clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4. 283 src_argb += xl * 4; 284 x -= (int)(xl << 16); 285#if defined(HAS_INTERPOLATEROW_SSSE3) 286 if (TestCpuFlag(kCpuHasSSSE3)) { 287 InterpolateRow = InterpolateRow_Any_SSSE3; 288 if (IS_ALIGNED(clip_src_width, 16)) { 289 InterpolateRow = InterpolateRow_SSSE3; 290 } 291 } 292#endif 293#if defined(HAS_INTERPOLATEROW_AVX2) 294 if (TestCpuFlag(kCpuHasAVX2)) { 295 InterpolateRow = InterpolateRow_Any_AVX2; 296 if (IS_ALIGNED(clip_src_width, 32)) { 297 InterpolateRow = InterpolateRow_AVX2; 298 } 299 } 300#endif 301#if defined(HAS_INTERPOLATEROW_NEON) 302 if (TestCpuFlag(kCpuHasNEON)) { 303 InterpolateRow = InterpolateRow_Any_NEON; 304 if (IS_ALIGNED(clip_src_width, 16)) { 305 InterpolateRow = InterpolateRow_NEON; 306 } 307 } 308#endif 309#if defined(HAS_INTERPOLATEROW_DSPR2) 310 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) && 311 IS_ALIGNED(src_stride, 4)) { 312 InterpolateRow = InterpolateRow_Any_DSPR2; 313 if (IS_ALIGNED(clip_src_width, 4)) { 314 InterpolateRow = InterpolateRow_DSPR2; 315 } 316 } 317#endif 318#if defined(HAS_INTERPOLATEROW_MSA) 319 if (TestCpuFlag(kCpuHasMSA)) { 320 InterpolateRow = InterpolateRow_Any_MSA; 321 if (IS_ALIGNED(clip_src_width, 32)) { 322 InterpolateRow = InterpolateRow_MSA; 323 } 324 } 325#endif 326#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) 327 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { 328 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; 329 } 330#endif 331#if defined(HAS_SCALEARGBFILTERCOLS_NEON) 332 if (TestCpuFlag(kCpuHasNEON)) { 333 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON; 334 if (IS_ALIGNED(dst_width, 4)) { 335 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON; 336 } 337 } 338#endif 339 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. 340 // Allocate a row of ARGB. 341 { 342 align_buffer_64(row, clip_src_width * 4); 343 344 const int max_y = (src_height - 1) << 16; 345 if (y > max_y) { 346 y = max_y; 347 } 348 for (j = 0; j < dst_height; ++j) { 349 int yi = y >> 16; 350 const uint8* src = src_argb + yi * src_stride; 351 if (filtering == kFilterLinear) { 352 ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx); 353 } else { 354 int yf = (y >> 8) & 255; 355 InterpolateRow(row, src, src_stride, clip_src_width, yf); 356 ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx); 357 } 358 dst_argb += dst_stride; 359 y += dy; 360 if (y > max_y) { 361 y = max_y; 362 } 363 } 364 free_aligned_buffer_64(row); 365 } 366} 367 368// Scale ARGB up with bilinear interpolation. 369static void ScaleARGBBilinearUp(int src_width, 370 int src_height, 371 int dst_width, 372 int dst_height, 373 int src_stride, 374 int dst_stride, 375 const uint8* src_argb, 376 uint8* dst_argb, 377 int x, 378 int dx, 379 int y, 380 int dy, 381 enum FilterMode filtering) { 382 int j; 383 void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb, 384 ptrdiff_t src_stride, int dst_width, 385 int source_y_fraction) = InterpolateRow_C; 386 void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb, 387 int dst_width, int x, int dx) = 388 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; 389 const int max_y = (src_height - 1) << 16; 390#if defined(HAS_INTERPOLATEROW_SSSE3) 391 if (TestCpuFlag(kCpuHasSSSE3)) { 392 InterpolateRow = InterpolateRow_Any_SSSE3; 393 if (IS_ALIGNED(dst_width, 4)) { 394 InterpolateRow = InterpolateRow_SSSE3; 395 } 396 } 397#endif 398#if defined(HAS_INTERPOLATEROW_AVX2) 399 if (TestCpuFlag(kCpuHasAVX2)) { 400 InterpolateRow = InterpolateRow_Any_AVX2; 401 if (IS_ALIGNED(dst_width, 8)) { 402 InterpolateRow = InterpolateRow_AVX2; 403 } 404 } 405#endif 406#if defined(HAS_INTERPOLATEROW_NEON) 407 if (TestCpuFlag(kCpuHasNEON)) { 408 InterpolateRow = InterpolateRow_Any_NEON; 409 if (IS_ALIGNED(dst_width, 4)) { 410 InterpolateRow = InterpolateRow_NEON; 411 } 412 } 413#endif 414#if defined(HAS_INTERPOLATEROW_DSPR2) 415 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_argb, 4) && 416 IS_ALIGNED(dst_stride, 4)) { 417 InterpolateRow = InterpolateRow_DSPR2; 418 } 419#endif 420#if defined(HAS_INTERPOLATEROW_MSA) 421 if (TestCpuFlag(kCpuHasMSA)) { 422 InterpolateRow = InterpolateRow_Any_MSA; 423 if (IS_ALIGNED(dst_width, 8)) { 424 InterpolateRow = InterpolateRow_MSA; 425 } 426 } 427#endif 428 if (src_width >= 32768) { 429 ScaleARGBFilterCols = 430 filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C; 431 } 432#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) 433 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { 434 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; 435 } 436#endif 437#if defined(HAS_SCALEARGBFILTERCOLS_NEON) 438 if (filtering && TestCpuFlag(kCpuHasNEON)) { 439 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON; 440 if (IS_ALIGNED(dst_width, 4)) { 441 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON; 442 } 443 } 444#endif 445#if defined(HAS_SCALEARGBCOLS_SSE2) 446 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { 447 ScaleARGBFilterCols = ScaleARGBCols_SSE2; 448 } 449#endif 450#if defined(HAS_SCALEARGBCOLS_NEON) 451 if (!filtering && TestCpuFlag(kCpuHasNEON)) { 452 ScaleARGBFilterCols = ScaleARGBCols_Any_NEON; 453 if (IS_ALIGNED(dst_width, 8)) { 454 ScaleARGBFilterCols = ScaleARGBCols_NEON; 455 } 456 } 457#endif 458 if (!filtering && src_width * 2 == dst_width && x < 0x8000) { 459 ScaleARGBFilterCols = ScaleARGBColsUp2_C; 460#if defined(HAS_SCALEARGBCOLSUP2_SSE2) 461 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { 462 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; 463 } 464#endif 465 } 466 467 if (y > max_y) { 468 y = max_y; 469 } 470 471 { 472 int yi = y >> 16; 473 const uint8* src = src_argb + yi * src_stride; 474 475 // Allocate 2 rows of ARGB. 476 const int kRowSize = (dst_width * 4 + 31) & ~31; 477 align_buffer_64(row, kRowSize * 2); 478 479 uint8* rowptr = row; 480 int rowstride = kRowSize; 481 int lasty = yi; 482 483 ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); 484 if (src_height > 1) { 485 src += src_stride; 486 } 487 ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx); 488 src += src_stride; 489 490 for (j = 0; j < dst_height; ++j) { 491 yi = y >> 16; 492 if (yi != lasty) { 493 if (y > max_y) { 494 y = max_y; 495 yi = y >> 16; 496 src = src_argb + yi * src_stride; 497 } 498 if (yi != lasty) { 499 ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); 500 rowptr += rowstride; 501 rowstride = -rowstride; 502 lasty = yi; 503 src += src_stride; 504 } 505 } 506 if (filtering == kFilterLinear) { 507 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0); 508 } else { 509 int yf = (y >> 8) & 255; 510 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf); 511 } 512 dst_argb += dst_stride; 513 y += dy; 514 } 515 free_aligned_buffer_64(row); 516 } 517} 518 519#ifdef YUVSCALEUP 520// Scale YUV to ARGB up with bilinear interpolation. 521static void ScaleYUVToARGBBilinearUp(int src_width, 522 int src_height, 523 int dst_width, 524 int dst_height, 525 int src_stride_y, 526 int src_stride_u, 527 int src_stride_v, 528 int dst_stride_argb, 529 const uint8* src_y, 530 const uint8* src_u, 531 const uint8* src_v, 532 uint8* dst_argb, 533 int x, 534 int dx, 535 int y, 536 int dy, 537 enum FilterMode filtering) { 538 int j; 539 void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf, 540 const uint8* v_buf, uint8* rgb_buf, int width) = 541 I422ToARGBRow_C; 542#if defined(HAS_I422TOARGBROW_SSSE3) 543 if (TestCpuFlag(kCpuHasSSSE3)) { 544 I422ToARGBRow = I422ToARGBRow_Any_SSSE3; 545 if (IS_ALIGNED(src_width, 8)) { 546 I422ToARGBRow = I422ToARGBRow_SSSE3; 547 } 548 } 549#endif 550#if defined(HAS_I422TOARGBROW_AVX2) 551 if (TestCpuFlag(kCpuHasAVX2)) { 552 I422ToARGBRow = I422ToARGBRow_Any_AVX2; 553 if (IS_ALIGNED(src_width, 16)) { 554 I422ToARGBRow = I422ToARGBRow_AVX2; 555 } 556 } 557#endif 558#if defined(HAS_I422TOARGBROW_NEON) 559 if (TestCpuFlag(kCpuHasNEON)) { 560 I422ToARGBRow = I422ToARGBRow_Any_NEON; 561 if (IS_ALIGNED(src_width, 8)) { 562 I422ToARGBRow = I422ToARGBRow_NEON; 563 } 564 } 565#endif 566#if defined(HAS_I422TOARGBROW_DSPR2) 567 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_width, 4) && 568 IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && 569 IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && 570 IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && 571 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { 572 I422ToARGBRow = I422ToARGBRow_DSPR2; 573 } 574#endif 575#if defined(HAS_I422TOARGBROW_MSA) 576 if (TestCpuFlag(kCpuHasMSA)) { 577 I422ToARGBRow = I422ToARGBRow_Any_MSA; 578 if (IS_ALIGNED(src_width, 8)) { 579 I422ToARGBRow = I422ToARGBRow_MSA; 580 } 581 } 582#endif 583 584 void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb, 585 ptrdiff_t src_stride, int dst_width, 586 int source_y_fraction) = InterpolateRow_C; 587#if defined(HAS_INTERPOLATEROW_SSSE3) 588 if (TestCpuFlag(kCpuHasSSSE3)) { 589 InterpolateRow = InterpolateRow_Any_SSSE3; 590 if (IS_ALIGNED(dst_width, 4)) { 591 InterpolateRow = InterpolateRow_SSSE3; 592 } 593 } 594#endif 595#if defined(HAS_INTERPOLATEROW_AVX2) 596 if (TestCpuFlag(kCpuHasAVX2)) { 597 InterpolateRow = InterpolateRow_Any_AVX2; 598 if (IS_ALIGNED(dst_width, 8)) { 599 InterpolateRow = InterpolateRow_AVX2; 600 } 601 } 602#endif 603#if defined(HAS_INTERPOLATEROW_NEON) 604 if (TestCpuFlag(kCpuHasNEON)) { 605 InterpolateRow = InterpolateRow_Any_NEON; 606 if (IS_ALIGNED(dst_width, 4)) { 607 InterpolateRow = InterpolateRow_NEON; 608 } 609 } 610#endif 611#if defined(HAS_INTERPOLATEROW_DSPR2) 612 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_argb, 4) && 613 IS_ALIGNED(dst_stride_argb, 4)) { 614 InterpolateRow = InterpolateRow_DSPR2; 615 } 616#endif 617#if defined(HAS_INTERPOLATEROW_MSA) 618 if (TestCpuFlag(kCpuHasMSA)) { 619 InterpolateRow = InterpolateRow_Any_MSA; 620 if (IS_ALIGNED(dst_width, 8)) { 621 InterpolateRow = InterpolateRow_MSA; 622 } 623 } 624#endif 625 626 void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb, 627 int dst_width, int x, int dx) = 628 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; 629 if (src_width >= 32768) { 630 ScaleARGBFilterCols = 631 filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C; 632 } 633#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) 634 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { 635 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; 636 } 637#endif 638#if defined(HAS_SCALEARGBFILTERCOLS_NEON) 639 if (filtering && TestCpuFlag(kCpuHasNEON)) { 640 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON; 641 if (IS_ALIGNED(dst_width, 4)) { 642 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON; 643 } 644 } 645#endif 646#if defined(HAS_SCALEARGBCOLS_SSE2) 647 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { 648 ScaleARGBFilterCols = ScaleARGBCols_SSE2; 649 } 650#endif 651#if defined(HAS_SCALEARGBCOLS_NEON) 652 if (!filtering && TestCpuFlag(kCpuHasNEON)) { 653 ScaleARGBFilterCols = ScaleARGBCols_Any_NEON; 654 if (IS_ALIGNED(dst_width, 8)) { 655 ScaleARGBFilterCols = ScaleARGBCols_NEON; 656 } 657 } 658#endif 659 if (!filtering && src_width * 2 == dst_width && x < 0x8000) { 660 ScaleARGBFilterCols = ScaleARGBColsUp2_C; 661#if defined(HAS_SCALEARGBCOLSUP2_SSE2) 662 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { 663 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; 664 } 665#endif 666 } 667 668 const int max_y = (src_height - 1) << 16; 669 if (y > max_y) { 670 y = max_y; 671 } 672 const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate. 673 int yi = y >> 16; 674 int uv_yi = yi >> kYShift; 675 const uint8* src_row_y = src_y + yi * src_stride_y; 676 const uint8* src_row_u = src_u + uv_yi * src_stride_u; 677 const uint8* src_row_v = src_v + uv_yi * src_stride_v; 678 679 // Allocate 2 rows of ARGB. 680 const int kRowSize = (dst_width * 4 + 31) & ~31; 681 align_buffer_64(row, kRowSize * 2); 682 683 // Allocate 1 row of ARGB for source conversion. 684 align_buffer_64(argb_row, src_width * 4); 685 686 uint8* rowptr = row; 687 int rowstride = kRowSize; 688 int lasty = yi; 689 690 // TODO(fbarchard): Convert first 2 rows of YUV to ARGB. 691 ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx); 692 if (src_height > 1) { 693 src_row_y += src_stride_y; 694 if (yi & 1) { 695 src_row_u += src_stride_u; 696 src_row_v += src_stride_v; 697 } 698 } 699 ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx); 700 if (src_height > 2) { 701 src_row_y += src_stride_y; 702 if (!(yi & 1)) { 703 src_row_u += src_stride_u; 704 src_row_v += src_stride_v; 705 } 706 } 707 708 for (j = 0; j < dst_height; ++j) { 709 yi = y >> 16; 710 if (yi != lasty) { 711 if (y > max_y) { 712 y = max_y; 713 yi = y >> 16; 714 uv_yi = yi >> kYShift; 715 src_row_y = src_y + yi * src_stride_y; 716 src_row_u = src_u + uv_yi * src_stride_u; 717 src_row_v = src_v + uv_yi * src_stride_v; 718 } 719 if (yi != lasty) { 720 // TODO(fbarchard): Convert the clipped region of row. 721 I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width); 722 ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx); 723 rowptr += rowstride; 724 rowstride = -rowstride; 725 lasty = yi; 726 src_row_y += src_stride_y; 727 if (yi & 1) { 728 src_row_u += src_stride_u; 729 src_row_v += src_stride_v; 730 } 731 } 732 } 733 if (filtering == kFilterLinear) { 734 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0); 735 } else { 736 int yf = (y >> 8) & 255; 737 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf); 738 } 739 dst_argb += dst_stride_argb; 740 y += dy; 741 } 742 free_aligned_buffer_64(row); 743 free_aligned_buffer_64(row_argb); 744} 745#endif 746 747// Scale ARGB to/from any dimensions, without interpolation. 748// Fixed point math is used for performance: The upper 16 bits 749// of x and dx is the integer part of the source position and 750// the lower 16 bits are the fixed decimal part. 751 752static void ScaleARGBSimple(int src_width, 753 int src_height, 754 int dst_width, 755 int dst_height, 756 int src_stride, 757 int dst_stride, 758 const uint8* src_argb, 759 uint8* dst_argb, 760 int x, 761 int dx, 762 int y, 763 int dy) { 764 int j; 765 void (*ScaleARGBCols)(uint8 * dst_argb, const uint8* src_argb, int dst_width, 766 int x, int dx) = 767 (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C; 768 (void)src_height; 769#if defined(HAS_SCALEARGBCOLS_SSE2) 770 if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { 771 ScaleARGBCols = ScaleARGBCols_SSE2; 772 } 773#endif 774#if defined(HAS_SCALEARGBCOLS_NEON) 775 if (TestCpuFlag(kCpuHasNEON)) { 776 ScaleARGBCols = ScaleARGBCols_Any_NEON; 777 if (IS_ALIGNED(dst_width, 8)) { 778 ScaleARGBCols = ScaleARGBCols_NEON; 779 } 780 } 781#endif 782 if (src_width * 2 == dst_width && x < 0x8000) { 783 ScaleARGBCols = ScaleARGBColsUp2_C; 784#if defined(HAS_SCALEARGBCOLSUP2_SSE2) 785 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { 786 ScaleARGBCols = ScaleARGBColsUp2_SSE2; 787 } 788#endif 789 } 790 791 for (j = 0; j < dst_height; ++j) { 792 ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x, 793 dx); 794 dst_argb += dst_stride; 795 y += dy; 796 } 797} 798 799// ScaleARGB a ARGB. 800// This function in turn calls a scaling function 801// suitable for handling the desired resolutions. 802static void ScaleARGB(const uint8* src, 803 int src_stride, 804 int src_width, 805 int src_height, 806 uint8* dst, 807 int dst_stride, 808 int dst_width, 809 int dst_height, 810 int clip_x, 811 int clip_y, 812 int clip_width, 813 int clip_height, 814 enum FilterMode filtering) { 815 // Initial source x/y coordinate and step values as 16.16 fixed point. 816 int x = 0; 817 int y = 0; 818 int dx = 0; 819 int dy = 0; 820 // ARGB does not support box filter yet, but allow the user to pass it. 821 // Simplify filtering when possible. 822 filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, 823 filtering); 824 825 // Negative src_height means invert the image. 826 if (src_height < 0) { 827 src_height = -src_height; 828 src = src + (src_height - 1) * src_stride; 829 src_stride = -src_stride; 830 } 831 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, 832 &dx, &dy); 833 src_width = Abs(src_width); 834 if (clip_x) { 835 int64 clipf = (int64)(clip_x)*dx; 836 x += (clipf & 0xffff); 837 src += (clipf >> 16) * 4; 838 dst += clip_x * 4; 839 } 840 if (clip_y) { 841 int64 clipf = (int64)(clip_y)*dy; 842 y += (clipf & 0xffff); 843 src += (clipf >> 16) * src_stride; 844 dst += clip_y * dst_stride; 845 } 846 847 // Special case for integer step values. 848 if (((dx | dy) & 0xffff) == 0) { 849 if (!dx || !dy) { // 1 pixel wide and/or tall. 850 filtering = kFilterNone; 851 } else { 852 // Optimized even scale down. ie 2, 4, 6, 8, 10x. 853 if (!(dx & 0x10000) && !(dy & 0x10000)) { 854 if (dx == 0x20000) { 855 // Optimized 1/2 downsample. 856 ScaleARGBDown2(src_width, src_height, clip_width, clip_height, 857 src_stride, dst_stride, src, dst, x, dx, y, dy, 858 filtering); 859 return; 860 } 861 if (dx == 0x40000 && filtering == kFilterBox) { 862 // Optimized 1/4 box downsample. 863 ScaleARGBDown4Box(src_width, src_height, clip_width, clip_height, 864 src_stride, dst_stride, src, dst, x, dx, y, dy); 865 return; 866 } 867 ScaleARGBDownEven(src_width, src_height, clip_width, clip_height, 868 src_stride, dst_stride, src, dst, x, dx, y, dy, 869 filtering); 870 return; 871 } 872 // Optimized odd scale down. ie 3, 5, 7, 9x. 873 if ((dx & 0x10000) && (dy & 0x10000)) { 874 filtering = kFilterNone; 875 if (dx == 0x10000 && dy == 0x10000) { 876 // Straight copy. 877 ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride, 878 dst, dst_stride, clip_width, clip_height); 879 return; 880 } 881 } 882 } 883 } 884 if (dx == 0x10000 && (x & 0xffff) == 0) { 885 // Arbitrary scale vertically, but unscaled vertically. 886 ScalePlaneVertical(src_height, clip_width, clip_height, src_stride, 887 dst_stride, src, dst, x, y, dy, 4, filtering); 888 return; 889 } 890 if (filtering && dy < 65536) { 891 ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height, 892 src_stride, dst_stride, src, dst, x, dx, y, dy, 893 filtering); 894 return; 895 } 896 if (filtering) { 897 ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height, 898 src_stride, dst_stride, src, dst, x, dx, y, dy, 899 filtering); 900 return; 901 } 902 ScaleARGBSimple(src_width, src_height, clip_width, clip_height, src_stride, 903 dst_stride, src, dst, x, dx, y, dy); 904} 905 906LIBYUV_API 907int ARGBScaleClip(const uint8* src_argb, 908 int src_stride_argb, 909 int src_width, 910 int src_height, 911 uint8* dst_argb, 912 int dst_stride_argb, 913 int dst_width, 914 int dst_height, 915 int clip_x, 916 int clip_y, 917 int clip_width, 918 int clip_height, 919 enum FilterMode filtering) { 920 if (!src_argb || src_width == 0 || src_height == 0 || !dst_argb || 921 dst_width <= 0 || dst_height <= 0 || clip_x < 0 || clip_y < 0 || 922 clip_width > 32768 || clip_height > 32768 || 923 (clip_x + clip_width) > dst_width || 924 (clip_y + clip_height) > dst_height) { 925 return -1; 926 } 927 ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb, 928 dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width, 929 clip_height, filtering); 930 return 0; 931} 932 933// Scale an ARGB image. 934LIBYUV_API 935int ARGBScale(const uint8* src_argb, 936 int src_stride_argb, 937 int src_width, 938 int src_height, 939 uint8* dst_argb, 940 int dst_stride_argb, 941 int dst_width, 942 int dst_height, 943 enum FilterMode filtering) { 944 if (!src_argb || src_width == 0 || src_height == 0 || src_width > 32768 || 945 src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) { 946 return -1; 947 } 948 ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb, 949 dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, dst_height, 950 filtering); 951 return 0; 952} 953 954// Scale with YUV conversion to ARGB and clipping. 955LIBYUV_API 956int YUVToARGBScaleClip(const uint8* src_y, 957 int src_stride_y, 958 const uint8* src_u, 959 int src_stride_u, 960 const uint8* src_v, 961 int src_stride_v, 962 uint32 src_fourcc, 963 int src_width, 964 int src_height, 965 uint8* dst_argb, 966 int dst_stride_argb, 967 uint32 dst_fourcc, 968 int dst_width, 969 int dst_height, 970 int clip_x, 971 int clip_y, 972 int clip_width, 973 int clip_height, 974 enum FilterMode filtering) { 975 uint8* argb_buffer = (uint8*)malloc(src_width * src_height * 4); 976 int r; 977 (void)src_fourcc; // TODO(fbarchard): implement and/or assert. 978 (void)dst_fourcc; 979 I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, 980 argb_buffer, src_width * 4, src_width, src_height); 981 982 r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, src_height, dst_argb, 983 dst_stride_argb, dst_width, dst_height, clip_x, clip_y, 984 clip_width, clip_height, filtering); 985 free(argb_buffer); 986 return r; 987} 988 989#ifdef __cplusplus 990} // extern "C" 991} // namespace libyuv 992#endif 993