1/* 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "libyuv/scale.h" 12 13#include <assert.h> 14#include <string.h> 15 16#include "libyuv/cpu_id.h" 17#include "libyuv/planar_functions.h" // For CopyPlane 18#include "libyuv/row.h" 19#include "libyuv/scale_row.h" 20 21#ifdef __cplusplus 22namespace libyuv { 23extern "C" { 24#endif 25 26static __inline int Abs(int v) { 27 return v >= 0 ? v : -v; 28} 29 30#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s) 31 32// Scale plane, 1/2 33// This is an optimized version for scaling down a plane to 1/2 of 34// its original size. 35 36static void ScalePlaneDown2(int src_width, 37 int src_height, 38 int dst_width, 39 int dst_height, 40 int src_stride, 41 int dst_stride, 42 const uint8* src_ptr, 43 uint8* dst_ptr, 44 enum FilterMode filtering) { 45 int y; 46 void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, 47 uint8* dst_ptr, int dst_width) = 48 filtering == kFilterNone 49 ? ScaleRowDown2_C 50 : (filtering == kFilterLinear ? ScaleRowDown2Linear_C 51 : ScaleRowDown2Box_C); 52 int row_stride = src_stride << 1; 53 (void)src_width; 54 (void)src_height; 55 if (!filtering) { 56 src_ptr += src_stride; // Point to odd rows. 57 src_stride = 0; 58 } 59 60#if defined(HAS_SCALEROWDOWN2_NEON) 61 if (TestCpuFlag(kCpuHasNEON)) { 62 ScaleRowDown2 = 63 filtering == kFilterNone 64 ? ScaleRowDown2_Any_NEON 65 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON 66 : ScaleRowDown2Box_Any_NEON); 67 if (IS_ALIGNED(dst_width, 16)) { 68 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON 69 : (filtering == kFilterLinear 70 ? ScaleRowDown2Linear_NEON 71 : ScaleRowDown2Box_NEON); 72 } 73 } 74#endif 75#if defined(HAS_SCALEROWDOWN2_SSSE3) 76 if (TestCpuFlag(kCpuHasSSSE3)) { 77 ScaleRowDown2 = 78 filtering == kFilterNone 79 ? ScaleRowDown2_Any_SSSE3 80 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3 81 : ScaleRowDown2Box_Any_SSSE3); 82 if (IS_ALIGNED(dst_width, 16)) { 83 ScaleRowDown2 = 84 filtering == kFilterNone 85 ? ScaleRowDown2_SSSE3 86 : (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3 87 : ScaleRowDown2Box_SSSE3); 88 } 89 } 90#endif 91#if defined(HAS_SCALEROWDOWN2_AVX2) 92 if (TestCpuFlag(kCpuHasAVX2)) { 93 ScaleRowDown2 = 94 filtering == kFilterNone 95 ? ScaleRowDown2_Any_AVX2 96 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2 97 : ScaleRowDown2Box_Any_AVX2); 98 if (IS_ALIGNED(dst_width, 32)) { 99 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2 100 : (filtering == kFilterLinear 101 ? ScaleRowDown2Linear_AVX2 102 : ScaleRowDown2Box_AVX2); 103 } 104 } 105#endif 106#if defined(HAS_SCALEROWDOWN2_DSPR2) 107 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) && 108 IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) && 109 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { 110 ScaleRowDown2 = filtering ? ScaleRowDown2Box_DSPR2 : ScaleRowDown2_DSPR2; 111 } 112#endif 113#if defined(HAS_SCALEROWDOWN2_MSA) 114 if (TestCpuFlag(kCpuHasMSA)) { 115 ScaleRowDown2 = 116 filtering == kFilterNone 117 ? ScaleRowDown2_Any_MSA 118 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MSA 119 : ScaleRowDown2Box_Any_MSA); 120 if (IS_ALIGNED(dst_width, 32)) { 121 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MSA 122 : (filtering == kFilterLinear 123 ? ScaleRowDown2Linear_MSA 124 : ScaleRowDown2Box_MSA); 125 } 126 } 127#endif 128 129 if (filtering == kFilterLinear) { 130 src_stride = 0; 131 } 132 // TODO(fbarchard): Loop through source height to allow odd height. 133 for (y = 0; y < dst_height; ++y) { 134 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width); 135 src_ptr += row_stride; 136 dst_ptr += dst_stride; 137 } 138} 139 140static void ScalePlaneDown2_16(int src_width, 141 int src_height, 142 int dst_width, 143 int dst_height, 144 int src_stride, 145 int dst_stride, 146 const uint16* src_ptr, 147 uint16* dst_ptr, 148 enum FilterMode filtering) { 149 int y; 150 void (*ScaleRowDown2)(const uint16* src_ptr, ptrdiff_t src_stride, 151 uint16* dst_ptr, int dst_width) = 152 filtering == kFilterNone 153 ? ScaleRowDown2_16_C 154 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C 155 : ScaleRowDown2Box_16_C); 156 int row_stride = src_stride << 1; 157 (void)src_width; 158 (void)src_height; 159 if (!filtering) { 160 src_ptr += src_stride; // Point to odd rows. 161 src_stride = 0; 162 } 163 164#if defined(HAS_SCALEROWDOWN2_16_NEON) 165 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) { 166 ScaleRowDown2 = 167 filtering ? ScaleRowDown2Box_16_NEON : ScaleRowDown2_16_NEON; 168 } 169#endif 170#if defined(HAS_SCALEROWDOWN2_16_SSE2) 171 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) { 172 ScaleRowDown2 = 173 filtering == kFilterNone 174 ? ScaleRowDown2_16_SSE2 175 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 176 : ScaleRowDown2Box_16_SSE2); 177 } 178#endif 179#if defined(HAS_SCALEROWDOWN2_16_DSPR2) 180 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) && 181 IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) && 182 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { 183 ScaleRowDown2 = 184 filtering ? ScaleRowDown2Box_16_DSPR2 : ScaleRowDown2_16_DSPR2; 185 } 186#endif 187 188 if (filtering == kFilterLinear) { 189 src_stride = 0; 190 } 191 // TODO(fbarchard): Loop through source height to allow odd height. 192 for (y = 0; y < dst_height; ++y) { 193 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width); 194 src_ptr += row_stride; 195 dst_ptr += dst_stride; 196 } 197} 198 199// Scale plane, 1/4 200// This is an optimized version for scaling down a plane to 1/4 of 201// its original size. 202 203static void ScalePlaneDown4(int src_width, 204 int src_height, 205 int dst_width, 206 int dst_height, 207 int src_stride, 208 int dst_stride, 209 const uint8* src_ptr, 210 uint8* dst_ptr, 211 enum FilterMode filtering) { 212 int y; 213 void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride, 214 uint8* dst_ptr, int dst_width) = 215 filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C; 216 int row_stride = src_stride << 2; 217 (void)src_width; 218 (void)src_height; 219 if (!filtering) { 220 src_ptr += src_stride * 2; // Point to row 2. 221 src_stride = 0; 222 } 223#if defined(HAS_SCALEROWDOWN4_NEON) 224 if (TestCpuFlag(kCpuHasNEON)) { 225 ScaleRowDown4 = 226 filtering ? ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON; 227 if (IS_ALIGNED(dst_width, 8)) { 228 ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON; 229 } 230 } 231#endif 232#if defined(HAS_SCALEROWDOWN4_SSSE3) 233 if (TestCpuFlag(kCpuHasSSSE3)) { 234 ScaleRowDown4 = 235 filtering ? ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3; 236 if (IS_ALIGNED(dst_width, 8)) { 237 ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3; 238 } 239 } 240#endif 241#if defined(HAS_SCALEROWDOWN4_AVX2) 242 if (TestCpuFlag(kCpuHasAVX2)) { 243 ScaleRowDown4 = 244 filtering ? ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2; 245 if (IS_ALIGNED(dst_width, 16)) { 246 ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2; 247 } 248 } 249#endif 250#if defined(HAS_SCALEROWDOWN4_DSPR2) 251 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) && 252 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && 253 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { 254 ScaleRowDown4 = filtering ? ScaleRowDown4Box_DSPR2 : ScaleRowDown4_DSPR2; 255 } 256#endif 257#if defined(HAS_SCALEROWDOWN4_MSA) 258 if (TestCpuFlag(kCpuHasMSA)) { 259 ScaleRowDown4 = 260 filtering ? ScaleRowDown4Box_Any_MSA : ScaleRowDown4_Any_MSA; 261 if (IS_ALIGNED(dst_width, 16)) { 262 ScaleRowDown4 = filtering ? ScaleRowDown4Box_MSA : ScaleRowDown4_MSA; 263 } 264 } 265#endif 266 267 if (filtering == kFilterLinear) { 268 src_stride = 0; 269 } 270 for (y = 0; y < dst_height; ++y) { 271 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width); 272 src_ptr += row_stride; 273 dst_ptr += dst_stride; 274 } 275} 276 277static void ScalePlaneDown4_16(int src_width, 278 int src_height, 279 int dst_width, 280 int dst_height, 281 int src_stride, 282 int dst_stride, 283 const uint16* src_ptr, 284 uint16* dst_ptr, 285 enum FilterMode filtering) { 286 int y; 287 void (*ScaleRowDown4)(const uint16* src_ptr, ptrdiff_t src_stride, 288 uint16* dst_ptr, int dst_width) = 289 filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C; 290 int row_stride = src_stride << 2; 291 (void)src_width; 292 (void)src_height; 293 if (!filtering) { 294 src_ptr += src_stride * 2; // Point to row 2. 295 src_stride = 0; 296 } 297#if defined(HAS_SCALEROWDOWN4_16_NEON) 298 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) { 299 ScaleRowDown4 = 300 filtering ? ScaleRowDown4Box_16_NEON : ScaleRowDown4_16_NEON; 301 } 302#endif 303#if defined(HAS_SCALEROWDOWN4_16_SSE2) 304 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { 305 ScaleRowDown4 = 306 filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2; 307 } 308#endif 309#if defined(HAS_SCALEROWDOWN4_16_DSPR2) 310 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) && 311 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && 312 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { 313 ScaleRowDown4 = 314 filtering ? ScaleRowDown4Box_16_DSPR2 : ScaleRowDown4_16_DSPR2; 315 } 316#endif 317 318 if (filtering == kFilterLinear) { 319 src_stride = 0; 320 } 321 for (y = 0; y < dst_height; ++y) { 322 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width); 323 src_ptr += row_stride; 324 dst_ptr += dst_stride; 325 } 326} 327 328// Scale plane down, 3/4 329static void ScalePlaneDown34(int src_width, 330 int src_height, 331 int dst_width, 332 int dst_height, 333 int src_stride, 334 int dst_stride, 335 const uint8* src_ptr, 336 uint8* dst_ptr, 337 enum FilterMode filtering) { 338 int y; 339 void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride, 340 uint8* dst_ptr, int dst_width); 341 void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride, 342 uint8* dst_ptr, int dst_width); 343 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; 344 (void)src_width; 345 (void)src_height; 346 assert(dst_width % 3 == 0); 347 if (!filtering) { 348 ScaleRowDown34_0 = ScaleRowDown34_C; 349 ScaleRowDown34_1 = ScaleRowDown34_C; 350 } else { 351 ScaleRowDown34_0 = ScaleRowDown34_0_Box_C; 352 ScaleRowDown34_1 = ScaleRowDown34_1_Box_C; 353 } 354#if defined(HAS_SCALEROWDOWN34_NEON) 355 if (TestCpuFlag(kCpuHasNEON)) { 356 if (!filtering) { 357 ScaleRowDown34_0 = ScaleRowDown34_Any_NEON; 358 ScaleRowDown34_1 = ScaleRowDown34_Any_NEON; 359 } else { 360 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON; 361 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON; 362 } 363 if (dst_width % 24 == 0) { 364 if (!filtering) { 365 ScaleRowDown34_0 = ScaleRowDown34_NEON; 366 ScaleRowDown34_1 = ScaleRowDown34_NEON; 367 } else { 368 ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON; 369 ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON; 370 } 371 } 372 } 373#endif 374#if defined(HAS_SCALEROWDOWN34_SSSE3) 375 if (TestCpuFlag(kCpuHasSSSE3)) { 376 if (!filtering) { 377 ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3; 378 ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3; 379 } else { 380 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3; 381 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3; 382 } 383 if (dst_width % 24 == 0) { 384 if (!filtering) { 385 ScaleRowDown34_0 = ScaleRowDown34_SSSE3; 386 ScaleRowDown34_1 = ScaleRowDown34_SSSE3; 387 } else { 388 ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3; 389 ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3; 390 } 391 } 392 } 393#endif 394#if defined(HAS_SCALEROWDOWN34_DSPR2) 395 if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) && 396 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && 397 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { 398 if (!filtering) { 399 ScaleRowDown34_0 = ScaleRowDown34_DSPR2; 400 ScaleRowDown34_1 = ScaleRowDown34_DSPR2; 401 } else { 402 ScaleRowDown34_0 = ScaleRowDown34_0_Box_DSPR2; 403 ScaleRowDown34_1 = ScaleRowDown34_1_Box_DSPR2; 404 } 405 } 406#endif 407 408 for (y = 0; y < dst_height - 2; y += 3) { 409 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); 410 src_ptr += src_stride; 411 dst_ptr += dst_stride; 412 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width); 413 src_ptr += src_stride; 414 dst_ptr += dst_stride; 415 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width); 416 src_ptr += src_stride * 2; 417 dst_ptr += dst_stride; 418 } 419 420 // Remainder 1 or 2 rows with last row vertically unfiltered 421 if ((dst_height % 3) == 2) { 422 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); 423 src_ptr += src_stride; 424 dst_ptr += dst_stride; 425 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width); 426 } else if ((dst_height % 3) == 1) { 427 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width); 428 } 429} 430 431static void ScalePlaneDown34_16(int src_width, 432 int src_height, 433 int dst_width, 434 int dst_height, 435 int src_stride, 436 int dst_stride, 437 const uint16* src_ptr, 438 uint16* dst_ptr, 439 enum FilterMode filtering) { 440 int y; 441 void (*ScaleRowDown34_0)(const uint16* src_ptr, ptrdiff_t src_stride, 442 uint16* dst_ptr, int dst_width); 443 void (*ScaleRowDown34_1)(const uint16* src_ptr, ptrdiff_t src_stride, 444 uint16* dst_ptr, int dst_width); 445 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; 446 (void)src_width; 447 (void)src_height; 448 assert(dst_width % 3 == 0); 449 if (!filtering) { 450 ScaleRowDown34_0 = ScaleRowDown34_16_C; 451 ScaleRowDown34_1 = ScaleRowDown34_16_C; 452 } else { 453 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C; 454 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C; 455 } 456#if defined(HAS_SCALEROWDOWN34_16_NEON) 457 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) { 458 if (!filtering) { 459 ScaleRowDown34_0 = ScaleRowDown34_16_NEON; 460 ScaleRowDown34_1 = ScaleRowDown34_16_NEON; 461 } else { 462 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON; 463 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON; 464 } 465 } 466#endif 467#if defined(HAS_SCALEROWDOWN34_16_SSSE3) 468 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) { 469 if (!filtering) { 470 ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3; 471 ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3; 472 } else { 473 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3; 474 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3; 475 } 476 } 477#endif 478#if defined(HAS_SCALEROWDOWN34_16_DSPR2) 479 if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) && 480 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && 481 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { 482 if (!filtering) { 483 ScaleRowDown34_0 = ScaleRowDown34_16_DSPR2; 484 ScaleRowDown34_1 = ScaleRowDown34_16_DSPR2; 485 } else { 486 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_DSPR2; 487 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_DSPR2; 488 } 489 } 490#endif 491 492 for (y = 0; y < dst_height - 2; y += 3) { 493 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); 494 src_ptr += src_stride; 495 dst_ptr += dst_stride; 496 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width); 497 src_ptr += src_stride; 498 dst_ptr += dst_stride; 499 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width); 500 src_ptr += src_stride * 2; 501 dst_ptr += dst_stride; 502 } 503 504 // Remainder 1 or 2 rows with last row vertically unfiltered 505 if ((dst_height % 3) == 2) { 506 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); 507 src_ptr += src_stride; 508 dst_ptr += dst_stride; 509 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width); 510 } else if ((dst_height % 3) == 1) { 511 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width); 512 } 513} 514 515// Scale plane, 3/8 516// This is an optimized version for scaling down a plane to 3/8 517// of its original size. 518// 519// Uses box filter arranges like this 520// aaabbbcc -> abc 521// aaabbbcc def 522// aaabbbcc ghi 523// dddeeeff 524// dddeeeff 525// dddeeeff 526// ggghhhii 527// ggghhhii 528// Boxes are 3x3, 2x3, 3x2 and 2x2 529 530static void ScalePlaneDown38(int src_width, 531 int src_height, 532 int dst_width, 533 int dst_height, 534 int src_stride, 535 int dst_stride, 536 const uint8* src_ptr, 537 uint8* dst_ptr, 538 enum FilterMode filtering) { 539 int y; 540 void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride, 541 uint8* dst_ptr, int dst_width); 542 void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride, 543 uint8* dst_ptr, int dst_width); 544 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; 545 assert(dst_width % 3 == 0); 546 (void)src_width; 547 (void)src_height; 548 if (!filtering) { 549 ScaleRowDown38_3 = ScaleRowDown38_C; 550 ScaleRowDown38_2 = ScaleRowDown38_C; 551 } else { 552 ScaleRowDown38_3 = ScaleRowDown38_3_Box_C; 553 ScaleRowDown38_2 = ScaleRowDown38_2_Box_C; 554 } 555 556#if defined(HAS_SCALEROWDOWN38_NEON) 557 if (TestCpuFlag(kCpuHasNEON)) { 558 if (!filtering) { 559 ScaleRowDown38_3 = ScaleRowDown38_Any_NEON; 560 ScaleRowDown38_2 = ScaleRowDown38_Any_NEON; 561 } else { 562 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON; 563 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON; 564 } 565 if (dst_width % 12 == 0) { 566 if (!filtering) { 567 ScaleRowDown38_3 = ScaleRowDown38_NEON; 568 ScaleRowDown38_2 = ScaleRowDown38_NEON; 569 } else { 570 ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON; 571 ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON; 572 } 573 } 574 } 575#endif 576#if defined(HAS_SCALEROWDOWN38_SSSE3) 577 if (TestCpuFlag(kCpuHasSSSE3)) { 578 if (!filtering) { 579 ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3; 580 ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3; 581 } else { 582 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3; 583 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3; 584 } 585 if (dst_width % 12 == 0 && !filtering) { 586 ScaleRowDown38_3 = ScaleRowDown38_SSSE3; 587 ScaleRowDown38_2 = ScaleRowDown38_SSSE3; 588 } 589 if (dst_width % 6 == 0 && filtering) { 590 ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3; 591 ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3; 592 } 593 } 594#endif 595#if defined(HAS_SCALEROWDOWN38_DSPR2) 596 if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) && 597 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && 598 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { 599 if (!filtering) { 600 ScaleRowDown38_3 = ScaleRowDown38_DSPR2; 601 ScaleRowDown38_2 = ScaleRowDown38_DSPR2; 602 } else { 603 ScaleRowDown38_3 = ScaleRowDown38_3_Box_DSPR2; 604 ScaleRowDown38_2 = ScaleRowDown38_2_Box_DSPR2; 605 } 606 } 607#endif 608#if defined(HAS_SCALEROWDOWN38_MSA) 609 if (TestCpuFlag(kCpuHasMSA)) { 610 if (!filtering) { 611 ScaleRowDown38_3 = ScaleRowDown38_Any_MSA; 612 ScaleRowDown38_2 = ScaleRowDown38_Any_MSA; 613 } else { 614 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_MSA; 615 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_MSA; 616 } 617 if (dst_width % 12 == 0) { 618 if (!filtering) { 619 ScaleRowDown38_3 = ScaleRowDown38_MSA; 620 ScaleRowDown38_2 = ScaleRowDown38_MSA; 621 } else { 622 ScaleRowDown38_3 = ScaleRowDown38_3_Box_MSA; 623 ScaleRowDown38_2 = ScaleRowDown38_2_Box_MSA; 624 } 625 } 626 } 627#endif 628 629 for (y = 0; y < dst_height - 2; y += 3) { 630 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); 631 src_ptr += src_stride * 3; 632 dst_ptr += dst_stride; 633 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); 634 src_ptr += src_stride * 3; 635 dst_ptr += dst_stride; 636 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width); 637 src_ptr += src_stride * 2; 638 dst_ptr += dst_stride; 639 } 640 641 // Remainder 1 or 2 rows with last row vertically unfiltered 642 if ((dst_height % 3) == 2) { 643 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); 644 src_ptr += src_stride * 3; 645 dst_ptr += dst_stride; 646 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); 647 } else if ((dst_height % 3) == 1) { 648 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); 649 } 650} 651 652static void ScalePlaneDown38_16(int src_width, 653 int src_height, 654 int dst_width, 655 int dst_height, 656 int src_stride, 657 int dst_stride, 658 const uint16* src_ptr, 659 uint16* dst_ptr, 660 enum FilterMode filtering) { 661 int y; 662 void (*ScaleRowDown38_3)(const uint16* src_ptr, ptrdiff_t src_stride, 663 uint16* dst_ptr, int dst_width); 664 void (*ScaleRowDown38_2)(const uint16* src_ptr, ptrdiff_t src_stride, 665 uint16* dst_ptr, int dst_width); 666 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; 667 (void)src_width; 668 (void)src_height; 669 assert(dst_width % 3 == 0); 670 if (!filtering) { 671 ScaleRowDown38_3 = ScaleRowDown38_16_C; 672 ScaleRowDown38_2 = ScaleRowDown38_16_C; 673 } else { 674 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C; 675 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C; 676 } 677#if defined(HAS_SCALEROWDOWN38_16_NEON) 678 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) { 679 if (!filtering) { 680 ScaleRowDown38_3 = ScaleRowDown38_16_NEON; 681 ScaleRowDown38_2 = ScaleRowDown38_16_NEON; 682 } else { 683 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON; 684 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON; 685 } 686 } 687#endif 688#if defined(HAS_SCALEROWDOWN38_16_SSSE3) 689 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) { 690 if (!filtering) { 691 ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3; 692 ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3; 693 } else { 694 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3; 695 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3; 696 } 697 } 698#endif 699#if defined(HAS_SCALEROWDOWN38_16_DSPR2) 700 if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) && 701 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && 702 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { 703 if (!filtering) { 704 ScaleRowDown38_3 = ScaleRowDown38_16_DSPR2; 705 ScaleRowDown38_2 = ScaleRowDown38_16_DSPR2; 706 } else { 707 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_DSPR2; 708 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_DSPR2; 709 } 710 } 711#endif 712 713 for (y = 0; y < dst_height - 2; y += 3) { 714 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); 715 src_ptr += src_stride * 3; 716 dst_ptr += dst_stride; 717 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); 718 src_ptr += src_stride * 3; 719 dst_ptr += dst_stride; 720 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width); 721 src_ptr += src_stride * 2; 722 dst_ptr += dst_stride; 723 } 724 725 // Remainder 1 or 2 rows with last row vertically unfiltered 726 if ((dst_height % 3) == 2) { 727 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); 728 src_ptr += src_stride * 3; 729 dst_ptr += dst_stride; 730 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); 731 } else if ((dst_height % 3) == 1) { 732 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); 733 } 734} 735 736#define MIN1(x) ((x) < 1 ? 1 : (x)) 737 738static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) { 739 uint32 sum = 0u; 740 int x; 741 assert(iboxwidth > 0); 742 for (x = 0; x < iboxwidth; ++x) { 743 sum += src_ptr[x]; 744 } 745 return sum; 746} 747 748static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) { 749 uint32 sum = 0u; 750 int x; 751 assert(iboxwidth > 0); 752 for (x = 0; x < iboxwidth; ++x) { 753 sum += src_ptr[x]; 754 } 755 return sum; 756} 757 758static void ScaleAddCols2_C(int dst_width, 759 int boxheight, 760 int x, 761 int dx, 762 const uint16* src_ptr, 763 uint8* dst_ptr) { 764 int i; 765 int scaletbl[2]; 766 int minboxwidth = dx >> 16; 767 int boxwidth; 768 scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight); 769 scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight); 770 for (i = 0; i < dst_width; ++i) { 771 int ix = x >> 16; 772 x += dx; 773 boxwidth = MIN1((x >> 16) - ix); 774 *dst_ptr++ = 775 SumPixels(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >> 776 16; 777 } 778} 779 780static void ScaleAddCols2_16_C(int dst_width, 781 int boxheight, 782 int x, 783 int dx, 784 const uint32* src_ptr, 785 uint16* dst_ptr) { 786 int i; 787 int scaletbl[2]; 788 int minboxwidth = dx >> 16; 789 int boxwidth; 790 scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight); 791 scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight); 792 for (i = 0; i < dst_width; ++i) { 793 int ix = x >> 16; 794 x += dx; 795 boxwidth = MIN1((x >> 16) - ix); 796 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) * 797 scaletbl[boxwidth - minboxwidth] >> 798 16; 799 } 800} 801 802static void ScaleAddCols0_C(int dst_width, 803 int boxheight, 804 int x, 805 int, 806 const uint16* src_ptr, 807 uint8* dst_ptr) { 808 int scaleval = 65536 / boxheight; 809 int i; 810 src_ptr += (x >> 16); 811 for (i = 0; i < dst_width; ++i) { 812 *dst_ptr++ = src_ptr[i] * scaleval >> 16; 813 } 814} 815 816static void ScaleAddCols1_C(int dst_width, 817 int boxheight, 818 int x, 819 int dx, 820 const uint16* src_ptr, 821 uint8* dst_ptr) { 822 int boxwidth = MIN1(dx >> 16); 823 int scaleval = 65536 / (boxwidth * boxheight); 824 int i; 825 x >>= 16; 826 for (i = 0; i < dst_width; ++i) { 827 *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16; 828 x += boxwidth; 829 } 830} 831 832static void ScaleAddCols1_16_C(int dst_width, 833 int boxheight, 834 int x, 835 int dx, 836 const uint32* src_ptr, 837 uint16* dst_ptr) { 838 int boxwidth = MIN1(dx >> 16); 839 int scaleval = 65536 / (boxwidth * boxheight); 840 int i; 841 for (i = 0; i < dst_width; ++i) { 842 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16; 843 x += boxwidth; 844 } 845} 846 847// Scale plane down to any dimensions, with interpolation. 848// (boxfilter). 849// 850// Same method as SimpleScale, which is fixed point, outputting 851// one pixel of destination using fixed point (16.16) to step 852// through source, sampling a box of pixel with simple 853// averaging. 854static void ScalePlaneBox(int src_width, 855 int src_height, 856 int dst_width, 857 int dst_height, 858 int src_stride, 859 int dst_stride, 860 const uint8* src_ptr, 861 uint8* dst_ptr) { 862 int j, k; 863 // Initial source x/y coordinate and step values as 16.16 fixed point. 864 int x = 0; 865 int y = 0; 866 int dx = 0; 867 int dy = 0; 868 const int max_y = (src_height << 16); 869 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y, 870 &dx, &dy); 871 src_width = Abs(src_width); 872 { 873 // Allocate a row buffer of uint16. 874 align_buffer_64(row16, src_width * 2); 875 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, 876 const uint16* src_ptr, uint8* dst_ptr) = 877 (dx & 0xffff) ? ScaleAddCols2_C 878 : ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C); 879 void (*ScaleAddRow)(const uint8* src_ptr, uint16* dst_ptr, int src_width) = 880 ScaleAddRow_C; 881#if defined(HAS_SCALEADDROW_SSE2) 882 if (TestCpuFlag(kCpuHasSSE2)) { 883 ScaleAddRow = ScaleAddRow_Any_SSE2; 884 if (IS_ALIGNED(src_width, 16)) { 885 ScaleAddRow = ScaleAddRow_SSE2; 886 } 887 } 888#endif 889#if defined(HAS_SCALEADDROW_AVX2) 890 if (TestCpuFlag(kCpuHasAVX2)) { 891 ScaleAddRow = ScaleAddRow_Any_AVX2; 892 if (IS_ALIGNED(src_width, 32)) { 893 ScaleAddRow = ScaleAddRow_AVX2; 894 } 895 } 896#endif 897#if defined(HAS_SCALEADDROW_NEON) 898 if (TestCpuFlag(kCpuHasNEON)) { 899 ScaleAddRow = ScaleAddRow_Any_NEON; 900 if (IS_ALIGNED(src_width, 16)) { 901 ScaleAddRow = ScaleAddRow_NEON; 902 } 903 } 904#endif 905#if defined(HAS_SCALEADDROW_MSA) 906 if (TestCpuFlag(kCpuHasMSA)) { 907 ScaleAddRow = ScaleAddRow_Any_MSA; 908 if (IS_ALIGNED(src_width, 16)) { 909 ScaleAddRow = ScaleAddRow_MSA; 910 } 911 } 912#endif 913#if defined(HAS_SCALEADDROW_DSPR2) 914 if (TestCpuFlag(kCpuHasDSPR2)) { 915 ScaleAddRow = ScaleAddRow_Any_DSPR2; 916 if (IS_ALIGNED(src_width, 16)) { 917 ScaleAddRow = ScaleAddRow_DSPR2; 918 } 919 } 920#endif 921 922 for (j = 0; j < dst_height; ++j) { 923 int boxheight; 924 int iy = y >> 16; 925 const uint8* src = src_ptr + iy * src_stride; 926 y += dy; 927 if (y > max_y) { 928 y = max_y; 929 } 930 boxheight = MIN1((y >> 16) - iy); 931 memset(row16, 0, src_width * 2); 932 for (k = 0; k < boxheight; ++k) { 933 ScaleAddRow(src, (uint16*)(row16), src_width); 934 src += src_stride; 935 } 936 ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), dst_ptr); 937 dst_ptr += dst_stride; 938 } 939 free_aligned_buffer_64(row16); 940 } 941} 942 943static void ScalePlaneBox_16(int src_width, 944 int src_height, 945 int dst_width, 946 int dst_height, 947 int src_stride, 948 int dst_stride, 949 const uint16* src_ptr, 950 uint16* dst_ptr) { 951 int j, k; 952 // Initial source x/y coordinate and step values as 16.16 fixed point. 953 int x = 0; 954 int y = 0; 955 int dx = 0; 956 int dy = 0; 957 const int max_y = (src_height << 16); 958 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y, 959 &dx, &dy); 960 src_width = Abs(src_width); 961 { 962 // Allocate a row buffer of uint32. 963 align_buffer_64(row32, src_width * 4); 964 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, 965 const uint32* src_ptr, uint16* dst_ptr) = 966 (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C; 967 void (*ScaleAddRow)(const uint16* src_ptr, uint32* dst_ptr, int src_width) = 968 ScaleAddRow_16_C; 969 970#if defined(HAS_SCALEADDROW_16_SSE2) 971 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) { 972 ScaleAddRow = ScaleAddRow_16_SSE2; 973 } 974#endif 975 976 for (j = 0; j < dst_height; ++j) { 977 int boxheight; 978 int iy = y >> 16; 979 const uint16* src = src_ptr + iy * src_stride; 980 y += dy; 981 if (y > max_y) { 982 y = max_y; 983 } 984 boxheight = MIN1((y >> 16) - iy); 985 memset(row32, 0, src_width * 4); 986 for (k = 0; k < boxheight; ++k) { 987 ScaleAddRow(src, (uint32*)(row32), src_width); 988 src += src_stride; 989 } 990 ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), dst_ptr); 991 dst_ptr += dst_stride; 992 } 993 free_aligned_buffer_64(row32); 994 } 995} 996 997// Scale plane down with bilinear interpolation. 998void ScalePlaneBilinearDown(int src_width, 999 int src_height, 1000 int dst_width, 1001 int dst_height, 1002 int src_stride, 1003 int dst_stride, 1004 const uint8* src_ptr, 1005 uint8* dst_ptr, 1006 enum FilterMode filtering) { 1007 // Initial source x/y coordinate and step values as 16.16 fixed point. 1008 int x = 0; 1009 int y = 0; 1010 int dx = 0; 1011 int dy = 0; 1012 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. 1013 // Allocate a row buffer. 1014 align_buffer_64(row, src_width); 1015 1016 const int max_y = (src_height - 1) << 16; 1017 int j; 1018 void (*ScaleFilterCols)(uint8 * dst_ptr, const uint8* src_ptr, int dst_width, 1019 int x, int dx) = 1020 (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C; 1021 void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr, 1022 ptrdiff_t src_stride, int dst_width, 1023 int source_y_fraction) = InterpolateRow_C; 1024 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, 1025 &dx, &dy); 1026 src_width = Abs(src_width); 1027 1028#if defined(HAS_INTERPOLATEROW_SSSE3) 1029 if (TestCpuFlag(kCpuHasSSSE3)) { 1030 InterpolateRow = InterpolateRow_Any_SSSE3; 1031 if (IS_ALIGNED(src_width, 16)) { 1032 InterpolateRow = InterpolateRow_SSSE3; 1033 } 1034 } 1035#endif 1036#if defined(HAS_INTERPOLATEROW_AVX2) 1037 if (TestCpuFlag(kCpuHasAVX2)) { 1038 InterpolateRow = InterpolateRow_Any_AVX2; 1039 if (IS_ALIGNED(src_width, 32)) { 1040 InterpolateRow = InterpolateRow_AVX2; 1041 } 1042 } 1043#endif 1044#if defined(HAS_INTERPOLATEROW_NEON) 1045 if (TestCpuFlag(kCpuHasNEON)) { 1046 InterpolateRow = InterpolateRow_Any_NEON; 1047 if (IS_ALIGNED(src_width, 16)) { 1048 InterpolateRow = InterpolateRow_NEON; 1049 } 1050 } 1051#endif 1052#if defined(HAS_INTERPOLATEROW_DSPR2) 1053 if (TestCpuFlag(kCpuHasDSPR2)) { 1054 InterpolateRow = InterpolateRow_Any_DSPR2; 1055 if (IS_ALIGNED(src_width, 4)) { 1056 InterpolateRow = InterpolateRow_DSPR2; 1057 } 1058 } 1059#endif 1060#if defined(HAS_INTERPOLATEROW_MSA) 1061 if (TestCpuFlag(kCpuHasMSA)) { 1062 InterpolateRow = InterpolateRow_Any_MSA; 1063 if (IS_ALIGNED(src_width, 32)) { 1064 InterpolateRow = InterpolateRow_MSA; 1065 } 1066 } 1067#endif 1068 1069#if defined(HAS_SCALEFILTERCOLS_SSSE3) 1070 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { 1071 ScaleFilterCols = ScaleFilterCols_SSSE3; 1072 } 1073#endif 1074#if defined(HAS_SCALEFILTERCOLS_NEON) 1075 if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) { 1076 ScaleFilterCols = ScaleFilterCols_Any_NEON; 1077 if (IS_ALIGNED(dst_width, 8)) { 1078 ScaleFilterCols = ScaleFilterCols_NEON; 1079 } 1080 } 1081#endif 1082 if (y > max_y) { 1083 y = max_y; 1084 } 1085 1086 for (j = 0; j < dst_height; ++j) { 1087 int yi = y >> 16; 1088 const uint8* src = src_ptr + yi * src_stride; 1089 if (filtering == kFilterLinear) { 1090 ScaleFilterCols(dst_ptr, src, dst_width, x, dx); 1091 } else { 1092 int yf = (y >> 8) & 255; 1093 InterpolateRow(row, src, src_stride, src_width, yf); 1094 ScaleFilterCols(dst_ptr, row, dst_width, x, dx); 1095 } 1096 dst_ptr += dst_stride; 1097 y += dy; 1098 if (y > max_y) { 1099 y = max_y; 1100 } 1101 } 1102 free_aligned_buffer_64(row); 1103} 1104 1105void ScalePlaneBilinearDown_16(int src_width, 1106 int src_height, 1107 int dst_width, 1108 int dst_height, 1109 int src_stride, 1110 int dst_stride, 1111 const uint16* src_ptr, 1112 uint16* dst_ptr, 1113 enum FilterMode filtering) { 1114 // Initial source x/y coordinate and step values as 16.16 fixed point. 1115 int x = 0; 1116 int y = 0; 1117 int dx = 0; 1118 int dy = 0; 1119 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. 1120 // Allocate a row buffer. 1121 align_buffer_64(row, src_width * 2); 1122 1123 const int max_y = (src_height - 1) << 16; 1124 int j; 1125 void (*ScaleFilterCols)(uint16 * dst_ptr, const uint16* src_ptr, 1126 int dst_width, int x, int dx) = 1127 (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C; 1128 void (*InterpolateRow)(uint16 * dst_ptr, const uint16* src_ptr, 1129 ptrdiff_t src_stride, int dst_width, 1130 int source_y_fraction) = InterpolateRow_16_C; 1131 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, 1132 &dx, &dy); 1133 src_width = Abs(src_width); 1134 1135#if defined(HAS_INTERPOLATEROW_16_SSE2) 1136 if (TestCpuFlag(kCpuHasSSE2)) { 1137 InterpolateRow = InterpolateRow_Any_16_SSE2; 1138 if (IS_ALIGNED(src_width, 16)) { 1139 InterpolateRow = InterpolateRow_16_SSE2; 1140 } 1141 } 1142#endif 1143#if defined(HAS_INTERPOLATEROW_16_SSSE3) 1144 if (TestCpuFlag(kCpuHasSSSE3)) { 1145 InterpolateRow = InterpolateRow_Any_16_SSSE3; 1146 if (IS_ALIGNED(src_width, 16)) { 1147 InterpolateRow = InterpolateRow_16_SSSE3; 1148 } 1149 } 1150#endif 1151#if defined(HAS_INTERPOLATEROW_16_AVX2) 1152 if (TestCpuFlag(kCpuHasAVX2)) { 1153 InterpolateRow = InterpolateRow_Any_16_AVX2; 1154 if (IS_ALIGNED(src_width, 32)) { 1155 InterpolateRow = InterpolateRow_16_AVX2; 1156 } 1157 } 1158#endif 1159#if defined(HAS_INTERPOLATEROW_16_NEON) 1160 if (TestCpuFlag(kCpuHasNEON)) { 1161 InterpolateRow = InterpolateRow_Any_16_NEON; 1162 if (IS_ALIGNED(src_width, 16)) { 1163 InterpolateRow = InterpolateRow_16_NEON; 1164 } 1165 } 1166#endif 1167#if defined(HAS_INTERPOLATEROW_16_DSPR2) 1168 if (TestCpuFlag(kCpuHasDSPR2)) { 1169 InterpolateRow = InterpolateRow_Any_16_DSPR2; 1170 if (IS_ALIGNED(src_width, 4)) { 1171 InterpolateRow = InterpolateRow_16_DSPR2; 1172 } 1173 } 1174#endif 1175 1176#if defined(HAS_SCALEFILTERCOLS_16_SSSE3) 1177 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { 1178 ScaleFilterCols = ScaleFilterCols_16_SSSE3; 1179 } 1180#endif 1181 if (y > max_y) { 1182 y = max_y; 1183 } 1184 1185 for (j = 0; j < dst_height; ++j) { 1186 int yi = y >> 16; 1187 const uint16* src = src_ptr + yi * src_stride; 1188 if (filtering == kFilterLinear) { 1189 ScaleFilterCols(dst_ptr, src, dst_width, x, dx); 1190 } else { 1191 int yf = (y >> 8) & 255; 1192 InterpolateRow((uint16*)row, src, src_stride, src_width, yf); 1193 ScaleFilterCols(dst_ptr, (uint16*)row, dst_width, x, dx); 1194 } 1195 dst_ptr += dst_stride; 1196 y += dy; 1197 if (y > max_y) { 1198 y = max_y; 1199 } 1200 } 1201 free_aligned_buffer_64(row); 1202} 1203 1204// Scale up down with bilinear interpolation. 1205void ScalePlaneBilinearUp(int src_width, 1206 int src_height, 1207 int dst_width, 1208 int dst_height, 1209 int src_stride, 1210 int dst_stride, 1211 const uint8* src_ptr, 1212 uint8* dst_ptr, 1213 enum FilterMode filtering) { 1214 int j; 1215 // Initial source x/y coordinate and step values as 16.16 fixed point. 1216 int x = 0; 1217 int y = 0; 1218 int dx = 0; 1219 int dy = 0; 1220 const int max_y = (src_height - 1) << 16; 1221 void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr, 1222 ptrdiff_t src_stride, int dst_width, 1223 int source_y_fraction) = InterpolateRow_C; 1224 void (*ScaleFilterCols)(uint8 * dst_ptr, const uint8* src_ptr, int dst_width, 1225 int x, int dx) = 1226 filtering ? ScaleFilterCols_C : ScaleCols_C; 1227 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, 1228 &dx, &dy); 1229 src_width = Abs(src_width); 1230 1231#if defined(HAS_INTERPOLATEROW_SSSE3) 1232 if (TestCpuFlag(kCpuHasSSSE3)) { 1233 InterpolateRow = InterpolateRow_Any_SSSE3; 1234 if (IS_ALIGNED(dst_width, 16)) { 1235 InterpolateRow = InterpolateRow_SSSE3; 1236 } 1237 } 1238#endif 1239#if defined(HAS_INTERPOLATEROW_AVX2) 1240 if (TestCpuFlag(kCpuHasAVX2)) { 1241 InterpolateRow = InterpolateRow_Any_AVX2; 1242 if (IS_ALIGNED(dst_width, 32)) { 1243 InterpolateRow = InterpolateRow_AVX2; 1244 } 1245 } 1246#endif 1247#if defined(HAS_INTERPOLATEROW_NEON) 1248 if (TestCpuFlag(kCpuHasNEON)) { 1249 InterpolateRow = InterpolateRow_Any_NEON; 1250 if (IS_ALIGNED(dst_width, 16)) { 1251 InterpolateRow = InterpolateRow_NEON; 1252 } 1253 } 1254#endif 1255#if defined(HAS_INTERPOLATEROW_DSPR2) 1256 if (TestCpuFlag(kCpuHasDSPR2)) { 1257 InterpolateRow = InterpolateRow_Any_DSPR2; 1258 if (IS_ALIGNED(dst_width, 4)) { 1259 InterpolateRow = InterpolateRow_DSPR2; 1260 } 1261 } 1262#endif 1263 1264 if (filtering && src_width >= 32768) { 1265 ScaleFilterCols = ScaleFilterCols64_C; 1266 } 1267#if defined(HAS_SCALEFILTERCOLS_SSSE3) 1268 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { 1269 ScaleFilterCols = ScaleFilterCols_SSSE3; 1270 } 1271#endif 1272#if defined(HAS_SCALEFILTERCOLS_NEON) 1273 if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) { 1274 ScaleFilterCols = ScaleFilterCols_Any_NEON; 1275 if (IS_ALIGNED(dst_width, 8)) { 1276 ScaleFilterCols = ScaleFilterCols_NEON; 1277 } 1278 } 1279#endif 1280 if (!filtering && src_width * 2 == dst_width && x < 0x8000) { 1281 ScaleFilterCols = ScaleColsUp2_C; 1282#if defined(HAS_SCALECOLS_SSE2) 1283 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { 1284 ScaleFilterCols = ScaleColsUp2_SSE2; 1285 } 1286#endif 1287 } 1288 1289 if (y > max_y) { 1290 y = max_y; 1291 } 1292 { 1293 int yi = y >> 16; 1294 const uint8* src = src_ptr + yi * src_stride; 1295 1296 // Allocate 2 row buffers. 1297 const int kRowSize = (dst_width + 31) & ~31; 1298 align_buffer_64(row, kRowSize * 2); 1299 1300 uint8* rowptr = row; 1301 int rowstride = kRowSize; 1302 int lasty = yi; 1303 1304 ScaleFilterCols(rowptr, src, dst_width, x, dx); 1305 if (src_height > 1) { 1306 src += src_stride; 1307 } 1308 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx); 1309 src += src_stride; 1310 1311 for (j = 0; j < dst_height; ++j) { 1312 yi = y >> 16; 1313 if (yi != lasty) { 1314 if (y > max_y) { 1315 y = max_y; 1316 yi = y >> 16; 1317 src = src_ptr + yi * src_stride; 1318 } 1319 if (yi != lasty) { 1320 ScaleFilterCols(rowptr, src, dst_width, x, dx); 1321 rowptr += rowstride; 1322 rowstride = -rowstride; 1323 lasty = yi; 1324 src += src_stride; 1325 } 1326 } 1327 if (filtering == kFilterLinear) { 1328 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0); 1329 } else { 1330 int yf = (y >> 8) & 255; 1331 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf); 1332 } 1333 dst_ptr += dst_stride; 1334 y += dy; 1335 } 1336 free_aligned_buffer_64(row); 1337 } 1338} 1339 1340void ScalePlaneBilinearUp_16(int src_width, 1341 int src_height, 1342 int dst_width, 1343 int dst_height, 1344 int src_stride, 1345 int dst_stride, 1346 const uint16* src_ptr, 1347 uint16* dst_ptr, 1348 enum FilterMode filtering) { 1349 int j; 1350 // Initial source x/y coordinate and step values as 16.16 fixed point. 1351 int x = 0; 1352 int y = 0; 1353 int dx = 0; 1354 int dy = 0; 1355 const int max_y = (src_height - 1) << 16; 1356 void (*InterpolateRow)(uint16 * dst_ptr, const uint16* src_ptr, 1357 ptrdiff_t src_stride, int dst_width, 1358 int source_y_fraction) = InterpolateRow_16_C; 1359 void (*ScaleFilterCols)(uint16 * dst_ptr, const uint16* src_ptr, 1360 int dst_width, int x, int dx) = 1361 filtering ? ScaleFilterCols_16_C : ScaleCols_16_C; 1362 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, 1363 &dx, &dy); 1364 src_width = Abs(src_width); 1365 1366#if defined(HAS_INTERPOLATEROW_16_SSE2) 1367 if (TestCpuFlag(kCpuHasSSE2)) { 1368 InterpolateRow = InterpolateRow_Any_16_SSE2; 1369 if (IS_ALIGNED(dst_width, 16)) { 1370 InterpolateRow = InterpolateRow_16_SSE2; 1371 } 1372 } 1373#endif 1374#if defined(HAS_INTERPOLATEROW_16_SSSE3) 1375 if (TestCpuFlag(kCpuHasSSSE3)) { 1376 InterpolateRow = InterpolateRow_Any_16_SSSE3; 1377 if (IS_ALIGNED(dst_width, 16)) { 1378 InterpolateRow = InterpolateRow_16_SSSE3; 1379 } 1380 } 1381#endif 1382#if defined(HAS_INTERPOLATEROW_16_AVX2) 1383 if (TestCpuFlag(kCpuHasAVX2)) { 1384 InterpolateRow = InterpolateRow_Any_16_AVX2; 1385 if (IS_ALIGNED(dst_width, 32)) { 1386 InterpolateRow = InterpolateRow_16_AVX2; 1387 } 1388 } 1389#endif 1390#if defined(HAS_INTERPOLATEROW_16_NEON) 1391 if (TestCpuFlag(kCpuHasNEON)) { 1392 InterpolateRow = InterpolateRow_Any_16_NEON; 1393 if (IS_ALIGNED(dst_width, 16)) { 1394 InterpolateRow = InterpolateRow_16_NEON; 1395 } 1396 } 1397#endif 1398#if defined(HAS_INTERPOLATEROW_16_DSPR2) 1399 if (TestCpuFlag(kCpuHasDSPR2)) { 1400 InterpolateRow = InterpolateRow_Any_16_DSPR2; 1401 if (IS_ALIGNED(dst_width, 4)) { 1402 InterpolateRow = InterpolateRow_16_DSPR2; 1403 } 1404 } 1405#endif 1406 1407 if (filtering && src_width >= 32768) { 1408 ScaleFilterCols = ScaleFilterCols64_16_C; 1409 } 1410#if defined(HAS_SCALEFILTERCOLS_16_SSSE3) 1411 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { 1412 ScaleFilterCols = ScaleFilterCols_16_SSSE3; 1413 } 1414#endif 1415 if (!filtering && src_width * 2 == dst_width && x < 0x8000) { 1416 ScaleFilterCols = ScaleColsUp2_16_C; 1417#if defined(HAS_SCALECOLS_16_SSE2) 1418 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { 1419 ScaleFilterCols = ScaleColsUp2_16_SSE2; 1420 } 1421#endif 1422 } 1423 1424 if (y > max_y) { 1425 y = max_y; 1426 } 1427 { 1428 int yi = y >> 16; 1429 const uint16* src = src_ptr + yi * src_stride; 1430 1431 // Allocate 2 row buffers. 1432 const int kRowSize = (dst_width + 31) & ~31; 1433 align_buffer_64(row, kRowSize * 4); 1434 1435 uint16* rowptr = (uint16*)row; 1436 int rowstride = kRowSize; 1437 int lasty = yi; 1438 1439 ScaleFilterCols(rowptr, src, dst_width, x, dx); 1440 if (src_height > 1) { 1441 src += src_stride; 1442 } 1443 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx); 1444 src += src_stride; 1445 1446 for (j = 0; j < dst_height; ++j) { 1447 yi = y >> 16; 1448 if (yi != lasty) { 1449 if (y > max_y) { 1450 y = max_y; 1451 yi = y >> 16; 1452 src = src_ptr + yi * src_stride; 1453 } 1454 if (yi != lasty) { 1455 ScaleFilterCols(rowptr, src, dst_width, x, dx); 1456 rowptr += rowstride; 1457 rowstride = -rowstride; 1458 lasty = yi; 1459 src += src_stride; 1460 } 1461 } 1462 if (filtering == kFilterLinear) { 1463 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0); 1464 } else { 1465 int yf = (y >> 8) & 255; 1466 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf); 1467 } 1468 dst_ptr += dst_stride; 1469 y += dy; 1470 } 1471 free_aligned_buffer_64(row); 1472 } 1473} 1474 1475// Scale Plane to/from any dimensions, without interpolation. 1476// Fixed point math is used for performance: The upper 16 bits 1477// of x and dx is the integer part of the source position and 1478// the lower 16 bits are the fixed decimal part. 1479 1480static void ScalePlaneSimple(int src_width, 1481 int src_height, 1482 int dst_width, 1483 int dst_height, 1484 int src_stride, 1485 int dst_stride, 1486 const uint8* src_ptr, 1487 uint8* dst_ptr) { 1488 int i; 1489 void (*ScaleCols)(uint8 * dst_ptr, const uint8* src_ptr, int dst_width, int x, 1490 int dx) = ScaleCols_C; 1491 // Initial source x/y coordinate and step values as 16.16 fixed point. 1492 int x = 0; 1493 int y = 0; 1494 int dx = 0; 1495 int dy = 0; 1496 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y, 1497 &dx, &dy); 1498 src_width = Abs(src_width); 1499 1500 if (src_width * 2 == dst_width && x < 0x8000) { 1501 ScaleCols = ScaleColsUp2_C; 1502#if defined(HAS_SCALECOLS_SSE2) 1503 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { 1504 ScaleCols = ScaleColsUp2_SSE2; 1505 } 1506#endif 1507 } 1508 1509 for (i = 0; i < dst_height; ++i) { 1510 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx); 1511 dst_ptr += dst_stride; 1512 y += dy; 1513 } 1514} 1515 1516static void ScalePlaneSimple_16(int src_width, 1517 int src_height, 1518 int dst_width, 1519 int dst_height, 1520 int src_stride, 1521 int dst_stride, 1522 const uint16* src_ptr, 1523 uint16* dst_ptr) { 1524 int i; 1525 void (*ScaleCols)(uint16 * dst_ptr, const uint16* src_ptr, int dst_width, 1526 int x, int dx) = ScaleCols_16_C; 1527 // Initial source x/y coordinate and step values as 16.16 fixed point. 1528 int x = 0; 1529 int y = 0; 1530 int dx = 0; 1531 int dy = 0; 1532 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y, 1533 &dx, &dy); 1534 src_width = Abs(src_width); 1535 1536 if (src_width * 2 == dst_width && x < 0x8000) { 1537 ScaleCols = ScaleColsUp2_16_C; 1538#if defined(HAS_SCALECOLS_16_SSE2) 1539 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { 1540 ScaleCols = ScaleColsUp2_16_SSE2; 1541 } 1542#endif 1543 } 1544 1545 for (i = 0; i < dst_height; ++i) { 1546 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx); 1547 dst_ptr += dst_stride; 1548 y += dy; 1549 } 1550} 1551 1552// Scale a plane. 1553// This function dispatches to a specialized scaler based on scale factor. 1554 1555LIBYUV_API 1556void ScalePlane(const uint8* src, 1557 int src_stride, 1558 int src_width, 1559 int src_height, 1560 uint8* dst, 1561 int dst_stride, 1562 int dst_width, 1563 int dst_height, 1564 enum FilterMode filtering) { 1565 // Simplify filtering when possible. 1566 filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, 1567 filtering); 1568 1569 // Negative height means invert the image. 1570 if (src_height < 0) { 1571 src_height = -src_height; 1572 src = src + (src_height - 1) * src_stride; 1573 src_stride = -src_stride; 1574 } 1575 1576 // Use specialized scales to improve performance for common resolutions. 1577 // For example, all the 1/2 scalings will use ScalePlaneDown2() 1578 if (dst_width == src_width && dst_height == src_height) { 1579 // Straight copy. 1580 CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height); 1581 return; 1582 } 1583 if (dst_width == src_width && filtering != kFilterBox) { 1584 int dy = FixedDiv(src_height, dst_height); 1585 // Arbitrary scale vertically, but unscaled horizontally. 1586 ScalePlaneVertical(src_height, dst_width, dst_height, src_stride, 1587 dst_stride, src, dst, 0, 0, dy, 1, filtering); 1588 return; 1589 } 1590 if (dst_width <= Abs(src_width) && dst_height <= src_height) { 1591 // Scale down. 1592 if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) { 1593 // optimized, 3/4 1594 ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride, 1595 dst_stride, src, dst, filtering); 1596 return; 1597 } 1598 if (2 * dst_width == src_width && 2 * dst_height == src_height) { 1599 // optimized, 1/2 1600 ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride, 1601 dst_stride, src, dst, filtering); 1602 return; 1603 } 1604 // 3/8 rounded up for odd sized chroma height. 1605 if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) { 1606 // optimized, 3/8 1607 ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride, 1608 dst_stride, src, dst, filtering); 1609 return; 1610 } 1611 if (4 * dst_width == src_width && 4 * dst_height == src_height && 1612 (filtering == kFilterBox || filtering == kFilterNone)) { 1613 // optimized, 1/4 1614 ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride, 1615 dst_stride, src, dst, filtering); 1616 return; 1617 } 1618 } 1619 if (filtering == kFilterBox && dst_height * 2 < src_height) { 1620 ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride, 1621 dst_stride, src, dst); 1622 return; 1623 } 1624 if (filtering && dst_height > src_height) { 1625 ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height, 1626 src_stride, dst_stride, src, dst, filtering); 1627 return; 1628 } 1629 if (filtering) { 1630 ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height, 1631 src_stride, dst_stride, src, dst, filtering); 1632 return; 1633 } 1634 ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride, 1635 dst_stride, src, dst); 1636} 1637 1638LIBYUV_API 1639void ScalePlane_16(const uint16* src, 1640 int src_stride, 1641 int src_width, 1642 int src_height, 1643 uint16* dst, 1644 int dst_stride, 1645 int dst_width, 1646 int dst_height, 1647 enum FilterMode filtering) { 1648 // Simplify filtering when possible. 1649 filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, 1650 filtering); 1651 1652 // Negative height means invert the image. 1653 if (src_height < 0) { 1654 src_height = -src_height; 1655 src = src + (src_height - 1) * src_stride; 1656 src_stride = -src_stride; 1657 } 1658 1659 // Use specialized scales to improve performance for common resolutions. 1660 // For example, all the 1/2 scalings will use ScalePlaneDown2() 1661 if (dst_width == src_width && dst_height == src_height) { 1662 // Straight copy. 1663 CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height); 1664 return; 1665 } 1666 if (dst_width == src_width) { 1667 int dy = FixedDiv(src_height, dst_height); 1668 // Arbitrary scale vertically, but unscaled vertically. 1669 ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride, 1670 dst_stride, src, dst, 0, 0, dy, 1, filtering); 1671 return; 1672 } 1673 if (dst_width <= Abs(src_width) && dst_height <= src_height) { 1674 // Scale down. 1675 if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) { 1676 // optimized, 3/4 1677 ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height, 1678 src_stride, dst_stride, src, dst, filtering); 1679 return; 1680 } 1681 if (2 * dst_width == src_width && 2 * dst_height == src_height) { 1682 // optimized, 1/2 1683 ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height, 1684 src_stride, dst_stride, src, dst, filtering); 1685 return; 1686 } 1687 // 3/8 rounded up for odd sized chroma height. 1688 if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) { 1689 // optimized, 3/8 1690 ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height, 1691 src_stride, dst_stride, src, dst, filtering); 1692 return; 1693 } 1694 if (4 * dst_width == src_width && 4 * dst_height == src_height && 1695 filtering != kFilterBilinear) { 1696 // optimized, 1/4 1697 ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height, 1698 src_stride, dst_stride, src, dst, filtering); 1699 return; 1700 } 1701 } 1702 if (filtering == kFilterBox && dst_height * 2 < src_height) { 1703 ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, src_stride, 1704 dst_stride, src, dst); 1705 return; 1706 } 1707 if (filtering && dst_height > src_height) { 1708 ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height, 1709 src_stride, dst_stride, src, dst, filtering); 1710 return; 1711 } 1712 if (filtering) { 1713 ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height, 1714 src_stride, dst_stride, src, dst, filtering); 1715 return; 1716 } 1717 ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride, 1718 dst_stride, src, dst); 1719} 1720 1721// Scale an I420 image. 1722// This function in turn calls a scaling function for each plane. 1723 1724LIBYUV_API 1725int I420Scale(const uint8* src_y, 1726 int src_stride_y, 1727 const uint8* src_u, 1728 int src_stride_u, 1729 const uint8* src_v, 1730 int src_stride_v, 1731 int src_width, 1732 int src_height, 1733 uint8* dst_y, 1734 int dst_stride_y, 1735 uint8* dst_u, 1736 int dst_stride_u, 1737 uint8* dst_v, 1738 int dst_stride_v, 1739 int dst_width, 1740 int dst_height, 1741 enum FilterMode filtering) { 1742 int src_halfwidth = SUBSAMPLE(src_width, 1, 1); 1743 int src_halfheight = SUBSAMPLE(src_height, 1, 1); 1744 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); 1745 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); 1746 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || 1747 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || 1748 dst_width <= 0 || dst_height <= 0) { 1749 return -1; 1750 } 1751 1752 ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, 1753 dst_width, dst_height, filtering); 1754 ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, 1755 dst_stride_u, dst_halfwidth, dst_halfheight, filtering); 1756 ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, 1757 dst_stride_v, dst_halfwidth, dst_halfheight, filtering); 1758 return 0; 1759} 1760 1761LIBYUV_API 1762int I420Scale_16(const uint16* src_y, 1763 int src_stride_y, 1764 const uint16* src_u, 1765 int src_stride_u, 1766 const uint16* src_v, 1767 int src_stride_v, 1768 int src_width, 1769 int src_height, 1770 uint16* dst_y, 1771 int dst_stride_y, 1772 uint16* dst_u, 1773 int dst_stride_u, 1774 uint16* dst_v, 1775 int dst_stride_v, 1776 int dst_width, 1777 int dst_height, 1778 enum FilterMode filtering) { 1779 int src_halfwidth = SUBSAMPLE(src_width, 1, 1); 1780 int src_halfheight = SUBSAMPLE(src_height, 1, 1); 1781 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); 1782 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); 1783 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || 1784 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || 1785 dst_width <= 0 || dst_height <= 0) { 1786 return -1; 1787 } 1788 1789 ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, 1790 dst_width, dst_height, filtering); 1791 ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, 1792 dst_stride_u, dst_halfwidth, dst_halfheight, filtering); 1793 ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, 1794 dst_stride_v, dst_halfwidth, dst_halfheight, filtering); 1795 return 0; 1796} 1797 1798// Deprecated api 1799LIBYUV_API 1800int Scale(const uint8* src_y, 1801 const uint8* src_u, 1802 const uint8* src_v, 1803 int src_stride_y, 1804 int src_stride_u, 1805 int src_stride_v, 1806 int src_width, 1807 int src_height, 1808 uint8* dst_y, 1809 uint8* dst_u, 1810 uint8* dst_v, 1811 int dst_stride_y, 1812 int dst_stride_u, 1813 int dst_stride_v, 1814 int dst_width, 1815 int dst_height, 1816 LIBYUV_BOOL interpolate) { 1817 return I420Scale(src_y, src_stride_y, src_u, src_stride_u, src_v, 1818 src_stride_v, src_width, src_height, dst_y, dst_stride_y, 1819 dst_u, dst_stride_u, dst_v, dst_stride_v, dst_width, 1820 dst_height, interpolate ? kFilterBox : kFilterNone); 1821} 1822 1823// Deprecated api 1824LIBYUV_API 1825int ScaleOffset(const uint8* src, 1826 int src_width, 1827 int src_height, 1828 uint8* dst, 1829 int dst_width, 1830 int dst_height, 1831 int dst_yoffset, 1832 LIBYUV_BOOL interpolate) { 1833 // Chroma requires offset to multiple of 2. 1834 int dst_yoffset_even = dst_yoffset & ~1; 1835 int src_halfwidth = SUBSAMPLE(src_width, 1, 1); 1836 int src_halfheight = SUBSAMPLE(src_height, 1, 1); 1837 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); 1838 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); 1839 int aheight = dst_height - dst_yoffset_even * 2; // actual output height 1840 const uint8* src_y = src; 1841 const uint8* src_u = src + src_width * src_height; 1842 const uint8* src_v = 1843 src + src_width * src_height + src_halfwidth * src_halfheight; 1844 uint8* dst_y = dst + dst_yoffset_even * dst_width; 1845 uint8* dst_u = 1846 dst + dst_width * dst_height + (dst_yoffset_even >> 1) * dst_halfwidth; 1847 uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight + 1848 (dst_yoffset_even >> 1) * dst_halfwidth; 1849 if (!src || src_width <= 0 || src_height <= 0 || !dst || dst_width <= 0 || 1850 dst_height <= 0 || dst_yoffset_even < 0 || 1851 dst_yoffset_even >= dst_height) { 1852 return -1; 1853 } 1854 return I420Scale(src_y, src_width, src_u, src_halfwidth, src_v, src_halfwidth, 1855 src_width, src_height, dst_y, dst_width, dst_u, 1856 dst_halfwidth, dst_v, dst_halfwidth, dst_width, aheight, 1857 interpolate ? kFilterBox : kFilterNone); 1858} 1859 1860#ifdef __cplusplus 1861} // extern "C" 1862} // namespace libyuv 1863#endif 1864