1/*
2 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS. All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "libyuv/scale.h"
12
13#include <assert.h>
14#include <string.h>
15
16#include "libyuv/cpu_id.h"
17#include "libyuv/planar_functions.h"  // For CopyPlane
18#include "libyuv/row.h"
19#include "libyuv/scale_row.h"
20
21#ifdef __cplusplus
22namespace libyuv {
23extern "C" {
24#endif
25
26static __inline int Abs(int v) {
27  return v >= 0 ? v : -v;
28}
29
30#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
31
32// Scale plane, 1/2
33// This is an optimized version for scaling down a plane to 1/2 of
34// its original size.
35
36static void ScalePlaneDown2(int src_width,
37                            int src_height,
38                            int dst_width,
39                            int dst_height,
40                            int src_stride,
41                            int dst_stride,
42                            const uint8* src_ptr,
43                            uint8* dst_ptr,
44                            enum FilterMode filtering) {
45  int y;
46  void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
47                        uint8* dst_ptr, int dst_width) =
48      filtering == kFilterNone
49          ? ScaleRowDown2_C
50          : (filtering == kFilterLinear ? ScaleRowDown2Linear_C
51                                        : ScaleRowDown2Box_C);
52  int row_stride = src_stride << 1;
53  (void)src_width;
54  (void)src_height;
55  if (!filtering) {
56    src_ptr += src_stride;  // Point to odd rows.
57    src_stride = 0;
58  }
59
60#if defined(HAS_SCALEROWDOWN2_NEON)
61  if (TestCpuFlag(kCpuHasNEON)) {
62    ScaleRowDown2 =
63        filtering == kFilterNone
64            ? ScaleRowDown2_Any_NEON
65            : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON
66                                          : ScaleRowDown2Box_Any_NEON);
67    if (IS_ALIGNED(dst_width, 16)) {
68      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON
69                                               : (filtering == kFilterLinear
70                                                      ? ScaleRowDown2Linear_NEON
71                                                      : ScaleRowDown2Box_NEON);
72    }
73  }
74#endif
75#if defined(HAS_SCALEROWDOWN2_SSSE3)
76  if (TestCpuFlag(kCpuHasSSSE3)) {
77    ScaleRowDown2 =
78        filtering == kFilterNone
79            ? ScaleRowDown2_Any_SSSE3
80            : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3
81                                          : ScaleRowDown2Box_Any_SSSE3);
82    if (IS_ALIGNED(dst_width, 16)) {
83      ScaleRowDown2 =
84          filtering == kFilterNone
85              ? ScaleRowDown2_SSSE3
86              : (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3
87                                            : ScaleRowDown2Box_SSSE3);
88    }
89  }
90#endif
91#if defined(HAS_SCALEROWDOWN2_AVX2)
92  if (TestCpuFlag(kCpuHasAVX2)) {
93    ScaleRowDown2 =
94        filtering == kFilterNone
95            ? ScaleRowDown2_Any_AVX2
96            : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2
97                                          : ScaleRowDown2Box_Any_AVX2);
98    if (IS_ALIGNED(dst_width, 32)) {
99      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2
100                                               : (filtering == kFilterLinear
101                                                      ? ScaleRowDown2Linear_AVX2
102                                                      : ScaleRowDown2Box_AVX2);
103    }
104  }
105#endif
106#if defined(HAS_SCALEROWDOWN2_DSPR2)
107  if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) &&
108      IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
109      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
110    ScaleRowDown2 = filtering ? ScaleRowDown2Box_DSPR2 : ScaleRowDown2_DSPR2;
111  }
112#endif
113#if defined(HAS_SCALEROWDOWN2_MSA)
114  if (TestCpuFlag(kCpuHasMSA)) {
115    ScaleRowDown2 =
116        filtering == kFilterNone
117            ? ScaleRowDown2_Any_MSA
118            : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MSA
119                                          : ScaleRowDown2Box_Any_MSA);
120    if (IS_ALIGNED(dst_width, 32)) {
121      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MSA
122                                               : (filtering == kFilterLinear
123                                                      ? ScaleRowDown2Linear_MSA
124                                                      : ScaleRowDown2Box_MSA);
125    }
126  }
127#endif
128
129  if (filtering == kFilterLinear) {
130    src_stride = 0;
131  }
132  // TODO(fbarchard): Loop through source height to allow odd height.
133  for (y = 0; y < dst_height; ++y) {
134    ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
135    src_ptr += row_stride;
136    dst_ptr += dst_stride;
137  }
138}
139
140static void ScalePlaneDown2_16(int src_width,
141                               int src_height,
142                               int dst_width,
143                               int dst_height,
144                               int src_stride,
145                               int dst_stride,
146                               const uint16* src_ptr,
147                               uint16* dst_ptr,
148                               enum FilterMode filtering) {
149  int y;
150  void (*ScaleRowDown2)(const uint16* src_ptr, ptrdiff_t src_stride,
151                        uint16* dst_ptr, int dst_width) =
152      filtering == kFilterNone
153          ? ScaleRowDown2_16_C
154          : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C
155                                        : ScaleRowDown2Box_16_C);
156  int row_stride = src_stride << 1;
157  (void)src_width;
158  (void)src_height;
159  if (!filtering) {
160    src_ptr += src_stride;  // Point to odd rows.
161    src_stride = 0;
162  }
163
164#if defined(HAS_SCALEROWDOWN2_16_NEON)
165  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
166    ScaleRowDown2 =
167        filtering ? ScaleRowDown2Box_16_NEON : ScaleRowDown2_16_NEON;
168  }
169#endif
170#if defined(HAS_SCALEROWDOWN2_16_SSE2)
171  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
172    ScaleRowDown2 =
173        filtering == kFilterNone
174            ? ScaleRowDown2_16_SSE2
175            : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2
176                                          : ScaleRowDown2Box_16_SSE2);
177  }
178#endif
179#if defined(HAS_SCALEROWDOWN2_16_DSPR2)
180  if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) &&
181      IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
182      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
183    ScaleRowDown2 =
184        filtering ? ScaleRowDown2Box_16_DSPR2 : ScaleRowDown2_16_DSPR2;
185  }
186#endif
187
188  if (filtering == kFilterLinear) {
189    src_stride = 0;
190  }
191  // TODO(fbarchard): Loop through source height to allow odd height.
192  for (y = 0; y < dst_height; ++y) {
193    ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
194    src_ptr += row_stride;
195    dst_ptr += dst_stride;
196  }
197}
198
199// Scale plane, 1/4
200// This is an optimized version for scaling down a plane to 1/4 of
201// its original size.
202
203static void ScalePlaneDown4(int src_width,
204                            int src_height,
205                            int dst_width,
206                            int dst_height,
207                            int src_stride,
208                            int dst_stride,
209                            const uint8* src_ptr,
210                            uint8* dst_ptr,
211                            enum FilterMode filtering) {
212  int y;
213  void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride,
214                        uint8* dst_ptr, int dst_width) =
215      filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
216  int row_stride = src_stride << 2;
217  (void)src_width;
218  (void)src_height;
219  if (!filtering) {
220    src_ptr += src_stride * 2;  // Point to row 2.
221    src_stride = 0;
222  }
223#if defined(HAS_SCALEROWDOWN4_NEON)
224  if (TestCpuFlag(kCpuHasNEON)) {
225    ScaleRowDown4 =
226        filtering ? ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
227    if (IS_ALIGNED(dst_width, 8)) {
228      ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
229    }
230  }
231#endif
232#if defined(HAS_SCALEROWDOWN4_SSSE3)
233  if (TestCpuFlag(kCpuHasSSSE3)) {
234    ScaleRowDown4 =
235        filtering ? ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3;
236    if (IS_ALIGNED(dst_width, 8)) {
237      ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3;
238    }
239  }
240#endif
241#if defined(HAS_SCALEROWDOWN4_AVX2)
242  if (TestCpuFlag(kCpuHasAVX2)) {
243    ScaleRowDown4 =
244        filtering ? ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
245    if (IS_ALIGNED(dst_width, 16)) {
246      ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
247    }
248  }
249#endif
250#if defined(HAS_SCALEROWDOWN4_DSPR2)
251  if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) &&
252      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
253      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
254    ScaleRowDown4 = filtering ? ScaleRowDown4Box_DSPR2 : ScaleRowDown4_DSPR2;
255  }
256#endif
257#if defined(HAS_SCALEROWDOWN4_MSA)
258  if (TestCpuFlag(kCpuHasMSA)) {
259    ScaleRowDown4 =
260        filtering ? ScaleRowDown4Box_Any_MSA : ScaleRowDown4_Any_MSA;
261    if (IS_ALIGNED(dst_width, 16)) {
262      ScaleRowDown4 = filtering ? ScaleRowDown4Box_MSA : ScaleRowDown4_MSA;
263    }
264  }
265#endif
266
267  if (filtering == kFilterLinear) {
268    src_stride = 0;
269  }
270  for (y = 0; y < dst_height; ++y) {
271    ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
272    src_ptr += row_stride;
273    dst_ptr += dst_stride;
274  }
275}
276
277static void ScalePlaneDown4_16(int src_width,
278                               int src_height,
279                               int dst_width,
280                               int dst_height,
281                               int src_stride,
282                               int dst_stride,
283                               const uint16* src_ptr,
284                               uint16* dst_ptr,
285                               enum FilterMode filtering) {
286  int y;
287  void (*ScaleRowDown4)(const uint16* src_ptr, ptrdiff_t src_stride,
288                        uint16* dst_ptr, int dst_width) =
289      filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
290  int row_stride = src_stride << 2;
291  (void)src_width;
292  (void)src_height;
293  if (!filtering) {
294    src_ptr += src_stride * 2;  // Point to row 2.
295    src_stride = 0;
296  }
297#if defined(HAS_SCALEROWDOWN4_16_NEON)
298  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
299    ScaleRowDown4 =
300        filtering ? ScaleRowDown4Box_16_NEON : ScaleRowDown4_16_NEON;
301  }
302#endif
303#if defined(HAS_SCALEROWDOWN4_16_SSE2)
304  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
305    ScaleRowDown4 =
306        filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2;
307  }
308#endif
309#if defined(HAS_SCALEROWDOWN4_16_DSPR2)
310  if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) &&
311      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
312      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
313    ScaleRowDown4 =
314        filtering ? ScaleRowDown4Box_16_DSPR2 : ScaleRowDown4_16_DSPR2;
315  }
316#endif
317
318  if (filtering == kFilterLinear) {
319    src_stride = 0;
320  }
321  for (y = 0; y < dst_height; ++y) {
322    ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
323    src_ptr += row_stride;
324    dst_ptr += dst_stride;
325  }
326}
327
328// Scale plane down, 3/4
329static void ScalePlaneDown34(int src_width,
330                             int src_height,
331                             int dst_width,
332                             int dst_height,
333                             int src_stride,
334                             int dst_stride,
335                             const uint8* src_ptr,
336                             uint8* dst_ptr,
337                             enum FilterMode filtering) {
338  int y;
339  void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride,
340                           uint8* dst_ptr, int dst_width);
341  void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride,
342                           uint8* dst_ptr, int dst_width);
343  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
344  (void)src_width;
345  (void)src_height;
346  assert(dst_width % 3 == 0);
347  if (!filtering) {
348    ScaleRowDown34_0 = ScaleRowDown34_C;
349    ScaleRowDown34_1 = ScaleRowDown34_C;
350  } else {
351    ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
352    ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
353  }
354#if defined(HAS_SCALEROWDOWN34_NEON)
355  if (TestCpuFlag(kCpuHasNEON)) {
356    if (!filtering) {
357      ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
358      ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
359    } else {
360      ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
361      ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
362    }
363    if (dst_width % 24 == 0) {
364      if (!filtering) {
365        ScaleRowDown34_0 = ScaleRowDown34_NEON;
366        ScaleRowDown34_1 = ScaleRowDown34_NEON;
367      } else {
368        ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
369        ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
370      }
371    }
372  }
373#endif
374#if defined(HAS_SCALEROWDOWN34_SSSE3)
375  if (TestCpuFlag(kCpuHasSSSE3)) {
376    if (!filtering) {
377      ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
378      ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
379    } else {
380      ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
381      ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
382    }
383    if (dst_width % 24 == 0) {
384      if (!filtering) {
385        ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
386        ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
387      } else {
388        ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
389        ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
390      }
391    }
392  }
393#endif
394#if defined(HAS_SCALEROWDOWN34_DSPR2)
395  if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) &&
396      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
397      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
398    if (!filtering) {
399      ScaleRowDown34_0 = ScaleRowDown34_DSPR2;
400      ScaleRowDown34_1 = ScaleRowDown34_DSPR2;
401    } else {
402      ScaleRowDown34_0 = ScaleRowDown34_0_Box_DSPR2;
403      ScaleRowDown34_1 = ScaleRowDown34_1_Box_DSPR2;
404    }
405  }
406#endif
407
408  for (y = 0; y < dst_height - 2; y += 3) {
409    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
410    src_ptr += src_stride;
411    dst_ptr += dst_stride;
412    ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
413    src_ptr += src_stride;
414    dst_ptr += dst_stride;
415    ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
416    src_ptr += src_stride * 2;
417    dst_ptr += dst_stride;
418  }
419
420  // Remainder 1 or 2 rows with last row vertically unfiltered
421  if ((dst_height % 3) == 2) {
422    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
423    src_ptr += src_stride;
424    dst_ptr += dst_stride;
425    ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
426  } else if ((dst_height % 3) == 1) {
427    ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
428  }
429}
430
431static void ScalePlaneDown34_16(int src_width,
432                                int src_height,
433                                int dst_width,
434                                int dst_height,
435                                int src_stride,
436                                int dst_stride,
437                                const uint16* src_ptr,
438                                uint16* dst_ptr,
439                                enum FilterMode filtering) {
440  int y;
441  void (*ScaleRowDown34_0)(const uint16* src_ptr, ptrdiff_t src_stride,
442                           uint16* dst_ptr, int dst_width);
443  void (*ScaleRowDown34_1)(const uint16* src_ptr, ptrdiff_t src_stride,
444                           uint16* dst_ptr, int dst_width);
445  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
446  (void)src_width;
447  (void)src_height;
448  assert(dst_width % 3 == 0);
449  if (!filtering) {
450    ScaleRowDown34_0 = ScaleRowDown34_16_C;
451    ScaleRowDown34_1 = ScaleRowDown34_16_C;
452  } else {
453    ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
454    ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
455  }
456#if defined(HAS_SCALEROWDOWN34_16_NEON)
457  if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
458    if (!filtering) {
459      ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
460      ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
461    } else {
462      ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
463      ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
464    }
465  }
466#endif
467#if defined(HAS_SCALEROWDOWN34_16_SSSE3)
468  if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
469    if (!filtering) {
470      ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
471      ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
472    } else {
473      ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
474      ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
475    }
476  }
477#endif
478#if defined(HAS_SCALEROWDOWN34_16_DSPR2)
479  if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) &&
480      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
481      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
482    if (!filtering) {
483      ScaleRowDown34_0 = ScaleRowDown34_16_DSPR2;
484      ScaleRowDown34_1 = ScaleRowDown34_16_DSPR2;
485    } else {
486      ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_DSPR2;
487      ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_DSPR2;
488    }
489  }
490#endif
491
492  for (y = 0; y < dst_height - 2; y += 3) {
493    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
494    src_ptr += src_stride;
495    dst_ptr += dst_stride;
496    ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
497    src_ptr += src_stride;
498    dst_ptr += dst_stride;
499    ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
500    src_ptr += src_stride * 2;
501    dst_ptr += dst_stride;
502  }
503
504  // Remainder 1 or 2 rows with last row vertically unfiltered
505  if ((dst_height % 3) == 2) {
506    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
507    src_ptr += src_stride;
508    dst_ptr += dst_stride;
509    ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
510  } else if ((dst_height % 3) == 1) {
511    ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
512  }
513}
514
515// Scale plane, 3/8
516// This is an optimized version for scaling down a plane to 3/8
517// of its original size.
518//
519// Uses box filter arranges like this
520// aaabbbcc -> abc
521// aaabbbcc    def
522// aaabbbcc    ghi
523// dddeeeff
524// dddeeeff
525// dddeeeff
526// ggghhhii
527// ggghhhii
528// Boxes are 3x3, 2x3, 3x2 and 2x2
529
530static void ScalePlaneDown38(int src_width,
531                             int src_height,
532                             int dst_width,
533                             int dst_height,
534                             int src_stride,
535                             int dst_stride,
536                             const uint8* src_ptr,
537                             uint8* dst_ptr,
538                             enum FilterMode filtering) {
539  int y;
540  void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride,
541                           uint8* dst_ptr, int dst_width);
542  void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride,
543                           uint8* dst_ptr, int dst_width);
544  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
545  assert(dst_width % 3 == 0);
546  (void)src_width;
547  (void)src_height;
548  if (!filtering) {
549    ScaleRowDown38_3 = ScaleRowDown38_C;
550    ScaleRowDown38_2 = ScaleRowDown38_C;
551  } else {
552    ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
553    ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
554  }
555
556#if defined(HAS_SCALEROWDOWN38_NEON)
557  if (TestCpuFlag(kCpuHasNEON)) {
558    if (!filtering) {
559      ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
560      ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
561    } else {
562      ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
563      ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
564    }
565    if (dst_width % 12 == 0) {
566      if (!filtering) {
567        ScaleRowDown38_3 = ScaleRowDown38_NEON;
568        ScaleRowDown38_2 = ScaleRowDown38_NEON;
569      } else {
570        ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
571        ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
572      }
573    }
574  }
575#endif
576#if defined(HAS_SCALEROWDOWN38_SSSE3)
577  if (TestCpuFlag(kCpuHasSSSE3)) {
578    if (!filtering) {
579      ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
580      ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
581    } else {
582      ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
583      ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
584    }
585    if (dst_width % 12 == 0 && !filtering) {
586      ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
587      ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
588    }
589    if (dst_width % 6 == 0 && filtering) {
590      ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
591      ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
592    }
593  }
594#endif
595#if defined(HAS_SCALEROWDOWN38_DSPR2)
596  if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) &&
597      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
598      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
599    if (!filtering) {
600      ScaleRowDown38_3 = ScaleRowDown38_DSPR2;
601      ScaleRowDown38_2 = ScaleRowDown38_DSPR2;
602    } else {
603      ScaleRowDown38_3 = ScaleRowDown38_3_Box_DSPR2;
604      ScaleRowDown38_2 = ScaleRowDown38_2_Box_DSPR2;
605    }
606  }
607#endif
608#if defined(HAS_SCALEROWDOWN38_MSA)
609  if (TestCpuFlag(kCpuHasMSA)) {
610    if (!filtering) {
611      ScaleRowDown38_3 = ScaleRowDown38_Any_MSA;
612      ScaleRowDown38_2 = ScaleRowDown38_Any_MSA;
613    } else {
614      ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_MSA;
615      ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_MSA;
616    }
617    if (dst_width % 12 == 0) {
618      if (!filtering) {
619        ScaleRowDown38_3 = ScaleRowDown38_MSA;
620        ScaleRowDown38_2 = ScaleRowDown38_MSA;
621      } else {
622        ScaleRowDown38_3 = ScaleRowDown38_3_Box_MSA;
623        ScaleRowDown38_2 = ScaleRowDown38_2_Box_MSA;
624      }
625    }
626  }
627#endif
628
629  for (y = 0; y < dst_height - 2; y += 3) {
630    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
631    src_ptr += src_stride * 3;
632    dst_ptr += dst_stride;
633    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
634    src_ptr += src_stride * 3;
635    dst_ptr += dst_stride;
636    ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
637    src_ptr += src_stride * 2;
638    dst_ptr += dst_stride;
639  }
640
641  // Remainder 1 or 2 rows with last row vertically unfiltered
642  if ((dst_height % 3) == 2) {
643    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
644    src_ptr += src_stride * 3;
645    dst_ptr += dst_stride;
646    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
647  } else if ((dst_height % 3) == 1) {
648    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
649  }
650}
651
652static void ScalePlaneDown38_16(int src_width,
653                                int src_height,
654                                int dst_width,
655                                int dst_height,
656                                int src_stride,
657                                int dst_stride,
658                                const uint16* src_ptr,
659                                uint16* dst_ptr,
660                                enum FilterMode filtering) {
661  int y;
662  void (*ScaleRowDown38_3)(const uint16* src_ptr, ptrdiff_t src_stride,
663                           uint16* dst_ptr, int dst_width);
664  void (*ScaleRowDown38_2)(const uint16* src_ptr, ptrdiff_t src_stride,
665                           uint16* dst_ptr, int dst_width);
666  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
667  (void)src_width;
668  (void)src_height;
669  assert(dst_width % 3 == 0);
670  if (!filtering) {
671    ScaleRowDown38_3 = ScaleRowDown38_16_C;
672    ScaleRowDown38_2 = ScaleRowDown38_16_C;
673  } else {
674    ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
675    ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
676  }
677#if defined(HAS_SCALEROWDOWN38_16_NEON)
678  if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
679    if (!filtering) {
680      ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
681      ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
682    } else {
683      ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
684      ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
685    }
686  }
687#endif
688#if defined(HAS_SCALEROWDOWN38_16_SSSE3)
689  if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
690    if (!filtering) {
691      ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
692      ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
693    } else {
694      ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
695      ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
696    }
697  }
698#endif
699#if defined(HAS_SCALEROWDOWN38_16_DSPR2)
700  if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) &&
701      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
702      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
703    if (!filtering) {
704      ScaleRowDown38_3 = ScaleRowDown38_16_DSPR2;
705      ScaleRowDown38_2 = ScaleRowDown38_16_DSPR2;
706    } else {
707      ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_DSPR2;
708      ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_DSPR2;
709    }
710  }
711#endif
712
713  for (y = 0; y < dst_height - 2; y += 3) {
714    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
715    src_ptr += src_stride * 3;
716    dst_ptr += dst_stride;
717    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
718    src_ptr += src_stride * 3;
719    dst_ptr += dst_stride;
720    ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
721    src_ptr += src_stride * 2;
722    dst_ptr += dst_stride;
723  }
724
725  // Remainder 1 or 2 rows with last row vertically unfiltered
726  if ((dst_height % 3) == 2) {
727    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
728    src_ptr += src_stride * 3;
729    dst_ptr += dst_stride;
730    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
731  } else if ((dst_height % 3) == 1) {
732    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
733  }
734}
735
736#define MIN1(x) ((x) < 1 ? 1 : (x))
737
738static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
739  uint32 sum = 0u;
740  int x;
741  assert(iboxwidth > 0);
742  for (x = 0; x < iboxwidth; ++x) {
743    sum += src_ptr[x];
744  }
745  return sum;
746}
747
748static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) {
749  uint32 sum = 0u;
750  int x;
751  assert(iboxwidth > 0);
752  for (x = 0; x < iboxwidth; ++x) {
753    sum += src_ptr[x];
754  }
755  return sum;
756}
757
758static void ScaleAddCols2_C(int dst_width,
759                            int boxheight,
760                            int x,
761                            int dx,
762                            const uint16* src_ptr,
763                            uint8* dst_ptr) {
764  int i;
765  int scaletbl[2];
766  int minboxwidth = dx >> 16;
767  int boxwidth;
768  scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
769  scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
770  for (i = 0; i < dst_width; ++i) {
771    int ix = x >> 16;
772    x += dx;
773    boxwidth = MIN1((x >> 16) - ix);
774    *dst_ptr++ =
775        SumPixels(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >>
776        16;
777  }
778}
779
780static void ScaleAddCols2_16_C(int dst_width,
781                               int boxheight,
782                               int x,
783                               int dx,
784                               const uint32* src_ptr,
785                               uint16* dst_ptr) {
786  int i;
787  int scaletbl[2];
788  int minboxwidth = dx >> 16;
789  int boxwidth;
790  scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
791  scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
792  for (i = 0; i < dst_width; ++i) {
793    int ix = x >> 16;
794    x += dx;
795    boxwidth = MIN1((x >> 16) - ix);
796    *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *
797                     scaletbl[boxwidth - minboxwidth] >>
798                 16;
799  }
800}
801
802static void ScaleAddCols0_C(int dst_width,
803                            int boxheight,
804                            int x,
805                            int,
806                            const uint16* src_ptr,
807                            uint8* dst_ptr) {
808  int scaleval = 65536 / boxheight;
809  int i;
810  src_ptr += (x >> 16);
811  for (i = 0; i < dst_width; ++i) {
812    *dst_ptr++ = src_ptr[i] * scaleval >> 16;
813  }
814}
815
816static void ScaleAddCols1_C(int dst_width,
817                            int boxheight,
818                            int x,
819                            int dx,
820                            const uint16* src_ptr,
821                            uint8* dst_ptr) {
822  int boxwidth = MIN1(dx >> 16);
823  int scaleval = 65536 / (boxwidth * boxheight);
824  int i;
825  x >>= 16;
826  for (i = 0; i < dst_width; ++i) {
827    *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
828    x += boxwidth;
829  }
830}
831
832static void ScaleAddCols1_16_C(int dst_width,
833                               int boxheight,
834                               int x,
835                               int dx,
836                               const uint32* src_ptr,
837                               uint16* dst_ptr) {
838  int boxwidth = MIN1(dx >> 16);
839  int scaleval = 65536 / (boxwidth * boxheight);
840  int i;
841  for (i = 0; i < dst_width; ++i) {
842    *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
843    x += boxwidth;
844  }
845}
846
847// Scale plane down to any dimensions, with interpolation.
848// (boxfilter).
849//
850// Same method as SimpleScale, which is fixed point, outputting
851// one pixel of destination using fixed point (16.16) to step
852// through source, sampling a box of pixel with simple
853// averaging.
854static void ScalePlaneBox(int src_width,
855                          int src_height,
856                          int dst_width,
857                          int dst_height,
858                          int src_stride,
859                          int dst_stride,
860                          const uint8* src_ptr,
861                          uint8* dst_ptr) {
862  int j, k;
863  // Initial source x/y coordinate and step values as 16.16 fixed point.
864  int x = 0;
865  int y = 0;
866  int dx = 0;
867  int dy = 0;
868  const int max_y = (src_height << 16);
869  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
870             &dx, &dy);
871  src_width = Abs(src_width);
872  {
873    // Allocate a row buffer of uint16.
874    align_buffer_64(row16, src_width * 2);
875    void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
876                         const uint16* src_ptr, uint8* dst_ptr) =
877        (dx & 0xffff) ? ScaleAddCols2_C
878                      : ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
879    void (*ScaleAddRow)(const uint8* src_ptr, uint16* dst_ptr, int src_width) =
880        ScaleAddRow_C;
881#if defined(HAS_SCALEADDROW_SSE2)
882    if (TestCpuFlag(kCpuHasSSE2)) {
883      ScaleAddRow = ScaleAddRow_Any_SSE2;
884      if (IS_ALIGNED(src_width, 16)) {
885        ScaleAddRow = ScaleAddRow_SSE2;
886      }
887    }
888#endif
889#if defined(HAS_SCALEADDROW_AVX2)
890    if (TestCpuFlag(kCpuHasAVX2)) {
891      ScaleAddRow = ScaleAddRow_Any_AVX2;
892      if (IS_ALIGNED(src_width, 32)) {
893        ScaleAddRow = ScaleAddRow_AVX2;
894      }
895    }
896#endif
897#if defined(HAS_SCALEADDROW_NEON)
898    if (TestCpuFlag(kCpuHasNEON)) {
899      ScaleAddRow = ScaleAddRow_Any_NEON;
900      if (IS_ALIGNED(src_width, 16)) {
901        ScaleAddRow = ScaleAddRow_NEON;
902      }
903    }
904#endif
905#if defined(HAS_SCALEADDROW_MSA)
906    if (TestCpuFlag(kCpuHasMSA)) {
907      ScaleAddRow = ScaleAddRow_Any_MSA;
908      if (IS_ALIGNED(src_width, 16)) {
909        ScaleAddRow = ScaleAddRow_MSA;
910      }
911    }
912#endif
913#if defined(HAS_SCALEADDROW_DSPR2)
914    if (TestCpuFlag(kCpuHasDSPR2)) {
915      ScaleAddRow = ScaleAddRow_Any_DSPR2;
916      if (IS_ALIGNED(src_width, 16)) {
917        ScaleAddRow = ScaleAddRow_DSPR2;
918      }
919    }
920#endif
921
922    for (j = 0; j < dst_height; ++j) {
923      int boxheight;
924      int iy = y >> 16;
925      const uint8* src = src_ptr + iy * src_stride;
926      y += dy;
927      if (y > max_y) {
928        y = max_y;
929      }
930      boxheight = MIN1((y >> 16) - iy);
931      memset(row16, 0, src_width * 2);
932      for (k = 0; k < boxheight; ++k) {
933        ScaleAddRow(src, (uint16*)(row16), src_width);
934        src += src_stride;
935      }
936      ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), dst_ptr);
937      dst_ptr += dst_stride;
938    }
939    free_aligned_buffer_64(row16);
940  }
941}
942
943static void ScalePlaneBox_16(int src_width,
944                             int src_height,
945                             int dst_width,
946                             int dst_height,
947                             int src_stride,
948                             int dst_stride,
949                             const uint16* src_ptr,
950                             uint16* dst_ptr) {
951  int j, k;
952  // Initial source x/y coordinate and step values as 16.16 fixed point.
953  int x = 0;
954  int y = 0;
955  int dx = 0;
956  int dy = 0;
957  const int max_y = (src_height << 16);
958  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
959             &dx, &dy);
960  src_width = Abs(src_width);
961  {
962    // Allocate a row buffer of uint32.
963    align_buffer_64(row32, src_width * 4);
964    void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
965                         const uint32* src_ptr, uint16* dst_ptr) =
966        (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C;
967    void (*ScaleAddRow)(const uint16* src_ptr, uint32* dst_ptr, int src_width) =
968        ScaleAddRow_16_C;
969
970#if defined(HAS_SCALEADDROW_16_SSE2)
971    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
972      ScaleAddRow = ScaleAddRow_16_SSE2;
973    }
974#endif
975
976    for (j = 0; j < dst_height; ++j) {
977      int boxheight;
978      int iy = y >> 16;
979      const uint16* src = src_ptr + iy * src_stride;
980      y += dy;
981      if (y > max_y) {
982        y = max_y;
983      }
984      boxheight = MIN1((y >> 16) - iy);
985      memset(row32, 0, src_width * 4);
986      for (k = 0; k < boxheight; ++k) {
987        ScaleAddRow(src, (uint32*)(row32), src_width);
988        src += src_stride;
989      }
990      ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), dst_ptr);
991      dst_ptr += dst_stride;
992    }
993    free_aligned_buffer_64(row32);
994  }
995}
996
997// Scale plane down with bilinear interpolation.
998void ScalePlaneBilinearDown(int src_width,
999                            int src_height,
1000                            int dst_width,
1001                            int dst_height,
1002                            int src_stride,
1003                            int dst_stride,
1004                            const uint8* src_ptr,
1005                            uint8* dst_ptr,
1006                            enum FilterMode filtering) {
1007  // Initial source x/y coordinate and step values as 16.16 fixed point.
1008  int x = 0;
1009  int y = 0;
1010  int dx = 0;
1011  int dy = 0;
1012  // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
1013  // Allocate a row buffer.
1014  align_buffer_64(row, src_width);
1015
1016  const int max_y = (src_height - 1) << 16;
1017  int j;
1018  void (*ScaleFilterCols)(uint8 * dst_ptr, const uint8* src_ptr, int dst_width,
1019                          int x, int dx) =
1020      (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
1021  void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr,
1022                         ptrdiff_t src_stride, int dst_width,
1023                         int source_y_fraction) = InterpolateRow_C;
1024  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1025             &dx, &dy);
1026  src_width = Abs(src_width);
1027
1028#if defined(HAS_INTERPOLATEROW_SSSE3)
1029  if (TestCpuFlag(kCpuHasSSSE3)) {
1030    InterpolateRow = InterpolateRow_Any_SSSE3;
1031    if (IS_ALIGNED(src_width, 16)) {
1032      InterpolateRow = InterpolateRow_SSSE3;
1033    }
1034  }
1035#endif
1036#if defined(HAS_INTERPOLATEROW_AVX2)
1037  if (TestCpuFlag(kCpuHasAVX2)) {
1038    InterpolateRow = InterpolateRow_Any_AVX2;
1039    if (IS_ALIGNED(src_width, 32)) {
1040      InterpolateRow = InterpolateRow_AVX2;
1041    }
1042  }
1043#endif
1044#if defined(HAS_INTERPOLATEROW_NEON)
1045  if (TestCpuFlag(kCpuHasNEON)) {
1046    InterpolateRow = InterpolateRow_Any_NEON;
1047    if (IS_ALIGNED(src_width, 16)) {
1048      InterpolateRow = InterpolateRow_NEON;
1049    }
1050  }
1051#endif
1052#if defined(HAS_INTERPOLATEROW_DSPR2)
1053  if (TestCpuFlag(kCpuHasDSPR2)) {
1054    InterpolateRow = InterpolateRow_Any_DSPR2;
1055    if (IS_ALIGNED(src_width, 4)) {
1056      InterpolateRow = InterpolateRow_DSPR2;
1057    }
1058  }
1059#endif
1060#if defined(HAS_INTERPOLATEROW_MSA)
1061  if (TestCpuFlag(kCpuHasMSA)) {
1062    InterpolateRow = InterpolateRow_Any_MSA;
1063    if (IS_ALIGNED(src_width, 32)) {
1064      InterpolateRow = InterpolateRow_MSA;
1065    }
1066  }
1067#endif
1068
1069#if defined(HAS_SCALEFILTERCOLS_SSSE3)
1070  if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1071    ScaleFilterCols = ScaleFilterCols_SSSE3;
1072  }
1073#endif
1074#if defined(HAS_SCALEFILTERCOLS_NEON)
1075  if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
1076    ScaleFilterCols = ScaleFilterCols_Any_NEON;
1077    if (IS_ALIGNED(dst_width, 8)) {
1078      ScaleFilterCols = ScaleFilterCols_NEON;
1079    }
1080  }
1081#endif
1082  if (y > max_y) {
1083    y = max_y;
1084  }
1085
1086  for (j = 0; j < dst_height; ++j) {
1087    int yi = y >> 16;
1088    const uint8* src = src_ptr + yi * src_stride;
1089    if (filtering == kFilterLinear) {
1090      ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1091    } else {
1092      int yf = (y >> 8) & 255;
1093      InterpolateRow(row, src, src_stride, src_width, yf);
1094      ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
1095    }
1096    dst_ptr += dst_stride;
1097    y += dy;
1098    if (y > max_y) {
1099      y = max_y;
1100    }
1101  }
1102  free_aligned_buffer_64(row);
1103}
1104
1105void ScalePlaneBilinearDown_16(int src_width,
1106                               int src_height,
1107                               int dst_width,
1108                               int dst_height,
1109                               int src_stride,
1110                               int dst_stride,
1111                               const uint16* src_ptr,
1112                               uint16* dst_ptr,
1113                               enum FilterMode filtering) {
1114  // Initial source x/y coordinate and step values as 16.16 fixed point.
1115  int x = 0;
1116  int y = 0;
1117  int dx = 0;
1118  int dy = 0;
1119  // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
1120  // Allocate a row buffer.
1121  align_buffer_64(row, src_width * 2);
1122
1123  const int max_y = (src_height - 1) << 16;
1124  int j;
1125  void (*ScaleFilterCols)(uint16 * dst_ptr, const uint16* src_ptr,
1126                          int dst_width, int x, int dx) =
1127      (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
1128  void (*InterpolateRow)(uint16 * dst_ptr, const uint16* src_ptr,
1129                         ptrdiff_t src_stride, int dst_width,
1130                         int source_y_fraction) = InterpolateRow_16_C;
1131  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1132             &dx, &dy);
1133  src_width = Abs(src_width);
1134
1135#if defined(HAS_INTERPOLATEROW_16_SSE2)
1136  if (TestCpuFlag(kCpuHasSSE2)) {
1137    InterpolateRow = InterpolateRow_Any_16_SSE2;
1138    if (IS_ALIGNED(src_width, 16)) {
1139      InterpolateRow = InterpolateRow_16_SSE2;
1140    }
1141  }
1142#endif
1143#if defined(HAS_INTERPOLATEROW_16_SSSE3)
1144  if (TestCpuFlag(kCpuHasSSSE3)) {
1145    InterpolateRow = InterpolateRow_Any_16_SSSE3;
1146    if (IS_ALIGNED(src_width, 16)) {
1147      InterpolateRow = InterpolateRow_16_SSSE3;
1148    }
1149  }
1150#endif
1151#if defined(HAS_INTERPOLATEROW_16_AVX2)
1152  if (TestCpuFlag(kCpuHasAVX2)) {
1153    InterpolateRow = InterpolateRow_Any_16_AVX2;
1154    if (IS_ALIGNED(src_width, 32)) {
1155      InterpolateRow = InterpolateRow_16_AVX2;
1156    }
1157  }
1158#endif
1159#if defined(HAS_INTERPOLATEROW_16_NEON)
1160  if (TestCpuFlag(kCpuHasNEON)) {
1161    InterpolateRow = InterpolateRow_Any_16_NEON;
1162    if (IS_ALIGNED(src_width, 16)) {
1163      InterpolateRow = InterpolateRow_16_NEON;
1164    }
1165  }
1166#endif
1167#if defined(HAS_INTERPOLATEROW_16_DSPR2)
1168  if (TestCpuFlag(kCpuHasDSPR2)) {
1169    InterpolateRow = InterpolateRow_Any_16_DSPR2;
1170    if (IS_ALIGNED(src_width, 4)) {
1171      InterpolateRow = InterpolateRow_16_DSPR2;
1172    }
1173  }
1174#endif
1175
1176#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1177  if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1178    ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1179  }
1180#endif
1181  if (y > max_y) {
1182    y = max_y;
1183  }
1184
1185  for (j = 0; j < dst_height; ++j) {
1186    int yi = y >> 16;
1187    const uint16* src = src_ptr + yi * src_stride;
1188    if (filtering == kFilterLinear) {
1189      ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1190    } else {
1191      int yf = (y >> 8) & 255;
1192      InterpolateRow((uint16*)row, src, src_stride, src_width, yf);
1193      ScaleFilterCols(dst_ptr, (uint16*)row, dst_width, x, dx);
1194    }
1195    dst_ptr += dst_stride;
1196    y += dy;
1197    if (y > max_y) {
1198      y = max_y;
1199    }
1200  }
1201  free_aligned_buffer_64(row);
1202}
1203
1204// Scale up down with bilinear interpolation.
1205void ScalePlaneBilinearUp(int src_width,
1206                          int src_height,
1207                          int dst_width,
1208                          int dst_height,
1209                          int src_stride,
1210                          int dst_stride,
1211                          const uint8* src_ptr,
1212                          uint8* dst_ptr,
1213                          enum FilterMode filtering) {
1214  int j;
1215  // Initial source x/y coordinate and step values as 16.16 fixed point.
1216  int x = 0;
1217  int y = 0;
1218  int dx = 0;
1219  int dy = 0;
1220  const int max_y = (src_height - 1) << 16;
1221  void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr,
1222                         ptrdiff_t src_stride, int dst_width,
1223                         int source_y_fraction) = InterpolateRow_C;
1224  void (*ScaleFilterCols)(uint8 * dst_ptr, const uint8* src_ptr, int dst_width,
1225                          int x, int dx) =
1226      filtering ? ScaleFilterCols_C : ScaleCols_C;
1227  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1228             &dx, &dy);
1229  src_width = Abs(src_width);
1230
1231#if defined(HAS_INTERPOLATEROW_SSSE3)
1232  if (TestCpuFlag(kCpuHasSSSE3)) {
1233    InterpolateRow = InterpolateRow_Any_SSSE3;
1234    if (IS_ALIGNED(dst_width, 16)) {
1235      InterpolateRow = InterpolateRow_SSSE3;
1236    }
1237  }
1238#endif
1239#if defined(HAS_INTERPOLATEROW_AVX2)
1240  if (TestCpuFlag(kCpuHasAVX2)) {
1241    InterpolateRow = InterpolateRow_Any_AVX2;
1242    if (IS_ALIGNED(dst_width, 32)) {
1243      InterpolateRow = InterpolateRow_AVX2;
1244    }
1245  }
1246#endif
1247#if defined(HAS_INTERPOLATEROW_NEON)
1248  if (TestCpuFlag(kCpuHasNEON)) {
1249    InterpolateRow = InterpolateRow_Any_NEON;
1250    if (IS_ALIGNED(dst_width, 16)) {
1251      InterpolateRow = InterpolateRow_NEON;
1252    }
1253  }
1254#endif
1255#if defined(HAS_INTERPOLATEROW_DSPR2)
1256  if (TestCpuFlag(kCpuHasDSPR2)) {
1257    InterpolateRow = InterpolateRow_Any_DSPR2;
1258    if (IS_ALIGNED(dst_width, 4)) {
1259      InterpolateRow = InterpolateRow_DSPR2;
1260    }
1261  }
1262#endif
1263
1264  if (filtering && src_width >= 32768) {
1265    ScaleFilterCols = ScaleFilterCols64_C;
1266  }
1267#if defined(HAS_SCALEFILTERCOLS_SSSE3)
1268  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1269    ScaleFilterCols = ScaleFilterCols_SSSE3;
1270  }
1271#endif
1272#if defined(HAS_SCALEFILTERCOLS_NEON)
1273  if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
1274    ScaleFilterCols = ScaleFilterCols_Any_NEON;
1275    if (IS_ALIGNED(dst_width, 8)) {
1276      ScaleFilterCols = ScaleFilterCols_NEON;
1277    }
1278  }
1279#endif
1280  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1281    ScaleFilterCols = ScaleColsUp2_C;
1282#if defined(HAS_SCALECOLS_SSE2)
1283    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1284      ScaleFilterCols = ScaleColsUp2_SSE2;
1285    }
1286#endif
1287  }
1288
1289  if (y > max_y) {
1290    y = max_y;
1291  }
1292  {
1293    int yi = y >> 16;
1294    const uint8* src = src_ptr + yi * src_stride;
1295
1296    // Allocate 2 row buffers.
1297    const int kRowSize = (dst_width + 31) & ~31;
1298    align_buffer_64(row, kRowSize * 2);
1299
1300    uint8* rowptr = row;
1301    int rowstride = kRowSize;
1302    int lasty = yi;
1303
1304    ScaleFilterCols(rowptr, src, dst_width, x, dx);
1305    if (src_height > 1) {
1306      src += src_stride;
1307    }
1308    ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1309    src += src_stride;
1310
1311    for (j = 0; j < dst_height; ++j) {
1312      yi = y >> 16;
1313      if (yi != lasty) {
1314        if (y > max_y) {
1315          y = max_y;
1316          yi = y >> 16;
1317          src = src_ptr + yi * src_stride;
1318        }
1319        if (yi != lasty) {
1320          ScaleFilterCols(rowptr, src, dst_width, x, dx);
1321          rowptr += rowstride;
1322          rowstride = -rowstride;
1323          lasty = yi;
1324          src += src_stride;
1325        }
1326      }
1327      if (filtering == kFilterLinear) {
1328        InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1329      } else {
1330        int yf = (y >> 8) & 255;
1331        InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1332      }
1333      dst_ptr += dst_stride;
1334      y += dy;
1335    }
1336    free_aligned_buffer_64(row);
1337  }
1338}
1339
1340void ScalePlaneBilinearUp_16(int src_width,
1341                             int src_height,
1342                             int dst_width,
1343                             int dst_height,
1344                             int src_stride,
1345                             int dst_stride,
1346                             const uint16* src_ptr,
1347                             uint16* dst_ptr,
1348                             enum FilterMode filtering) {
1349  int j;
1350  // Initial source x/y coordinate and step values as 16.16 fixed point.
1351  int x = 0;
1352  int y = 0;
1353  int dx = 0;
1354  int dy = 0;
1355  const int max_y = (src_height - 1) << 16;
1356  void (*InterpolateRow)(uint16 * dst_ptr, const uint16* src_ptr,
1357                         ptrdiff_t src_stride, int dst_width,
1358                         int source_y_fraction) = InterpolateRow_16_C;
1359  void (*ScaleFilterCols)(uint16 * dst_ptr, const uint16* src_ptr,
1360                          int dst_width, int x, int dx) =
1361      filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
1362  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1363             &dx, &dy);
1364  src_width = Abs(src_width);
1365
1366#if defined(HAS_INTERPOLATEROW_16_SSE2)
1367  if (TestCpuFlag(kCpuHasSSE2)) {
1368    InterpolateRow = InterpolateRow_Any_16_SSE2;
1369    if (IS_ALIGNED(dst_width, 16)) {
1370      InterpolateRow = InterpolateRow_16_SSE2;
1371    }
1372  }
1373#endif
1374#if defined(HAS_INTERPOLATEROW_16_SSSE3)
1375  if (TestCpuFlag(kCpuHasSSSE3)) {
1376    InterpolateRow = InterpolateRow_Any_16_SSSE3;
1377    if (IS_ALIGNED(dst_width, 16)) {
1378      InterpolateRow = InterpolateRow_16_SSSE3;
1379    }
1380  }
1381#endif
1382#if defined(HAS_INTERPOLATEROW_16_AVX2)
1383  if (TestCpuFlag(kCpuHasAVX2)) {
1384    InterpolateRow = InterpolateRow_Any_16_AVX2;
1385    if (IS_ALIGNED(dst_width, 32)) {
1386      InterpolateRow = InterpolateRow_16_AVX2;
1387    }
1388  }
1389#endif
1390#if defined(HAS_INTERPOLATEROW_16_NEON)
1391  if (TestCpuFlag(kCpuHasNEON)) {
1392    InterpolateRow = InterpolateRow_Any_16_NEON;
1393    if (IS_ALIGNED(dst_width, 16)) {
1394      InterpolateRow = InterpolateRow_16_NEON;
1395    }
1396  }
1397#endif
1398#if defined(HAS_INTERPOLATEROW_16_DSPR2)
1399  if (TestCpuFlag(kCpuHasDSPR2)) {
1400    InterpolateRow = InterpolateRow_Any_16_DSPR2;
1401    if (IS_ALIGNED(dst_width, 4)) {
1402      InterpolateRow = InterpolateRow_16_DSPR2;
1403    }
1404  }
1405#endif
1406
1407  if (filtering && src_width >= 32768) {
1408    ScaleFilterCols = ScaleFilterCols64_16_C;
1409  }
1410#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1411  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1412    ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1413  }
1414#endif
1415  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1416    ScaleFilterCols = ScaleColsUp2_16_C;
1417#if defined(HAS_SCALECOLS_16_SSE2)
1418    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1419      ScaleFilterCols = ScaleColsUp2_16_SSE2;
1420    }
1421#endif
1422  }
1423
1424  if (y > max_y) {
1425    y = max_y;
1426  }
1427  {
1428    int yi = y >> 16;
1429    const uint16* src = src_ptr + yi * src_stride;
1430
1431    // Allocate 2 row buffers.
1432    const int kRowSize = (dst_width + 31) & ~31;
1433    align_buffer_64(row, kRowSize * 4);
1434
1435    uint16* rowptr = (uint16*)row;
1436    int rowstride = kRowSize;
1437    int lasty = yi;
1438
1439    ScaleFilterCols(rowptr, src, dst_width, x, dx);
1440    if (src_height > 1) {
1441      src += src_stride;
1442    }
1443    ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1444    src += src_stride;
1445
1446    for (j = 0; j < dst_height; ++j) {
1447      yi = y >> 16;
1448      if (yi != lasty) {
1449        if (y > max_y) {
1450          y = max_y;
1451          yi = y >> 16;
1452          src = src_ptr + yi * src_stride;
1453        }
1454        if (yi != lasty) {
1455          ScaleFilterCols(rowptr, src, dst_width, x, dx);
1456          rowptr += rowstride;
1457          rowstride = -rowstride;
1458          lasty = yi;
1459          src += src_stride;
1460        }
1461      }
1462      if (filtering == kFilterLinear) {
1463        InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1464      } else {
1465        int yf = (y >> 8) & 255;
1466        InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1467      }
1468      dst_ptr += dst_stride;
1469      y += dy;
1470    }
1471    free_aligned_buffer_64(row);
1472  }
1473}
1474
1475// Scale Plane to/from any dimensions, without interpolation.
1476// Fixed point math is used for performance: The upper 16 bits
1477// of x and dx is the integer part of the source position and
1478// the lower 16 bits are the fixed decimal part.
1479
1480static void ScalePlaneSimple(int src_width,
1481                             int src_height,
1482                             int dst_width,
1483                             int dst_height,
1484                             int src_stride,
1485                             int dst_stride,
1486                             const uint8* src_ptr,
1487                             uint8* dst_ptr) {
1488  int i;
1489  void (*ScaleCols)(uint8 * dst_ptr, const uint8* src_ptr, int dst_width, int x,
1490                    int dx) = ScaleCols_C;
1491  // Initial source x/y coordinate and step values as 16.16 fixed point.
1492  int x = 0;
1493  int y = 0;
1494  int dx = 0;
1495  int dy = 0;
1496  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
1497             &dx, &dy);
1498  src_width = Abs(src_width);
1499
1500  if (src_width * 2 == dst_width && x < 0x8000) {
1501    ScaleCols = ScaleColsUp2_C;
1502#if defined(HAS_SCALECOLS_SSE2)
1503    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1504      ScaleCols = ScaleColsUp2_SSE2;
1505    }
1506#endif
1507  }
1508
1509  for (i = 0; i < dst_height; ++i) {
1510    ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
1511    dst_ptr += dst_stride;
1512    y += dy;
1513  }
1514}
1515
1516static void ScalePlaneSimple_16(int src_width,
1517                                int src_height,
1518                                int dst_width,
1519                                int dst_height,
1520                                int src_stride,
1521                                int dst_stride,
1522                                const uint16* src_ptr,
1523                                uint16* dst_ptr) {
1524  int i;
1525  void (*ScaleCols)(uint16 * dst_ptr, const uint16* src_ptr, int dst_width,
1526                    int x, int dx) = ScaleCols_16_C;
1527  // Initial source x/y coordinate and step values as 16.16 fixed point.
1528  int x = 0;
1529  int y = 0;
1530  int dx = 0;
1531  int dy = 0;
1532  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
1533             &dx, &dy);
1534  src_width = Abs(src_width);
1535
1536  if (src_width * 2 == dst_width && x < 0x8000) {
1537    ScaleCols = ScaleColsUp2_16_C;
1538#if defined(HAS_SCALECOLS_16_SSE2)
1539    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1540      ScaleCols = ScaleColsUp2_16_SSE2;
1541    }
1542#endif
1543  }
1544
1545  for (i = 0; i < dst_height; ++i) {
1546    ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
1547    dst_ptr += dst_stride;
1548    y += dy;
1549  }
1550}
1551
1552// Scale a plane.
1553// This function dispatches to a specialized scaler based on scale factor.
1554
1555LIBYUV_API
1556void ScalePlane(const uint8* src,
1557                int src_stride,
1558                int src_width,
1559                int src_height,
1560                uint8* dst,
1561                int dst_stride,
1562                int dst_width,
1563                int dst_height,
1564                enum FilterMode filtering) {
1565  // Simplify filtering when possible.
1566  filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
1567                                filtering);
1568
1569  // Negative height means invert the image.
1570  if (src_height < 0) {
1571    src_height = -src_height;
1572    src = src + (src_height - 1) * src_stride;
1573    src_stride = -src_stride;
1574  }
1575
1576  // Use specialized scales to improve performance for common resolutions.
1577  // For example, all the 1/2 scalings will use ScalePlaneDown2()
1578  if (dst_width == src_width && dst_height == src_height) {
1579    // Straight copy.
1580    CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
1581    return;
1582  }
1583  if (dst_width == src_width && filtering != kFilterBox) {
1584    int dy = FixedDiv(src_height, dst_height);
1585    // Arbitrary scale vertically, but unscaled horizontally.
1586    ScalePlaneVertical(src_height, dst_width, dst_height, src_stride,
1587                       dst_stride, src, dst, 0, 0, dy, 1, filtering);
1588    return;
1589  }
1590  if (dst_width <= Abs(src_width) && dst_height <= src_height) {
1591    // Scale down.
1592    if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
1593      // optimized, 3/4
1594      ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride,
1595                       dst_stride, src, dst, filtering);
1596      return;
1597    }
1598    if (2 * dst_width == src_width && 2 * dst_height == src_height) {
1599      // optimized, 1/2
1600      ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride,
1601                      dst_stride, src, dst, filtering);
1602      return;
1603    }
1604    // 3/8 rounded up for odd sized chroma height.
1605    if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
1606      // optimized, 3/8
1607      ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride,
1608                       dst_stride, src, dst, filtering);
1609      return;
1610    }
1611    if (4 * dst_width == src_width && 4 * dst_height == src_height &&
1612        (filtering == kFilterBox || filtering == kFilterNone)) {
1613      // optimized, 1/4
1614      ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride,
1615                      dst_stride, src, dst, filtering);
1616      return;
1617    }
1618  }
1619  if (filtering == kFilterBox && dst_height * 2 < src_height) {
1620    ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride,
1621                  dst_stride, src, dst);
1622    return;
1623  }
1624  if (filtering && dst_height > src_height) {
1625    ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
1626                         src_stride, dst_stride, src, dst, filtering);
1627    return;
1628  }
1629  if (filtering) {
1630    ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
1631                           src_stride, dst_stride, src, dst, filtering);
1632    return;
1633  }
1634  ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride,
1635                   dst_stride, src, dst);
1636}
1637
1638LIBYUV_API
1639void ScalePlane_16(const uint16* src,
1640                   int src_stride,
1641                   int src_width,
1642                   int src_height,
1643                   uint16* dst,
1644                   int dst_stride,
1645                   int dst_width,
1646                   int dst_height,
1647                   enum FilterMode filtering) {
1648  // Simplify filtering when possible.
1649  filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
1650                                filtering);
1651
1652  // Negative height means invert the image.
1653  if (src_height < 0) {
1654    src_height = -src_height;
1655    src = src + (src_height - 1) * src_stride;
1656    src_stride = -src_stride;
1657  }
1658
1659  // Use specialized scales to improve performance for common resolutions.
1660  // For example, all the 1/2 scalings will use ScalePlaneDown2()
1661  if (dst_width == src_width && dst_height == src_height) {
1662    // Straight copy.
1663    CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
1664    return;
1665  }
1666  if (dst_width == src_width) {
1667    int dy = FixedDiv(src_height, dst_height);
1668    // Arbitrary scale vertically, but unscaled vertically.
1669    ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride,
1670                          dst_stride, src, dst, 0, 0, dy, 1, filtering);
1671    return;
1672  }
1673  if (dst_width <= Abs(src_width) && dst_height <= src_height) {
1674    // Scale down.
1675    if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
1676      // optimized, 3/4
1677      ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
1678                          src_stride, dst_stride, src, dst, filtering);
1679      return;
1680    }
1681    if (2 * dst_width == src_width && 2 * dst_height == src_height) {
1682      // optimized, 1/2
1683      ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
1684                         src_stride, dst_stride, src, dst, filtering);
1685      return;
1686    }
1687    // 3/8 rounded up for odd sized chroma height.
1688    if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
1689      // optimized, 3/8
1690      ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
1691                          src_stride, dst_stride, src, dst, filtering);
1692      return;
1693    }
1694    if (4 * dst_width == src_width && 4 * dst_height == src_height &&
1695        filtering != kFilterBilinear) {
1696      // optimized, 1/4
1697      ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
1698                         src_stride, dst_stride, src, dst, filtering);
1699      return;
1700    }
1701  }
1702  if (filtering == kFilterBox && dst_height * 2 < src_height) {
1703    ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, src_stride,
1704                     dst_stride, src, dst);
1705    return;
1706  }
1707  if (filtering && dst_height > src_height) {
1708    ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
1709                            src_stride, dst_stride, src, dst, filtering);
1710    return;
1711  }
1712  if (filtering) {
1713    ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,
1714                              src_stride, dst_stride, src, dst, filtering);
1715    return;
1716  }
1717  ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride,
1718                      dst_stride, src, dst);
1719}
1720
1721// Scale an I420 image.
1722// This function in turn calls a scaling function for each plane.
1723
1724LIBYUV_API
1725int I420Scale(const uint8* src_y,
1726              int src_stride_y,
1727              const uint8* src_u,
1728              int src_stride_u,
1729              const uint8* src_v,
1730              int src_stride_v,
1731              int src_width,
1732              int src_height,
1733              uint8* dst_y,
1734              int dst_stride_y,
1735              uint8* dst_u,
1736              int dst_stride_u,
1737              uint8* dst_v,
1738              int dst_stride_v,
1739              int dst_width,
1740              int dst_height,
1741              enum FilterMode filtering) {
1742  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1743  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1744  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1745  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1746  if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1747      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
1748      dst_width <= 0 || dst_height <= 0) {
1749    return -1;
1750  }
1751
1752  ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
1753             dst_width, dst_height, filtering);
1754  ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
1755             dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
1756  ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
1757             dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
1758  return 0;
1759}
1760
1761LIBYUV_API
1762int I420Scale_16(const uint16* src_y,
1763                 int src_stride_y,
1764                 const uint16* src_u,
1765                 int src_stride_u,
1766                 const uint16* src_v,
1767                 int src_stride_v,
1768                 int src_width,
1769                 int src_height,
1770                 uint16* dst_y,
1771                 int dst_stride_y,
1772                 uint16* dst_u,
1773                 int dst_stride_u,
1774                 uint16* dst_v,
1775                 int dst_stride_v,
1776                 int dst_width,
1777                 int dst_height,
1778                 enum FilterMode filtering) {
1779  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1780  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1781  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1782  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1783  if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1784      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
1785      dst_width <= 0 || dst_height <= 0) {
1786    return -1;
1787  }
1788
1789  ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
1790                dst_width, dst_height, filtering);
1791  ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
1792                dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
1793  ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
1794                dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
1795  return 0;
1796}
1797
1798// Deprecated api
1799LIBYUV_API
1800int Scale(const uint8* src_y,
1801          const uint8* src_u,
1802          const uint8* src_v,
1803          int src_stride_y,
1804          int src_stride_u,
1805          int src_stride_v,
1806          int src_width,
1807          int src_height,
1808          uint8* dst_y,
1809          uint8* dst_u,
1810          uint8* dst_v,
1811          int dst_stride_y,
1812          int dst_stride_u,
1813          int dst_stride_v,
1814          int dst_width,
1815          int dst_height,
1816          LIBYUV_BOOL interpolate) {
1817  return I420Scale(src_y, src_stride_y, src_u, src_stride_u, src_v,
1818                   src_stride_v, src_width, src_height, dst_y, dst_stride_y,
1819                   dst_u, dst_stride_u, dst_v, dst_stride_v, dst_width,
1820                   dst_height, interpolate ? kFilterBox : kFilterNone);
1821}
1822
1823// Deprecated api
1824LIBYUV_API
1825int ScaleOffset(const uint8* src,
1826                int src_width,
1827                int src_height,
1828                uint8* dst,
1829                int dst_width,
1830                int dst_height,
1831                int dst_yoffset,
1832                LIBYUV_BOOL interpolate) {
1833  // Chroma requires offset to multiple of 2.
1834  int dst_yoffset_even = dst_yoffset & ~1;
1835  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1836  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1837  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1838  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1839  int aheight = dst_height - dst_yoffset_even * 2;  // actual output height
1840  const uint8* src_y = src;
1841  const uint8* src_u = src + src_width * src_height;
1842  const uint8* src_v =
1843      src + src_width * src_height + src_halfwidth * src_halfheight;
1844  uint8* dst_y = dst + dst_yoffset_even * dst_width;
1845  uint8* dst_u =
1846      dst + dst_width * dst_height + (dst_yoffset_even >> 1) * dst_halfwidth;
1847  uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight +
1848                 (dst_yoffset_even >> 1) * dst_halfwidth;
1849  if (!src || src_width <= 0 || src_height <= 0 || !dst || dst_width <= 0 ||
1850      dst_height <= 0 || dst_yoffset_even < 0 ||
1851      dst_yoffset_even >= dst_height) {
1852    return -1;
1853  }
1854  return I420Scale(src_y, src_width, src_u, src_halfwidth, src_v, src_halfwidth,
1855                   src_width, src_height, dst_y, dst_width, dst_u,
1856                   dst_halfwidth, dst_v, dst_halfwidth, dst_width, aheight,
1857                   interpolate ? kFilterBox : kFilterNone);
1858}
1859
1860#ifdef __cplusplus
1861}  // extern "C"
1862}  // namespace libyuv
1863#endif
1864