1/*
2 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS. All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "libyuv/scale.h"
12
13#include <assert.h>
14#include <string.h>
15
16#include "libyuv/cpu_id.h"
17#include "libyuv/planar_functions.h"  // For CopyPlane
18#include "libyuv/row.h"
19#include "libyuv/scale_row.h"
20
21#ifdef __cplusplus
22namespace libyuv {
23extern "C" {
24#endif
25
26static __inline int Abs(int v) {
27  return v >= 0 ? v : -v;
28}
29
30#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
31
32// Scale plane, 1/2
33// This is an optimized version for scaling down a plane to 1/2 of
34// its original size.
35
36static void ScalePlaneDown2(int src_width, int src_height,
37                            int dst_width, int dst_height,
38                            int src_stride, int dst_stride,
39                            const uint8* src_ptr, uint8* dst_ptr,
40                            enum FilterMode filtering) {
41  int y;
42  void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
43                        uint8* dst_ptr, int dst_width) =
44      filtering == kFilterNone ? ScaleRowDown2_C :
45      (filtering == kFilterLinear ? ScaleRowDown2Linear_C : ScaleRowDown2Box_C);
46  int row_stride = src_stride << 1;
47  if (!filtering) {
48    src_ptr += src_stride;  // Point to odd rows.
49    src_stride = 0;
50  }
51
52#if defined(HAS_SCALEROWDOWN2_NEON)
53  if (TestCpuFlag(kCpuHasNEON)) {
54    ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_NEON :
55        (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON :
56        ScaleRowDown2Box_Any_NEON);
57    if (IS_ALIGNED(dst_width, 16)) {
58      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON :
59          (filtering == kFilterLinear ? ScaleRowDown2Linear_NEON :
60          ScaleRowDown2Box_NEON);
61    }
62  }
63#endif
64#if defined(HAS_SCALEROWDOWN2_SSE2)
65  if (TestCpuFlag(kCpuHasSSE2)) {
66    ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_SSE2 :
67        (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSE2 :
68        ScaleRowDown2Box_Any_SSE2);
69    if (IS_ALIGNED(dst_width, 16)) {
70      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
71          (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
72          ScaleRowDown2Box_SSE2);
73    }
74  }
75#endif
76#if defined(HAS_SCALEROWDOWN2_AVX2)
77  if (TestCpuFlag(kCpuHasAVX2)) {
78    ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_AVX2 :
79        (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2 :
80        ScaleRowDown2Box_Any_AVX2);
81    if (IS_ALIGNED(dst_width, 32)) {
82      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2 :
83          (filtering == kFilterLinear ? ScaleRowDown2Linear_AVX2 :
84          ScaleRowDown2Box_AVX2);
85    }
86  }
87#endif
88#if defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
89  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
90      IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
91      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
92    ScaleRowDown2 = filtering ?
93        ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2;
94  }
95#endif
96
97  if (filtering == kFilterLinear) {
98    src_stride = 0;
99  }
100  // TODO(fbarchard): Loop through source height to allow odd height.
101  for (y = 0; y < dst_height; ++y) {
102    ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
103    src_ptr += row_stride;
104    dst_ptr += dst_stride;
105  }
106}
107
108static void ScalePlaneDown2_16(int src_width, int src_height,
109                               int dst_width, int dst_height,
110                               int src_stride, int dst_stride,
111                               const uint16* src_ptr, uint16* dst_ptr,
112                               enum FilterMode filtering) {
113  int y;
114  void (*ScaleRowDown2)(const uint16* src_ptr, ptrdiff_t src_stride,
115                        uint16* dst_ptr, int dst_width) =
116    filtering == kFilterNone ? ScaleRowDown2_16_C :
117        (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C :
118        ScaleRowDown2Box_16_C);
119  int row_stride = src_stride << 1;
120  if (!filtering) {
121    src_ptr += src_stride;  // Point to odd rows.
122    src_stride = 0;
123  }
124
125#if defined(HAS_SCALEROWDOWN2_16_NEON)
126  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
127    ScaleRowDown2 = filtering ? ScaleRowDown2Box_16_NEON :
128        ScaleRowDown2_16_NEON;
129  }
130#endif
131#if defined(HAS_SCALEROWDOWN2_16_SSE2)
132  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
133    ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 :
134        (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 :
135        ScaleRowDown2Box_16_SSE2);
136  }
137#endif
138#if defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2)
139  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
140      IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
141      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
142    ScaleRowDown2 = filtering ?
143        ScaleRowDown2Box_16_MIPS_DSPR2 : ScaleRowDown2_16_MIPS_DSPR2;
144  }
145#endif
146
147  if (filtering == kFilterLinear) {
148    src_stride = 0;
149  }
150  // TODO(fbarchard): Loop through source height to allow odd height.
151  for (y = 0; y < dst_height; ++y) {
152    ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
153    src_ptr += row_stride;
154    dst_ptr += dst_stride;
155  }
156}
157
158// Scale plane, 1/4
159// This is an optimized version for scaling down a plane to 1/4 of
160// its original size.
161
162static void ScalePlaneDown4(int src_width, int src_height,
163                            int dst_width, int dst_height,
164                            int src_stride, int dst_stride,
165                            const uint8* src_ptr, uint8* dst_ptr,
166                            enum FilterMode filtering) {
167  int y;
168  void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride,
169                        uint8* dst_ptr, int dst_width) =
170      filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
171  int row_stride = src_stride << 2;
172  if (!filtering) {
173    src_ptr += src_stride * 2;  // Point to row 2.
174    src_stride = 0;
175  }
176#if defined(HAS_SCALEROWDOWN4_NEON)
177  if (TestCpuFlag(kCpuHasNEON)) {
178    ScaleRowDown4 = filtering ?
179        ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
180    if (IS_ALIGNED(dst_width, 8)) {
181      ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
182    }
183  }
184#endif
185#if defined(HAS_SCALEROWDOWN4_SSE2)
186  if (TestCpuFlag(kCpuHasSSE2)) {
187    ScaleRowDown4 = filtering ?
188        ScaleRowDown4Box_Any_SSE2 : ScaleRowDown4_Any_SSE2;
189    if (IS_ALIGNED(dst_width, 8)) {
190      ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;
191    }
192  }
193#endif
194#if defined(HAS_SCALEROWDOWN4_AVX2)
195  if (TestCpuFlag(kCpuHasAVX2)) {
196    ScaleRowDown4 = filtering ?
197        ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
198    if (IS_ALIGNED(dst_width, 16)) {
199      ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
200    }
201  }
202#endif
203#if defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
204  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
205      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
206      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
207    ScaleRowDown4 = filtering ?
208        ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2;
209  }
210#endif
211
212  if (filtering == kFilterLinear) {
213    src_stride = 0;
214  }
215  for (y = 0; y < dst_height; ++y) {
216    ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
217    src_ptr += row_stride;
218    dst_ptr += dst_stride;
219  }
220}
221
222static void ScalePlaneDown4_16(int src_width, int src_height,
223                               int dst_width, int dst_height,
224                               int src_stride, int dst_stride,
225                               const uint16* src_ptr, uint16* dst_ptr,
226                               enum FilterMode filtering) {
227  int y;
228  void (*ScaleRowDown4)(const uint16* src_ptr, ptrdiff_t src_stride,
229                        uint16* dst_ptr, int dst_width) =
230      filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
231  int row_stride = src_stride << 2;
232  if (!filtering) {
233    src_ptr += src_stride * 2;  // Point to row 2.
234    src_stride = 0;
235  }
236#if defined(HAS_SCALEROWDOWN4_16_NEON)
237  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
238    ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_NEON :
239        ScaleRowDown4_16_NEON;
240  }
241#endif
242#if defined(HAS_SCALEROWDOWN4_16_SSE2)
243  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
244    ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 :
245        ScaleRowDown4_16_SSE2;
246  }
247#endif
248#if defined(HAS_SCALEROWDOWN4_16_MIPS_DSPR2)
249  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
250      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
251      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
252    ScaleRowDown4 = filtering ?
253        ScaleRowDown4Box_16_MIPS_DSPR2 : ScaleRowDown4_16_MIPS_DSPR2;
254  }
255#endif
256
257  if (filtering == kFilterLinear) {
258    src_stride = 0;
259  }
260  for (y = 0; y < dst_height; ++y) {
261    ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
262    src_ptr += row_stride;
263    dst_ptr += dst_stride;
264  }
265}
266
267// Scale plane down, 3/4
268
269static void ScalePlaneDown34(int src_width, int src_height,
270                             int dst_width, int dst_height,
271                             int src_stride, int dst_stride,
272                             const uint8* src_ptr, uint8* dst_ptr,
273                             enum FilterMode filtering) {
274  int y;
275  void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride,
276                           uint8* dst_ptr, int dst_width);
277  void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride,
278                           uint8* dst_ptr, int dst_width);
279  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
280  assert(dst_width % 3 == 0);
281  if (!filtering) {
282    ScaleRowDown34_0 = ScaleRowDown34_C;
283    ScaleRowDown34_1 = ScaleRowDown34_C;
284  } else {
285    ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
286    ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
287  }
288#if defined(HAS_SCALEROWDOWN34_NEON)
289  if (TestCpuFlag(kCpuHasNEON)) {
290    if (!filtering) {
291      ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
292      ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
293    } else {
294      ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
295      ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
296    }
297    if (dst_width % 24 == 0) {
298      if (!filtering) {
299        ScaleRowDown34_0 = ScaleRowDown34_NEON;
300        ScaleRowDown34_1 = ScaleRowDown34_NEON;
301      } else {
302        ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
303        ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
304      }
305    }
306  }
307#endif
308#if defined(HAS_SCALEROWDOWN34_SSSE3)
309  if (TestCpuFlag(kCpuHasSSSE3)) {
310    if (!filtering) {
311      ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
312      ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
313    } else {
314      ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
315      ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
316    }
317    if (dst_width % 24 == 0) {
318      if (!filtering) {
319        ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
320        ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
321      } else {
322        ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
323        ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
324      }
325    }
326  }
327#endif
328#if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2)
329  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
330      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
331      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
332    if (!filtering) {
333      ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2;
334      ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2;
335    } else {
336      ScaleRowDown34_0 = ScaleRowDown34_0_Box_MIPS_DSPR2;
337      ScaleRowDown34_1 = ScaleRowDown34_1_Box_MIPS_DSPR2;
338    }
339  }
340#endif
341
342  for (y = 0; y < dst_height - 2; y += 3) {
343    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
344    src_ptr += src_stride;
345    dst_ptr += dst_stride;
346    ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
347    src_ptr += src_stride;
348    dst_ptr += dst_stride;
349    ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
350                     dst_ptr, dst_width);
351    src_ptr += src_stride * 2;
352    dst_ptr += dst_stride;
353  }
354
355  // Remainder 1 or 2 rows with last row vertically unfiltered
356  if ((dst_height % 3) == 2) {
357    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
358    src_ptr += src_stride;
359    dst_ptr += dst_stride;
360    ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
361  } else if ((dst_height % 3) == 1) {
362    ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
363  }
364}
365
366static void ScalePlaneDown34_16(int src_width, int src_height,
367                                int dst_width, int dst_height,
368                                int src_stride, int dst_stride,
369                                const uint16* src_ptr, uint16* dst_ptr,
370                                enum FilterMode filtering) {
371  int y;
372  void (*ScaleRowDown34_0)(const uint16* src_ptr, ptrdiff_t src_stride,
373                           uint16* dst_ptr, int dst_width);
374  void (*ScaleRowDown34_1)(const uint16* src_ptr, ptrdiff_t src_stride,
375                           uint16* dst_ptr, int dst_width);
376  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
377  assert(dst_width % 3 == 0);
378  if (!filtering) {
379    ScaleRowDown34_0 = ScaleRowDown34_16_C;
380    ScaleRowDown34_1 = ScaleRowDown34_16_C;
381  } else {
382    ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
383    ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
384  }
385#if defined(HAS_SCALEROWDOWN34_16_NEON)
386  if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
387    if (!filtering) {
388      ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
389      ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
390    } else {
391      ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
392      ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
393    }
394  }
395#endif
396#if defined(HAS_SCALEROWDOWN34_16_SSSE3)
397  if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
398    if (!filtering) {
399      ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
400      ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
401    } else {
402      ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
403      ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
404    }
405  }
406#endif
407#if defined(HAS_SCALEROWDOWN34_16_MIPS_DSPR2)
408  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
409      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
410      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
411    if (!filtering) {
412      ScaleRowDown34_0 = ScaleRowDown34_16_MIPS_DSPR2;
413      ScaleRowDown34_1 = ScaleRowDown34_16_MIPS_DSPR2;
414    } else {
415      ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_MIPS_DSPR2;
416      ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_MIPS_DSPR2;
417    }
418  }
419#endif
420
421  for (y = 0; y < dst_height - 2; y += 3) {
422    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
423    src_ptr += src_stride;
424    dst_ptr += dst_stride;
425    ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
426    src_ptr += src_stride;
427    dst_ptr += dst_stride;
428    ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
429                     dst_ptr, dst_width);
430    src_ptr += src_stride * 2;
431    dst_ptr += dst_stride;
432  }
433
434  // Remainder 1 or 2 rows with last row vertically unfiltered
435  if ((dst_height % 3) == 2) {
436    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
437    src_ptr += src_stride;
438    dst_ptr += dst_stride;
439    ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
440  } else if ((dst_height % 3) == 1) {
441    ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
442  }
443}
444
445
446// Scale plane, 3/8
447// This is an optimized version for scaling down a plane to 3/8
448// of its original size.
449//
450// Uses box filter arranges like this
451// aaabbbcc -> abc
452// aaabbbcc    def
453// aaabbbcc    ghi
454// dddeeeff
455// dddeeeff
456// dddeeeff
457// ggghhhii
458// ggghhhii
459// Boxes are 3x3, 2x3, 3x2 and 2x2
460
461static void ScalePlaneDown38(int src_width, int src_height,
462                             int dst_width, int dst_height,
463                             int src_stride, int dst_stride,
464                             const uint8* src_ptr, uint8* dst_ptr,
465                             enum FilterMode filtering) {
466  int y;
467  void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride,
468                           uint8* dst_ptr, int dst_width);
469  void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride,
470                           uint8* dst_ptr, int dst_width);
471  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
472  assert(dst_width % 3 == 0);
473  if (!filtering) {
474    ScaleRowDown38_3 = ScaleRowDown38_C;
475    ScaleRowDown38_2 = ScaleRowDown38_C;
476  } else {
477    ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
478    ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
479  }
480
481#if defined(HAS_SCALEROWDOWN38_NEON)
482  if (TestCpuFlag(kCpuHasNEON)) {
483    if (!filtering) {
484      ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
485      ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
486    } else {
487      ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
488      ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
489    }
490    if (dst_width % 12 == 0) {
491      if (!filtering) {
492        ScaleRowDown38_3 = ScaleRowDown38_NEON;
493        ScaleRowDown38_2 = ScaleRowDown38_NEON;
494      } else {
495        ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
496        ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
497      }
498    }
499  }
500#endif
501#if defined(HAS_SCALEROWDOWN38_SSSE3)
502  if (TestCpuFlag(kCpuHasSSSE3)) {
503    if (!filtering) {
504      ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
505      ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
506    } else {
507      ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
508      ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
509    }
510    if (dst_width % 12 == 0 && !filtering) {
511      ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
512      ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
513    }
514    if (dst_width % 6 == 0 && filtering) {
515      ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
516      ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
517    }
518  }
519#endif
520#if defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)
521  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
522      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
523      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
524    if (!filtering) {
525      ScaleRowDown38_3 = ScaleRowDown38_MIPS_DSPR2;
526      ScaleRowDown38_2 = ScaleRowDown38_MIPS_DSPR2;
527    } else {
528      ScaleRowDown38_3 = ScaleRowDown38_3_Box_MIPS_DSPR2;
529      ScaleRowDown38_2 = ScaleRowDown38_2_Box_MIPS_DSPR2;
530    }
531  }
532#endif
533
534  for (y = 0; y < dst_height - 2; y += 3) {
535    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
536    src_ptr += src_stride * 3;
537    dst_ptr += dst_stride;
538    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
539    src_ptr += src_stride * 3;
540    dst_ptr += dst_stride;
541    ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
542    src_ptr += src_stride * 2;
543    dst_ptr += dst_stride;
544  }
545
546  // Remainder 1 or 2 rows with last row vertically unfiltered
547  if ((dst_height % 3) == 2) {
548    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
549    src_ptr += src_stride * 3;
550    dst_ptr += dst_stride;
551    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
552  } else if ((dst_height % 3) == 1) {
553    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
554  }
555}
556
557static void ScalePlaneDown38_16(int src_width, int src_height,
558                                int dst_width, int dst_height,
559                                int src_stride, int dst_stride,
560                                const uint16* src_ptr, uint16* dst_ptr,
561                                enum FilterMode filtering) {
562  int y;
563  void (*ScaleRowDown38_3)(const uint16* src_ptr, ptrdiff_t src_stride,
564                           uint16* dst_ptr, int dst_width);
565  void (*ScaleRowDown38_2)(const uint16* src_ptr, ptrdiff_t src_stride,
566                           uint16* dst_ptr, int dst_width);
567  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
568  assert(dst_width % 3 == 0);
569  if (!filtering) {
570    ScaleRowDown38_3 = ScaleRowDown38_16_C;
571    ScaleRowDown38_2 = ScaleRowDown38_16_C;
572  } else {
573    ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
574    ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
575  }
576#if defined(HAS_SCALEROWDOWN38_16_NEON)
577  if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
578    if (!filtering) {
579      ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
580      ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
581    } else {
582      ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
583      ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
584    }
585  }
586#endif
587#if defined(HAS_SCALEROWDOWN38_16_SSSE3)
588  if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
589    if (!filtering) {
590      ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
591      ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
592    } else {
593      ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
594      ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
595    }
596  }
597#endif
598#if defined(HAS_SCALEROWDOWN38_16_MIPS_DSPR2)
599  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
600      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
601      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
602    if (!filtering) {
603      ScaleRowDown38_3 = ScaleRowDown38_16_MIPS_DSPR2;
604      ScaleRowDown38_2 = ScaleRowDown38_16_MIPS_DSPR2;
605    } else {
606      ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_MIPS_DSPR2;
607      ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_MIPS_DSPR2;
608    }
609  }
610#endif
611
612  for (y = 0; y < dst_height - 2; y += 3) {
613    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
614    src_ptr += src_stride * 3;
615    dst_ptr += dst_stride;
616    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
617    src_ptr += src_stride * 3;
618    dst_ptr += dst_stride;
619    ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
620    src_ptr += src_stride * 2;
621    dst_ptr += dst_stride;
622  }
623
624  // Remainder 1 or 2 rows with last row vertically unfiltered
625  if ((dst_height % 3) == 2) {
626    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
627    src_ptr += src_stride * 3;
628    dst_ptr += dst_stride;
629    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
630  } else if ((dst_height % 3) == 1) {
631    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
632  }
633}
634
635#define MIN1(x) ((x) < 1 ? 1 : (x))
636
637static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
638  uint32 sum = 0u;
639  int x;
640  assert(iboxwidth > 0);
641  for (x = 0; x < iboxwidth; ++x) {
642    sum += src_ptr[x];
643  }
644  return sum;
645}
646
647static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) {
648  uint32 sum = 0u;
649  int x;
650  assert(iboxwidth > 0);
651  for (x = 0; x < iboxwidth; ++x) {
652    sum += src_ptr[x];
653  }
654  return sum;
655}
656
657static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
658                            const uint16* src_ptr, uint8* dst_ptr) {
659  int i;
660  int scaletbl[2];
661  int minboxwidth = dx >> 16;
662  int* scaleptr = scaletbl - minboxwidth;
663  int boxwidth;
664  scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
665  scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
666  for (i = 0; i < dst_width; ++i) {
667    int ix = x >> 16;
668    x += dx;
669    boxwidth = MIN1((x >> 16) - ix);
670    *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
671  }
672}
673
674static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx,
675                               const uint32* src_ptr, uint16* dst_ptr) {
676  int i;
677  int scaletbl[2];
678  int minboxwidth = dx >> 16;
679  int* scaleptr = scaletbl - minboxwidth;
680  int boxwidth;
681  scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
682  scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
683  for (i = 0; i < dst_width; ++i) {
684    int ix = x >> 16;
685    x += dx;
686    boxwidth = MIN1((x >> 16) - ix);
687    *dst_ptr++ =
688        SumPixels_16(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
689  }
690}
691
692static void ScaleAddCols0_C(int dst_width, int boxheight, int x, int,
693                            const uint16* src_ptr, uint8* dst_ptr) {
694  int scaleval = 65536 / boxheight;
695  int i;
696  src_ptr += (x >> 16);
697  for (i = 0; i < dst_width; ++i) {
698    *dst_ptr++ = src_ptr[i] * scaleval >> 16;
699  }
700}
701
702static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
703                            const uint16* src_ptr, uint8* dst_ptr) {
704  int boxwidth = MIN1(dx >> 16);
705  int scaleval = 65536 / (boxwidth * boxheight);
706  int i;
707  x >>= 16;
708  for (i = 0; i < dst_width; ++i) {
709    *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
710    x += boxwidth;
711  }
712}
713
714static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx,
715                               const uint32* src_ptr, uint16* dst_ptr) {
716  int boxwidth = MIN1(dx >> 16);
717  int scaleval = 65536 / (boxwidth * boxheight);
718  int i;
719  for (i = 0; i < dst_width; ++i) {
720    *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
721    x += boxwidth;
722  }
723}
724
725// Scale plane down to any dimensions, with interpolation.
726// (boxfilter).
727//
728// Same method as SimpleScale, which is fixed point, outputting
729// one pixel of destination using fixed point (16.16) to step
730// through source, sampling a box of pixel with simple
731// averaging.
732static void ScalePlaneBox(int src_width, int src_height,
733                          int dst_width, int dst_height,
734                          int src_stride, int dst_stride,
735                          const uint8* src_ptr, uint8* dst_ptr) {
736  int j, k;
737  // Initial source x/y coordinate and step values as 16.16 fixed point.
738  int x = 0;
739  int y = 0;
740  int dx = 0;
741  int dy = 0;
742  const int max_y = (src_height << 16);
743  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
744             &x, &y, &dx, &dy);
745  src_width = Abs(src_width);
746  {
747    // Allocate a row buffer of uint16.
748    align_buffer_64(row16, src_width * 2);
749    void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
750        const uint16* src_ptr, uint8* dst_ptr) =
751        (dx & 0xffff) ? ScaleAddCols2_C:
752        ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
753    void (*ScaleAddRow)(const uint8* src_ptr, uint16* dst_ptr, int src_width) =
754        ScaleAddRow_C;
755#if defined(HAS_SCALEADDROW_SSE2)
756    if (TestCpuFlag(kCpuHasSSE2)) {
757      ScaleAddRow = ScaleAddRow_Any_SSE2;
758      if (IS_ALIGNED(src_width, 16)) {
759        ScaleAddRow = ScaleAddRow_SSE2;
760      }
761    }
762#endif
763#if defined(HAS_SCALEADDROW_AVX2)
764    if (TestCpuFlag(kCpuHasAVX2)) {
765      ScaleAddRow = ScaleAddRow_Any_AVX2;
766      if (IS_ALIGNED(src_width, 32)) {
767        ScaleAddRow = ScaleAddRow_AVX2;
768      }
769    }
770#endif
771#if defined(HAS_SCALEADDROW_NEON)
772    if (TestCpuFlag(kCpuHasNEON)) {
773      ScaleAddRow = ScaleAddRow_Any_NEON;
774      if (IS_ALIGNED(src_width, 16)) {
775        ScaleAddRow = ScaleAddRow_NEON;
776      }
777    }
778#endif
779
780    for (j = 0; j < dst_height; ++j) {
781      int boxheight;
782      int iy = y >> 16;
783      const uint8* src = src_ptr + iy * src_stride;
784      y += dy;
785      if (y > max_y) {
786        y = max_y;
787      }
788      boxheight = MIN1((y >> 16) - iy);
789      memset(row16, 0, src_width * 2);
790      for (k = 0; k < boxheight; ++k) {
791        ScaleAddRow(src, (uint16 *)(row16), src_width);
792        src += src_stride;
793      }
794      ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), dst_ptr);
795      dst_ptr += dst_stride;
796    }
797    free_aligned_buffer_64(row16);
798  }
799}
800
801static void ScalePlaneBox_16(int src_width, int src_height,
802                             int dst_width, int dst_height,
803                             int src_stride, int dst_stride,
804                             const uint16* src_ptr, uint16* dst_ptr) {
805  int j, k;
806  // Initial source x/y coordinate and step values as 16.16 fixed point.
807  int x = 0;
808  int y = 0;
809  int dx = 0;
810  int dy = 0;
811  const int max_y = (src_height << 16);
812  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
813             &x, &y, &dx, &dy);
814  src_width = Abs(src_width);
815  {
816    // Allocate a row buffer of uint32.
817    align_buffer_64(row32, src_width * 4);
818    void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
819        const uint32* src_ptr, uint16* dst_ptr) =
820        (dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C;
821    void (*ScaleAddRow)(const uint16* src_ptr, uint32* dst_ptr, int src_width) =
822        ScaleAddRow_16_C;
823
824#if defined(HAS_SCALEADDROW_16_SSE2)
825    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
826      ScaleAddRow = ScaleAddRow_16_SSE2;
827    }
828#endif
829
830    for (j = 0; j < dst_height; ++j) {
831      int boxheight;
832      int iy = y >> 16;
833      const uint16* src = src_ptr + iy * src_stride;
834      y += dy;
835      if (y > max_y) {
836        y = max_y;
837      }
838      boxheight = MIN1((y >> 16) - iy);
839      memset(row32, 0, src_width * 4);
840      for (k = 0; k < boxheight; ++k) {
841        ScaleAddRow(src, (uint32 *)(row32), src_width);
842        src += src_stride;
843      }
844      ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), dst_ptr);
845      dst_ptr += dst_stride;
846    }
847    free_aligned_buffer_64(row32);
848  }
849}
850
851// Scale plane down with bilinear interpolation.
852void ScalePlaneBilinearDown(int src_width, int src_height,
853                            int dst_width, int dst_height,
854                            int src_stride, int dst_stride,
855                            const uint8* src_ptr, uint8* dst_ptr,
856                            enum FilterMode filtering) {
857  // Initial source x/y coordinate and step values as 16.16 fixed point.
858  int x = 0;
859  int y = 0;
860  int dx = 0;
861  int dy = 0;
862  // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
863  // Allocate a row buffer.
864  align_buffer_64(row, src_width);
865
866  const int max_y = (src_height - 1) << 16;
867  int j;
868  void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
869      int dst_width, int x, int dx) =
870      (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
871  void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
872      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
873      InterpolateRow_C;
874  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
875             &x, &y, &dx, &dy);
876  src_width = Abs(src_width);
877
878#if defined(HAS_INTERPOLATEROW_SSE2)
879  if (TestCpuFlag(kCpuHasSSE2)) {
880    InterpolateRow = InterpolateRow_Any_SSE2;
881    if (IS_ALIGNED(src_width, 16)) {
882      InterpolateRow = InterpolateRow_SSE2;
883    }
884  }
885#endif
886#if defined(HAS_INTERPOLATEROW_SSSE3)
887  if (TestCpuFlag(kCpuHasSSSE3)) {
888    InterpolateRow = InterpolateRow_Any_SSSE3;
889    if (IS_ALIGNED(src_width, 16)) {
890      InterpolateRow = InterpolateRow_SSSE3;
891    }
892  }
893#endif
894#if defined(HAS_INTERPOLATEROW_AVX2)
895  if (TestCpuFlag(kCpuHasAVX2)) {
896    InterpolateRow = InterpolateRow_Any_AVX2;
897    if (IS_ALIGNED(src_width, 32)) {
898      InterpolateRow = InterpolateRow_AVX2;
899    }
900  }
901#endif
902#if defined(HAS_INTERPOLATEROW_NEON)
903  if (TestCpuFlag(kCpuHasNEON)) {
904    InterpolateRow = InterpolateRow_Any_NEON;
905    if (IS_ALIGNED(src_width, 16)) {
906      InterpolateRow = InterpolateRow_NEON;
907    }
908  }
909#endif
910#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
911  if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
912    InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
913    if (IS_ALIGNED(src_width, 4)) {
914      InterpolateRow = InterpolateRow_MIPS_DSPR2;
915    }
916  }
917#endif
918
919
920#if defined(HAS_SCALEFILTERCOLS_SSSE3)
921  if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
922    ScaleFilterCols = ScaleFilterCols_SSSE3;
923  }
924#endif
925#if defined(HAS_SCALEFILTERCOLS_NEON)
926  if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
927    ScaleFilterCols = ScaleFilterCols_Any_NEON;
928    if (IS_ALIGNED(dst_width, 8)) {
929      ScaleFilterCols = ScaleFilterCols_NEON;
930    }
931  }
932#endif
933  if (y > max_y) {
934    y = max_y;
935  }
936
937  for (j = 0; j < dst_height; ++j) {
938    int yi = y >> 16;
939    const uint8* src = src_ptr + yi * src_stride;
940    if (filtering == kFilterLinear) {
941      ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
942    } else {
943      int yf = (y >> 8) & 255;
944      InterpolateRow(row, src, src_stride, src_width, yf);
945      ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
946    }
947    dst_ptr += dst_stride;
948    y += dy;
949    if (y > max_y) {
950      y = max_y;
951    }
952  }
953  free_aligned_buffer_64(row);
954}
955
956void ScalePlaneBilinearDown_16(int src_width, int src_height,
957                               int dst_width, int dst_height,
958                               int src_stride, int dst_stride,
959                               const uint16* src_ptr, uint16* dst_ptr,
960                               enum FilterMode filtering) {
961  // Initial source x/y coordinate and step values as 16.16 fixed point.
962  int x = 0;
963  int y = 0;
964  int dx = 0;
965  int dy = 0;
966  // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
967  // Allocate a row buffer.
968  align_buffer_64(row, src_width * 2);
969
970  const int max_y = (src_height - 1) << 16;
971  int j;
972  void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
973      int dst_width, int x, int dx) =
974      (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
975  void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr,
976      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
977      InterpolateRow_16_C;
978  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
979             &x, &y, &dx, &dy);
980  src_width = Abs(src_width);
981
982#if defined(HAS_INTERPOLATEROW_16_SSE2)
983  if (TestCpuFlag(kCpuHasSSE2)) {
984    InterpolateRow = InterpolateRow_Any_16_SSE2;
985    if (IS_ALIGNED(src_width, 16)) {
986      InterpolateRow = InterpolateRow_16_SSE2;
987    }
988  }
989#endif
990#if defined(HAS_INTERPOLATEROW_16_SSSE3)
991  if (TestCpuFlag(kCpuHasSSSE3)) {
992    InterpolateRow = InterpolateRow_Any_16_SSSE3;
993    if (IS_ALIGNED(src_width, 16)) {
994      InterpolateRow = InterpolateRow_16_SSSE3;
995    }
996  }
997#endif
998#if defined(HAS_INTERPOLATEROW_16_AVX2)
999  if (TestCpuFlag(kCpuHasAVX2)) {
1000    InterpolateRow = InterpolateRow_Any_16_AVX2;
1001    if (IS_ALIGNED(src_width, 32)) {
1002      InterpolateRow = InterpolateRow_16_AVX2;
1003    }
1004  }
1005#endif
1006#if defined(HAS_INTERPOLATEROW_16_NEON)
1007  if (TestCpuFlag(kCpuHasNEON)) {
1008    InterpolateRow = InterpolateRow_Any_16_NEON;
1009    if (IS_ALIGNED(src_width, 16)) {
1010      InterpolateRow = InterpolateRow_16_NEON;
1011    }
1012  }
1013#endif
1014#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
1015  if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
1016    InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
1017    if (IS_ALIGNED(src_width, 4)) {
1018      InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
1019    }
1020  }
1021#endif
1022
1023
1024#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1025  if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1026    ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1027  }
1028#endif
1029  if (y > max_y) {
1030    y = max_y;
1031  }
1032
1033  for (j = 0; j < dst_height; ++j) {
1034    int yi = y >> 16;
1035    const uint16* src = src_ptr + yi * src_stride;
1036    if (filtering == kFilterLinear) {
1037      ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1038    } else {
1039      int yf = (y >> 8) & 255;
1040      InterpolateRow((uint16*)row, src, src_stride, src_width, yf);
1041      ScaleFilterCols(dst_ptr, (uint16*)row, dst_width, x, dx);
1042    }
1043    dst_ptr += dst_stride;
1044    y += dy;
1045    if (y > max_y) {
1046      y = max_y;
1047    }
1048  }
1049  free_aligned_buffer_64(row);
1050}
1051
1052// Scale up down with bilinear interpolation.
1053void ScalePlaneBilinearUp(int src_width, int src_height,
1054                          int dst_width, int dst_height,
1055                          int src_stride, int dst_stride,
1056                          const uint8* src_ptr, uint8* dst_ptr,
1057                          enum FilterMode filtering) {
1058  int j;
1059  // Initial source x/y coordinate and step values as 16.16 fixed point.
1060  int x = 0;
1061  int y = 0;
1062  int dx = 0;
1063  int dy = 0;
1064  const int max_y = (src_height - 1) << 16;
1065  void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
1066      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
1067      InterpolateRow_C;
1068  void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
1069      int dst_width, int x, int dx) =
1070      filtering ? ScaleFilterCols_C : ScaleCols_C;
1071  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
1072             &x, &y, &dx, &dy);
1073  src_width = Abs(src_width);
1074
1075#if defined(HAS_INTERPOLATEROW_SSE2)
1076  if (TestCpuFlag(kCpuHasSSE2)) {
1077    InterpolateRow = InterpolateRow_Any_SSE2;
1078    if (IS_ALIGNED(dst_width, 16)) {
1079      InterpolateRow = InterpolateRow_SSE2;
1080    }
1081  }
1082#endif
1083#if defined(HAS_INTERPOLATEROW_SSSE3)
1084  if (TestCpuFlag(kCpuHasSSSE3)) {
1085    InterpolateRow = InterpolateRow_Any_SSSE3;
1086    if (IS_ALIGNED(dst_width, 16)) {
1087      InterpolateRow = InterpolateRow_SSSE3;
1088    }
1089  }
1090#endif
1091#if defined(HAS_INTERPOLATEROW_AVX2)
1092  if (TestCpuFlag(kCpuHasAVX2)) {
1093    InterpolateRow = InterpolateRow_Any_AVX2;
1094    if (IS_ALIGNED(dst_width, 32)) {
1095      InterpolateRow = InterpolateRow_AVX2;
1096    }
1097  }
1098#endif
1099#if defined(HAS_INTERPOLATEROW_NEON)
1100  if (TestCpuFlag(kCpuHasNEON)) {
1101    InterpolateRow = InterpolateRow_Any_NEON;
1102    if (IS_ALIGNED(dst_width, 16)) {
1103      InterpolateRow = InterpolateRow_NEON;
1104    }
1105  }
1106#endif
1107#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
1108  if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
1109    InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
1110    if (IS_ALIGNED(dst_width, 4)) {
1111      InterpolateRow = InterpolateRow_MIPS_DSPR2;
1112    }
1113  }
1114#endif
1115
1116  if (filtering && src_width >= 32768) {
1117    ScaleFilterCols = ScaleFilterCols64_C;
1118  }
1119#if defined(HAS_SCALEFILTERCOLS_SSSE3)
1120  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1121    ScaleFilterCols = ScaleFilterCols_SSSE3;
1122  }
1123#endif
1124#if defined(HAS_SCALEFILTERCOLS_NEON)
1125  if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
1126    ScaleFilterCols = ScaleFilterCols_Any_NEON;
1127    if (IS_ALIGNED(dst_width, 8)) {
1128      ScaleFilterCols = ScaleFilterCols_NEON;
1129    }
1130  }
1131#endif
1132  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1133    ScaleFilterCols = ScaleColsUp2_C;
1134#if defined(HAS_SCALECOLS_SSE2)
1135    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1136      ScaleFilterCols = ScaleColsUp2_SSE2;
1137    }
1138#endif
1139  }
1140
1141  if (y > max_y) {
1142    y = max_y;
1143  }
1144  {
1145    int yi = y >> 16;
1146    const uint8* src = src_ptr + yi * src_stride;
1147
1148    // Allocate 2 row buffers.
1149    const int kRowSize = (dst_width + 31) & ~31;
1150    align_buffer_64(row, kRowSize * 2);
1151
1152    uint8* rowptr = row;
1153    int rowstride = kRowSize;
1154    int lasty = yi;
1155
1156    ScaleFilterCols(rowptr, src, dst_width, x, dx);
1157    if (src_height > 1) {
1158      src += src_stride;
1159    }
1160    ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1161    src += src_stride;
1162
1163    for (j = 0; j < dst_height; ++j) {
1164      yi = y >> 16;
1165      if (yi != lasty) {
1166        if (y > max_y) {
1167          y = max_y;
1168          yi = y >> 16;
1169          src = src_ptr + yi * src_stride;
1170        }
1171        if (yi != lasty) {
1172          ScaleFilterCols(rowptr, src, dst_width, x, dx);
1173          rowptr += rowstride;
1174          rowstride = -rowstride;
1175          lasty = yi;
1176          src += src_stride;
1177        }
1178      }
1179      if (filtering == kFilterLinear) {
1180        InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1181      } else {
1182        int yf = (y >> 8) & 255;
1183        InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1184      }
1185      dst_ptr += dst_stride;
1186      y += dy;
1187    }
1188    free_aligned_buffer_64(row);
1189  }
1190}
1191
1192void ScalePlaneBilinearUp_16(int src_width, int src_height,
1193                             int dst_width, int dst_height,
1194                             int src_stride, int dst_stride,
1195                             const uint16* src_ptr, uint16* dst_ptr,
1196                             enum FilterMode filtering) {
1197  int j;
1198  // Initial source x/y coordinate and step values as 16.16 fixed point.
1199  int x = 0;
1200  int y = 0;
1201  int dx = 0;
1202  int dy = 0;
1203  const int max_y = (src_height - 1) << 16;
1204  void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr,
1205      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
1206      InterpolateRow_16_C;
1207  void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
1208      int dst_width, int x, int dx) =
1209      filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
1210  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
1211             &x, &y, &dx, &dy);
1212  src_width = Abs(src_width);
1213
1214#if defined(HAS_INTERPOLATEROW_16_SSE2)
1215  if (TestCpuFlag(kCpuHasSSE2)) {
1216    InterpolateRow = InterpolateRow_Any_16_SSE2;
1217    if (IS_ALIGNED(dst_width, 16)) {
1218      InterpolateRow = InterpolateRow_16_SSE2;
1219    }
1220  }
1221#endif
1222#if defined(HAS_INTERPOLATEROW_16_SSSE3)
1223  if (TestCpuFlag(kCpuHasSSSE3)) {
1224    InterpolateRow = InterpolateRow_Any_16_SSSE3;
1225    if (IS_ALIGNED(dst_width, 16)) {
1226      InterpolateRow = InterpolateRow_16_SSSE3;
1227    }
1228  }
1229#endif
1230#if defined(HAS_INTERPOLATEROW_16_AVX2)
1231  if (TestCpuFlag(kCpuHasAVX2)) {
1232    InterpolateRow = InterpolateRow_Any_16_AVX2;
1233    if (IS_ALIGNED(dst_width, 32)) {
1234      InterpolateRow = InterpolateRow_16_AVX2;
1235    }
1236  }
1237#endif
1238#if defined(HAS_INTERPOLATEROW_16_NEON)
1239  if (TestCpuFlag(kCpuHasNEON)) {
1240    InterpolateRow = InterpolateRow_Any_16_NEON;
1241    if (IS_ALIGNED(dst_width, 16)) {
1242      InterpolateRow = InterpolateRow_16_NEON;
1243    }
1244  }
1245#endif
1246#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
1247  if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
1248    InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
1249    if (IS_ALIGNED(dst_width, 4)) {
1250      InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
1251    }
1252  }
1253#endif
1254
1255  if (filtering && src_width >= 32768) {
1256    ScaleFilterCols = ScaleFilterCols64_16_C;
1257  }
1258#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1259  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1260    ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1261  }
1262#endif
1263  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1264    ScaleFilterCols = ScaleColsUp2_16_C;
1265#if defined(HAS_SCALECOLS_16_SSE2)
1266    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1267      ScaleFilterCols = ScaleColsUp2_16_SSE2;
1268    }
1269#endif
1270  }
1271
1272  if (y > max_y) {
1273    y = max_y;
1274  }
1275  {
1276    int yi = y >> 16;
1277    const uint16* src = src_ptr + yi * src_stride;
1278
1279    // Allocate 2 row buffers.
1280    const int kRowSize = (dst_width + 31) & ~31;
1281    align_buffer_64(row, kRowSize * 4);
1282
1283    uint16* rowptr = (uint16*)row;
1284    int rowstride = kRowSize;
1285    int lasty = yi;
1286
1287    ScaleFilterCols(rowptr, src, dst_width, x, dx);
1288    if (src_height > 1) {
1289      src += src_stride;
1290    }
1291    ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1292    src += src_stride;
1293
1294    for (j = 0; j < dst_height; ++j) {
1295      yi = y >> 16;
1296      if (yi != lasty) {
1297        if (y > max_y) {
1298          y = max_y;
1299          yi = y >> 16;
1300          src = src_ptr + yi * src_stride;
1301        }
1302        if (yi != lasty) {
1303          ScaleFilterCols(rowptr, src, dst_width, x, dx);
1304          rowptr += rowstride;
1305          rowstride = -rowstride;
1306          lasty = yi;
1307          src += src_stride;
1308        }
1309      }
1310      if (filtering == kFilterLinear) {
1311        InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1312      } else {
1313        int yf = (y >> 8) & 255;
1314        InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1315      }
1316      dst_ptr += dst_stride;
1317      y += dy;
1318    }
1319    free_aligned_buffer_64(row);
1320  }
1321}
1322
1323// Scale Plane to/from any dimensions, without interpolation.
1324// Fixed point math is used for performance: The upper 16 bits
1325// of x and dx is the integer part of the source position and
1326// the lower 16 bits are the fixed decimal part.
1327
1328static void ScalePlaneSimple(int src_width, int src_height,
1329                             int dst_width, int dst_height,
1330                             int src_stride, int dst_stride,
1331                             const uint8* src_ptr, uint8* dst_ptr) {
1332  int i;
1333  void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr,
1334      int dst_width, int x, int dx) = ScaleCols_C;
1335  // Initial source x/y coordinate and step values as 16.16 fixed point.
1336  int x = 0;
1337  int y = 0;
1338  int dx = 0;
1339  int dy = 0;
1340  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
1341             &x, &y, &dx, &dy);
1342  src_width = Abs(src_width);
1343
1344  if (src_width * 2 == dst_width && x < 0x8000) {
1345    ScaleCols = ScaleColsUp2_C;
1346#if defined(HAS_SCALECOLS_SSE2)
1347    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1348      ScaleCols = ScaleColsUp2_SSE2;
1349    }
1350#endif
1351  }
1352
1353  for (i = 0; i < dst_height; ++i) {
1354    ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
1355    dst_ptr += dst_stride;
1356    y += dy;
1357  }
1358}
1359
1360static void ScalePlaneSimple_16(int src_width, int src_height,
1361                                int dst_width, int dst_height,
1362                                int src_stride, int dst_stride,
1363                                const uint16* src_ptr, uint16* dst_ptr) {
1364  int i;
1365  void (*ScaleCols)(uint16* dst_ptr, const uint16* src_ptr,
1366      int dst_width, int x, int dx) = ScaleCols_16_C;
1367  // Initial source x/y coordinate and step values as 16.16 fixed point.
1368  int x = 0;
1369  int y = 0;
1370  int dx = 0;
1371  int dy = 0;
1372  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
1373             &x, &y, &dx, &dy);
1374  src_width = Abs(src_width);
1375
1376  if (src_width * 2 == dst_width && x < 0x8000) {
1377    ScaleCols = ScaleColsUp2_16_C;
1378#if defined(HAS_SCALECOLS_16_SSE2)
1379    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1380      ScaleCols = ScaleColsUp2_16_SSE2;
1381    }
1382#endif
1383  }
1384
1385  for (i = 0; i < dst_height; ++i) {
1386    ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
1387              dst_width, x, dx);
1388    dst_ptr += dst_stride;
1389    y += dy;
1390  }
1391}
1392
1393// Scale a plane.
1394// This function dispatches to a specialized scaler based on scale factor.
1395
1396LIBYUV_API
1397void ScalePlane(const uint8* src, int src_stride,
1398                int src_width, int src_height,
1399                uint8* dst, int dst_stride,
1400                int dst_width, int dst_height,
1401                enum FilterMode filtering) {
1402  // Simplify filtering when possible.
1403  filtering = ScaleFilterReduce(src_width, src_height,
1404                                dst_width, dst_height, filtering);
1405
1406  // Negative height means invert the image.
1407  if (src_height < 0) {
1408    src_height = -src_height;
1409    src = src + (src_height - 1) * src_stride;
1410    src_stride = -src_stride;
1411  }
1412
1413  // Use specialized scales to improve performance for common resolutions.
1414  // For example, all the 1/2 scalings will use ScalePlaneDown2()
1415  if (dst_width == src_width && dst_height == src_height) {
1416    // Straight copy.
1417    CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
1418    return;
1419  }
1420  if (dst_width == src_width && filtering != kFilterBox) {
1421    int dy = FixedDiv(src_height, dst_height);
1422    // Arbitrary scale vertically, but unscaled horizontally.
1423    ScalePlaneVertical(src_height,
1424                       dst_width, dst_height,
1425                       src_stride, dst_stride, src, dst,
1426                       0, 0, dy, 1, filtering);
1427    return;
1428  }
1429  if (dst_width <= Abs(src_width) && dst_height <= src_height) {
1430    // Scale down.
1431    if (4 * dst_width == 3 * src_width &&
1432        4 * dst_height == 3 * src_height) {
1433      // optimized, 3/4
1434      ScalePlaneDown34(src_width, src_height, dst_width, dst_height,
1435                       src_stride, dst_stride, src, dst, filtering);
1436      return;
1437    }
1438    if (2 * dst_width == src_width && 2 * dst_height == src_height) {
1439      // optimized, 1/2
1440      ScalePlaneDown2(src_width, src_height, dst_width, dst_height,
1441                      src_stride, dst_stride, src, dst, filtering);
1442      return;
1443    }
1444    // 3/8 rounded up for odd sized chroma height.
1445    if (8 * dst_width == 3 * src_width &&
1446        dst_height == ((src_height * 3 + 7) / 8)) {
1447      // optimized, 3/8
1448      ScalePlaneDown38(src_width, src_height, dst_width, dst_height,
1449                       src_stride, dst_stride, src, dst, filtering);
1450      return;
1451    }
1452    if (4 * dst_width == src_width && 4 * dst_height == src_height &&
1453        (filtering == kFilterBox || filtering == kFilterNone)) {
1454      // optimized, 1/4
1455      ScalePlaneDown4(src_width, src_height, dst_width, dst_height,
1456                      src_stride, dst_stride, src, dst, filtering);
1457      return;
1458    }
1459  }
1460  if (filtering == kFilterBox && dst_height * 2 < src_height) {
1461    ScalePlaneBox(src_width, src_height, dst_width, dst_height,
1462                  src_stride, dst_stride, src, dst);
1463    return;
1464  }
1465  if (filtering && dst_height > src_height) {
1466    ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
1467                         src_stride, dst_stride, src, dst, filtering);
1468    return;
1469  }
1470  if (filtering) {
1471    ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
1472                           src_stride, dst_stride, src, dst, filtering);
1473    return;
1474  }
1475  ScalePlaneSimple(src_width, src_height, dst_width, dst_height,
1476                   src_stride, dst_stride, src, dst);
1477}
1478
1479LIBYUV_API
1480void ScalePlane_16(const uint16* src, int src_stride,
1481                  int src_width, int src_height,
1482                  uint16* dst, int dst_stride,
1483                  int dst_width, int dst_height,
1484                  enum FilterMode filtering) {
1485  // Simplify filtering when possible.
1486  filtering = ScaleFilterReduce(src_width, src_height,
1487                                dst_width, dst_height, filtering);
1488
1489  // Negative height means invert the image.
1490  if (src_height < 0) {
1491    src_height = -src_height;
1492    src = src + (src_height - 1) * src_stride;
1493    src_stride = -src_stride;
1494  }
1495
1496  // Use specialized scales to improve performance for common resolutions.
1497  // For example, all the 1/2 scalings will use ScalePlaneDown2()
1498  if (dst_width == src_width && dst_height == src_height) {
1499    // Straight copy.
1500    CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
1501    return;
1502  }
1503  if (dst_width == src_width) {
1504    int dy = FixedDiv(src_height, dst_height);
1505    // Arbitrary scale vertically, but unscaled vertically.
1506    ScalePlaneVertical_16(src_height,
1507                          dst_width, dst_height,
1508                          src_stride, dst_stride, src, dst,
1509                          0, 0, dy, 1, filtering);
1510    return;
1511  }
1512  if (dst_width <= Abs(src_width) && dst_height <= src_height) {
1513    // Scale down.
1514    if (4 * dst_width == 3 * src_width &&
1515        4 * dst_height == 3 * src_height) {
1516      // optimized, 3/4
1517      ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
1518                          src_stride, dst_stride, src, dst, filtering);
1519      return;
1520    }
1521    if (2 * dst_width == src_width && 2 * dst_height == src_height) {
1522      // optimized, 1/2
1523      ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
1524                         src_stride, dst_stride, src, dst, filtering);
1525      return;
1526    }
1527    // 3/8 rounded up for odd sized chroma height.
1528    if (8 * dst_width == 3 * src_width &&
1529        dst_height == ((src_height * 3 + 7) / 8)) {
1530      // optimized, 3/8
1531      ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
1532                          src_stride, dst_stride, src, dst, filtering);
1533      return;
1534    }
1535    if (4 * dst_width == src_width && 4 * dst_height == src_height &&
1536               filtering != kFilterBilinear) {
1537      // optimized, 1/4
1538      ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
1539                         src_stride, dst_stride, src, dst, filtering);
1540      return;
1541    }
1542  }
1543  if (filtering == kFilterBox && dst_height * 2 < src_height) {
1544    ScalePlaneBox_16(src_width, src_height, dst_width, dst_height,
1545                     src_stride, dst_stride, src, dst);
1546    return;
1547  }
1548  if (filtering && dst_height > src_height) {
1549    ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
1550                            src_stride, dst_stride, src, dst, filtering);
1551    return;
1552  }
1553  if (filtering) {
1554    ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,
1555                              src_stride, dst_stride, src, dst, filtering);
1556    return;
1557  }
1558  ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height,
1559                      src_stride, dst_stride, src, dst);
1560}
1561
1562// Scale an I420 image.
1563// This function in turn calls a scaling function for each plane.
1564
1565LIBYUV_API
1566int I420Scale(const uint8* src_y, int src_stride_y,
1567              const uint8* src_u, int src_stride_u,
1568              const uint8* src_v, int src_stride_v,
1569              int src_width, int src_height,
1570              uint8* dst_y, int dst_stride_y,
1571              uint8* dst_u, int dst_stride_u,
1572              uint8* dst_v, int dst_stride_v,
1573              int dst_width, int dst_height,
1574              enum FilterMode filtering) {
1575  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1576  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1577  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1578  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1579  if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1580      src_width > 32768 || src_height > 32768 ||
1581      !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
1582    return -1;
1583  }
1584
1585  ScalePlane(src_y, src_stride_y, src_width, src_height,
1586             dst_y, dst_stride_y, dst_width, dst_height,
1587             filtering);
1588  ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight,
1589             dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
1590             filtering);
1591  ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight,
1592             dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
1593             filtering);
1594  return 0;
1595}
1596
1597LIBYUV_API
1598int I420Scale_16(const uint16* src_y, int src_stride_y,
1599                 const uint16* src_u, int src_stride_u,
1600                 const uint16* src_v, int src_stride_v,
1601                 int src_width, int src_height,
1602                 uint16* dst_y, int dst_stride_y,
1603                 uint16* dst_u, int dst_stride_u,
1604                 uint16* dst_v, int dst_stride_v,
1605                 int dst_width, int dst_height,
1606                 enum FilterMode filtering) {
1607  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1608  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1609  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1610  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1611  if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1612      src_width > 32768 || src_height > 32768 ||
1613      !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
1614    return -1;
1615  }
1616
1617  ScalePlane_16(src_y, src_stride_y, src_width, src_height,
1618                dst_y, dst_stride_y, dst_width, dst_height,
1619                filtering);
1620  ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight,
1621                dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
1622                filtering);
1623  ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight,
1624                dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
1625                filtering);
1626  return 0;
1627}
1628
1629// Deprecated api
1630LIBYUV_API
1631int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
1632          int src_stride_y, int src_stride_u, int src_stride_v,
1633          int src_width, int src_height,
1634          uint8* dst_y, uint8* dst_u, uint8* dst_v,
1635          int dst_stride_y, int dst_stride_u, int dst_stride_v,
1636          int dst_width, int dst_height,
1637          LIBYUV_BOOL interpolate) {
1638  return I420Scale(src_y, src_stride_y,
1639                   src_u, src_stride_u,
1640                   src_v, src_stride_v,
1641                   src_width, src_height,
1642                   dst_y, dst_stride_y,
1643                   dst_u, dst_stride_u,
1644                   dst_v, dst_stride_v,
1645                   dst_width, dst_height,
1646                   interpolate ? kFilterBox : kFilterNone);
1647}
1648
1649// Deprecated api
1650LIBYUV_API
1651int ScaleOffset(const uint8* src, int src_width, int src_height,
1652                uint8* dst, int dst_width, int dst_height, int dst_yoffset,
1653                LIBYUV_BOOL interpolate) {
1654  // Chroma requires offset to multiple of 2.
1655  int dst_yoffset_even = dst_yoffset & ~1;
1656  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1657  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1658  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1659  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1660  int aheight = dst_height - dst_yoffset_even * 2;  // actual output height
1661  const uint8* src_y = src;
1662  const uint8* src_u = src + src_width * src_height;
1663  const uint8* src_v = src + src_width * src_height +
1664                             src_halfwidth * src_halfheight;
1665  uint8* dst_y = dst + dst_yoffset_even * dst_width;
1666  uint8* dst_u = dst + dst_width * dst_height +
1667                 (dst_yoffset_even >> 1) * dst_halfwidth;
1668  uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight +
1669                 (dst_yoffset_even >> 1) * dst_halfwidth;
1670  if (!src || src_width <= 0 || src_height <= 0 ||
1671      !dst || dst_width <= 0 || dst_height <= 0 || dst_yoffset_even < 0 ||
1672      dst_yoffset_even >= dst_height) {
1673    return -1;
1674  }
1675  return I420Scale(src_y, src_width,
1676                   src_u, src_halfwidth,
1677                   src_v, src_halfwidth,
1678                   src_width, src_height,
1679                   dst_y, dst_width,
1680                   dst_u, dst_halfwidth,
1681                   dst_v, dst_halfwidth,
1682                   dst_width, aheight,
1683                   interpolate ? kFilterBox : kFilterNone);
1684}
1685
1686#ifdef __cplusplus
1687}  // extern "C"
1688}  // namespace libyuv
1689#endif
1690