1/*
2 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS. All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "libyuv/scale.h"
12
13#include <assert.h>
14#include <string.h>
15
16#include "libyuv/cpu_id.h"
17#include "libyuv/planar_functions.h"  // For CopyPlane
18#include "libyuv/row.h"
19#include "libyuv/scale_row.h"
20
21#ifdef __cplusplus
22namespace libyuv {
23extern "C" {
24#endif
25
26// Remove this macro if OVERREAD is safe.
27#define AVOID_OVERREAD 1
28
29static __inline int Abs(int v) {
30  return v >= 0 ? v : -v;
31}
32
33#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
34
35// Scale plane, 1/2
36// This is an optimized version for scaling down a plane to 1/2 of
37// its original size.
38
39static void ScalePlaneDown2(int src_width, int src_height,
40                            int dst_width, int dst_height,
41                            int src_stride, int dst_stride,
42                            const uint8* src_ptr, uint8* dst_ptr,
43                            enum FilterMode filtering) {
44  int y;
45  void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
46                        uint8* dst_ptr, int dst_width) =
47    filtering == kFilterNone ? ScaleRowDown2_C :
48        (filtering == kFilterLinear ? ScaleRowDown2Linear_C :
49        ScaleRowDown2Box_C);
50  int row_stride = src_stride << 1;
51  if (!filtering) {
52    src_ptr += src_stride;  // Point to odd rows.
53    src_stride = 0;
54  }
55
56#if defined(HAS_SCALEROWDOWN2_NEON)
57  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
58    ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON;
59  }
60#elif defined(HAS_SCALEROWDOWN2_SSE2)
61  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
62    ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Unaligned_SSE2 :
63        (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_SSE2 :
64        ScaleRowDown2Box_Unaligned_SSE2);
65    if (IS_ALIGNED(src_ptr, 16) &&
66        IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
67        IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
68      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
69          (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
70          ScaleRowDown2Box_SSE2);
71    }
72  }
73#elif defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
74  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
75      IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
76      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
77    ScaleRowDown2 = filtering ?
78        ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2;
79  }
80#endif
81
82  if (filtering == kFilterLinear) {
83    src_stride = 0;
84  }
85  // TODO(fbarchard): Loop through source height to allow odd height.
86  for (y = 0; y < dst_height; ++y) {
87    ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
88    src_ptr += row_stride;
89    dst_ptr += dst_stride;
90  }
91}
92
93static void ScalePlaneDown2_16(int src_width, int src_height,
94                               int dst_width, int dst_height,
95                               int src_stride, int dst_stride,
96                               const uint16* src_ptr, uint16* dst_ptr,
97                               enum FilterMode filtering) {
98  int y;
99  void (*ScaleRowDown2)(const uint16* src_ptr, ptrdiff_t src_stride,
100                        uint16* dst_ptr, int dst_width) =
101    filtering == kFilterNone ? ScaleRowDown2_16_C :
102        (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C :
103        ScaleRowDown2Box_16_C);
104  int row_stride = src_stride << 1;
105  if (!filtering) {
106    src_ptr += src_stride;  // Point to odd rows.
107    src_stride = 0;
108  }
109
110#if defined(HAS_SCALEROWDOWN2_16_NEON)
111  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
112    ScaleRowDown2 = filtering ? ScaleRowDown2Box_16_NEON :
113        ScaleRowDown2_16_NEON;
114  }
115#elif defined(HAS_SCALEROWDOWN2_16_SSE2)
116  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
117    ScaleRowDown2 = filtering == kFilterNone ?
118        ScaleRowDown2_Unaligned_16_SSE2 :
119        (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_16_SSE2 :
120        ScaleRowDown2Box_Unaligned_16_SSE2);
121    if (IS_ALIGNED(src_ptr, 16) &&
122        IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
123        IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
124      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 :
125          (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 :
126          ScaleRowDown2Box_16_SSE2);
127    }
128  }
129#elif defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2)
130  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
131      IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
132      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
133    ScaleRowDown2 = filtering ?
134        ScaleRowDown2Box_16_MIPS_DSPR2 : ScaleRowDown2_16_MIPS_DSPR2;
135  }
136#endif
137
138  if (filtering == kFilterLinear) {
139    src_stride = 0;
140  }
141  // TODO(fbarchard): Loop through source height to allow odd height.
142  for (y = 0; y < dst_height; ++y) {
143    ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
144    src_ptr += row_stride;
145    dst_ptr += dst_stride;
146  }
147}
148
149// Scale plane, 1/4
150// This is an optimized version for scaling down a plane to 1/4 of
151// its original size.
152
153static void ScalePlaneDown4(int src_width, int src_height,
154                            int dst_width, int dst_height,
155                            int src_stride, int dst_stride,
156                            const uint8* src_ptr, uint8* dst_ptr,
157                            enum FilterMode filtering) {
158  int y;
159  void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride,
160                        uint8* dst_ptr, int dst_width) =
161      filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
162  int row_stride = src_stride << 2;
163  if (!filtering) {
164    src_ptr += src_stride * 2;  // Point to row 2.
165    src_stride = 0;
166  }
167#if defined(HAS_SCALEROWDOWN4_NEON)
168  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
169    ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
170  }
171#elif defined(HAS_SCALEROWDOWN4_SSE2)
172  if (TestCpuFlag(kCpuHasSSE2) &&
173      IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
174      IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
175    ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;
176  }
177#elif defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
178  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
179      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
180      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
181    ScaleRowDown4 = filtering ?
182        ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2;
183  }
184#endif
185
186  if (filtering == kFilterLinear) {
187    src_stride = 0;
188  }
189  for (y = 0; y < dst_height; ++y) {
190    ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
191    src_ptr += row_stride;
192    dst_ptr += dst_stride;
193  }
194}
195
196static void ScalePlaneDown4_16(int src_width, int src_height,
197                               int dst_width, int dst_height,
198                               int src_stride, int dst_stride,
199                               const uint16* src_ptr, uint16* dst_ptr,
200                               enum FilterMode filtering) {
201  int y;
202  void (*ScaleRowDown4)(const uint16* src_ptr, ptrdiff_t src_stride,
203                        uint16* dst_ptr, int dst_width) =
204      filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
205  int row_stride = src_stride << 2;
206  if (!filtering) {
207    src_ptr += src_stride * 2;  // Point to row 2.
208    src_stride = 0;
209  }
210#if defined(HAS_SCALEROWDOWN4_16_NEON)
211  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
212    ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_NEON :
213        ScaleRowDown4_16_NEON;
214  }
215#elif defined(HAS_SCALEROWDOWN4_16_SSE2)
216  if (TestCpuFlag(kCpuHasSSE2) &&
217      IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
218      IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
219    ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 :
220        ScaleRowDown4_16_SSE2;
221  }
222#elif defined(HAS_SCALEROWDOWN4_16_MIPS_DSPR2)
223  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
224      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
225      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
226    ScaleRowDown4 = filtering ?
227        ScaleRowDown4Box_16_MIPS_DSPR2 : ScaleRowDown4_16_MIPS_DSPR2;
228  }
229#endif
230
231  if (filtering == kFilterLinear) {
232    src_stride = 0;
233  }
234  for (y = 0; y < dst_height; ++y) {
235    ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
236    src_ptr += row_stride;
237    dst_ptr += dst_stride;
238  }
239}
240
241// Scale plane down, 3/4
242
243static void ScalePlaneDown34(int src_width, int src_height,
244                             int dst_width, int dst_height,
245                             int src_stride, int dst_stride,
246                             const uint8* src_ptr, uint8* dst_ptr,
247                             enum FilterMode filtering) {
248  int y;
249  void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride,
250                           uint8* dst_ptr, int dst_width);
251  void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride,
252                           uint8* dst_ptr, int dst_width);
253  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
254  assert(dst_width % 3 == 0);
255  if (!filtering) {
256    ScaleRowDown34_0 = ScaleRowDown34_C;
257    ScaleRowDown34_1 = ScaleRowDown34_C;
258  } else {
259    ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
260    ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
261  }
262#if defined(HAS_SCALEROWDOWN34_NEON)
263  if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
264    if (!filtering) {
265      ScaleRowDown34_0 = ScaleRowDown34_NEON;
266      ScaleRowDown34_1 = ScaleRowDown34_NEON;
267    } else {
268      ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
269      ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
270    }
271  }
272#endif
273#if defined(HAS_SCALEROWDOWN34_SSSE3)
274  if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
275      IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
276    if (!filtering) {
277      ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
278      ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
279    } else {
280      ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
281      ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
282    }
283  }
284#endif
285#if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2)
286  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
287      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
288      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
289    if (!filtering) {
290      ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2;
291      ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2;
292    } else {
293      ScaleRowDown34_0 = ScaleRowDown34_0_Box_MIPS_DSPR2;
294      ScaleRowDown34_1 = ScaleRowDown34_1_Box_MIPS_DSPR2;
295    }
296  }
297#endif
298
299  for (y = 0; y < dst_height - 2; y += 3) {
300    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
301    src_ptr += src_stride;
302    dst_ptr += dst_stride;
303    ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
304    src_ptr += src_stride;
305    dst_ptr += dst_stride;
306    ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
307                     dst_ptr, dst_width);
308    src_ptr += src_stride * 2;
309    dst_ptr += dst_stride;
310  }
311
312  // Remainder 1 or 2 rows with last row vertically unfiltered
313  if ((dst_height % 3) == 2) {
314    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
315    src_ptr += src_stride;
316    dst_ptr += dst_stride;
317    ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
318  } else if ((dst_height % 3) == 1) {
319    ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
320  }
321}
322
323static void ScalePlaneDown34_16(int src_width, int src_height,
324                                int dst_width, int dst_height,
325                                int src_stride, int dst_stride,
326                                const uint16* src_ptr, uint16* dst_ptr,
327                                enum FilterMode filtering) {
328  int y;
329  void (*ScaleRowDown34_0)(const uint16* src_ptr, ptrdiff_t src_stride,
330                           uint16* dst_ptr, int dst_width);
331  void (*ScaleRowDown34_1)(const uint16* src_ptr, ptrdiff_t src_stride,
332                           uint16* dst_ptr, int dst_width);
333  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
334  assert(dst_width % 3 == 0);
335  if (!filtering) {
336    ScaleRowDown34_0 = ScaleRowDown34_16_C;
337    ScaleRowDown34_1 = ScaleRowDown34_16_C;
338  } else {
339    ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
340    ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
341  }
342#if defined(HAS_SCALEROWDOWN34_16_NEON)
343  if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
344    if (!filtering) {
345      ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
346      ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
347    } else {
348      ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
349      ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
350    }
351  }
352#endif
353#if defined(HAS_SCALEROWDOWN34_16_SSSE3)
354  if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
355      IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
356    if (!filtering) {
357      ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
358      ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
359    } else {
360      ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
361      ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
362    }
363  }
364#endif
365#if defined(HAS_SCALEROWDOWN34_16_MIPS_DSPR2)
366  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
367      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
368      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
369    if (!filtering) {
370      ScaleRowDown34_0 = ScaleRowDown34_16_MIPS_DSPR2;
371      ScaleRowDown34_1 = ScaleRowDown34_16_MIPS_DSPR2;
372    } else {
373      ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_MIPS_DSPR2;
374      ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_MIPS_DSPR2;
375    }
376  }
377#endif
378
379  for (y = 0; y < dst_height - 2; y += 3) {
380    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
381    src_ptr += src_stride;
382    dst_ptr += dst_stride;
383    ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
384    src_ptr += src_stride;
385    dst_ptr += dst_stride;
386    ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
387                     dst_ptr, dst_width);
388    src_ptr += src_stride * 2;
389    dst_ptr += dst_stride;
390  }
391
392  // Remainder 1 or 2 rows with last row vertically unfiltered
393  if ((dst_height % 3) == 2) {
394    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
395    src_ptr += src_stride;
396    dst_ptr += dst_stride;
397    ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
398  } else if ((dst_height % 3) == 1) {
399    ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
400  }
401}
402
403
404// Scale plane, 3/8
405// This is an optimized version for scaling down a plane to 3/8
406// of its original size.
407//
408// Uses box filter arranges like this
409// aaabbbcc -> abc
410// aaabbbcc    def
411// aaabbbcc    ghi
412// dddeeeff
413// dddeeeff
414// dddeeeff
415// ggghhhii
416// ggghhhii
417// Boxes are 3x3, 2x3, 3x2 and 2x2
418
419static void ScalePlaneDown38(int src_width, int src_height,
420                             int dst_width, int dst_height,
421                             int src_stride, int dst_stride,
422                             const uint8* src_ptr, uint8* dst_ptr,
423                             enum FilterMode filtering) {
424  int y;
425  void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride,
426                           uint8* dst_ptr, int dst_width);
427  void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride,
428                           uint8* dst_ptr, int dst_width);
429  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
430  assert(dst_width % 3 == 0);
431  if (!filtering) {
432    ScaleRowDown38_3 = ScaleRowDown38_C;
433    ScaleRowDown38_2 = ScaleRowDown38_C;
434  } else {
435    ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
436    ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
437  }
438#if defined(HAS_SCALEROWDOWN38_NEON)
439  if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
440    if (!filtering) {
441      ScaleRowDown38_3 = ScaleRowDown38_NEON;
442      ScaleRowDown38_2 = ScaleRowDown38_NEON;
443    } else {
444      ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
445      ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
446    }
447  }
448#elif defined(HAS_SCALEROWDOWN38_SSSE3)
449  if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
450      IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
451    if (!filtering) {
452      ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
453      ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
454    } else {
455      ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
456      ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
457    }
458  }
459#elif defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)
460  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
461      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
462      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
463    if (!filtering) {
464      ScaleRowDown38_3 = ScaleRowDown38_MIPS_DSPR2;
465      ScaleRowDown38_2 = ScaleRowDown38_MIPS_DSPR2;
466    } else {
467      ScaleRowDown38_3 = ScaleRowDown38_3_Box_MIPS_DSPR2;
468      ScaleRowDown38_2 = ScaleRowDown38_2_Box_MIPS_DSPR2;
469    }
470  }
471#endif
472
473  for (y = 0; y < dst_height - 2; y += 3) {
474    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
475    src_ptr += src_stride * 3;
476    dst_ptr += dst_stride;
477    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
478    src_ptr += src_stride * 3;
479    dst_ptr += dst_stride;
480    ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
481    src_ptr += src_stride * 2;
482    dst_ptr += dst_stride;
483  }
484
485  // Remainder 1 or 2 rows with last row vertically unfiltered
486  if ((dst_height % 3) == 2) {
487    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
488    src_ptr += src_stride * 3;
489    dst_ptr += dst_stride;
490    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
491  } else if ((dst_height % 3) == 1) {
492    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
493  }
494}
495
496static void ScalePlaneDown38_16(int src_width, int src_height,
497                                int dst_width, int dst_height,
498                                int src_stride, int dst_stride,
499                                const uint16* src_ptr, uint16* dst_ptr,
500                                enum FilterMode filtering) {
501  int y;
502  void (*ScaleRowDown38_3)(const uint16* src_ptr, ptrdiff_t src_stride,
503                           uint16* dst_ptr, int dst_width);
504  void (*ScaleRowDown38_2)(const uint16* src_ptr, ptrdiff_t src_stride,
505                           uint16* dst_ptr, int dst_width);
506  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
507  assert(dst_width % 3 == 0);
508  if (!filtering) {
509    ScaleRowDown38_3 = ScaleRowDown38_16_C;
510    ScaleRowDown38_2 = ScaleRowDown38_16_C;
511  } else {
512    ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
513    ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
514  }
515#if defined(HAS_SCALEROWDOWN38_16_NEON)
516  if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
517    if (!filtering) {
518      ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
519      ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
520    } else {
521      ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
522      ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
523    }
524  }
525#elif defined(HAS_SCALEROWDOWN38_16_SSSE3)
526  if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
527      IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
528    if (!filtering) {
529      ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
530      ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
531    } else {
532      ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
533      ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
534    }
535  }
536#elif defined(HAS_SCALEROWDOWN38_16_MIPS_DSPR2)
537  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
538      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
539      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
540    if (!filtering) {
541      ScaleRowDown38_3 = ScaleRowDown38_16_MIPS_DSPR2;
542      ScaleRowDown38_2 = ScaleRowDown38_16_MIPS_DSPR2;
543    } else {
544      ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_MIPS_DSPR2;
545      ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_MIPS_DSPR2;
546    }
547  }
548#endif
549
550  for (y = 0; y < dst_height - 2; y += 3) {
551    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
552    src_ptr += src_stride * 3;
553    dst_ptr += dst_stride;
554    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
555    src_ptr += src_stride * 3;
556    dst_ptr += dst_stride;
557    ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
558    src_ptr += src_stride * 2;
559    dst_ptr += dst_stride;
560  }
561
562  // Remainder 1 or 2 rows with last row vertically unfiltered
563  if ((dst_height % 3) == 2) {
564    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
565    src_ptr += src_stride * 3;
566    dst_ptr += dst_stride;
567    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
568  } else if ((dst_height % 3) == 1) {
569    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
570  }
571}
572
573static __inline uint32 SumBox(int iboxwidth, int iboxheight,
574                              ptrdiff_t src_stride, const uint8* src_ptr) {
575  uint32 sum = 0u;
576  int y;
577  assert(iboxwidth > 0);
578  assert(iboxheight > 0);
579  for (y = 0; y < iboxheight; ++y) {
580    int x;
581    for (x = 0; x < iboxwidth; ++x) {
582      sum += src_ptr[x];
583    }
584    src_ptr += src_stride;
585  }
586  return sum;
587}
588
589static __inline uint32 SumBox_16(int iboxwidth, int iboxheight,
590                                 ptrdiff_t src_stride, const uint16* src_ptr) {
591  uint32 sum = 0u;
592  int y;
593  assert(iboxwidth > 0);
594  assert(iboxheight > 0);
595  for (y = 0; y < iboxheight; ++y) {
596    int x;
597    for (x = 0; x < iboxwidth; ++x) {
598      sum += src_ptr[x];
599    }
600    src_ptr += src_stride;
601  }
602  return sum;
603}
604
605static void ScalePlaneBoxRow_C(int dst_width, int boxheight,
606                               int x, int dx, ptrdiff_t src_stride,
607                               const uint8* src_ptr, uint8* dst_ptr) {
608  int i;
609  int boxwidth;
610  for (i = 0; i < dst_width; ++i) {
611    int ix = x >> 16;
612    x += dx;
613    boxwidth = (x >> 16) - ix;
614    *dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) /
615        (boxwidth * boxheight);
616  }
617}
618
619static void ScalePlaneBoxRow_16_C(int dst_width, int boxheight,
620                                  int x, int dx, ptrdiff_t src_stride,
621                                  const uint16* src_ptr, uint16* dst_ptr) {
622  int i;
623  int boxwidth;
624  for (i = 0; i < dst_width; ++i) {
625    int ix = x >> 16;
626    x += dx;
627    boxwidth = (x >> 16) - ix;
628    *dst_ptr++ = SumBox_16(boxwidth, boxheight, src_stride, src_ptr + ix) /
629        (boxwidth * boxheight);
630  }
631}
632
633static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
634  uint32 sum = 0u;
635  int x;
636  assert(iboxwidth > 0);
637  for (x = 0; x < iboxwidth; ++x) {
638    sum += src_ptr[x];
639  }
640  return sum;
641}
642
643static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) {
644  uint32 sum = 0u;
645  int x;
646  assert(iboxwidth > 0);
647  for (x = 0; x < iboxwidth; ++x) {
648    sum += src_ptr[x];
649  }
650  return sum;
651}
652
653static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
654                            const uint16* src_ptr, uint8* dst_ptr) {
655  int i;
656  int scaletbl[2];
657  int minboxwidth = (dx >> 16);
658  int* scaleptr = scaletbl - minboxwidth;
659  int boxwidth;
660  scaletbl[0] = 65536 / (minboxwidth * boxheight);
661  scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
662  for (i = 0; i < dst_width; ++i) {
663    int ix = x >> 16;
664    x += dx;
665    boxwidth = (x >> 16) - ix;
666    *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
667  }
668}
669
670static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx,
671                               const uint32* src_ptr, uint16* dst_ptr) {
672  int i;
673  int scaletbl[2];
674  int minboxwidth = (dx >> 16);
675  int* scaleptr = scaletbl - minboxwidth;
676  int boxwidth;
677  scaletbl[0] = 65536 / (minboxwidth * boxheight);
678  scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
679  for (i = 0; i < dst_width; ++i) {
680    int ix = x >> 16;
681    x += dx;
682    boxwidth = (x >> 16) - ix;
683    *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *
684        scaleptr[boxwidth] >> 16;
685  }
686}
687
688static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
689                            const uint16* src_ptr, uint8* dst_ptr) {
690  int boxwidth = (dx >> 16);
691  int scaleval = 65536 / (boxwidth * boxheight);
692  int i;
693  for (i = 0; i < dst_width; ++i) {
694    *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
695    x += boxwidth;
696  }
697}
698
699static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx,
700                               const uint32* src_ptr, uint16* dst_ptr) {
701  int boxwidth = (dx >> 16);
702  int scaleval = 65536 / (boxwidth * boxheight);
703  int i;
704  for (i = 0; i < dst_width; ++i) {
705    *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
706    x += boxwidth;
707  }
708}
709
710// Scale plane down to any dimensions, with interpolation.
711// (boxfilter).
712//
713// Same method as SimpleScale, which is fixed point, outputting
714// one pixel of destination using fixed point (16.16) to step
715// through source, sampling a box of pixel with simple
716// averaging.
717static void ScalePlaneBox(int src_width, int src_height,
718                          int dst_width, int dst_height,
719                          int src_stride, int dst_stride,
720                          const uint8* src_ptr, uint8* dst_ptr) {
721  int j;
722  // Initial source x/y coordinate and step values as 16.16 fixed point.
723  int x = 0;
724  int y = 0;
725  int dx = 0;
726  int dy = 0;
727  const int max_y = (src_height << 16);
728  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
729             &x, &y, &dx, &dy);
730  src_width = Abs(src_width);
731  // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
732  if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
733    uint8* dst = dst_ptr;
734    int j;
735    for (j = 0; j < dst_height; ++j) {
736      int boxheight;
737      int iy = y >> 16;
738      const uint8* src = src_ptr + iy * src_stride;
739      y += dy;
740      if (y > max_y) {
741        y = max_y;
742      }
743      boxheight = (y >> 16) - iy;
744      ScalePlaneBoxRow_C(dst_width, boxheight,
745                         x, dx, src_stride,
746                         src, dst);
747      dst += dst_stride;
748    }
749    return;
750  }
751  {
752    // Allocate a row buffer of uint16.
753    align_buffer_64(row16, src_width * 2);
754    void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
755        const uint16* src_ptr, uint8* dst_ptr) =
756        (dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C;
757    void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride,
758        uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C;
759
760#if defined(HAS_SCALEADDROWS_SSE2)
761    if (TestCpuFlag(kCpuHasSSE2) &&
762#ifdef AVOID_OVERREAD
763        IS_ALIGNED(src_width, 16) &&
764#endif
765        IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
766      ScaleAddRows = ScaleAddRows_SSE2;
767    }
768#endif
769
770    for (j = 0; j < dst_height; ++j) {
771      int boxheight;
772      int iy = y >> 16;
773      const uint8* src = src_ptr + iy * src_stride;
774      y += dy;
775      if (y > (src_height << 16)) {
776        y = (src_height << 16);
777      }
778      boxheight = (y >> 16) - iy;
779      ScaleAddRows(src, src_stride, (uint16*)(row16),
780                 src_width, boxheight);
781      ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16),
782                 dst_ptr);
783      dst_ptr += dst_stride;
784    }
785    free_aligned_buffer_64(row16);
786  }
787}
788
789static void ScalePlaneBox_16(int src_width, int src_height,
790                             int dst_width, int dst_height,
791                             int src_stride, int dst_stride,
792                             const uint16* src_ptr, uint16* dst_ptr) {
793  int j;
794  // Initial source x/y coordinate and step values as 16.16 fixed point.
795  int x = 0;
796  int y = 0;
797  int dx = 0;
798  int dy = 0;
799  const int max_y = (src_height << 16);
800  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
801             &x, &y, &dx, &dy);
802  src_width = Abs(src_width);
803  // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
804  if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
805    uint16* dst = dst_ptr;
806    int j;
807    for (j = 0; j < dst_height; ++j) {
808      int boxheight;
809      int iy = y >> 16;
810      const uint16* src = src_ptr + iy * src_stride;
811      y += dy;
812      if (y > max_y) {
813        y = max_y;
814      }
815      boxheight = (y >> 16) - iy;
816      ScalePlaneBoxRow_16_C(dst_width, boxheight,
817                            x, dx, src_stride,
818                            src, dst);
819      dst += dst_stride;
820    }
821    return;
822  }
823  {
824    // Allocate a row buffer of uint32.
825    align_buffer_64(row32, src_width * 4);
826    void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
827        const uint32* src_ptr, uint16* dst_ptr) =
828        (dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C;
829    void (*ScaleAddRows)(const uint16* src_ptr, ptrdiff_t src_stride,
830        uint32* dst_ptr, int src_width, int src_height) = ScaleAddRows_16_C;
831
832#if defined(HAS_SCALEADDROWS_16_SSE2)
833    if (TestCpuFlag(kCpuHasSSE2) &&
834#ifdef AVOID_OVERREAD
835        IS_ALIGNED(src_width, 16) &&
836#endif
837        IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
838      ScaleAddRows = ScaleAddRows_16_SSE2;
839    }
840#endif
841
842    for (j = 0; j < dst_height; ++j) {
843      int boxheight;
844      int iy = y >> 16;
845      const uint16* src = src_ptr + iy * src_stride;
846      y += dy;
847      if (y > (src_height << 16)) {
848        y = (src_height << 16);
849      }
850      boxheight = (y >> 16) - iy;
851      ScaleAddRows(src, src_stride, (uint32*)(row32),
852                 src_width, boxheight);
853      ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32),
854                 dst_ptr);
855      dst_ptr += dst_stride;
856    }
857    free_aligned_buffer_64(row32);
858  }
859}
860
861// Scale plane down with bilinear interpolation.
862void ScalePlaneBilinearDown(int src_width, int src_height,
863                            int dst_width, int dst_height,
864                            int src_stride, int dst_stride,
865                            const uint8* src_ptr, uint8* dst_ptr,
866                            enum FilterMode filtering) {
867  // Initial source x/y coordinate and step values as 16.16 fixed point.
868  int x = 0;
869  int y = 0;
870  int dx = 0;
871  int dy = 0;
872  // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
873  // Allocate a row buffer.
874  align_buffer_64(row, src_width);
875
876  const int max_y = (src_height - 1) << 16;
877  int j;
878  void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
879      int dst_width, int x, int dx) =
880      (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
881  void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
882      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
883      InterpolateRow_C;
884  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
885             &x, &y, &dx, &dy);
886  src_width = Abs(src_width);
887
888#if defined(HAS_INTERPOLATEROW_SSE2)
889  if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
890    InterpolateRow = InterpolateRow_Any_SSE2;
891    if (IS_ALIGNED(src_width, 16)) {
892      InterpolateRow = InterpolateRow_Unaligned_SSE2;
893      if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
894        InterpolateRow = InterpolateRow_SSE2;
895      }
896    }
897  }
898#endif
899#if defined(HAS_INTERPOLATEROW_SSSE3)
900  if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
901    InterpolateRow = InterpolateRow_Any_SSSE3;
902    if (IS_ALIGNED(src_width, 16)) {
903      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
904      if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
905        InterpolateRow = InterpolateRow_SSSE3;
906      }
907    }
908  }
909#endif
910#if defined(HAS_INTERPOLATEROW_AVX2)
911  if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) {
912    InterpolateRow = InterpolateRow_Any_AVX2;
913    if (IS_ALIGNED(src_width, 32)) {
914      InterpolateRow = InterpolateRow_AVX2;
915    }
916  }
917#endif
918#if defined(HAS_INTERPOLATEROW_NEON)
919  if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) {
920    InterpolateRow = InterpolateRow_Any_NEON;
921    if (IS_ALIGNED(src_width, 16)) {
922      InterpolateRow = InterpolateRow_NEON;
923    }
924  }
925#endif
926#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
927  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) {
928    InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
929    if (IS_ALIGNED(src_width, 4)) {
930      InterpolateRow = InterpolateRow_MIPS_DSPR2;
931    }
932  }
933#endif
934
935
936#if defined(HAS_SCALEFILTERCOLS_SSSE3)
937  if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
938    ScaleFilterCols = ScaleFilterCols_SSSE3;
939  }
940#endif
941  if (y > max_y) {
942    y = max_y;
943  }
944
945  for (j = 0; j < dst_height; ++j) {
946    int yi = y >> 16;
947    const uint8* src = src_ptr + yi * src_stride;
948    if (filtering == kFilterLinear) {
949      ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
950    } else {
951      int yf = (y >> 8) & 255;
952      InterpolateRow(row, src, src_stride, src_width, yf);
953      ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
954    }
955    dst_ptr += dst_stride;
956    y += dy;
957    if (y > max_y) {
958      y = max_y;
959    }
960  }
961  free_aligned_buffer_64(row);
962}
963
964void ScalePlaneBilinearDown_16(int src_width, int src_height,
965                               int dst_width, int dst_height,
966                               int src_stride, int dst_stride,
967                               const uint16* src_ptr, uint16* dst_ptr,
968                               enum FilterMode filtering) {
969  // Initial source x/y coordinate and step values as 16.16 fixed point.
970  int x = 0;
971  int y = 0;
972  int dx = 0;
973  int dy = 0;
974  // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
975  // Allocate a row buffer.
976  align_buffer_64(row, src_width * 2);
977
978  const int max_y = (src_height - 1) << 16;
979  int j;
980  void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
981      int dst_width, int x, int dx) =
982      (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
983  void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr,
984      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
985      InterpolateRow_16_C;
986  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
987             &x, &y, &dx, &dy);
988  src_width = Abs(src_width);
989
990#if defined(HAS_INTERPOLATEROW_16_SSE2)
991  if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
992    InterpolateRow = InterpolateRow_Any_16_SSE2;
993    if (IS_ALIGNED(src_width, 16)) {
994      InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
995      if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
996        InterpolateRow = InterpolateRow_16_SSE2;
997      }
998    }
999  }
1000#endif
1001#if defined(HAS_INTERPOLATEROW_16_SSSE3)
1002  if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
1003    InterpolateRow = InterpolateRow_Any_16_SSSE3;
1004    if (IS_ALIGNED(src_width, 16)) {
1005      InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
1006      if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
1007        InterpolateRow = InterpolateRow_16_SSSE3;
1008      }
1009    }
1010  }
1011#endif
1012#if defined(HAS_INTERPOLATEROW_16_AVX2)
1013  if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) {
1014    InterpolateRow = InterpolateRow_Any_16_AVX2;
1015    if (IS_ALIGNED(src_width, 32)) {
1016      InterpolateRow = InterpolateRow_16_AVX2;
1017    }
1018  }
1019#endif
1020#if defined(HAS_INTERPOLATEROW_16_NEON)
1021  if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) {
1022    InterpolateRow = InterpolateRow_Any_16_NEON;
1023    if (IS_ALIGNED(src_width, 16)) {
1024      InterpolateRow = InterpolateRow_16_NEON;
1025    }
1026  }
1027#endif
1028#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
1029  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) {
1030    InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
1031    if (IS_ALIGNED(src_width, 4)) {
1032      InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
1033    }
1034  }
1035#endif
1036
1037
1038#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1039  if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1040    ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1041  }
1042#endif
1043  if (y > max_y) {
1044    y = max_y;
1045  }
1046
1047  for (j = 0; j < dst_height; ++j) {
1048    int yi = y >> 16;
1049    const uint16* src = src_ptr + yi * src_stride;
1050    if (filtering == kFilterLinear) {
1051      ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1052    } else {
1053      int yf = (y >> 8) & 255;
1054      InterpolateRow((uint16*)row, src, src_stride, src_width, yf);
1055      ScaleFilterCols(dst_ptr, (uint16*)row, dst_width, x, dx);
1056    }
1057    dst_ptr += dst_stride;
1058    y += dy;
1059    if (y > max_y) {
1060      y = max_y;
1061    }
1062  }
1063  free_aligned_buffer_64(row);
1064}
1065
1066// Scale up down with bilinear interpolation.
1067void ScalePlaneBilinearUp(int src_width, int src_height,
1068                          int dst_width, int dst_height,
1069                          int src_stride, int dst_stride,
1070                          const uint8* src_ptr, uint8* dst_ptr,
1071                          enum FilterMode filtering) {
1072  int j;
1073  // Initial source x/y coordinate and step values as 16.16 fixed point.
1074  int x = 0;
1075  int y = 0;
1076  int dx = 0;
1077  int dy = 0;
1078  const int max_y = (src_height - 1) << 16;
1079  void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
1080      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
1081      InterpolateRow_C;
1082  void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
1083       int dst_width, int x, int dx) =
1084       filtering ? ScaleFilterCols_C : ScaleCols_C;
1085  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
1086             &x, &y, &dx, &dy);
1087  src_width = Abs(src_width);
1088
1089#if defined(HAS_INTERPOLATEROW_SSE2)
1090  if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
1091    InterpolateRow = InterpolateRow_Any_SSE2;
1092    if (IS_ALIGNED(dst_width, 16)) {
1093      InterpolateRow = InterpolateRow_Unaligned_SSE2;
1094      if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
1095        InterpolateRow = InterpolateRow_SSE2;
1096      }
1097    }
1098  }
1099#endif
1100#if defined(HAS_INTERPOLATEROW_SSSE3)
1101  if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {
1102    InterpolateRow = InterpolateRow_Any_SSSE3;
1103    if (IS_ALIGNED(dst_width, 16)) {
1104      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
1105      if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
1106        InterpolateRow = InterpolateRow_SSSE3;
1107      }
1108    }
1109  }
1110#endif
1111#if defined(HAS_INTERPOLATEROW_AVX2)
1112  if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) {
1113    InterpolateRow = InterpolateRow_Any_AVX2;
1114    if (IS_ALIGNED(dst_width, 32)) {
1115      InterpolateRow = InterpolateRow_AVX2;
1116    }
1117  }
1118#endif
1119#if defined(HAS_INTERPOLATEROW_NEON)
1120  if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) {
1121    InterpolateRow = InterpolateRow_Any_NEON;
1122    if (IS_ALIGNED(dst_width, 16)) {
1123      InterpolateRow = InterpolateRow_NEON;
1124    }
1125  }
1126#endif
1127#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
1128  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) {
1129    InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
1130    if (IS_ALIGNED(dst_width, 4)) {
1131      InterpolateRow = InterpolateRow_MIPS_DSPR2;
1132    }
1133  }
1134#endif
1135
1136  if (filtering && src_width >= 32768) {
1137    ScaleFilterCols = ScaleFilterCols64_C;
1138  }
1139#if defined(HAS_SCALEFILTERCOLS_SSSE3)
1140  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1141    ScaleFilterCols = ScaleFilterCols_SSSE3;
1142  }
1143#endif
1144  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1145    ScaleFilterCols = ScaleColsUp2_C;
1146#if defined(HAS_SCALECOLS_SSE2)
1147    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
1148        IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
1149        IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
1150      ScaleFilterCols = ScaleColsUp2_SSE2;
1151    }
1152#endif
1153  }
1154
1155  if (y > max_y) {
1156    y = max_y;
1157  }
1158  {
1159    int yi = y >> 16;
1160    const uint8* src = src_ptr + yi * src_stride;
1161
1162    // Allocate 2 row buffers.
1163    const int kRowSize = (dst_width + 15) & ~15;
1164    align_buffer_64(row, kRowSize * 2);
1165
1166    uint8* rowptr = row;
1167    int rowstride = kRowSize;
1168    int lasty = yi;
1169
1170    ScaleFilterCols(rowptr, src, dst_width, x, dx);
1171    if (src_height > 1) {
1172      src += src_stride;
1173    }
1174    ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1175    src += src_stride;
1176
1177    for (j = 0; j < dst_height; ++j) {
1178      yi = y >> 16;
1179      if (yi != lasty) {
1180        if (y > max_y) {
1181          y = max_y;
1182          yi = y >> 16;
1183          src = src_ptr + yi * src_stride;
1184        }
1185        if (yi != lasty) {
1186          ScaleFilterCols(rowptr, src, dst_width, x, dx);
1187          rowptr += rowstride;
1188          rowstride = -rowstride;
1189          lasty = yi;
1190          src += src_stride;
1191        }
1192      }
1193      if (filtering == kFilterLinear) {
1194        InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1195      } else {
1196        int yf = (y >> 8) & 255;
1197        InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1198      }
1199      dst_ptr += dst_stride;
1200      y += dy;
1201    }
1202    free_aligned_buffer_64(row);
1203  }
1204}
1205
1206void ScalePlaneBilinearUp_16(int src_width, int src_height,
1207                             int dst_width, int dst_height,
1208                             int src_stride, int dst_stride,
1209                             const uint16* src_ptr, uint16* dst_ptr,
1210                             enum FilterMode filtering) {
1211  int j;
1212  // Initial source x/y coordinate and step values as 16.16 fixed point.
1213  int x = 0;
1214  int y = 0;
1215  int dx = 0;
1216  int dy = 0;
1217  const int max_y = (src_height - 1) << 16;
1218  void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr,
1219      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
1220      InterpolateRow_16_C;
1221  void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
1222       int dst_width, int x, int dx) =
1223       filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
1224  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
1225             &x, &y, &dx, &dy);
1226  src_width = Abs(src_width);
1227
1228#if defined(HAS_INTERPOLATEROW_16_SSE2)
1229  if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
1230    InterpolateRow = InterpolateRow_Any_16_SSE2;
1231    if (IS_ALIGNED(dst_width, 16)) {
1232      InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
1233      if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
1234        InterpolateRow = InterpolateRow_16_SSE2;
1235      }
1236    }
1237  }
1238#endif
1239#if defined(HAS_INTERPOLATEROW_16_SSSE3)
1240  if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {
1241    InterpolateRow = InterpolateRow_Any_16_SSSE3;
1242    if (IS_ALIGNED(dst_width, 16)) {
1243      InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
1244      if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
1245        InterpolateRow = InterpolateRow_16_SSSE3;
1246      }
1247    }
1248  }
1249#endif
1250#if defined(HAS_INTERPOLATEROW_16_AVX2)
1251  if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) {
1252    InterpolateRow = InterpolateRow_Any_16_AVX2;
1253    if (IS_ALIGNED(dst_width, 32)) {
1254      InterpolateRow = InterpolateRow_16_AVX2;
1255    }
1256  }
1257#endif
1258#if defined(HAS_INTERPOLATEROW_16_NEON)
1259  if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) {
1260    InterpolateRow = InterpolateRow_Any_16_NEON;
1261    if (IS_ALIGNED(dst_width, 16)) {
1262      InterpolateRow = InterpolateRow_16_NEON;
1263    }
1264  }
1265#endif
1266#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
1267  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) {
1268    InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
1269    if (IS_ALIGNED(dst_width, 4)) {
1270      InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
1271    }
1272  }
1273#endif
1274
1275  if (filtering && src_width >= 32768) {
1276    ScaleFilterCols = ScaleFilterCols64_16_C;
1277  }
1278#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1279  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1280    ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1281  }
1282#endif
1283  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1284    ScaleFilterCols = ScaleColsUp2_16_C;
1285#if defined(HAS_SCALECOLS_16_SSE2)
1286    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
1287        IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
1288        IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
1289      ScaleFilterCols = ScaleColsUp2_16_SSE2;
1290    }
1291#endif
1292  }
1293
1294  if (y > max_y) {
1295    y = max_y;
1296  }
1297  {
1298    int yi = y >> 16;
1299    const uint16* src = src_ptr + yi * src_stride;
1300
1301    // Allocate 2 row buffers.
1302    const int kRowSize = (dst_width + 15) & ~15;
1303    align_buffer_64(row, kRowSize * 4);
1304
1305    uint16* rowptr = (uint16*)row;
1306    int rowstride = kRowSize;
1307    int lasty = yi;
1308
1309    ScaleFilterCols(rowptr, src, dst_width, x, dx);
1310    if (src_height > 1) {
1311      src += src_stride;
1312    }
1313    ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1314    src += src_stride;
1315
1316    for (j = 0; j < dst_height; ++j) {
1317      yi = y >> 16;
1318      if (yi != lasty) {
1319        if (y > max_y) {
1320          y = max_y;
1321          yi = y >> 16;
1322          src = src_ptr + yi * src_stride;
1323        }
1324        if (yi != lasty) {
1325          ScaleFilterCols(rowptr, src, dst_width, x, dx);
1326          rowptr += rowstride;
1327          rowstride = -rowstride;
1328          lasty = yi;
1329          src += src_stride;
1330        }
1331      }
1332      if (filtering == kFilterLinear) {
1333        InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1334      } else {
1335        int yf = (y >> 8) & 255;
1336        InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1337      }
1338      dst_ptr += dst_stride;
1339      y += dy;
1340    }
1341    free_aligned_buffer_64(row);
1342  }
1343}
1344
1345// Scale Plane to/from any dimensions, without interpolation.
1346// Fixed point math is used for performance: The upper 16 bits
1347// of x and dx is the integer part of the source position and
1348// the lower 16 bits are the fixed decimal part.
1349
1350static void ScalePlaneSimple(int src_width, int src_height,
1351                             int dst_width, int dst_height,
1352                             int src_stride, int dst_stride,
1353                             const uint8* src_ptr, uint8* dst_ptr) {
1354  int i;
1355  void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr,
1356      int dst_width, int x, int dx) = ScaleCols_C;
1357  // Initial source x/y coordinate and step values as 16.16 fixed point.
1358  int x = 0;
1359  int y = 0;
1360  int dx = 0;
1361  int dy = 0;
1362  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
1363             &x, &y, &dx, &dy);
1364  src_width = Abs(src_width);
1365
1366  if (src_width * 2 == dst_width && x < 0x8000) {
1367    ScaleCols = ScaleColsUp2_C;
1368#if defined(HAS_SCALECOLS_SSE2)
1369    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
1370        IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
1371        IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
1372      ScaleCols = ScaleColsUp2_SSE2;
1373    }
1374#endif
1375  }
1376
1377  for (i = 0; i < dst_height; ++i) {
1378    ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
1379              dst_width, x, dx);
1380    dst_ptr += dst_stride;
1381    y += dy;
1382  }
1383}
1384
1385static void ScalePlaneSimple_16(int src_width, int src_height,
1386                                int dst_width, int dst_height,
1387                                int src_stride, int dst_stride,
1388                                const uint16* src_ptr, uint16* dst_ptr) {
1389  int i;
1390  void (*ScaleCols)(uint16* dst_ptr, const uint16* src_ptr,
1391      int dst_width, int x, int dx) = ScaleCols_16_C;
1392  // Initial source x/y coordinate and step values as 16.16 fixed point.
1393  int x = 0;
1394  int y = 0;
1395  int dx = 0;
1396  int dy = 0;
1397  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
1398             &x, &y, &dx, &dy);
1399  src_width = Abs(src_width);
1400
1401  if (src_width * 2 == dst_width && x < 0x8000) {
1402    ScaleCols = ScaleColsUp2_16_C;
1403#if defined(HAS_SCALECOLS_16_SSE2)
1404    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
1405        IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
1406        IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
1407      ScaleCols = ScaleColsUp2_16_SSE2;
1408    }
1409#endif
1410  }
1411
1412  for (i = 0; i < dst_height; ++i) {
1413    ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
1414              dst_width, x, dx);
1415    dst_ptr += dst_stride;
1416    y += dy;
1417  }
1418}
1419
1420// Scale a plane.
1421// This function dispatches to a specialized scaler based on scale factor.
1422
1423LIBYUV_API
1424void ScalePlane(const uint8* src, int src_stride,
1425                int src_width, int src_height,
1426                uint8* dst, int dst_stride,
1427                int dst_width, int dst_height,
1428                enum FilterMode filtering) {
1429  // Simplify filtering when possible.
1430  filtering = ScaleFilterReduce(src_width, src_height,
1431                                dst_width, dst_height,
1432                                filtering);
1433
1434  // Negative height means invert the image.
1435  if (src_height < 0) {
1436    src_height = -src_height;
1437    src = src + (src_height - 1) * src_stride;
1438    src_stride = -src_stride;
1439  }
1440
1441  // Use specialized scales to improve performance for common resolutions.
1442  // For example, all the 1/2 scalings will use ScalePlaneDown2()
1443  if (dst_width == src_width && dst_height == src_height) {
1444    // Straight copy.
1445    CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
1446    return;
1447  }
1448  if (dst_width == src_width) {
1449    int dy = FixedDiv(src_height, dst_height);
1450    // Arbitrary scale vertically, but unscaled vertically.
1451    ScalePlaneVertical(src_height,
1452                       dst_width, dst_height,
1453                       src_stride, dst_stride, src, dst,
1454                       0, 0, dy, 1, filtering);
1455    return;
1456  }
1457  if (dst_width <= Abs(src_width) && dst_height <= src_height) {
1458    // Scale down.
1459    if (4 * dst_width == 3 * src_width &&
1460        4 * dst_height == 3 * src_height) {
1461      // optimized, 3/4
1462      ScalePlaneDown34(src_width, src_height, dst_width, dst_height,
1463                       src_stride, dst_stride, src, dst, filtering);
1464      return;
1465    }
1466    if (2 * dst_width == src_width && 2 * dst_height == src_height) {
1467      // optimized, 1/2
1468      ScalePlaneDown2(src_width, src_height, dst_width, dst_height,
1469                      src_stride, dst_stride, src, dst, filtering);
1470      return;
1471    }
1472    // 3/8 rounded up for odd sized chroma height.
1473    if (8 * dst_width == 3 * src_width &&
1474        dst_height == ((src_height * 3 + 7) / 8)) {
1475      // optimized, 3/8
1476      ScalePlaneDown38(src_width, src_height, dst_width, dst_height,
1477                       src_stride, dst_stride, src, dst, filtering);
1478      return;
1479    }
1480    if (4 * dst_width == src_width && 4 * dst_height == src_height &&
1481               filtering != kFilterBilinear) {
1482      // optimized, 1/4
1483      ScalePlaneDown4(src_width, src_height, dst_width, dst_height,
1484                      src_stride, dst_stride, src, dst, filtering);
1485      return;
1486    }
1487  }
1488  if (filtering == kFilterBox && dst_height * 2 < src_height) {
1489    ScalePlaneBox(src_width, src_height, dst_width, dst_height,
1490                  src_stride, dst_stride, src, dst);
1491    return;
1492  }
1493  if (filtering && dst_height > src_height) {
1494    ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
1495                         src_stride, dst_stride, src, dst, filtering);
1496    return;
1497  }
1498  if (filtering) {
1499    ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
1500                           src_stride, dst_stride, src, dst, filtering);
1501    return;
1502  }
1503  ScalePlaneSimple(src_width, src_height, dst_width, dst_height,
1504                   src_stride, dst_stride, src, dst);
1505}
1506
1507LIBYUV_API
1508void ScalePlane_16(const uint16* src, int src_stride,
1509                  int src_width, int src_height,
1510                  uint16* dst, int dst_stride,
1511                  int dst_width, int dst_height,
1512                  enum FilterMode filtering) {
1513  // Simplify filtering when possible.
1514  filtering = ScaleFilterReduce(src_width, src_height,
1515                                dst_width, dst_height,
1516                                filtering);
1517
1518  // Negative height means invert the image.
1519  if (src_height < 0) {
1520    src_height = -src_height;
1521    src = src + (src_height - 1) * src_stride;
1522    src_stride = -src_stride;
1523  }
1524
1525  // Use specialized scales to improve performance for common resolutions.
1526  // For example, all the 1/2 scalings will use ScalePlaneDown2()
1527  if (dst_width == src_width && dst_height == src_height) {
1528    // Straight copy.
1529    CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
1530    return;
1531  }
1532  if (dst_width == src_width) {
1533    int dy = FixedDiv(src_height, dst_height);
1534    // Arbitrary scale vertically, but unscaled vertically.
1535    ScalePlaneVertical_16(src_height,
1536                          dst_width, dst_height,
1537                          src_stride, dst_stride, src, dst,
1538                          0, 0, dy, 1, filtering);
1539    return;
1540  }
1541  if (dst_width <= Abs(src_width) && dst_height <= src_height) {
1542    // Scale down.
1543    if (4 * dst_width == 3 * src_width &&
1544        4 * dst_height == 3 * src_height) {
1545      // optimized, 3/4
1546      ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
1547                          src_stride, dst_stride, src, dst, filtering);
1548      return;
1549    }
1550    if (2 * dst_width == src_width && 2 * dst_height == src_height) {
1551      // optimized, 1/2
1552      ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
1553                         src_stride, dst_stride, src, dst, filtering);
1554      return;
1555    }
1556    // 3/8 rounded up for odd sized chroma height.
1557    if (8 * dst_width == 3 * src_width &&
1558        dst_height == ((src_height * 3 + 7) / 8)) {
1559      // optimized, 3/8
1560      ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
1561                          src_stride, dst_stride, src, dst, filtering);
1562      return;
1563    }
1564    if (4 * dst_width == src_width && 4 * dst_height == src_height &&
1565               filtering != kFilterBilinear) {
1566      // optimized, 1/4
1567      ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
1568                         src_stride, dst_stride, src, dst, filtering);
1569      return;
1570    }
1571  }
1572  if (filtering == kFilterBox && dst_height * 2 < src_height) {
1573    ScalePlaneBox_16(src_width, src_height, dst_width, dst_height,
1574                     src_stride, dst_stride, src, dst);
1575    return;
1576  }
1577  if (filtering && dst_height > src_height) {
1578    ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
1579                            src_stride, dst_stride, src, dst, filtering);
1580    return;
1581  }
1582  if (filtering) {
1583    ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,
1584                              src_stride, dst_stride, src, dst, filtering);
1585    return;
1586  }
1587  ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height,
1588                      src_stride, dst_stride, src, dst);
1589}
1590
1591// Scale an I420 image.
1592// This function in turn calls a scaling function for each plane.
1593
1594LIBYUV_API
1595int I420Scale(const uint8* src_y, int src_stride_y,
1596              const uint8* src_u, int src_stride_u,
1597              const uint8* src_v, int src_stride_v,
1598              int src_width, int src_height,
1599              uint8* dst_y, int dst_stride_y,
1600              uint8* dst_u, int dst_stride_u,
1601              uint8* dst_v, int dst_stride_v,
1602              int dst_width, int dst_height,
1603              enum FilterMode filtering) {
1604  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1605  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1606  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1607  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1608  if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1609      !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
1610    return -1;
1611  }
1612
1613  ScalePlane(src_y, src_stride_y, src_width, src_height,
1614             dst_y, dst_stride_y, dst_width, dst_height,
1615             filtering);
1616  ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight,
1617             dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
1618             filtering);
1619  ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight,
1620             dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
1621             filtering);
1622  return 0;
1623}
1624
1625LIBYUV_API
1626int I420Scale_16(const uint16* src_y, int src_stride_y,
1627                 const uint16* src_u, int src_stride_u,
1628                 const uint16* src_v, int src_stride_v,
1629                 int src_width, int src_height,
1630                 uint16* dst_y, int dst_stride_y,
1631                 uint16* dst_u, int dst_stride_u,
1632                 uint16* dst_v, int dst_stride_v,
1633                 int dst_width, int dst_height,
1634                 enum FilterMode filtering) {
1635  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1636  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1637  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1638  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1639  if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1640      !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
1641    return -1;
1642  }
1643
1644  ScalePlane_16(src_y, src_stride_y, src_width, src_height,
1645                dst_y, dst_stride_y, dst_width, dst_height,
1646                filtering);
1647  ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight,
1648                dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
1649                filtering);
1650  ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight,
1651                dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
1652                filtering);
1653  return 0;
1654}
1655
1656// Deprecated api
1657LIBYUV_API
1658int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
1659          int src_stride_y, int src_stride_u, int src_stride_v,
1660          int src_width, int src_height,
1661          uint8* dst_y, uint8* dst_u, uint8* dst_v,
1662          int dst_stride_y, int dst_stride_u, int dst_stride_v,
1663          int dst_width, int dst_height,
1664          LIBYUV_BOOL interpolate) {
1665  return I420Scale(src_y, src_stride_y,
1666                   src_u, src_stride_u,
1667                   src_v, src_stride_v,
1668                   src_width, src_height,
1669                   dst_y, dst_stride_y,
1670                   dst_u, dst_stride_u,
1671                   dst_v, dst_stride_v,
1672                   dst_width, dst_height,
1673                   interpolate ? kFilterBox : kFilterNone);
1674}
1675
1676// Deprecated api
1677LIBYUV_API
1678int ScaleOffset(const uint8* src, int src_width, int src_height,
1679                uint8* dst, int dst_width, int dst_height, int dst_yoffset,
1680                LIBYUV_BOOL interpolate) {
1681  // Chroma requires offset to multiple of 2.
1682  int dst_yoffset_even = dst_yoffset & ~1;
1683  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1684  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1685  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1686  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1687  int aheight = dst_height - dst_yoffset_even * 2;  // actual output height
1688  const uint8* src_y = src;
1689  const uint8* src_u = src + src_width * src_height;
1690  const uint8* src_v = src + src_width * src_height +
1691                             src_halfwidth * src_halfheight;
1692  uint8* dst_y = dst + dst_yoffset_even * dst_width;
1693  uint8* dst_u = dst + dst_width * dst_height +
1694                 (dst_yoffset_even >> 1) * dst_halfwidth;
1695  uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight +
1696                 (dst_yoffset_even >> 1) * dst_halfwidth;
1697  if (!src || src_width <= 0 || src_height <= 0 ||
1698      !dst || dst_width <= 0 || dst_height <= 0 || dst_yoffset_even < 0 ||
1699      dst_yoffset_even >= dst_height) {
1700    return -1;
1701  }
1702  return I420Scale(src_y, src_width,
1703                   src_u, src_halfwidth,
1704                   src_v, src_halfwidth,
1705                   src_width, src_height,
1706                   dst_y, dst_width,
1707                   dst_u, dst_halfwidth,
1708                   dst_v, dst_halfwidth,
1709                   dst_width, aheight,
1710                   interpolate ? kFilterBox : kFilterNone);
1711}
1712
1713#ifdef __cplusplus
1714}  // extern "C"
1715}  // namespace libyuv
1716#endif
1717