1/*
2 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS. All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "libyuv/scale.h"
12
13#include <assert.h>
14#include <string.h>
15
16#include "libyuv/cpu_id.h"
17#include "libyuv/planar_functions.h"  // For CopyARGB
18#include "libyuv/row.h"
19#include "libyuv/scale_row.h"
20
21#ifdef __cplusplus
22namespace libyuv {
23extern "C" {
24#endif
25
26static __inline int Abs(int v) {
27  return v >= 0 ? v : -v;
28}
29
30// ScaleARGB ARGB, 1/2
31// This is an optimized version for scaling down a ARGB to 1/2 of
32// its original size.
33static void ScaleARGBDown2(int src_width, int src_height,
34                           int dst_width, int dst_height,
35                           int src_stride, int dst_stride,
36                           const uint8* src_argb, uint8* dst_argb,
37                           int x, int dx, int y, int dy,
38                           enum FilterMode filtering) {
39  int j;
40  int row_stride = src_stride * (dy >> 16);
41  void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
42                            uint8* dst_argb, int dst_width) =
43    filtering == kFilterNone ? ScaleARGBRowDown2_C :
44        (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
45        ScaleARGBRowDown2Box_C);
46  assert(dx == 65536 * 2);  // Test scale factor of 2.
47  assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
48  // Advance to odd row, even column.
49  if (filtering == kFilterBilinear) {
50    src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
51  } else {
52    src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
53  }
54
55#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
56  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
57      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
58      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
59    ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
60        (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
61        ScaleARGBRowDown2Box_SSE2);
62  }
63#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
64  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
65      IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
66    ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON :
67        ScaleARGBRowDown2_NEON;
68  }
69#endif
70
71  if (filtering == kFilterLinear) {
72    src_stride = 0;
73  }
74  for (j = 0; j < dst_height; ++j) {
75    ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
76    src_argb += row_stride;
77    dst_argb += dst_stride;
78  }
79}
80
81// ScaleARGB ARGB, 1/4
82// This is an optimized version for scaling down a ARGB to 1/4 of
83// its original size.
84static void ScaleARGBDown4Box(int src_width, int src_height,
85                              int dst_width, int dst_height,
86                              int src_stride, int dst_stride,
87                              const uint8* src_argb, uint8* dst_argb,
88                              int x, int dx, int y, int dy) {
89  int j;
90  // Allocate 2 rows of ARGB.
91  const int kRowSize = (dst_width * 2 * 4 + 15) & ~15;
92  align_buffer_64(row, kRowSize * 2);
93  int row_stride = src_stride * (dy >> 16);
94  void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
95    uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C;
96  // Advance to odd row, even column.
97  src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
98  assert(dx == 65536 * 4);  // Test scale factor of 4.
99  assert((dy & 0x3ffff) == 0);  // Test vertical scale is multiple of 4.
100#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
101  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
102      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
103      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
104    ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
105  }
106#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
107  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
108      IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
109    ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
110  }
111#endif
112  for (j = 0; j < dst_height; ++j) {
113    ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
114    ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
115                      row + kRowSize, dst_width * 2);
116    ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
117    src_argb += row_stride;
118    dst_argb += dst_stride;
119  }
120  free_aligned_buffer_64(row);
121}
122
123// ScaleARGB ARGB Even
124// This is an optimized version for scaling down a ARGB to even
125// multiple of its original size.
126static void ScaleARGBDownEven(int src_width, int src_height,
127                              int dst_width, int dst_height,
128                              int src_stride, int dst_stride,
129                              const uint8* src_argb, uint8* dst_argb,
130                              int x, int dx, int y, int dy,
131                              enum FilterMode filtering) {
132  int j;
133  int col_step = dx >> 16;
134  int row_stride = (dy >> 16) * src_stride;
135  void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
136                               int src_step, uint8* dst_argb, int dst_width) =
137      filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
138  assert(IS_ALIGNED(src_width, 2));
139  assert(IS_ALIGNED(src_height, 2));
140  src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
141#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
142  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
143      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
144    ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
145        ScaleARGBRowDownEven_SSE2;
146  }
147#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
148  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) &&
149      IS_ALIGNED(src_argb, 4)) {
150    ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
151        ScaleARGBRowDownEven_NEON;
152  }
153#endif
154
155  if (filtering == kFilterLinear) {
156    src_stride = 0;
157  }
158  for (j = 0; j < dst_height; ++j) {
159    ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
160    src_argb += row_stride;
161    dst_argb += dst_stride;
162  }
163}
164
165// Scale ARGB down with bilinear interpolation.
166static void ScaleARGBBilinearDown(int src_width, int src_height,
167                                  int dst_width, int dst_height,
168                                  int src_stride, int dst_stride,
169                                  const uint8* src_argb, uint8* dst_argb,
170                                  int x, int dx, int y, int dy,
171                                  enum FilterMode filtering) {
172  int j;
173  void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
174      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
175      InterpolateRow_C;
176  void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
177      int dst_width, int x, int dx) =
178      (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
179  int64 xlast = x + (int64)(dst_width - 1) * dx;
180  int64 xl = (dx >= 0) ? x : xlast;
181  int64 xr = (dx >= 0) ? xlast : x;
182  int clip_src_width;
183  xl = (xl >> 16) & ~3;  // Left edge aligned.
184  xr = (xr >> 16) + 1;  // Right most pixel used.  Bilinear uses 2 pixels.
185  xr = (xr + 1 + 3) & ~3;  // 1 beyond 4 pixel aligned right most pixel.
186  if (xr > src_width) {
187    xr = src_width;
188  }
189  clip_src_width = (int)(xr - xl) * 4;  // Width aligned to 4.
190  src_argb += xl * 4;
191  x -= (int)(xl << 16);
192#if defined(HAS_INTERPOLATEROW_SSE2)
193  if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) {
194    InterpolateRow = InterpolateRow_Any_SSE2;
195    if (IS_ALIGNED(clip_src_width, 16)) {
196      InterpolateRow = InterpolateRow_Unaligned_SSE2;
197      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
198        InterpolateRow = InterpolateRow_SSE2;
199      }
200    }
201  }
202#endif
203#if defined(HAS_INTERPOLATEROW_SSSE3)
204  if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) {
205    InterpolateRow = InterpolateRow_Any_SSSE3;
206    if (IS_ALIGNED(clip_src_width, 16)) {
207      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
208      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
209        InterpolateRow = InterpolateRow_SSSE3;
210      }
211    }
212  }
213#endif
214#if defined(HAS_INTERPOLATEROW_AVX2)
215  if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) {
216    InterpolateRow = InterpolateRow_Any_AVX2;
217    if (IS_ALIGNED(clip_src_width, 32)) {
218      InterpolateRow = InterpolateRow_AVX2;
219    }
220  }
221#endif
222#if defined(HAS_INTERPOLATEROW_NEON)
223  if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) {
224    InterpolateRow = InterpolateRow_Any_NEON;
225    if (IS_ALIGNED(clip_src_width, 16)) {
226      InterpolateRow = InterpolateRow_NEON;
227    }
228  }
229#endif
230#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
231  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 &&
232      IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
233    InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
234    if (IS_ALIGNED(clip_src_width, 4)) {
235      InterpolateRow = InterpolateRow_MIPS_DSPR2;
236    }
237  }
238#endif
239#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
240  if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
241    ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
242  }
243#endif
244  // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
245  // Allocate a row of ARGB.
246  {
247    align_buffer_64(row, clip_src_width * 4);
248
249    const int max_y = (src_height - 1) << 16;
250    if (y > max_y) {
251      y = max_y;
252    }
253    for (j = 0; j < dst_height; ++j) {
254      int yi = y >> 16;
255      const uint8* src = src_argb + yi * src_stride;
256      if (filtering == kFilterLinear) {
257        ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
258      } else {
259        int yf = (y >> 8) & 255;
260        InterpolateRow(row, src, src_stride, clip_src_width, yf);
261        ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
262      }
263      dst_argb += dst_stride;
264      y += dy;
265      if (y > max_y) {
266        y = max_y;
267      }
268    }
269    free_aligned_buffer_64(row);
270  }
271}
272
273// Scale ARGB up with bilinear interpolation.
274static void ScaleARGBBilinearUp(int src_width, int src_height,
275                                int dst_width, int dst_height,
276                                int src_stride, int dst_stride,
277                                const uint8* src_argb, uint8* dst_argb,
278                                int x, int dx, int y, int dy,
279                                enum FilterMode filtering) {
280  int j;
281  void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
282      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
283      InterpolateRow_C;
284  void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
285      int dst_width, int x, int dx) =
286      filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
287  const int max_y = (src_height - 1) << 16;
288#if defined(HAS_INTERPOLATEROW_SSE2)
289  if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
290    InterpolateRow = InterpolateRow_Any_SSE2;
291    if (IS_ALIGNED(dst_width, 4)) {
292      InterpolateRow = InterpolateRow_Unaligned_SSE2;
293      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
294        InterpolateRow = InterpolateRow_SSE2;
295      }
296    }
297  }
298#endif
299#if defined(HAS_INTERPOLATEROW_SSSE3)
300  if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
301    InterpolateRow = InterpolateRow_Any_SSSE3;
302    if (IS_ALIGNED(dst_width, 4)) {
303      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
304      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
305        InterpolateRow = InterpolateRow_SSSE3;
306      }
307    }
308  }
309#endif
310#if defined(HAS_INTERPOLATEROW_AVX2)
311  if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
312    InterpolateRow = InterpolateRow_Any_AVX2;
313    if (IS_ALIGNED(dst_width, 8)) {
314      InterpolateRow = InterpolateRow_AVX2;
315    }
316  }
317#endif
318#if defined(HAS_INTERPOLATEROW_NEON)
319  if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
320    InterpolateRow = InterpolateRow_Any_NEON;
321    if (IS_ALIGNED(dst_width, 4)) {
322      InterpolateRow = InterpolateRow_NEON;
323    }
324  }
325#endif
326#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
327  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
328      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
329    InterpolateRow = InterpolateRow_MIPS_DSPR2;
330  }
331#endif
332  if (src_width >= 32768) {
333    ScaleARGBFilterCols = filtering ?
334        ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
335  }
336#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
337  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
338    ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
339  }
340#endif
341#if defined(HAS_SCALEARGBCOLS_SSE2)
342  if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
343    ScaleARGBFilterCols = ScaleARGBCols_SSE2;
344  }
345#endif
346  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
347    ScaleARGBFilterCols = ScaleARGBColsUp2_C;
348#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
349    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
350        IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
351        IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
352      ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
353    }
354#endif
355  }
356
357  if (y > max_y) {
358    y = max_y;
359  }
360
361  {
362    int yi = y >> 16;
363    const uint8* src = src_argb + yi * src_stride;
364
365    // Allocate 2 rows of ARGB.
366    const int kRowSize = (dst_width * 4 + 15) & ~15;
367    align_buffer_64(row, kRowSize * 2);
368
369    uint8* rowptr = row;
370    int rowstride = kRowSize;
371    int lasty = yi;
372
373    ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
374    if (src_height > 1) {
375      src += src_stride;
376    }
377    ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
378    src += src_stride;
379
380    for (j = 0; j < dst_height; ++j) {
381      yi = y >> 16;
382      if (yi != lasty) {
383        if (y > max_y) {
384          y = max_y;
385          yi = y >> 16;
386          src = src_argb + yi * src_stride;
387        }
388        if (yi != lasty) {
389          ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
390          rowptr += rowstride;
391          rowstride = -rowstride;
392          lasty = yi;
393          src += src_stride;
394        }
395      }
396      if (filtering == kFilterLinear) {
397        InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
398      } else {
399        int yf = (y >> 8) & 255;
400        InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
401      }
402      dst_argb += dst_stride;
403      y += dy;
404    }
405    free_aligned_buffer_64(row);
406  }
407}
408
409#ifdef YUVSCALEUP
410// Scale YUV to ARGB up with bilinear interpolation.
411static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
412                                     int dst_width, int dst_height,
413                                     int src_stride_y,
414                                     int src_stride_u,
415                                     int src_stride_v,
416                                     int dst_stride_argb,
417                                     const uint8* src_y,
418                                     const uint8* src_u,
419                                     const uint8* src_v,
420                                     uint8* dst_argb,
421                                     int x, int dx, int y, int dy,
422                                     enum FilterMode filtering) {
423  int j;
424  void (*I422ToARGBRow)(const uint8* y_buf,
425                        const uint8* u_buf,
426                        const uint8* v_buf,
427                        uint8* rgb_buf,
428                        int width) = I422ToARGBRow_C;
429#if defined(HAS_I422TOARGBROW_SSSE3)
430  if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) {
431    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
432    if (IS_ALIGNED(src_width, 8)) {
433      I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
434      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
435        I422ToARGBRow = I422ToARGBRow_SSSE3;
436      }
437    }
438  }
439#endif
440#if defined(HAS_I422TOARGBROW_AVX2)
441  if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) {
442    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
443    if (IS_ALIGNED(src_width, 16)) {
444      I422ToARGBRow = I422ToARGBRow_AVX2;
445    }
446  }
447#endif
448#if defined(HAS_I422TOARGBROW_NEON)
449  if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) {
450    I422ToARGBRow = I422ToARGBRow_Any_NEON;
451    if (IS_ALIGNED(src_width, 8)) {
452      I422ToARGBRow = I422ToARGBRow_NEON;
453    }
454  }
455#endif
456#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
457  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) &&
458      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
459      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
460      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
461      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
462    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
463  }
464#endif
465
466  void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
467      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
468      InterpolateRow_C;
469#if defined(HAS_INTERPOLATEROW_SSE2)
470  if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
471    InterpolateRow = InterpolateRow_Any_SSE2;
472    if (IS_ALIGNED(dst_width, 4)) {
473      InterpolateRow = InterpolateRow_Unaligned_SSE2;
474      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
475        InterpolateRow = InterpolateRow_SSE2;
476      }
477    }
478  }
479#endif
480#if defined(HAS_INTERPOLATEROW_SSSE3)
481  if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
482    InterpolateRow = InterpolateRow_Any_SSSE3;
483    if (IS_ALIGNED(dst_width, 4)) {
484      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
485      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
486        InterpolateRow = InterpolateRow_SSSE3;
487      }
488    }
489  }
490#endif
491#if defined(HAS_INTERPOLATEROW_AVX2)
492  if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
493    InterpolateRow = InterpolateRow_Any_AVX2;
494    if (IS_ALIGNED(dst_width, 8)) {
495      InterpolateRow = InterpolateRow_AVX2;
496    }
497  }
498#endif
499#if defined(HAS_INTERPOLATEROW_NEON)
500  if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
501    InterpolateRow = InterpolateRow_Any_NEON;
502    if (IS_ALIGNED(dst_width, 4)) {
503      InterpolateRow = InterpolateRow_NEON;
504    }
505  }
506#endif
507#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
508  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
509      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
510    InterpolateRow = InterpolateRow_MIPS_DSPR2;
511  }
512#endif
513
514  void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
515      int dst_width, int x, int dx) =
516      filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
517  if (src_width >= 32768) {
518    ScaleARGBFilterCols = filtering ?
519        ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
520  }
521#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
522  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
523    ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
524  }
525#endif
526#if defined(HAS_SCALEARGBCOLS_SSE2)
527  if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
528    ScaleARGBFilterCols = ScaleARGBCols_SSE2;
529  }
530#endif
531  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
532    ScaleARGBFilterCols = ScaleARGBColsUp2_C;
533#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
534    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
535        IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
536        IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
537      ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
538    }
539#endif
540  }
541
542  const int max_y = (src_height - 1) << 16;
543  if (y > max_y) {
544    y = max_y;
545  }
546  const int kYShift = 1;  // Shift Y by 1 to convert Y plane to UV coordinate.
547  int yi = y >> 16;
548  int uv_yi = yi >> kYShift;
549  const uint8* src_row_y = src_y + yi * src_stride_y;
550  const uint8* src_row_u = src_u + uv_yi * src_stride_u;
551  const uint8* src_row_v = src_v + uv_yi * src_stride_v;
552
553  // Allocate 2 rows of ARGB.
554  const int kRowSize = (dst_width * 4 + 15) & ~15;
555  align_buffer_64(row, kRowSize * 2);
556
557  // Allocate 1 row of ARGB for source conversion.
558  align_buffer_64(argb_row, src_width * 4);
559
560  uint8* rowptr = row;
561  int rowstride = kRowSize;
562  int lasty = yi;
563
564  // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
565  ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
566  if (src_height > 1) {
567    src_row_y += src_stride_y;
568    if (yi & 1) {
569      src_row_u += src_stride_u;
570      src_row_v += src_stride_v;
571    }
572  }
573  ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
574  if (src_height > 2) {
575    src_row_y += src_stride_y;
576    if (!(yi & 1)) {
577      src_row_u += src_stride_u;
578      src_row_v += src_stride_v;
579    }
580  }
581
582  for (j = 0; j < dst_height; ++j) {
583    yi = y >> 16;
584    if (yi != lasty) {
585      if (y > max_y) {
586        y = max_y;
587        yi = y >> 16;
588        uv_yi = yi >> kYShift;
589        src_row_y = src_y + yi * src_stride_y;
590        src_row_u = src_u + uv_yi * src_stride_u;
591        src_row_v = src_v + uv_yi * src_stride_v;
592      }
593      if (yi != lasty) {
594        // TODO(fbarchard): Convert the clipped region of row.
595        I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
596        ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
597        rowptr += rowstride;
598        rowstride = -rowstride;
599        lasty = yi;
600        src_row_y += src_stride_y;
601        if (yi & 1) {
602          src_row_u += src_stride_u;
603          src_row_v += src_stride_v;
604        }
605      }
606    }
607    if (filtering == kFilterLinear) {
608      InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
609    } else {
610      int yf = (y >> 8) & 255;
611      InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
612    }
613    dst_argb += dst_stride_argb;
614    y += dy;
615  }
616  free_aligned_buffer_64(row);
617  free_aligned_buffer_64(row_argb);
618}
619#endif
620
621// Scale ARGB to/from any dimensions, without interpolation.
622// Fixed point math is used for performance: The upper 16 bits
623// of x and dx is the integer part of the source position and
624// the lower 16 bits are the fixed decimal part.
625
626static void ScaleARGBSimple(int src_width, int src_height,
627                            int dst_width, int dst_height,
628                            int src_stride, int dst_stride,
629                            const uint8* src_argb, uint8* dst_argb,
630                            int x, int dx, int y, int dy) {
631  int j;
632  void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
633      int dst_width, int x, int dx) =
634      (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
635#if defined(HAS_SCALEARGBCOLS_SSE2)
636  if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
637    ScaleARGBCols = ScaleARGBCols_SSE2;
638  }
639#endif
640  if (src_width * 2 == dst_width && x < 0x8000) {
641    ScaleARGBCols = ScaleARGBColsUp2_C;
642#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
643    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
644        IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
645        IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
646      ScaleARGBCols = ScaleARGBColsUp2_SSE2;
647    }
648#endif
649  }
650
651  for (j = 0; j < dst_height; ++j) {
652    ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
653                  dst_width, x, dx);
654    dst_argb += dst_stride;
655    y += dy;
656  }
657}
658
659// ScaleARGB a ARGB.
660// This function in turn calls a scaling function
661// suitable for handling the desired resolutions.
662static void ScaleARGB(const uint8* src, int src_stride,
663                      int src_width, int src_height,
664                      uint8* dst, int dst_stride,
665                      int dst_width, int dst_height,
666                      int clip_x, int clip_y, int clip_width, int clip_height,
667                      enum FilterMode filtering) {
668  // Initial source x/y coordinate and step values as 16.16 fixed point.
669  int x = 0;
670  int y = 0;
671  int dx = 0;
672  int dy = 0;
673  // ARGB does not support box filter yet, but allow the user to pass it.
674  // Simplify filtering when possible.
675  filtering = ScaleFilterReduce(src_width, src_height,
676                                dst_width, dst_height,
677                                filtering);
678
679  // Negative src_height means invert the image.
680  if (src_height < 0) {
681    src_height = -src_height;
682    src = src + (src_height - 1) * src_stride;
683    src_stride = -src_stride;
684  }
685  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
686             &x, &y, &dx, &dy);
687  src_width = Abs(src_width);
688  if (clip_x) {
689    int64 clipf = (int64)(clip_x) * dx;
690    x += (clipf & 0xffff);
691    src += (clipf >> 16) * 4;
692    dst += clip_x * 4;
693  }
694  if (clip_y) {
695    int64 clipf = (int64)(clip_y) * dy;
696    y += (clipf & 0xffff);
697    src += (clipf >> 16) * src_stride;
698    dst += clip_y * dst_stride;
699  }
700
701  // Special case for integer step values.
702  if (((dx | dy) & 0xffff) == 0) {
703    if (!dx || !dy) {  // 1 pixel wide and/or tall.
704      filtering = kFilterNone;
705    } else {
706      // Optimized even scale down. ie 2, 4, 6, 8, 10x.
707      if (!(dx & 0x10000) && !(dy & 0x10000)) {
708        if (dx == 0x20000) {
709          // Optimized 1/2 downsample.
710          ScaleARGBDown2(src_width, src_height,
711                         clip_width, clip_height,
712                         src_stride, dst_stride, src, dst,
713                         x, dx, y, dy, filtering);
714          return;
715        }
716        if (dx == 0x40000 && filtering == kFilterBox) {
717          // Optimized 1/4 box downsample.
718          ScaleARGBDown4Box(src_width, src_height,
719                            clip_width, clip_height,
720                            src_stride, dst_stride, src, dst,
721                            x, dx, y, dy);
722          return;
723        }
724        ScaleARGBDownEven(src_width, src_height,
725                          clip_width, clip_height,
726                          src_stride, dst_stride, src, dst,
727                          x, dx, y, dy, filtering);
728        return;
729      }
730      // Optimized odd scale down. ie 3, 5, 7, 9x.
731      if ((dx & 0x10000) && (dy & 0x10000)) {
732        filtering = kFilterNone;
733        if (dx == 0x10000 && dy == 0x10000) {
734          // Straight copy.
735          ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
736                   dst, dst_stride, clip_width, clip_height);
737          return;
738        }
739      }
740    }
741  }
742  if (dx == 0x10000 && (x & 0xffff) == 0) {
743    // Arbitrary scale vertically, but unscaled vertically.
744    ScalePlaneVertical(src_height,
745                       clip_width, clip_height,
746                       src_stride, dst_stride, src, dst,
747                       x, y, dy, 4, filtering);
748    return;
749  }
750  if (filtering && dy < 65536) {
751    ScaleARGBBilinearUp(src_width, src_height,
752                        clip_width, clip_height,
753                        src_stride, dst_stride, src, dst,
754                        x, dx, y, dy, filtering);
755    return;
756  }
757  if (filtering) {
758    ScaleARGBBilinearDown(src_width, src_height,
759                          clip_width, clip_height,
760                          src_stride, dst_stride, src, dst,
761                          x, dx, y, dy, filtering);
762    return;
763  }
764  ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
765                  src_stride, dst_stride, src, dst,
766                  x, dx, y, dy);
767}
768
769LIBYUV_API
770int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
771                  int src_width, int src_height,
772                  uint8* dst_argb, int dst_stride_argb,
773                  int dst_width, int dst_height,
774                  int clip_x, int clip_y, int clip_width, int clip_height,
775                  enum FilterMode filtering) {
776  if (!src_argb || src_width == 0 || src_height == 0 ||
777      !dst_argb || dst_width <= 0 || dst_height <= 0 ||
778      clip_x < 0 || clip_y < 0 ||
779      (clip_x + clip_width) > dst_width ||
780      (clip_y + clip_height) > dst_height) {
781    return -1;
782  }
783  ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
784            dst_argb, dst_stride_argb, dst_width, dst_height,
785            clip_x, clip_y, clip_width, clip_height, filtering);
786  return 0;
787}
788
789// Scale an ARGB image.
790LIBYUV_API
791int ARGBScale(const uint8* src_argb, int src_stride_argb,
792              int src_width, int src_height,
793              uint8* dst_argb, int dst_stride_argb,
794              int dst_width, int dst_height,
795              enum FilterMode filtering) {
796  if (!src_argb || src_width == 0 || src_height == 0 ||
797      !dst_argb || dst_width <= 0 || dst_height <= 0) {
798    return -1;
799  }
800  ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
801            dst_argb, dst_stride_argb, dst_width, dst_height,
802            0, 0, dst_width, dst_height, filtering);
803  return 0;
804}
805
806#ifdef __cplusplus
807}  // extern "C"
808}  // namespace libyuv
809#endif
810