1/*
2 *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS. All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "libyuv/scale.h"
12
13#include <assert.h>
14#include <string.h>
15
16#include "libyuv/cpu_id.h"
17#include "libyuv/planar_functions.h"  // For CopyARGB
18#include "libyuv/row.h"
19#include "libyuv/scale_row.h"
20
21#ifdef __cplusplus
22namespace libyuv {
23extern "C" {
24#endif
25
26static __inline int Abs(int v) {
27  return v >= 0 ? v : -v;
28}
29
30// CPU agnostic row functions
31void ScaleRowDown2_C(const uint8* src_ptr,
32                     ptrdiff_t src_stride,
33                     uint8* dst,
34                     int dst_width) {
35  int x;
36  (void)src_stride;
37  for (x = 0; x < dst_width - 1; x += 2) {
38    dst[0] = src_ptr[1];
39    dst[1] = src_ptr[3];
40    dst += 2;
41    src_ptr += 4;
42  }
43  if (dst_width & 1) {
44    dst[0] = src_ptr[1];
45  }
46}
47
48void ScaleRowDown2_16_C(const uint16* src_ptr,
49                        ptrdiff_t src_stride,
50                        uint16* dst,
51                        int dst_width) {
52  int x;
53  (void)src_stride;
54  for (x = 0; x < dst_width - 1; x += 2) {
55    dst[0] = src_ptr[1];
56    dst[1] = src_ptr[3];
57    dst += 2;
58    src_ptr += 4;
59  }
60  if (dst_width & 1) {
61    dst[0] = src_ptr[1];
62  }
63}
64
65void ScaleRowDown2Linear_C(const uint8* src_ptr,
66                           ptrdiff_t src_stride,
67                           uint8* dst,
68                           int dst_width) {
69  const uint8* s = src_ptr;
70  int x;
71  (void)src_stride;
72  for (x = 0; x < dst_width - 1; x += 2) {
73    dst[0] = (s[0] + s[1] + 1) >> 1;
74    dst[1] = (s[2] + s[3] + 1) >> 1;
75    dst += 2;
76    s += 4;
77  }
78  if (dst_width & 1) {
79    dst[0] = (s[0] + s[1] + 1) >> 1;
80  }
81}
82
83void ScaleRowDown2Linear_16_C(const uint16* src_ptr,
84                              ptrdiff_t src_stride,
85                              uint16* dst,
86                              int dst_width) {
87  const uint16* s = src_ptr;
88  int x;
89  (void)src_stride;
90  for (x = 0; x < dst_width - 1; x += 2) {
91    dst[0] = (s[0] + s[1] + 1) >> 1;
92    dst[1] = (s[2] + s[3] + 1) >> 1;
93    dst += 2;
94    s += 4;
95  }
96  if (dst_width & 1) {
97    dst[0] = (s[0] + s[1] + 1) >> 1;
98  }
99}
100
101void ScaleRowDown2Box_C(const uint8* src_ptr,
102                        ptrdiff_t src_stride,
103                        uint8* dst,
104                        int dst_width) {
105  const uint8* s = src_ptr;
106  const uint8* t = src_ptr + src_stride;
107  int x;
108  for (x = 0; x < dst_width - 1; x += 2) {
109    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
110    dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
111    dst += 2;
112    s += 4;
113    t += 4;
114  }
115  if (dst_width & 1) {
116    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
117  }
118}
119
120void ScaleRowDown2Box_Odd_C(const uint8* src_ptr,
121                            ptrdiff_t src_stride,
122                            uint8* dst,
123                            int dst_width) {
124  const uint8* s = src_ptr;
125  const uint8* t = src_ptr + src_stride;
126  int x;
127  dst_width -= 1;
128  for (x = 0; x < dst_width - 1; x += 2) {
129    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
130    dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
131    dst += 2;
132    s += 4;
133    t += 4;
134  }
135  if (dst_width & 1) {
136    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
137    dst += 1;
138    s += 2;
139    t += 2;
140  }
141  dst[0] = (s[0] + t[0] + 1) >> 1;
142}
143
144void ScaleRowDown2Box_16_C(const uint16* src_ptr,
145                           ptrdiff_t src_stride,
146                           uint16* dst,
147                           int dst_width) {
148  const uint16* s = src_ptr;
149  const uint16* t = src_ptr + src_stride;
150  int x;
151  for (x = 0; x < dst_width - 1; x += 2) {
152    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
153    dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
154    dst += 2;
155    s += 4;
156    t += 4;
157  }
158  if (dst_width & 1) {
159    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
160  }
161}
162
163void ScaleRowDown4_C(const uint8* src_ptr,
164                     ptrdiff_t src_stride,
165                     uint8* dst,
166                     int dst_width) {
167  int x;
168  (void)src_stride;
169  for (x = 0; x < dst_width - 1; x += 2) {
170    dst[0] = src_ptr[2];
171    dst[1] = src_ptr[6];
172    dst += 2;
173    src_ptr += 8;
174  }
175  if (dst_width & 1) {
176    dst[0] = src_ptr[2];
177  }
178}
179
180void ScaleRowDown4_16_C(const uint16* src_ptr,
181                        ptrdiff_t src_stride,
182                        uint16* dst,
183                        int dst_width) {
184  int x;
185  (void)src_stride;
186  for (x = 0; x < dst_width - 1; x += 2) {
187    dst[0] = src_ptr[2];
188    dst[1] = src_ptr[6];
189    dst += 2;
190    src_ptr += 8;
191  }
192  if (dst_width & 1) {
193    dst[0] = src_ptr[2];
194  }
195}
196
197void ScaleRowDown4Box_C(const uint8* src_ptr,
198                        ptrdiff_t src_stride,
199                        uint8* dst,
200                        int dst_width) {
201  intptr_t stride = src_stride;
202  int x;
203  for (x = 0; x < dst_width - 1; x += 2) {
204    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
205              src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
206              src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
207              src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
208              src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
209              src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
210              src_ptr[stride * 3 + 3] + 8) >>
211             4;
212    dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
213              src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
214              src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
215              src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
216              src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
217              src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
218              src_ptr[stride * 3 + 7] + 8) >>
219             4;
220    dst += 2;
221    src_ptr += 8;
222  }
223  if (dst_width & 1) {
224    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
225              src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
226              src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
227              src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
228              src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
229              src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
230              src_ptr[stride * 3 + 3] + 8) >>
231             4;
232  }
233}
234
235void ScaleRowDown4Box_16_C(const uint16* src_ptr,
236                           ptrdiff_t src_stride,
237                           uint16* dst,
238                           int dst_width) {
239  intptr_t stride = src_stride;
240  int x;
241  for (x = 0; x < dst_width - 1; x += 2) {
242    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
243              src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
244              src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
245              src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
246              src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
247              src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
248              src_ptr[stride * 3 + 3] + 8) >>
249             4;
250    dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
251              src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
252              src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
253              src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
254              src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
255              src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
256              src_ptr[stride * 3 + 7] + 8) >>
257             4;
258    dst += 2;
259    src_ptr += 8;
260  }
261  if (dst_width & 1) {
262    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
263              src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
264              src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
265              src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
266              src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
267              src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
268              src_ptr[stride * 3 + 3] + 8) >>
269             4;
270  }
271}
272
273void ScaleRowDown34_C(const uint8* src_ptr,
274                      ptrdiff_t src_stride,
275                      uint8* dst,
276                      int dst_width) {
277  int x;
278  (void)src_stride;
279  assert((dst_width % 3 == 0) && (dst_width > 0));
280  for (x = 0; x < dst_width; x += 3) {
281    dst[0] = src_ptr[0];
282    dst[1] = src_ptr[1];
283    dst[2] = src_ptr[3];
284    dst += 3;
285    src_ptr += 4;
286  }
287}
288
289void ScaleRowDown34_16_C(const uint16* src_ptr,
290                         ptrdiff_t src_stride,
291                         uint16* dst,
292                         int dst_width) {
293  int x;
294  (void)src_stride;
295  assert((dst_width % 3 == 0) && (dst_width > 0));
296  for (x = 0; x < dst_width; x += 3) {
297    dst[0] = src_ptr[0];
298    dst[1] = src_ptr[1];
299    dst[2] = src_ptr[3];
300    dst += 3;
301    src_ptr += 4;
302  }
303}
304
305// Filter rows 0 and 1 together, 3 : 1
306void ScaleRowDown34_0_Box_C(const uint8* src_ptr,
307                            ptrdiff_t src_stride,
308                            uint8* d,
309                            int dst_width) {
310  const uint8* s = src_ptr;
311  const uint8* t = src_ptr + src_stride;
312  int x;
313  assert((dst_width % 3 == 0) && (dst_width > 0));
314  for (x = 0; x < dst_width; x += 3) {
315    uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
316    uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
317    uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
318    uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
319    uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
320    uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
321    d[0] = (a0 * 3 + b0 + 2) >> 2;
322    d[1] = (a1 * 3 + b1 + 2) >> 2;
323    d[2] = (a2 * 3 + b2 + 2) >> 2;
324    d += 3;
325    s += 4;
326    t += 4;
327  }
328}
329
330void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr,
331                               ptrdiff_t src_stride,
332                               uint16* d,
333                               int dst_width) {
334  const uint16* s = src_ptr;
335  const uint16* t = src_ptr + src_stride;
336  int x;
337  assert((dst_width % 3 == 0) && (dst_width > 0));
338  for (x = 0; x < dst_width; x += 3) {
339    uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
340    uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
341    uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
342    uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
343    uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
344    uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
345    d[0] = (a0 * 3 + b0 + 2) >> 2;
346    d[1] = (a1 * 3 + b1 + 2) >> 2;
347    d[2] = (a2 * 3 + b2 + 2) >> 2;
348    d += 3;
349    s += 4;
350    t += 4;
351  }
352}
353
354// Filter rows 1 and 2 together, 1 : 1
355void ScaleRowDown34_1_Box_C(const uint8* src_ptr,
356                            ptrdiff_t src_stride,
357                            uint8* d,
358                            int dst_width) {
359  const uint8* s = src_ptr;
360  const uint8* t = src_ptr + src_stride;
361  int x;
362  assert((dst_width % 3 == 0) && (dst_width > 0));
363  for (x = 0; x < dst_width; x += 3) {
364    uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
365    uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
366    uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
367    uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
368    uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
369    uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
370    d[0] = (a0 + b0 + 1) >> 1;
371    d[1] = (a1 + b1 + 1) >> 1;
372    d[2] = (a2 + b2 + 1) >> 1;
373    d += 3;
374    s += 4;
375    t += 4;
376  }
377}
378
379void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr,
380                               ptrdiff_t src_stride,
381                               uint16* d,
382                               int dst_width) {
383  const uint16* s = src_ptr;
384  const uint16* t = src_ptr + src_stride;
385  int x;
386  assert((dst_width % 3 == 0) && (dst_width > 0));
387  for (x = 0; x < dst_width; x += 3) {
388    uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
389    uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
390    uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
391    uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
392    uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
393    uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
394    d[0] = (a0 + b0 + 1) >> 1;
395    d[1] = (a1 + b1 + 1) >> 1;
396    d[2] = (a2 + b2 + 1) >> 1;
397    d += 3;
398    s += 4;
399    t += 4;
400  }
401}
402
403// Scales a single row of pixels using point sampling.
404void ScaleCols_C(uint8* dst_ptr,
405                 const uint8* src_ptr,
406                 int dst_width,
407                 int x,
408                 int dx) {
409  int j;
410  for (j = 0; j < dst_width - 1; j += 2) {
411    dst_ptr[0] = src_ptr[x >> 16];
412    x += dx;
413    dst_ptr[1] = src_ptr[x >> 16];
414    x += dx;
415    dst_ptr += 2;
416  }
417  if (dst_width & 1) {
418    dst_ptr[0] = src_ptr[x >> 16];
419  }
420}
421
422void ScaleCols_16_C(uint16* dst_ptr,
423                    const uint16* src_ptr,
424                    int dst_width,
425                    int x,
426                    int dx) {
427  int j;
428  for (j = 0; j < dst_width - 1; j += 2) {
429    dst_ptr[0] = src_ptr[x >> 16];
430    x += dx;
431    dst_ptr[1] = src_ptr[x >> 16];
432    x += dx;
433    dst_ptr += 2;
434  }
435  if (dst_width & 1) {
436    dst_ptr[0] = src_ptr[x >> 16];
437  }
438}
439
440// Scales a single row of pixels up by 2x using point sampling.
441void ScaleColsUp2_C(uint8* dst_ptr,
442                    const uint8* src_ptr,
443                    int dst_width,
444                    int x,
445                    int dx) {
446  int j;
447  (void)x;
448  (void)dx;
449  for (j = 0; j < dst_width - 1; j += 2) {
450    dst_ptr[1] = dst_ptr[0] = src_ptr[0];
451    src_ptr += 1;
452    dst_ptr += 2;
453  }
454  if (dst_width & 1) {
455    dst_ptr[0] = src_ptr[0];
456  }
457}
458
459void ScaleColsUp2_16_C(uint16* dst_ptr,
460                       const uint16* src_ptr,
461                       int dst_width,
462                       int x,
463                       int dx) {
464  int j;
465  (void)x;
466  (void)dx;
467  for (j = 0; j < dst_width - 1; j += 2) {
468    dst_ptr[1] = dst_ptr[0] = src_ptr[0];
469    src_ptr += 1;
470    dst_ptr += 2;
471  }
472  if (dst_width & 1) {
473    dst_ptr[0] = src_ptr[0];
474  }
475}
476
477// (1-f)a + fb can be replaced with a + f(b-a)
478#if defined(__arm__) || defined(__aarch64__)
479#define BLENDER(a, b, f) \
480  (uint8)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
481#else
482// Intel uses 7 bit math with rounding.
483#define BLENDER(a, b, f) \
484  (uint8)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
485#endif
486
487void ScaleFilterCols_C(uint8* dst_ptr,
488                       const uint8* src_ptr,
489                       int dst_width,
490                       int x,
491                       int dx) {
492  int j;
493  for (j = 0; j < dst_width - 1; j += 2) {
494    int xi = x >> 16;
495    int a = src_ptr[xi];
496    int b = src_ptr[xi + 1];
497    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
498    x += dx;
499    xi = x >> 16;
500    a = src_ptr[xi];
501    b = src_ptr[xi + 1];
502    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
503    x += dx;
504    dst_ptr += 2;
505  }
506  if (dst_width & 1) {
507    int xi = x >> 16;
508    int a = src_ptr[xi];
509    int b = src_ptr[xi + 1];
510    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
511  }
512}
513
514void ScaleFilterCols64_C(uint8* dst_ptr,
515                         const uint8* src_ptr,
516                         int dst_width,
517                         int x32,
518                         int dx) {
519  int64 x = (int64)(x32);
520  int j;
521  for (j = 0; j < dst_width - 1; j += 2) {
522    int64 xi = x >> 16;
523    int a = src_ptr[xi];
524    int b = src_ptr[xi + 1];
525    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
526    x += dx;
527    xi = x >> 16;
528    a = src_ptr[xi];
529    b = src_ptr[xi + 1];
530    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
531    x += dx;
532    dst_ptr += 2;
533  }
534  if (dst_width & 1) {
535    int64 xi = x >> 16;
536    int a = src_ptr[xi];
537    int b = src_ptr[xi + 1];
538    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
539  }
540}
541#undef BLENDER
542
543// Same as 8 bit arm blender but return is cast to uint16
544#define BLENDER(a, b, f) \
545  (uint16)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
546
547void ScaleFilterCols_16_C(uint16* dst_ptr,
548                          const uint16* src_ptr,
549                          int dst_width,
550                          int x,
551                          int dx) {
552  int j;
553  for (j = 0; j < dst_width - 1; j += 2) {
554    int xi = x >> 16;
555    int a = src_ptr[xi];
556    int b = src_ptr[xi + 1];
557    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
558    x += dx;
559    xi = x >> 16;
560    a = src_ptr[xi];
561    b = src_ptr[xi + 1];
562    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
563    x += dx;
564    dst_ptr += 2;
565  }
566  if (dst_width & 1) {
567    int xi = x >> 16;
568    int a = src_ptr[xi];
569    int b = src_ptr[xi + 1];
570    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
571  }
572}
573
574void ScaleFilterCols64_16_C(uint16* dst_ptr,
575                            const uint16* src_ptr,
576                            int dst_width,
577                            int x32,
578                            int dx) {
579  int64 x = (int64)(x32);
580  int j;
581  for (j = 0; j < dst_width - 1; j += 2) {
582    int64 xi = x >> 16;
583    int a = src_ptr[xi];
584    int b = src_ptr[xi + 1];
585    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
586    x += dx;
587    xi = x >> 16;
588    a = src_ptr[xi];
589    b = src_ptr[xi + 1];
590    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
591    x += dx;
592    dst_ptr += 2;
593  }
594  if (dst_width & 1) {
595    int64 xi = x >> 16;
596    int a = src_ptr[xi];
597    int b = src_ptr[xi + 1];
598    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
599  }
600}
601#undef BLENDER
602
603void ScaleRowDown38_C(const uint8* src_ptr,
604                      ptrdiff_t src_stride,
605                      uint8* dst,
606                      int dst_width) {
607  int x;
608  (void)src_stride;
609  assert(dst_width % 3 == 0);
610  for (x = 0; x < dst_width; x += 3) {
611    dst[0] = src_ptr[0];
612    dst[1] = src_ptr[3];
613    dst[2] = src_ptr[6];
614    dst += 3;
615    src_ptr += 8;
616  }
617}
618
619void ScaleRowDown38_16_C(const uint16* src_ptr,
620                         ptrdiff_t src_stride,
621                         uint16* dst,
622                         int dst_width) {
623  int x;
624  (void)src_stride;
625  assert(dst_width % 3 == 0);
626  for (x = 0; x < dst_width; x += 3) {
627    dst[0] = src_ptr[0];
628    dst[1] = src_ptr[3];
629    dst[2] = src_ptr[6];
630    dst += 3;
631    src_ptr += 8;
632  }
633}
634
635// 8x3 -> 3x1
636void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
637                            ptrdiff_t src_stride,
638                            uint8* dst_ptr,
639                            int dst_width) {
640  intptr_t stride = src_stride;
641  int i;
642  assert((dst_width % 3 == 0) && (dst_width > 0));
643  for (i = 0; i < dst_width; i += 3) {
644    dst_ptr[0] =
645        (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
646         src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
647         src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
648            (65536 / 9) >>
649        16;
650    dst_ptr[1] =
651        (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
652         src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
653         src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
654            (65536 / 9) >>
655        16;
656    dst_ptr[2] =
657        (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
658         src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
659            (65536 / 6) >>
660        16;
661    src_ptr += 8;
662    dst_ptr += 3;
663  }
664}
665
666void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
667                               ptrdiff_t src_stride,
668                               uint16* dst_ptr,
669                               int dst_width) {
670  intptr_t stride = src_stride;
671  int i;
672  assert((dst_width % 3 == 0) && (dst_width > 0));
673  for (i = 0; i < dst_width; i += 3) {
674    dst_ptr[0] =
675        (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
676         src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
677         src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
678            (65536 / 9) >>
679        16;
680    dst_ptr[1] =
681        (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
682         src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
683         src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
684            (65536 / 9) >>
685        16;
686    dst_ptr[2] =
687        (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
688         src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
689            (65536 / 6) >>
690        16;
691    src_ptr += 8;
692    dst_ptr += 3;
693  }
694}
695
696// 8x2 -> 3x1
697void ScaleRowDown38_2_Box_C(const uint8* src_ptr,
698                            ptrdiff_t src_stride,
699                            uint8* dst_ptr,
700                            int dst_width) {
701  intptr_t stride = src_stride;
702  int i;
703  assert((dst_width % 3 == 0) && (dst_width > 0));
704  for (i = 0; i < dst_width; i += 3) {
705    dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
706                  src_ptr[stride + 1] + src_ptr[stride + 2]) *
707                     (65536 / 6) >>
708                 16;
709    dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
710                  src_ptr[stride + 4] + src_ptr[stride + 5]) *
711                     (65536 / 6) >>
712                 16;
713    dst_ptr[2] =
714        (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
715            (65536 / 4) >>
716        16;
717    src_ptr += 8;
718    dst_ptr += 3;
719  }
720}
721
722void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr,
723                               ptrdiff_t src_stride,
724                               uint16* dst_ptr,
725                               int dst_width) {
726  intptr_t stride = src_stride;
727  int i;
728  assert((dst_width % 3 == 0) && (dst_width > 0));
729  for (i = 0; i < dst_width; i += 3) {
730    dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
731                  src_ptr[stride + 1] + src_ptr[stride + 2]) *
732                     (65536 / 6) >>
733                 16;
734    dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
735                  src_ptr[stride + 4] + src_ptr[stride + 5]) *
736                     (65536 / 6) >>
737                 16;
738    dst_ptr[2] =
739        (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
740            (65536 / 4) >>
741        16;
742    src_ptr += 8;
743    dst_ptr += 3;
744  }
745}
746
747void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
748  int x;
749  assert(src_width > 0);
750  for (x = 0; x < src_width - 1; x += 2) {
751    dst_ptr[0] += src_ptr[0];
752    dst_ptr[1] += src_ptr[1];
753    src_ptr += 2;
754    dst_ptr += 2;
755  }
756  if (src_width & 1) {
757    dst_ptr[0] += src_ptr[0];
758  }
759}
760
761void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) {
762  int x;
763  assert(src_width > 0);
764  for (x = 0; x < src_width - 1; x += 2) {
765    dst_ptr[0] += src_ptr[0];
766    dst_ptr[1] += src_ptr[1];
767    src_ptr += 2;
768    dst_ptr += 2;
769  }
770  if (src_width & 1) {
771    dst_ptr[0] += src_ptr[0];
772  }
773}
774
775void ScaleARGBRowDown2_C(const uint8* src_argb,
776                         ptrdiff_t src_stride,
777                         uint8* dst_argb,
778                         int dst_width) {
779  const uint32* src = (const uint32*)(src_argb);
780  uint32* dst = (uint32*)(dst_argb);
781  int x;
782  (void)src_stride;
783  for (x = 0; x < dst_width - 1; x += 2) {
784    dst[0] = src[1];
785    dst[1] = src[3];
786    src += 4;
787    dst += 2;
788  }
789  if (dst_width & 1) {
790    dst[0] = src[1];
791  }
792}
793
794void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
795                               ptrdiff_t src_stride,
796                               uint8* dst_argb,
797                               int dst_width) {
798  int x;
799  (void)src_stride;
800  for (x = 0; x < dst_width; ++x) {
801    dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
802    dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
803    dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
804    dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
805    src_argb += 8;
806    dst_argb += 4;
807  }
808}
809
810void ScaleARGBRowDown2Box_C(const uint8* src_argb,
811                            ptrdiff_t src_stride,
812                            uint8* dst_argb,
813                            int dst_width) {
814  int x;
815  for (x = 0; x < dst_width; ++x) {
816    dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
817                   src_argb[src_stride + 4] + 2) >>
818                  2;
819    dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
820                   src_argb[src_stride + 5] + 2) >>
821                  2;
822    dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
823                   src_argb[src_stride + 6] + 2) >>
824                  2;
825    dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
826                   src_argb[src_stride + 7] + 2) >>
827                  2;
828    src_argb += 8;
829    dst_argb += 4;
830  }
831}
832
833void ScaleARGBRowDownEven_C(const uint8* src_argb,
834                            ptrdiff_t src_stride,
835                            int src_stepx,
836                            uint8* dst_argb,
837                            int dst_width) {
838  const uint32* src = (const uint32*)(src_argb);
839  uint32* dst = (uint32*)(dst_argb);
840  (void)src_stride;
841  int x;
842  for (x = 0; x < dst_width - 1; x += 2) {
843    dst[0] = src[0];
844    dst[1] = src[src_stepx];
845    src += src_stepx * 2;
846    dst += 2;
847  }
848  if (dst_width & 1) {
849    dst[0] = src[0];
850  }
851}
852
853void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
854                               ptrdiff_t src_stride,
855                               int src_stepx,
856                               uint8* dst_argb,
857                               int dst_width) {
858  int x;
859  for (x = 0; x < dst_width; ++x) {
860    dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
861                   src_argb[src_stride + 4] + 2) >>
862                  2;
863    dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
864                   src_argb[src_stride + 5] + 2) >>
865                  2;
866    dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
867                   src_argb[src_stride + 6] + 2) >>
868                  2;
869    dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
870                   src_argb[src_stride + 7] + 2) >>
871                  2;
872    src_argb += src_stepx * 4;
873    dst_argb += 4;
874  }
875}
876
877// Scales a single row of pixels using point sampling.
878void ScaleARGBCols_C(uint8* dst_argb,
879                     const uint8* src_argb,
880                     int dst_width,
881                     int x,
882                     int dx) {
883  const uint32* src = (const uint32*)(src_argb);
884  uint32* dst = (uint32*)(dst_argb);
885  int j;
886  for (j = 0; j < dst_width - 1; j += 2) {
887    dst[0] = src[x >> 16];
888    x += dx;
889    dst[1] = src[x >> 16];
890    x += dx;
891    dst += 2;
892  }
893  if (dst_width & 1) {
894    dst[0] = src[x >> 16];
895  }
896}
897
898void ScaleARGBCols64_C(uint8* dst_argb,
899                       const uint8* src_argb,
900                       int dst_width,
901                       int x32,
902                       int dx) {
903  int64 x = (int64)(x32);
904  const uint32* src = (const uint32*)(src_argb);
905  uint32* dst = (uint32*)(dst_argb);
906  int j;
907  for (j = 0; j < dst_width - 1; j += 2) {
908    dst[0] = src[x >> 16];
909    x += dx;
910    dst[1] = src[x >> 16];
911    x += dx;
912    dst += 2;
913  }
914  if (dst_width & 1) {
915    dst[0] = src[x >> 16];
916  }
917}
918
919// Scales a single row of pixels up by 2x using point sampling.
920void ScaleARGBColsUp2_C(uint8* dst_argb,
921                        const uint8* src_argb,
922                        int dst_width,
923                        int x,
924                        int dx) {
925  const uint32* src = (const uint32*)(src_argb);
926  uint32* dst = (uint32*)(dst_argb);
927  int j;
928  (void)x;
929  (void)dx;
930  for (j = 0; j < dst_width - 1; j += 2) {
931    dst[1] = dst[0] = src[0];
932    src += 1;
933    dst += 2;
934  }
935  if (dst_width & 1) {
936    dst[0] = src[0];
937  }
938}
939
940// TODO(fbarchard): Replace 0x7f ^ f with 128-f.  bug=607.
941// Mimics SSSE3 blender
942#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
943#define BLENDERC(a, b, f, s) \
944  (uint32)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
945#define BLENDER(a, b, f)                                                 \
946  BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \
947      BLENDERC(a, b, f, 0)
948
949void ScaleARGBFilterCols_C(uint8* dst_argb,
950                           const uint8* src_argb,
951                           int dst_width,
952                           int x,
953                           int dx) {
954  const uint32* src = (const uint32*)(src_argb);
955  uint32* dst = (uint32*)(dst_argb);
956  int j;
957  for (j = 0; j < dst_width - 1; j += 2) {
958    int xi = x >> 16;
959    int xf = (x >> 9) & 0x7f;
960    uint32 a = src[xi];
961    uint32 b = src[xi + 1];
962    dst[0] = BLENDER(a, b, xf);
963    x += dx;
964    xi = x >> 16;
965    xf = (x >> 9) & 0x7f;
966    a = src[xi];
967    b = src[xi + 1];
968    dst[1] = BLENDER(a, b, xf);
969    x += dx;
970    dst += 2;
971  }
972  if (dst_width & 1) {
973    int xi = x >> 16;
974    int xf = (x >> 9) & 0x7f;
975    uint32 a = src[xi];
976    uint32 b = src[xi + 1];
977    dst[0] = BLENDER(a, b, xf);
978  }
979}
980
981void ScaleARGBFilterCols64_C(uint8* dst_argb,
982                             const uint8* src_argb,
983                             int dst_width,
984                             int x32,
985                             int dx) {
986  int64 x = (int64)(x32);
987  const uint32* src = (const uint32*)(src_argb);
988  uint32* dst = (uint32*)(dst_argb);
989  int j;
990  for (j = 0; j < dst_width - 1; j += 2) {
991    int64 xi = x >> 16;
992    int xf = (x >> 9) & 0x7f;
993    uint32 a = src[xi];
994    uint32 b = src[xi + 1];
995    dst[0] = BLENDER(a, b, xf);
996    x += dx;
997    xi = x >> 16;
998    xf = (x >> 9) & 0x7f;
999    a = src[xi];
1000    b = src[xi + 1];
1001    dst[1] = BLENDER(a, b, xf);
1002    x += dx;
1003    dst += 2;
1004  }
1005  if (dst_width & 1) {
1006    int64 xi = x >> 16;
1007    int xf = (x >> 9) & 0x7f;
1008    uint32 a = src[xi];
1009    uint32 b = src[xi + 1];
1010    dst[0] = BLENDER(a, b, xf);
1011  }
1012}
1013#undef BLENDER1
1014#undef BLENDERC
1015#undef BLENDER
1016
1017// Scale plane vertically with bilinear interpolation.
1018void ScalePlaneVertical(int src_height,
1019                        int dst_width,
1020                        int dst_height,
1021                        int src_stride,
1022                        int dst_stride,
1023                        const uint8* src_argb,
1024                        uint8* dst_argb,
1025                        int x,
1026                        int y,
1027                        int dy,
1028                        int bpp,
1029                        enum FilterMode filtering) {
1030  // TODO(fbarchard): Allow higher bpp.
1031  int dst_width_bytes = dst_width * bpp;
1032  void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
1033                         ptrdiff_t src_stride, int dst_width,
1034                         int source_y_fraction) = InterpolateRow_C;
1035  const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1036  int j;
1037  assert(bpp >= 1 && bpp <= 4);
1038  assert(src_height != 0);
1039  assert(dst_width > 0);
1040  assert(dst_height > 0);
1041  src_argb += (x >> 16) * bpp;
1042#if defined(HAS_INTERPOLATEROW_SSSE3)
1043  if (TestCpuFlag(kCpuHasSSSE3)) {
1044    InterpolateRow = InterpolateRow_Any_SSSE3;
1045    if (IS_ALIGNED(dst_width_bytes, 16)) {
1046      InterpolateRow = InterpolateRow_SSSE3;
1047    }
1048  }
1049#endif
1050#if defined(HAS_INTERPOLATEROW_AVX2)
1051  if (TestCpuFlag(kCpuHasAVX2)) {
1052    InterpolateRow = InterpolateRow_Any_AVX2;
1053    if (IS_ALIGNED(dst_width_bytes, 32)) {
1054      InterpolateRow = InterpolateRow_AVX2;
1055    }
1056  }
1057#endif
1058#if defined(HAS_INTERPOLATEROW_NEON)
1059  if (TestCpuFlag(kCpuHasNEON)) {
1060    InterpolateRow = InterpolateRow_Any_NEON;
1061    if (IS_ALIGNED(dst_width_bytes, 16)) {
1062      InterpolateRow = InterpolateRow_NEON;
1063    }
1064  }
1065#endif
1066#if defined(HAS_INTERPOLATEROW_DSPR2)
1067  if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) &&
1068      IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst_argb, 4) &&
1069      IS_ALIGNED(dst_stride, 4)) {
1070    InterpolateRow = InterpolateRow_Any_DSPR2;
1071    if (IS_ALIGNED(dst_width_bytes, 4)) {
1072      InterpolateRow = InterpolateRow_DSPR2;
1073    }
1074  }
1075#endif
1076#if defined(HAS_INTERPOLATEROW_MSA)
1077  if (TestCpuFlag(kCpuHasMSA)) {
1078    InterpolateRow = InterpolateRow_Any_MSA;
1079    if (IS_ALIGNED(dst_width_bytes, 32)) {
1080      InterpolateRow = InterpolateRow_MSA;
1081    }
1082  }
1083#endif
1084  for (j = 0; j < dst_height; ++j) {
1085    int yi;
1086    int yf;
1087    if (y > max_y) {
1088      y = max_y;
1089    }
1090    yi = y >> 16;
1091    yf = filtering ? ((y >> 8) & 255) : 0;
1092    InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
1093                   dst_width_bytes, yf);
1094    dst_argb += dst_stride;
1095    y += dy;
1096  }
1097}
1098void ScalePlaneVertical_16(int src_height,
1099                           int dst_width,
1100                           int dst_height,
1101                           int src_stride,
1102                           int dst_stride,
1103                           const uint16* src_argb,
1104                           uint16* dst_argb,
1105                           int x,
1106                           int y,
1107                           int dy,
1108                           int wpp,
1109                           enum FilterMode filtering) {
1110  // TODO(fbarchard): Allow higher wpp.
1111  int dst_width_words = dst_width * wpp;
1112  void (*InterpolateRow)(uint16 * dst_argb, const uint16* src_argb,
1113                         ptrdiff_t src_stride, int dst_width,
1114                         int source_y_fraction) = InterpolateRow_16_C;
1115  const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1116  int j;
1117  assert(wpp >= 1 && wpp <= 2);
1118  assert(src_height != 0);
1119  assert(dst_width > 0);
1120  assert(dst_height > 0);
1121  src_argb += (x >> 16) * wpp;
1122#if defined(HAS_INTERPOLATEROW_16_SSE2)
1123  if (TestCpuFlag(kCpuHasSSE2)) {
1124    InterpolateRow = InterpolateRow_Any_16_SSE2;
1125    if (IS_ALIGNED(dst_width_bytes, 16)) {
1126      InterpolateRow = InterpolateRow_16_SSE2;
1127    }
1128  }
1129#endif
1130#if defined(HAS_INTERPOLATEROW_16_SSSE3)
1131  if (TestCpuFlag(kCpuHasSSSE3)) {
1132    InterpolateRow = InterpolateRow_Any_16_SSSE3;
1133    if (IS_ALIGNED(dst_width_bytes, 16)) {
1134      InterpolateRow = InterpolateRow_16_SSSE3;
1135    }
1136  }
1137#endif
1138#if defined(HAS_INTERPOLATEROW_16_AVX2)
1139  if (TestCpuFlag(kCpuHasAVX2)) {
1140    InterpolateRow = InterpolateRow_Any_16_AVX2;
1141    if (IS_ALIGNED(dst_width_bytes, 32)) {
1142      InterpolateRow = InterpolateRow_16_AVX2;
1143    }
1144  }
1145#endif
1146#if defined(HAS_INTERPOLATEROW_16_NEON)
1147  if (TestCpuFlag(kCpuHasNEON)) {
1148    InterpolateRow = InterpolateRow_Any_16_NEON;
1149    if (IS_ALIGNED(dst_width_bytes, 16)) {
1150      InterpolateRow = InterpolateRow_16_NEON;
1151    }
1152  }
1153#endif
1154#if defined(HAS_INTERPOLATEROW_16_DSPR2)
1155  if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) &&
1156      IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst_argb, 4) &&
1157      IS_ALIGNED(dst_stride, 4)) {
1158    InterpolateRow = InterpolateRow_Any_16_DSPR2;
1159    if (IS_ALIGNED(dst_width_bytes, 4)) {
1160      InterpolateRow = InterpolateRow_16_DSPR2;
1161    }
1162  }
1163#endif
1164  for (j = 0; j < dst_height; ++j) {
1165    int yi;
1166    int yf;
1167    if (y > max_y) {
1168      y = max_y;
1169    }
1170    yi = y >> 16;
1171    yf = filtering ? ((y >> 8) & 255) : 0;
1172    InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
1173                   dst_width_words, yf);
1174    dst_argb += dst_stride;
1175    y += dy;
1176  }
1177}
1178
1179// Simplify the filtering based on scale factors.
1180enum FilterMode ScaleFilterReduce(int src_width,
1181                                  int src_height,
1182                                  int dst_width,
1183                                  int dst_height,
1184                                  enum FilterMode filtering) {
1185  if (src_width < 0) {
1186    src_width = -src_width;
1187  }
1188  if (src_height < 0) {
1189    src_height = -src_height;
1190  }
1191  if (filtering == kFilterBox) {
1192    // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
1193    if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
1194      filtering = kFilterBilinear;
1195    }
1196  }
1197  if (filtering == kFilterBilinear) {
1198    if (src_height == 1) {
1199      filtering = kFilterLinear;
1200    }
1201    // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
1202    if (dst_height == src_height || dst_height * 3 == src_height) {
1203      filtering = kFilterLinear;
1204    }
1205    // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
1206    // avoid reading 2 pixels horizontally that causes memory exception.
1207    if (src_width == 1) {
1208      filtering = kFilterNone;
1209    }
1210  }
1211  if (filtering == kFilterLinear) {
1212    if (src_width == 1) {
1213      filtering = kFilterNone;
1214    }
1215    // TODO(fbarchard): Detect any odd scale factor and reduce to None.
1216    if (dst_width == src_width || dst_width * 3 == src_width) {
1217      filtering = kFilterNone;
1218    }
1219  }
1220  return filtering;
1221}
1222
1223// Divide num by div and return as 16.16 fixed point result.
1224int FixedDiv_C(int num, int div) {
1225  return (int)(((int64)(num) << 16) / div);
1226}
1227
1228// Divide num by div and return as 16.16 fixed point result.
1229int FixedDiv1_C(int num, int div) {
1230  return (int)((((int64)(num) << 16) - 0x00010001) / (div - 1));
1231}
1232
1233#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
1234
1235// Compute slope values for stepping.
1236void ScaleSlope(int src_width,
1237                int src_height,
1238                int dst_width,
1239                int dst_height,
1240                enum FilterMode filtering,
1241                int* x,
1242                int* y,
1243                int* dx,
1244                int* dy) {
1245  assert(x != NULL);
1246  assert(y != NULL);
1247  assert(dx != NULL);
1248  assert(dy != NULL);
1249  assert(src_width != 0);
1250  assert(src_height != 0);
1251  assert(dst_width > 0);
1252  assert(dst_height > 0);
1253  // Check for 1 pixel and avoid FixedDiv overflow.
1254  if (dst_width == 1 && src_width >= 32768) {
1255    dst_width = src_width;
1256  }
1257  if (dst_height == 1 && src_height >= 32768) {
1258    dst_height = src_height;
1259  }
1260  if (filtering == kFilterBox) {
1261    // Scale step for point sampling duplicates all pixels equally.
1262    *dx = FixedDiv(Abs(src_width), dst_width);
1263    *dy = FixedDiv(src_height, dst_height);
1264    *x = 0;
1265    *y = 0;
1266  } else if (filtering == kFilterBilinear) {
1267    // Scale step for bilinear sampling renders last pixel once for upsample.
1268    if (dst_width <= Abs(src_width)) {
1269      *dx = FixedDiv(Abs(src_width), dst_width);
1270      *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
1271    } else if (dst_width > 1) {
1272      *dx = FixedDiv1(Abs(src_width), dst_width);
1273      *x = 0;
1274    }
1275    if (dst_height <= src_height) {
1276      *dy = FixedDiv(src_height, dst_height);
1277      *y = CENTERSTART(*dy, -32768);  // Subtract 0.5 (32768) to center filter.
1278    } else if (dst_height > 1) {
1279      *dy = FixedDiv1(src_height, dst_height);
1280      *y = 0;
1281    }
1282  } else if (filtering == kFilterLinear) {
1283    // Scale step for bilinear sampling renders last pixel once for upsample.
1284    if (dst_width <= Abs(src_width)) {
1285      *dx = FixedDiv(Abs(src_width), dst_width);
1286      *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
1287    } else if (dst_width > 1) {
1288      *dx = FixedDiv1(Abs(src_width), dst_width);
1289      *x = 0;
1290    }
1291    *dy = FixedDiv(src_height, dst_height);
1292    *y = *dy >> 1;
1293  } else {
1294    // Scale step for point sampling duplicates all pixels equally.
1295    *dx = FixedDiv(Abs(src_width), dst_width);
1296    *dy = FixedDiv(src_height, dst_height);
1297    *x = CENTERSTART(*dx, 0);
1298    *y = CENTERSTART(*dy, 0);
1299  }
1300  // Negative src_width means horizontally mirror.
1301  if (src_width < 0) {
1302    *x += (dst_width - 1) * *dx;
1303    *dx = -*dx;
1304    // src_width = -src_width;   // Caller must do this.
1305  }
1306}
1307#undef CENTERSTART
1308
1309#ifdef __cplusplus
1310}  // extern "C"
1311}  // namespace libyuv
1312#endif
1313