1/*
2 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "libyuv/planar_functions.h"
12
13#include <string.h>  // for memset()
14
15#include "libyuv/cpu_id.h"
16#ifdef HAVE_JPEG
17#include "libyuv/mjpeg_decoder.h"
18#endif
19#include "libyuv/row.h"
20
21#ifdef __cplusplus
22namespace libyuv {
23extern "C" {
24#endif
25
26// Copy a plane of data
27LIBYUV_API
28void CopyPlane(const uint8* src_y, int src_stride_y,
29               uint8* dst_y, int dst_stride_y,
30               int width, int height) {
31  void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
32#if defined(HAS_COPYROW_NEON)
33  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 64)) {
34    CopyRow = CopyRow_NEON;
35  }
36#endif
37#if defined(HAS_COPYROW_X86)
38  if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
39    CopyRow = CopyRow_X86;
40  }
41#endif
42#if defined(HAS_COPYROW_SSE2)
43  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
44      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
45      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
46    CopyRow = CopyRow_SSE2;
47  }
48#endif
49
50  // Copy plane
51  for (int y = 0; y < height; ++y) {
52    CopyRow(src_y, dst_y, width);
53    src_y += src_stride_y;
54    dst_y += dst_stride_y;
55  }
56}
57
58// Convert I420 to I400.
59LIBYUV_API
60int I420ToI400(const uint8* src_y, int src_stride_y,
61               uint8*, int,  // src_u
62               uint8*, int,  // src_v
63               uint8* dst_y, int dst_stride_y,
64               int width, int height) {
65  if (!src_y || !dst_y || width <= 0 || height == 0) {
66    return -1;
67  }
68  // Negative height means invert the image.
69  if (height < 0) {
70    height = -height;
71    src_y = src_y + (height - 1) * src_stride_y;
72    src_stride_y = -src_stride_y;
73  }
74  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
75  return 0;
76}
77
78// Mirror a plane of data
79void MirrorPlane(const uint8* src_y, int src_stride_y,
80                 uint8* dst_y, int dst_stride_y,
81                 int width, int height) {
82  void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
83#if defined(HAS_MIRRORROW_NEON)
84  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
85    MirrorRow = MirrorRow_NEON;
86  }
87#endif
88#if defined(HAS_MIRRORROW_SSE2)
89  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
90    MirrorRow = MirrorRow_SSE2;
91#if defined(HAS_MIRRORROW_SSSE3)
92    if (TestCpuFlag(kCpuHasSSSE3) &&
93        IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) {
94      MirrorRow = MirrorRow_SSSE3;
95    }
96#endif
97  }
98#endif
99
100  // Mirror plane
101  for (int y = 0; y < height; ++y) {
102    MirrorRow(src_y, dst_y, width);
103    src_y += src_stride_y;
104    dst_y += dst_stride_y;
105  }
106}
107
108// Convert YUY2 to I422.
109LIBYUV_API
110int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
111               uint8* dst_y, int dst_stride_y,
112               uint8* dst_u, int dst_stride_u,
113               uint8* dst_v, int dst_stride_v,
114               int width, int height) {
115  // Negative height means invert the image.
116  if (height < 0) {
117    height = -height;
118    src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
119    src_stride_yuy2 = -src_stride_yuy2;
120  }
121  void (*YUY2ToUV422Row)(const uint8* src_yuy2,
122                      uint8* dst_u, uint8* dst_v, int pix);
123  void (*YUY2ToYRow)(const uint8* src_yuy2,
124                     uint8* dst_y, int pix);
125  YUY2ToYRow = YUY2ToYRow_C;
126  YUY2ToUV422Row = YUY2ToUV422Row_C;
127#if defined(HAS_YUY2TOYROW_SSE2)
128  if (TestCpuFlag(kCpuHasSSE2)) {
129    if (width > 16) {
130      YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
131      YUY2ToYRow = YUY2ToYRow_Any_SSE2;
132    }
133    if (IS_ALIGNED(width, 16)) {
134      YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
135      YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
136      if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
137        YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
138        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
139          YUY2ToYRow = YUY2ToYRow_SSE2;
140        }
141      }
142    }
143  }
144#elif defined(HAS_YUY2TOYROW_NEON)
145  if (TestCpuFlag(kCpuHasNEON)) {
146    if (width > 8) {
147      YUY2ToYRow = YUY2ToYRow_Any_NEON;
148      if (width > 16) {
149        YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
150      }
151    }
152    if (IS_ALIGNED(width, 16)) {
153      YUY2ToYRow = YUY2ToYRow_NEON;
154      YUY2ToUV422Row = YUY2ToUV422Row_NEON;
155    }
156  }
157#endif
158
159  for (int y = 0; y < height; ++y) {
160    YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
161    YUY2ToYRow(src_yuy2, dst_y, width);
162    src_yuy2 += src_stride_yuy2;
163    dst_y += dst_stride_y;
164    dst_u += dst_stride_u;
165    dst_v += dst_stride_v;
166  }
167  return 0;
168}
169
170// Convert UYVY to I422.
171LIBYUV_API
172int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
173               uint8* dst_y, int dst_stride_y,
174               uint8* dst_u, int dst_stride_u,
175               uint8* dst_v, int dst_stride_v,
176               int width, int height) {
177  // Negative height means invert the image.
178  if (height < 0) {
179    height = -height;
180    src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
181    src_stride_uyvy = -src_stride_uyvy;
182  }
183  void (*UYVYToUV422Row)(const uint8* src_uyvy,
184                      uint8* dst_u, uint8* dst_v, int pix);
185  void (*UYVYToYRow)(const uint8* src_uyvy,
186                     uint8* dst_y, int pix);
187  UYVYToYRow = UYVYToYRow_C;
188  UYVYToUV422Row = UYVYToUV422Row_C;
189#if defined(HAS_UYVYTOYROW_SSE2)
190  if (TestCpuFlag(kCpuHasSSE2)) {
191    if (width > 16) {
192      UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
193      UYVYToYRow = UYVYToYRow_Any_SSE2;
194    }
195    if (IS_ALIGNED(width, 16)) {
196      UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2;
197      UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
198      if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
199        UYVYToUV422Row = UYVYToUV422Row_SSE2;
200        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
201          UYVYToYRow = UYVYToYRow_SSE2;
202        }
203      }
204    }
205  }
206#elif defined(HAS_UYVYTOYROW_NEON)
207  if (TestCpuFlag(kCpuHasNEON)) {
208    if (width > 8) {
209      UYVYToYRow = UYVYToYRow_Any_NEON;
210      if (width > 16) {
211        UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
212      }
213    }
214    if (IS_ALIGNED(width, 16)) {
215      UYVYToYRow = UYVYToYRow_NEON;
216      UYVYToUV422Row = UYVYToUV422Row_NEON;
217    }
218  }
219#endif
220
221  for (int y = 0; y < height; ++y) {
222    UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
223    UYVYToYRow(src_uyvy, dst_y, width);
224    src_uyvy += src_stride_uyvy;
225    dst_y += dst_stride_y;
226    dst_u += dst_stride_u;
227    dst_v += dst_stride_v;
228  }
229  return 0;
230}
231
232// Mirror I420 with optional flipping
233LIBYUV_API
234int I420Mirror(const uint8* src_y, int src_stride_y,
235               const uint8* src_u, int src_stride_u,
236               const uint8* src_v, int src_stride_v,
237               uint8* dst_y, int dst_stride_y,
238               uint8* dst_u, int dst_stride_u,
239               uint8* dst_v, int dst_stride_v,
240               int width, int height) {
241  if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v ||
242      width <= 0 || height == 0) {
243    return -1;
244  }
245  // Negative height means invert the image.
246  if (height < 0) {
247    height = -height;
248    int halfheight = (height + 1) >> 1;
249    src_y = src_y + (height - 1) * src_stride_y;
250    src_u = src_u + (halfheight - 1) * src_stride_u;
251    src_v = src_v + (halfheight - 1) * src_stride_v;
252    src_stride_y = -src_stride_y;
253    src_stride_u = -src_stride_u;
254    src_stride_v = -src_stride_v;
255  }
256
257  int halfwidth = (width + 1) >> 1;
258  int halfheight = (height + 1) >> 1;
259  if (dst_y) {
260    MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
261  }
262  MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
263  MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
264  return 0;
265}
266
267// ARGB mirror.
268LIBYUV_API
269int ARGBMirror(const uint8* src_argb, int src_stride_argb,
270               uint8* dst_argb, int dst_stride_argb,
271               int width, int height) {
272  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
273    return -1;
274  }
275  // Negative height means invert the image.
276  if (height < 0) {
277    height = -height;
278    src_argb = src_argb + (height - 1) * src_stride_argb;
279    src_stride_argb = -src_stride_argb;
280  }
281
282  void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
283      ARGBMirrorRow_C;
284#if defined(HAS_ARGBMIRRORROW_SSSE3)
285  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
286      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
287      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
288    ARGBMirrorRow = ARGBMirrorRow_SSSE3;
289  }
290#endif
291
292  // Mirror plane
293  for (int y = 0; y < height; ++y) {
294    ARGBMirrorRow(src_argb, dst_argb, width);
295    src_argb += src_stride_argb;
296    dst_argb += dst_stride_argb;
297  }
298  return 0;
299}
300
301// Get a blender that optimized for the CPU, alignment and pixel count.
302// As there are 6 blenders to choose from, the caller should try to use
303// the same blend function for all pixels if possible.
304LIBYUV_API
305ARGBBlendRow GetARGBBlend() {
306  void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
307                       uint8* dst_argb, int width) = ARGBBlendRow_C;
308#if defined(HAS_ARGBBLENDROW_SSSE3)
309  if (TestCpuFlag(kCpuHasSSSE3)) {
310    ARGBBlendRow = ARGBBlendRow_SSSE3;
311    return ARGBBlendRow;
312  }
313#endif
314#if defined(HAS_ARGBBLENDROW_SSE2)
315  if (TestCpuFlag(kCpuHasSSE2)) {
316    ARGBBlendRow = ARGBBlendRow_SSE2;
317  }
318#endif
319  return ARGBBlendRow;
320}
321
322// Alpha Blend 2 ARGB images and store to destination.
323LIBYUV_API
324int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
325              const uint8* src_argb1, int src_stride_argb1,
326              uint8* dst_argb, int dst_stride_argb,
327              int width, int height) {
328  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
329    return -1;
330  }
331  // Negative height means invert the image.
332  if (height < 0) {
333    height = -height;
334    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
335    dst_stride_argb = -dst_stride_argb;
336  }
337  void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
338                       uint8* dst_argb, int width) = GetARGBBlend();
339
340  for (int y = 0; y < height; ++y) {
341    ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
342    src_argb0 += src_stride_argb0;
343    src_argb1 += src_stride_argb1;
344    dst_argb += dst_stride_argb;
345  }
346  return 0;
347}
348
349// Convert ARGB to I400.
350LIBYUV_API
351int ARGBToI400(const uint8* src_argb, int src_stride_argb,
352               uint8* dst_y, int dst_stride_y,
353               int width, int height) {
354  if (!src_argb || !dst_y || width <= 0 || height == 0) {
355    return -1;
356  }
357  if (height < 0) {
358    height = -height;
359    src_argb = src_argb + (height - 1) * src_stride_argb;
360    src_stride_argb = -src_stride_argb;
361  }
362  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
363      ARGBToYRow_C;
364#if defined(HAS_ARGBTOYROW_SSSE3)
365  if (TestCpuFlag(kCpuHasSSSE3) &&
366      IS_ALIGNED(width, 4) &&
367      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
368      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
369    ARGBToYRow = ARGBToYRow_SSSE3;
370  }
371#endif
372
373  for (int y = 0; y < height; ++y) {
374    ARGBToYRow(src_argb, dst_y, width);
375    src_argb += src_stride_argb;
376    dst_y += dst_stride_y;
377  }
378  return 0;
379}
380
381// ARGB little endian (bgra in memory) to I422
382// same as I420 except UV plane is full height
383LIBYUV_API
384int ARGBToI422(const uint8* src_argb, int src_stride_argb,
385               uint8* dst_y, int dst_stride_y,
386               uint8* dst_u, int dst_stride_u,
387               uint8* dst_v, int dst_stride_v,
388               int width, int height) {
389  if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
390    return -1;
391  }
392  if (height < 0) {
393    height = -height;
394    src_argb = src_argb + (height - 1) * src_stride_argb;
395    src_stride_argb = -src_stride_argb;
396  }
397  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
398      ARGBToYRow_C;
399  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
400                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
401#if defined(HAS_ARGBTOYROW_SSSE3)
402  if (TestCpuFlag(kCpuHasSSSE3)) {
403    if (width > 16) {
404      ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
405      ARGBToYRow = ARGBToYRow_Any_SSSE3;
406    }
407    if (IS_ALIGNED(width, 16)) {
408      ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
409      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
410      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
411        ARGBToUVRow = ARGBToUVRow_SSSE3;
412        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
413          ARGBToYRow = ARGBToYRow_SSSE3;
414        }
415      }
416    }
417  }
418#endif
419
420  for (int y = 0; y < height; ++y) {
421    ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
422    ARGBToYRow(src_argb, dst_y, width);
423    src_argb += src_stride_argb;
424    dst_y += dst_stride_y;
425    dst_u += dst_stride_u;
426    dst_v += dst_stride_v;
427  }
428  return 0;
429}
430
431// Convert I422 to BGRA.
432LIBYUV_API
433int I422ToBGRA(const uint8* src_y, int src_stride_y,
434               const uint8* src_u, int src_stride_u,
435               const uint8* src_v, int src_stride_v,
436               uint8* dst_bgra, int dst_stride_bgra,
437               int width, int height) {
438  if (!src_y || !src_u || !src_v ||
439      !dst_bgra ||
440      width <= 0 || height == 0) {
441    return -1;
442  }
443  // Negative height means invert the image.
444  if (height < 0) {
445    height = -height;
446    dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
447    dst_stride_bgra = -dst_stride_bgra;
448  }
449  void (*I422ToBGRARow)(const uint8* y_buf,
450                        const uint8* u_buf,
451                        const uint8* v_buf,
452                        uint8* rgb_buf,
453                        int width) = I422ToBGRARow_C;
454#if defined(HAS_I422TOBGRAROW_NEON)
455  if (TestCpuFlag(kCpuHasNEON)) {
456    I422ToBGRARow = I422ToBGRARow_Any_NEON;
457    if (IS_ALIGNED(width, 16)) {
458      I422ToBGRARow = I422ToBGRARow_NEON;
459    }
460  }
461#elif defined(HAS_I422TOBGRAROW_SSSE3)
462  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
463    I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
464    if (IS_ALIGNED(width, 8)) {
465      I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
466      if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
467        I422ToBGRARow = I422ToBGRARow_SSSE3;
468      }
469    }
470  }
471#endif
472
473  for (int y = 0; y < height; ++y) {
474    I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
475    dst_bgra += dst_stride_bgra;
476    src_y += src_stride_y;
477    src_u += src_stride_u;
478    src_v += src_stride_v;
479  }
480  return 0;
481}
482
483// Convert I422 to ABGR.
484LIBYUV_API
485int I422ToABGR(const uint8* src_y, int src_stride_y,
486               const uint8* src_u, int src_stride_u,
487               const uint8* src_v, int src_stride_v,
488               uint8* dst_abgr, int dst_stride_abgr,
489               int width, int height) {
490  if (!src_y || !src_u || !src_v ||
491      !dst_abgr ||
492      width <= 0 || height == 0) {
493    return -1;
494  }
495  // Negative height means invert the image.
496  if (height < 0) {
497    height = -height;
498    dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
499    dst_stride_abgr = -dst_stride_abgr;
500  }
501  void (*I422ToABGRRow)(const uint8* y_buf,
502                        const uint8* u_buf,
503                        const uint8* v_buf,
504                        uint8* rgb_buf,
505                        int width) = I422ToABGRRow_C;
506#if defined(HAS_I422TOABGRROW_NEON)
507  if (TestCpuFlag(kCpuHasNEON)) {
508    I422ToABGRRow = I422ToABGRRow_Any_NEON;
509    if (IS_ALIGNED(width, 16)) {
510      I422ToABGRRow = I422ToABGRRow_NEON;
511    }
512  }
513#elif defined(HAS_I422TOABGRROW_SSSE3)
514  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
515    I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
516    if (IS_ALIGNED(width, 8)) {
517      I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
518      if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
519        I422ToABGRRow = I422ToABGRRow_SSSE3;
520      }
521    }
522  }
523#endif
524
525  for (int y = 0; y < height; ++y) {
526    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
527    dst_abgr += dst_stride_abgr;
528    src_y += src_stride_y;
529    src_u += src_stride_u;
530    src_v += src_stride_v;
531  }
532  return 0;
533}
534
535// Convert I422 to RGBA.
536LIBYUV_API
537int I422ToRGBA(const uint8* src_y, int src_stride_y,
538               const uint8* src_u, int src_stride_u,
539               const uint8* src_v, int src_stride_v,
540               uint8* dst_rgba, int dst_stride_rgba,
541               int width, int height) {
542  if (!src_y || !src_u || !src_v ||
543      !dst_rgba ||
544      width <= 0 || height == 0) {
545    return -1;
546  }
547  // Negative height means invert the image.
548  if (height < 0) {
549    height = -height;
550    dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
551    dst_stride_rgba = -dst_stride_rgba;
552  }
553  void (*I422ToRGBARow)(const uint8* y_buf,
554                        const uint8* u_buf,
555                        const uint8* v_buf,
556                        uint8* rgb_buf,
557                        int width) = I422ToRGBARow_C;
558#if defined(HAS_I422TORGBAROW_NEON)
559  if (TestCpuFlag(kCpuHasNEON)) {
560    I422ToRGBARow = I422ToRGBARow_Any_NEON;
561    if (IS_ALIGNED(width, 16)) {
562      I422ToRGBARow = I422ToRGBARow_NEON;
563    }
564  }
565#elif defined(HAS_I422TORGBAROW_SSSE3)
566  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
567    I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
568    if (IS_ALIGNED(width, 8)) {
569      I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3;
570      if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
571        I422ToRGBARow = I422ToRGBARow_SSSE3;
572      }
573    }
574  }
575#endif
576
577  for (int y = 0; y < height; ++y) {
578    I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
579    dst_rgba += dst_stride_rgba;
580    src_y += src_stride_y;
581    src_u += src_stride_u;
582    src_v += src_stride_v;
583  }
584  return 0;
585}
586
587// Convert ARGB to RGBA.
588LIBYUV_API
589int ARGBToRGBA(const uint8* src_argb, int src_stride_argb,
590               uint8* dst_rgba, int dst_stride_rgba,
591               int width, int height) {
592  if (!src_argb || !dst_rgba ||
593      width <= 0 || height == 0) {
594    return -1;
595  }
596  // Negative height means invert the image.
597  if (height < 0) {
598    height = -height;
599    src_argb = src_argb + (height - 1) * src_stride_argb;
600    src_stride_argb = -src_stride_argb;
601  }
602  void (*ARGBToRGBARow)(const uint8* src_argb, uint8* dst_rgba, int pix) =
603      ARGBToRGBARow_C;
604#if defined(HAS_ARGBTORGBAROW_SSSE3)
605  if (TestCpuFlag(kCpuHasSSSE3) &&
606      IS_ALIGNED(width, 4) &&
607      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
608      IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
609    ARGBToRGBARow = ARGBToRGBARow_SSSE3;
610  }
611#endif
612#if defined(HAS_ARGBTORGBAROW_NEON)
613  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
614    ARGBToRGBARow = ARGBToRGBARow_NEON;
615  }
616#endif
617
618  for (int y = 0; y < height; ++y) {
619    ARGBToRGBARow(src_argb, dst_rgba, width);
620    src_argb += src_stride_argb;
621    dst_rgba += dst_stride_rgba;
622  }
623  return 0;
624}
625
626// Convert ARGB To RGB24.
627LIBYUV_API
628int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
629                uint8* dst_rgb24, int dst_stride_rgb24,
630                int width, int height) {
631  if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) {
632    return -1;
633  }
634  if (height < 0) {
635    height = -height;
636    src_argb = src_argb + (height - 1) * src_stride_argb;
637    src_stride_argb = -src_stride_argb;
638  }
639  void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
640      ARGBToRGB24Row_C;
641#if defined(HAS_ARGBTORGB24ROW_SSSE3)
642  if (TestCpuFlag(kCpuHasSSSE3) &&
643      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
644    if (width * 3 <= kMaxStride) {
645      ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3;
646    }
647    if (IS_ALIGNED(width, 16) &&
648        IS_ALIGNED(dst_rgb24, 16) && IS_ALIGNED(dst_stride_rgb24, 16)) {
649      ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
650    }
651  }
652#endif
653#if defined(HAS_ARGBTORGB24ROW_NEON)
654  if (TestCpuFlag(kCpuHasNEON)) {
655    if (width * 3 <= kMaxStride) {
656      ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON;
657    }
658    if (IS_ALIGNED(width, 8)) {
659      ARGBToRGB24Row = ARGBToRGB24Row_NEON;
660    }
661  }
662#endif
663
664  for (int y = 0; y < height; ++y) {
665    ARGBToRGB24Row(src_argb, dst_rgb24, width);
666    src_argb += src_stride_argb;
667    dst_rgb24 += dst_stride_rgb24;
668  }
669  return 0;
670}
671
672// Convert ARGB To RAW.
673LIBYUV_API
674int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
675              uint8* dst_raw, int dst_stride_raw,
676              int width, int height) {
677  if (!src_argb || !dst_raw || width <= 0 || height == 0) {
678    return -1;
679  }
680  if (height < 0) {
681    height = -height;
682    src_argb = src_argb + (height - 1) * src_stride_argb;
683    src_stride_argb = -src_stride_argb;
684  }
685  void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix) =
686      ARGBToRAWRow_C;
687#if defined(HAS_ARGBTORAWROW_SSSE3)
688  if (TestCpuFlag(kCpuHasSSSE3) &&
689      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
690    if (width * 3 <= kMaxStride) {
691      ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3;
692    }
693    if (IS_ALIGNED(width, 16) &&
694        IS_ALIGNED(dst_raw, 16) && IS_ALIGNED(dst_stride_raw, 16)) {
695      ARGBToRAWRow = ARGBToRAWRow_SSSE3;
696    }
697  }
698#endif
699#if defined(HAS_ARGBTORAWROW_NEON)
700  if (TestCpuFlag(kCpuHasNEON)) {
701    if (width * 3 <= kMaxStride) {
702      ARGBToRAWRow = ARGBToRAWRow_Any_NEON;
703    }
704    if (IS_ALIGNED(width, 8)) {
705      ARGBToRAWRow = ARGBToRAWRow_NEON;
706    }
707  }
708#endif
709
710  for (int y = 0; y < height; ++y) {
711    ARGBToRAWRow(src_argb, dst_raw, width);
712    src_argb += src_stride_argb;
713    dst_raw += dst_stride_raw;
714  }
715  return 0;
716}
717
718// Convert ARGB To RGB565.
719LIBYUV_API
720int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
721                 uint8* dst_rgb565, int dst_stride_rgb565,
722                 int width, int height) {
723  if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
724    return -1;
725  }
726  if (height < 0) {
727    height = -height;
728    src_argb = src_argb + (height - 1) * src_stride_argb;
729    src_stride_argb = -src_stride_argb;
730  }
731  void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
732      ARGBToRGB565Row_C;
733#if defined(HAS_ARGBTORGB565ROW_SSE2)
734  if (TestCpuFlag(kCpuHasSSE2) &&
735      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
736    if (width * 2 <= kMaxStride) {
737      ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2;
738    }
739    if (IS_ALIGNED(width, 4)) {
740      ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
741    }
742  }
743#endif
744
745  for (int y = 0; y < height; ++y) {
746    ARGBToRGB565Row(src_argb, dst_rgb565, width);
747    src_argb += src_stride_argb;
748    dst_rgb565 += dst_stride_rgb565;
749  }
750  return 0;
751}
752
753// Convert ARGB To ARGB1555.
754LIBYUV_API
755int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
756                   uint8* dst_argb1555, int dst_stride_argb1555,
757                   int width, int height) {
758  if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) {
759    return -1;
760  }
761  if (height < 0) {
762    height = -height;
763    src_argb = src_argb + (height - 1) * src_stride_argb;
764    src_stride_argb = -src_stride_argb;
765  }
766  void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
767      ARGBToARGB1555Row_C;
768#if defined(HAS_ARGBTOARGB1555ROW_SSE2)
769  if (TestCpuFlag(kCpuHasSSE2) &&
770      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
771    if (width * 2 <= kMaxStride) {
772      ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2;
773    }
774    if (IS_ALIGNED(width, 4)) {
775      ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2;
776    }
777  }
778#endif
779
780  for (int y = 0; y < height; ++y) {
781    ARGBToARGB1555Row(src_argb, dst_argb1555, width);
782    src_argb += src_stride_argb;
783    dst_argb1555 += dst_stride_argb1555;
784  }
785  return 0;
786}
787
788// Convert ARGB To ARGB4444.
789LIBYUV_API
790int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
791                   uint8* dst_argb4444, int dst_stride_argb4444,
792                   int width, int height) {
793  if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) {
794    return -1;
795  }
796  if (height < 0) {
797    height = -height;
798    src_argb = src_argb + (height - 1) * src_stride_argb;
799    src_stride_argb = -src_stride_argb;
800  }
801  void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
802      ARGBToARGB4444Row_C;
803#if defined(HAS_ARGBTOARGB4444ROW_SSE2)
804  if (TestCpuFlag(kCpuHasSSE2) &&
805      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
806    if (width * 2 <= kMaxStride) {
807      ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2;
808    }
809    if (IS_ALIGNED(width, 4)) {
810      ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2;
811    }
812  }
813#endif
814
815  for (int y = 0; y < height; ++y) {
816    ARGBToARGB4444Row(src_argb, dst_argb4444, width);
817    src_argb += src_stride_argb;
818    dst_argb4444 += dst_stride_argb4444;
819  }
820  return 0;
821}
822
823// Convert NV12 to RGB565.
824// TODO(fbarchard): (Re) Optimize for Neon.
825LIBYUV_API
826int NV12ToRGB565(const uint8* src_y, int src_stride_y,
827                 const uint8* src_uv, int src_stride_uv,
828                 uint8* dst_rgb565, int dst_stride_rgb565,
829                 int width, int height) {
830  if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) {
831    return -1;
832  }
833  // Negative height means invert the image.
834  if (height < 0) {
835    height = -height;
836    dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
837    dst_stride_rgb565 = -dst_stride_rgb565;
838  }
839  void (*NV12ToARGBRow)(const uint8* y_buf,
840                        const uint8* uv_buf,
841                        uint8* rgb_buf,
842                        int width) = NV12ToARGBRow_C;
843#if defined(HAS_NV12TOARGBROW_SSSE3)
844  if (TestCpuFlag(kCpuHasSSSE3) && width * 4 <= kMaxStride) {
845    NV12ToARGBRow = NV12ToARGBRow_SSSE3;
846  }
847#endif
848#if defined(HAS_NV12TOARGBROW_NEON)
849  if (TestCpuFlag(kCpuHasNEON) && width * 4 <= kMaxStride) {
850    NV12ToARGBRow = NV12ToARGBRow_NEON;
851  }
852#endif
853
854  SIMD_ALIGNED(uint8 row[kMaxStride]);
855  void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
856      ARGBToRGB565Row_C;
857#if defined(HAS_ARGBTORGB565ROW_SSE2)
858  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
859    ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
860  }
861#endif
862
863  for (int y = 0; y < height; ++y) {
864    NV12ToARGBRow(src_y, src_uv, row, width);
865    ARGBToRGB565Row(row, dst_rgb565, width);
866    dst_rgb565 += dst_stride_rgb565;
867    src_y += src_stride_y;
868    if (y & 1) {
869      src_uv += src_stride_uv;
870    }
871  }
872  return 0;
873}
874
875// Convert NV21 to RGB565.
876LIBYUV_API
877int NV21ToRGB565(const uint8* src_y, int src_stride_y,
878                 const uint8* src_vu, int src_stride_vu,
879                 uint8* dst_rgb565, int dst_stride_rgb565,
880                 int width, int height) {
881  if (!src_y || !src_vu || !dst_rgb565 || width <= 0 || height == 0) {
882    return -1;
883  }
884  // Negative height means invert the image.
885  if (height < 0) {
886    height = -height;
887    dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
888    dst_stride_rgb565 = -dst_stride_rgb565;
889  }
890  void (*NV21ToARGBRow)(const uint8* y_buf,
891                        const uint8* uv_buf,
892                        uint8* rgb_buf,
893                        int width) = NV21ToARGBRow_C;
894#if defined(HAS_NV21TOARGBROW_SSSE3)
895  if (TestCpuFlag(kCpuHasSSSE3) && width * 4 <= kMaxStride) {
896    NV21ToARGBRow = NV21ToARGBRow_SSSE3;
897  }
898#endif
899
900  SIMD_ALIGNED(uint8 row[kMaxStride]);
901  void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
902      ARGBToRGB565Row_C;
903#if defined(HAS_ARGBTORGB565ROW_SSE2)
904  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
905    ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
906  }
907#endif
908
909  for (int y = 0; y < height; ++y) {
910    NV21ToARGBRow(src_y, src_vu, row, width);
911    ARGBToRGB565Row(row, dst_rgb565, width);
912    dst_rgb565 += dst_stride_rgb565;
913    src_y += src_stride_y;
914    if (y & 1) {
915      src_vu += src_stride_vu;
916    }
917  }
918  return 0;
919}
920
921LIBYUV_API
922void SetPlane(uint8* dst_y, int dst_stride_y,
923              int width, int height,
924              uint32 value) {
925  void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow8_C;
926#if defined(HAS_SETROW_NEON)
927  if (TestCpuFlag(kCpuHasNEON) &&
928      IS_ALIGNED(width, 16) &&
929      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
930    SetRow = SetRow8_NEON;
931  }
932#endif
933#if defined(HAS_SETROW_X86)
934  if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
935    SetRow = SetRow8_X86;
936  }
937#endif
938
939  uint32 v32 = value | (value << 8) | (value << 16) | (value << 24);
940  // Set plane
941  for (int y = 0; y < height; ++y) {
942    SetRow(dst_y, v32, width);
943    dst_y += dst_stride_y;
944  }
945}
946
947// Draw a rectangle into I420
948LIBYUV_API
949int I420Rect(uint8* dst_y, int dst_stride_y,
950             uint8* dst_u, int dst_stride_u,
951             uint8* dst_v, int dst_stride_v,
952             int x, int y,
953             int width, int height,
954             int value_y, int value_u, int value_v) {
955  if (!dst_y || !dst_u || !dst_v ||
956      width <= 0 || height <= 0 ||
957      x < 0 || y < 0 ||
958      value_y < 0 || value_y > 255 ||
959      value_u < 0 || value_u > 255 ||
960      value_v < 0 || value_v > 255) {
961    return -1;
962  }
963  int halfwidth = (width + 1) >> 1;
964  int halfheight = (height + 1) >> 1;
965  uint8* start_y = dst_y + y * dst_stride_y + x;
966  uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
967  uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
968
969  SetPlane(start_y, dst_stride_y, width, height, value_y);
970  SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
971  SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
972  return 0;
973}
974
975// Draw a rectangle into ARGB
976LIBYUV_API
977int ARGBRect(uint8* dst_argb, int dst_stride_argb,
978             int dst_x, int dst_y,
979             int width, int height,
980             uint32 value) {
981  if (!dst_argb ||
982      width <= 0 || height <= 0 ||
983      dst_x < 0 || dst_y < 0) {
984    return -1;
985  }
986  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
987#if defined(HAS_SETROW_NEON)
988  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) &&
989      IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
990    SetRows32_NEON(dst, value, width, dst_stride_argb, height);
991    return 0;
992  }
993#endif
994#if defined(HAS_SETROW_X86)
995  if (TestCpuFlag(kCpuHasX86)) {
996    SetRows32_X86(dst, value, width, dst_stride_argb, height);
997    return 0;
998  }
999#endif
1000  SetRows32_C(dst, value, width, dst_stride_argb, height);
1001  return 0;
1002}
1003
1004// Convert unattentuated ARGB to preattenuated ARGB.
1005// An unattenutated ARGB alpha blend uses the formula
1006// p = a * f + (1 - a) * b
1007// where
1008//   p is output pixel
1009//   f is foreground pixel
1010//   b is background pixel
1011//   a is alpha value from foreground pixel
1012// An preattenutated ARGB alpha blend uses the formula
1013// p = f + (1 - a) * b
1014// where
1015//   f is foreground pixel premultiplied by alpha
1016
1017LIBYUV_API
1018int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
1019                  uint8* dst_argb, int dst_stride_argb,
1020                  int width, int height) {
1021  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1022    return -1;
1023  }
1024  if (height < 0) {
1025    height = -height;
1026    src_argb = src_argb + (height - 1) * src_stride_argb;
1027    src_stride_argb = -src_stride_argb;
1028  }
1029  void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
1030                           int width) = ARGBAttenuateRow_C;
1031#if defined(HAS_ARGBATTENUATE_SSE2)
1032  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
1033      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
1034      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1035    ARGBAttenuateRow = ARGBAttenuateRow_SSE2;
1036  }
1037#endif
1038#if defined(HAS_ARGBATTENUATEROW_SSSE3)
1039  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
1040      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
1041      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1042    ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
1043  }
1044#endif
1045
1046  for (int y = 0; y < height; ++y) {
1047    ARGBAttenuateRow(src_argb, dst_argb, width);
1048    src_argb += src_stride_argb;
1049    dst_argb += dst_stride_argb;
1050  }
1051  return 0;
1052}
1053
1054// Convert preattentuated ARGB to unattenuated ARGB.
1055LIBYUV_API
1056int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
1057                    uint8* dst_argb, int dst_stride_argb,
1058                    int width, int height) {
1059  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1060    return -1;
1061  }
1062  if (height < 0) {
1063    height = -height;
1064    src_argb = src_argb + (height - 1) * src_stride_argb;
1065    src_stride_argb = -src_stride_argb;
1066  }
1067  void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
1068                             int width) = ARGBUnattenuateRow_C;
1069#if defined(HAS_ARGBUNATTENUATEROW_SSE2)
1070  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
1071      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
1072      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1073    ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
1074  }
1075#endif
1076
1077  for (int y = 0; y < height; ++y) {
1078    ARGBUnattenuateRow(src_argb, dst_argb, width);
1079    src_argb += src_stride_argb;
1080    dst_argb += dst_stride_argb;
1081  }
1082  return 0;
1083}
1084
1085// Convert ARGB to Grayed ARGB.
1086LIBYUV_API
1087int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
1088               uint8* dst_argb, int dst_stride_argb,
1089               int width, int height) {
1090  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1091    return -1;
1092  }
1093  if (height < 0) {
1094    height = -height;
1095    src_argb = src_argb + (height - 1) * src_stride_argb;
1096    src_stride_argb = -src_stride_argb;
1097  }
1098  void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
1099                      int width) = ARGBGrayRow_C;
1100#if defined(HAS_ARGBGRAYROW_SSSE3)
1101  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
1102      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
1103      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1104    ARGBGrayRow = ARGBGrayRow_SSSE3;
1105  }
1106#endif
1107
1108  for (int y = 0; y < height; ++y) {
1109    ARGBGrayRow(src_argb, dst_argb, width);
1110    src_argb += src_stride_argb;
1111    dst_argb += dst_stride_argb;
1112  }
1113  return 0;
1114}
1115
1116// Make a rectangle of ARGB gray scale.
1117LIBYUV_API
1118int ARGBGray(uint8* dst_argb, int dst_stride_argb,
1119             int dst_x, int dst_y,
1120             int width, int height) {
1121  if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
1122    return -1;
1123  }
1124  void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
1125                      int width) = ARGBGrayRow_C;
1126#if defined(HAS_ARGBGRAYROW_SSSE3)
1127  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
1128      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1129    ARGBGrayRow = ARGBGrayRow_SSSE3;
1130  }
1131#endif
1132  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1133  for (int y = 0; y < height; ++y) {
1134    ARGBGrayRow(dst, dst, width);
1135    dst += dst_stride_argb;
1136  }
1137  return 0;
1138}
1139
1140// Make a rectangle of ARGB Sepia tone.
1141LIBYUV_API
1142int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
1143              int dst_x, int dst_y, int width, int height) {
1144  if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
1145    return -1;
1146  }
1147  void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C;
1148#if defined(HAS_ARGBSEPIAROW_SSSE3)
1149  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
1150      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1151    ARGBSepiaRow = ARGBSepiaRow_SSSE3;
1152  }
1153#endif
1154  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1155  for (int y = 0; y < height; ++y) {
1156    ARGBSepiaRow(dst, width);
1157    dst += dst_stride_argb;
1158  }
1159  return 0;
1160}
1161
1162// Apply a 4x3 matrix rotation to each ARGB pixel.
1163LIBYUV_API
1164int ARGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
1165                    const int8* matrix_argb,
1166                    int dst_x, int dst_y, int width, int height) {
1167  if (!dst_argb || !matrix_argb || width <= 0 || height <= 0 ||
1168      dst_x < 0 || dst_y < 0) {
1169    return -1;
1170  }
1171  void (*ARGBColorMatrixRow)(uint8* dst_argb, const int8* matrix_argb,
1172                             int width) = ARGBColorMatrixRow_C;
1173#if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
1174  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
1175      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1176    ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
1177  }
1178#endif
1179  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1180  for (int y = 0; y < height; ++y) {
1181    ARGBColorMatrixRow(dst, matrix_argb, width);
1182    dst += dst_stride_argb;
1183  }
1184  return 0;
1185}
1186
1187// Apply a color table each ARGB pixel.
1188// Table contains 256 ARGB values.
1189LIBYUV_API
1190int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
1191                   const uint8* table_argb,
1192                   int dst_x, int dst_y, int width, int height) {
1193  if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
1194      dst_x < 0 || dst_y < 0) {
1195    return -1;
1196  }
1197  void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
1198                            int width) = ARGBColorTableRow_C;
1199#if defined(HAS_ARGBCOLORTABLEROW_X86)
1200  if (TestCpuFlag(kCpuHasX86)) {
1201    ARGBColorTableRow = ARGBColorTableRow_X86;
1202  }
1203#endif
1204  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1205  for (int y = 0; y < height; ++y) {
1206    ARGBColorTableRow(dst, table_argb, width);
1207    dst += dst_stride_argb;
1208  }
1209  return 0;
1210}
1211
1212// ARGBQuantize is used to posterize art.
1213// e.g. rgb / qvalue * qvalue + qvalue / 2
1214// But the low levels implement efficiently with 3 parameters, and could be
1215// used for other high level operations.
1216// The divide is replaces with a multiply by reciprocal fixed point multiply.
1217// Caveat - although SSE2 saturates, the C function does not and should be used
1218// with care if doing anything but quantization.
1219LIBYUV_API
1220int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
1221                 int scale, int interval_size, int interval_offset,
1222                 int dst_x, int dst_y, int width, int height) {
1223  if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
1224      interval_size < 1 || interval_size > 255) {
1225    return -1;
1226  }
1227  void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size,
1228                          int interval_offset, int width) = ARGBQuantizeRow_C;
1229#if defined(HAS_ARGBQUANTIZEROW_SSE2)
1230  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
1231      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1232    ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
1233  }
1234#endif
1235  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1236  for (int y = 0; y < height; ++y) {
1237    ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
1238    dst += dst_stride_argb;
1239  }
1240  return 0;
1241}
1242
1243// Computes table of cumulative sum for image where the value is the sum
1244// of all values above and to the left of the entry. Used by ARGBBlur.
1245LIBYUV_API
1246int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
1247                             int32* dst_cumsum, int dst_stride32_cumsum,
1248                             int width, int height) {
1249  if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
1250    return -1;
1251  }
1252  void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
1253      const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
1254#if defined(HAS_CUMULATIVESUMTOAVERAGE_SSE2)
1255  if (TestCpuFlag(kCpuHasSSE2)) {
1256    ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
1257  }
1258#endif
1259  memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4);  // 4 int per pixel.
1260  int32* previous_cumsum = dst_cumsum;
1261  for (int y = 0; y < height; ++y) {
1262    ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
1263    previous_cumsum = dst_cumsum;
1264    dst_cumsum += dst_stride32_cumsum;
1265    src_argb += src_stride_argb;
1266  }
1267  return 0;
1268}
1269
1270// Blur ARGB image.
1271// Caller should allocate CumulativeSum table of width * height * 16 bytes
1272// aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
1273// as the buffer is treated as circular.
1274LIBYUV_API
1275int ARGBBlur(const uint8* src_argb, int src_stride_argb,
1276             uint8* dst_argb, int dst_stride_argb,
1277             int32* dst_cumsum, int dst_stride32_cumsum,
1278             int width, int height, int radius) {
1279  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1280    return -1;
1281  }
1282  void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
1283      const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
1284  void (*CumulativeSumToAverage)(const int32* topleft, const int32* botleft,
1285      int width, int area, uint8* dst, int count) = CumulativeSumToAverage_C;
1286#if defined(HAS_CUMULATIVESUMTOAVERAGE_SSE2)
1287  if (TestCpuFlag(kCpuHasSSE2)) {
1288    ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
1289    CumulativeSumToAverage = CumulativeSumToAverage_SSE2;
1290  }
1291#endif
1292  // Compute enough CumulativeSum for first row to be blurred. After this
1293  // one row of CumulativeSum is updated at a time.
1294  ARGBComputeCumulativeSum(src_argb, src_stride_argb,
1295                           dst_cumsum, dst_stride32_cumsum,
1296                           width, radius);
1297
1298  src_argb = src_argb + radius * src_stride_argb;
1299  int32* cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
1300
1301  const int32* max_cumsum_bot_row =
1302      &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
1303  const int32* cumsum_top_row = &dst_cumsum[0];
1304
1305  for (int y = 0; y < height; ++y) {
1306    int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
1307    int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
1308    int area = radius * (bot_y - top_y);
1309
1310    // Increment cumsum_top_row pointer with circular buffer wrap around.
1311    if (top_y) {
1312      cumsum_top_row += dst_stride32_cumsum;
1313      if (cumsum_top_row >= max_cumsum_bot_row) {
1314        cumsum_top_row = dst_cumsum;
1315      }
1316    }
1317    // Increment cumsum_bot_row pointer with circular buffer wrap around and
1318    // then fill in a row of CumulativeSum.
1319    if ((y + radius) < height) {
1320      const int32* prev_cumsum_bot_row = cumsum_bot_row;
1321      cumsum_bot_row += dst_stride32_cumsum;
1322      if (cumsum_bot_row >= max_cumsum_bot_row) {
1323        cumsum_bot_row = dst_cumsum;
1324      }
1325      ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
1326                              width);
1327      src_argb += src_stride_argb;
1328    }
1329
1330    // Left clipped.
1331    int boxwidth = radius * 4;
1332    int x;
1333    for (x = 0; x < radius + 1; ++x) {
1334      CumulativeSumToAverage(cumsum_top_row, cumsum_bot_row,
1335                              boxwidth, area, &dst_argb[x * 4], 1);
1336      area += (bot_y - top_y);
1337      boxwidth += 4;
1338    }
1339
1340    // Middle unclipped.
1341    int n = (width - 1) - radius - x + 1;
1342    CumulativeSumToAverage(cumsum_top_row, cumsum_bot_row,
1343                           boxwidth, area, &dst_argb[x * 4], n);
1344
1345    // Right clipped.
1346    for (x += n; x <= width - 1; ++x) {
1347      area -= (bot_y - top_y);
1348      boxwidth -= 4;
1349      CumulativeSumToAverage(cumsum_top_row + (x - radius - 1) * 4,
1350                             cumsum_bot_row + (x - radius - 1) * 4,
1351                             boxwidth, area, &dst_argb[x * 4], 1);
1352    }
1353    dst_argb += dst_stride_argb;
1354  }
1355  return 0;
1356}
1357
1358// Multiply ARGB image by a specified ARGB value.
1359LIBYUV_API
1360int ARGBShade(const uint8* src_argb, int src_stride_argb,
1361              uint8* dst_argb, int dst_stride_argb,
1362              int width, int height, uint32 value) {
1363  if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
1364    return -1;
1365  }
1366  if (height < 0) {
1367    height = -height;
1368    src_argb = src_argb + (height - 1) * src_stride_argb;
1369    src_stride_argb = -src_stride_argb;
1370  }
1371  void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb,
1372                       int width, uint32 value) = ARGBShadeRow_C;
1373#if defined(HAS_ARGBSHADE_SSE2)
1374  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
1375      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
1376      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1377    ARGBShadeRow = ARGBShadeRow_SSE2;
1378  }
1379#endif
1380
1381  for (int y = 0; y < height; ++y) {
1382    ARGBShadeRow(src_argb, dst_argb, width, value);
1383    src_argb += src_stride_argb;
1384    dst_argb += dst_stride_argb;
1385  }
1386  return 0;
1387}
1388
1389// Interpolate 2 ARGB images by specified amount (0 to 255).
1390LIBYUV_API
1391int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
1392                    const uint8* src_argb1, int src_stride_argb1,
1393                    uint8* dst_argb, int dst_stride_argb,
1394                    int width, int height, int interpolation) {
1395  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
1396    return -1;
1397  }
1398  // Negative height means invert the image.
1399  if (height < 0) {
1400    height = -height;
1401    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1402    dst_stride_argb = -dst_stride_argb;
1403  }
1404  void (*ARGBInterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
1405                              ptrdiff_t src_stride, int dst_width,
1406                              int source_y_fraction) = ARGBInterpolateRow_C;
1407#if defined(HAS_ARGBINTERPOLATEROW_SSSE3)
1408  if (TestCpuFlag(kCpuHasSSSE3) &&
1409      IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
1410      IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
1411      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1412    ARGBInterpolateRow = ARGBInterpolateRow_SSSE3;
1413  }
1414#endif
1415  for (int y = 0; y < height; ++y) {
1416    ARGBInterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0,
1417                       width, interpolation);
1418    src_argb0 += src_stride_argb0;
1419    src_argb1 += src_stride_argb1;
1420    dst_argb += dst_stride_argb;
1421  }
1422  return 0;
1423}
1424
1425#ifdef __cplusplus
1426}  // extern "C"
1427}  // namespace libyuv
1428#endif
1429