1/*
2 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS. All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "libyuv/planar_functions.h"
12
13#include <string.h>  // for memset()
14
15#include "libyuv/cpu_id.h"
16#ifdef HAVE_JPEG
17#include "libyuv/mjpeg_decoder.h"
18#endif
19#include "libyuv/row.h"
20
21#ifdef __cplusplus
22namespace libyuv {
23extern "C" {
24#endif
25
26// Copy a plane of data
27LIBYUV_API
28void CopyPlane(const uint8* src_y, int src_stride_y,
29               uint8* dst_y, int dst_stride_y,
30               int width, int height) {
31  int y;
32  void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
33  // Coalesce rows.
34  if (src_stride_y == width &&
35      dst_stride_y == width) {
36    width *= height;
37    height = 1;
38    src_stride_y = dst_stride_y = 0;
39  }
40  // Nothing to do.
41  if (src_y == dst_y && src_stride_y == dst_stride_y) {
42    return;
43  }
44#if defined(HAS_COPYROW_X86)
45  if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
46    CopyRow = CopyRow_X86;
47  }
48#endif
49#if defined(HAS_COPYROW_SSE2)
50  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
51      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
52      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
53    CopyRow = CopyRow_SSE2;
54  }
55#endif
56#if defined(HAS_COPYROW_ERMS)
57  if (TestCpuFlag(kCpuHasERMS)) {
58    CopyRow = CopyRow_ERMS;
59  }
60#endif
61#if defined(HAS_COPYROW_NEON)
62  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
63    CopyRow = CopyRow_NEON;
64  }
65#endif
66#if defined(HAS_COPYROW_MIPS)
67  if (TestCpuFlag(kCpuHasMIPS)) {
68    CopyRow = CopyRow_MIPS;
69  }
70#endif
71
72  // Copy plane
73  for (y = 0; y < height; ++y) {
74    CopyRow(src_y, dst_y, width);
75    src_y += src_stride_y;
76    dst_y += dst_stride_y;
77  }
78}
79
80LIBYUV_API
81void CopyPlane_16(const uint16* src_y, int src_stride_y,
82                  uint16* dst_y, int dst_stride_y,
83                  int width, int height) {
84  int y;
85  void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C;
86  // Coalesce rows.
87  if (src_stride_y == width &&
88      dst_stride_y == width) {
89    width *= height;
90    height = 1;
91    src_stride_y = dst_stride_y = 0;
92  }
93#if defined(HAS_COPYROW_16_X86)
94  if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
95    CopyRow = CopyRow_16_X86;
96  }
97#endif
98#if defined(HAS_COPYROW_16_SSE2)
99  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
100      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
101      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
102    CopyRow = CopyRow_16_SSE2;
103  }
104#endif
105#if defined(HAS_COPYROW_16_ERMS)
106  if (TestCpuFlag(kCpuHasERMS)) {
107    CopyRow = CopyRow_16_ERMS;
108  }
109#endif
110#if defined(HAS_COPYROW_16_NEON)
111  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
112    CopyRow = CopyRow_16_NEON;
113  }
114#endif
115#if defined(HAS_COPYROW_16_MIPS)
116  if (TestCpuFlag(kCpuHasMIPS)) {
117    CopyRow = CopyRow_16_MIPS;
118  }
119#endif
120
121  // Copy plane
122  for (y = 0; y < height; ++y) {
123    CopyRow(src_y, dst_y, width);
124    src_y += src_stride_y;
125    dst_y += dst_stride_y;
126  }
127}
128
129// Copy I422.
130LIBYUV_API
131int I422Copy(const uint8* src_y, int src_stride_y,
132             const uint8* src_u, int src_stride_u,
133             const uint8* src_v, int src_stride_v,
134             uint8* dst_y, int dst_stride_y,
135             uint8* dst_u, int dst_stride_u,
136             uint8* dst_v, int dst_stride_v,
137             int width, int height) {
138  int halfwidth = (width + 1) >> 1;
139  if (!src_y || !src_u || !src_v ||
140      !dst_y || !dst_u || !dst_v ||
141      width <= 0 || height == 0) {
142    return -1;
143  }
144  // Negative height means invert the image.
145  if (height < 0) {
146    height = -height;
147    src_y = src_y + (height - 1) * src_stride_y;
148    src_u = src_u + (height - 1) * src_stride_u;
149    src_v = src_v + (height - 1) * src_stride_v;
150    src_stride_y = -src_stride_y;
151    src_stride_u = -src_stride_u;
152    src_stride_v = -src_stride_v;
153  }
154  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
155  CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
156  CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
157  return 0;
158}
159
160// Copy I444.
161LIBYUV_API
162int I444Copy(const uint8* src_y, int src_stride_y,
163             const uint8* src_u, int src_stride_u,
164             const uint8* src_v, int src_stride_v,
165             uint8* dst_y, int dst_stride_y,
166             uint8* dst_u, int dst_stride_u,
167             uint8* dst_v, int dst_stride_v,
168             int width, int height) {
169  if (!src_y || !src_u || !src_v ||
170      !dst_y || !dst_u || !dst_v ||
171      width <= 0 || height == 0) {
172    return -1;
173  }
174  // Negative height means invert the image.
175  if (height < 0) {
176    height = -height;
177    src_y = src_y + (height - 1) * src_stride_y;
178    src_u = src_u + (height - 1) * src_stride_u;
179    src_v = src_v + (height - 1) * src_stride_v;
180    src_stride_y = -src_stride_y;
181    src_stride_u = -src_stride_u;
182    src_stride_v = -src_stride_v;
183  }
184
185  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
186  CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
187  CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
188  return 0;
189}
190
191// Copy I400.
192LIBYUV_API
193int I400ToI400(const uint8* src_y, int src_stride_y,
194               uint8* dst_y, int dst_stride_y,
195               int width, int height) {
196  if (!src_y || !dst_y || width <= 0 || height == 0) {
197    return -1;
198  }
199  // Negative height means invert the image.
200  if (height < 0) {
201    height = -height;
202    src_y = src_y + (height - 1) * src_stride_y;
203    src_stride_y = -src_stride_y;
204  }
205  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
206  return 0;
207}
208
209// Convert I420 to I400.
210LIBYUV_API
211int I420ToI400(const uint8* src_y, int src_stride_y,
212               const uint8* src_u, int src_stride_u,
213               const uint8* src_v, int src_stride_v,
214               uint8* dst_y, int dst_stride_y,
215               int width, int height) {
216  if (!src_y || !dst_y || width <= 0 || height == 0) {
217    return -1;
218  }
219  // Negative height means invert the image.
220  if (height < 0) {
221    height = -height;
222    src_y = src_y + (height - 1) * src_stride_y;
223    src_stride_y = -src_stride_y;
224  }
225  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
226  return 0;
227}
228
229// Mirror a plane of data.
230void MirrorPlane(const uint8* src_y, int src_stride_y,
231                 uint8* dst_y, int dst_stride_y,
232                 int width, int height) {
233  int y;
234  void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
235  // Negative height means invert the image.
236  if (height < 0) {
237    height = -height;
238    src_y = src_y + (height - 1) * src_stride_y;
239    src_stride_y = -src_stride_y;
240  }
241#if defined(HAS_MIRRORROW_NEON)
242  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
243    MirrorRow = MirrorRow_NEON;
244  }
245#endif
246#if defined(HAS_MIRRORROW_SSE2)
247  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
248    MirrorRow = MirrorRow_SSE2;
249  }
250#endif
251#if defined(HAS_MIRRORROW_SSSE3)
252  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
253      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
254      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
255    MirrorRow = MirrorRow_SSSE3;
256  }
257#endif
258#if defined(HAS_MIRRORROW_AVX2)
259  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
260    MirrorRow = MirrorRow_AVX2;
261  }
262#endif
263
264  // Mirror plane
265  for (y = 0; y < height; ++y) {
266    MirrorRow(src_y, dst_y, width);
267    src_y += src_stride_y;
268    dst_y += dst_stride_y;
269  }
270}
271
272// Convert YUY2 to I422.
273LIBYUV_API
274int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
275               uint8* dst_y, int dst_stride_y,
276               uint8* dst_u, int dst_stride_u,
277               uint8* dst_v, int dst_stride_v,
278               int width, int height) {
279  int y;
280  void (*YUY2ToUV422Row)(const uint8* src_yuy2,
281                         uint8* dst_u, uint8* dst_v, int pix) =
282      YUY2ToUV422Row_C;
283  void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
284      YUY2ToYRow_C;
285  // Negative height means invert the image.
286  if (height < 0) {
287    height = -height;
288    src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
289    src_stride_yuy2 = -src_stride_yuy2;
290  }
291  // Coalesce rows.
292  if (src_stride_yuy2 == width * 2 &&
293      dst_stride_y == width &&
294      dst_stride_u * 2 == width &&
295      dst_stride_v * 2 == width) {
296    width *= height;
297    height = 1;
298    src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
299  }
300#if defined(HAS_YUY2TOYROW_SSE2)
301  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
302    YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
303    YUY2ToYRow = YUY2ToYRow_Any_SSE2;
304    if (IS_ALIGNED(width, 16)) {
305      YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
306      YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
307      if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
308        YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
309        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
310          YUY2ToYRow = YUY2ToYRow_SSE2;
311        }
312      }
313    }
314  }
315#endif
316#if defined(HAS_YUY2TOYROW_AVX2)
317  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
318    YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
319    YUY2ToYRow = YUY2ToYRow_Any_AVX2;
320    if (IS_ALIGNED(width, 32)) {
321      YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
322      YUY2ToYRow = YUY2ToYRow_AVX2;
323    }
324  }
325#endif
326#if defined(HAS_YUY2TOYROW_NEON)
327  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
328    YUY2ToYRow = YUY2ToYRow_Any_NEON;
329    if (width >= 16) {
330      YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
331    }
332    if (IS_ALIGNED(width, 16)) {
333      YUY2ToYRow = YUY2ToYRow_NEON;
334      YUY2ToUV422Row = YUY2ToUV422Row_NEON;
335    }
336  }
337#endif
338
339  for (y = 0; y < height; ++y) {
340    YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
341    YUY2ToYRow(src_yuy2, dst_y, width);
342    src_yuy2 += src_stride_yuy2;
343    dst_y += dst_stride_y;
344    dst_u += dst_stride_u;
345    dst_v += dst_stride_v;
346  }
347  return 0;
348}
349
350// Convert UYVY to I422.
351LIBYUV_API
352int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
353               uint8* dst_y, int dst_stride_y,
354               uint8* dst_u, int dst_stride_u,
355               uint8* dst_v, int dst_stride_v,
356               int width, int height) {
357  int y;
358  void (*UYVYToUV422Row)(const uint8* src_uyvy,
359                         uint8* dst_u, uint8* dst_v, int pix) =
360      UYVYToUV422Row_C;
361  void (*UYVYToYRow)(const uint8* src_uyvy,
362                     uint8* dst_y, int pix) = UYVYToYRow_C;
363  // Negative height means invert the image.
364  if (height < 0) {
365    height = -height;
366    src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
367    src_stride_uyvy = -src_stride_uyvy;
368  }
369  // Coalesce rows.
370  if (src_stride_uyvy == width * 2 &&
371      dst_stride_y == width &&
372      dst_stride_u * 2 == width &&
373      dst_stride_v * 2 == width) {
374    width *= height;
375    height = 1;
376    src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
377  }
378#if defined(HAS_UYVYTOYROW_SSE2)
379  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
380    UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
381    UYVYToYRow = UYVYToYRow_Any_SSE2;
382    if (IS_ALIGNED(width, 16)) {
383      UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2;
384      UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
385      if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
386        UYVYToUV422Row = UYVYToUV422Row_SSE2;
387        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
388          UYVYToYRow = UYVYToYRow_SSE2;
389        }
390      }
391    }
392  }
393#endif
394#if defined(HAS_UYVYTOYROW_AVX2)
395  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
396    UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
397    UYVYToYRow = UYVYToYRow_Any_AVX2;
398    if (IS_ALIGNED(width, 32)) {
399      UYVYToUV422Row = UYVYToUV422Row_AVX2;
400      UYVYToYRow = UYVYToYRow_AVX2;
401    }
402  }
403#endif
404#if defined(HAS_UYVYTOYROW_NEON)
405  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
406    UYVYToYRow = UYVYToYRow_Any_NEON;
407    if (width >= 16) {
408      UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
409    }
410    if (IS_ALIGNED(width, 16)) {
411      UYVYToYRow = UYVYToYRow_NEON;
412      UYVYToUV422Row = UYVYToUV422Row_NEON;
413    }
414  }
415#endif
416
417  for (y = 0; y < height; ++y) {
418    UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
419    UYVYToYRow(src_uyvy, dst_y, width);
420    src_uyvy += src_stride_uyvy;
421    dst_y += dst_stride_y;
422    dst_u += dst_stride_u;
423    dst_v += dst_stride_v;
424  }
425  return 0;
426}
427
428// Mirror I400 with optional flipping
429LIBYUV_API
430int I400Mirror(const uint8* src_y, int src_stride_y,
431               uint8* dst_y, int dst_stride_y,
432               int width, int height) {
433  if (!src_y || !dst_y ||
434      width <= 0 || height == 0) {
435    return -1;
436  }
437  // Negative height means invert the image.
438  if (height < 0) {
439    height = -height;
440    src_y = src_y + (height - 1) * src_stride_y;
441    src_stride_y = -src_stride_y;
442  }
443
444  MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
445  return 0;
446}
447
448// Mirror I420 with optional flipping
449LIBYUV_API
450int I420Mirror(const uint8* src_y, int src_stride_y,
451               const uint8* src_u, int src_stride_u,
452               const uint8* src_v, int src_stride_v,
453               uint8* dst_y, int dst_stride_y,
454               uint8* dst_u, int dst_stride_u,
455               uint8* dst_v, int dst_stride_v,
456               int width, int height) {
457  int halfwidth = (width + 1) >> 1;
458  int halfheight = (height + 1) >> 1;
459  if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v ||
460      width <= 0 || height == 0) {
461    return -1;
462  }
463  // Negative height means invert the image.
464  if (height < 0) {
465    height = -height;
466    halfheight = (height + 1) >> 1;
467    src_y = src_y + (height - 1) * src_stride_y;
468    src_u = src_u + (halfheight - 1) * src_stride_u;
469    src_v = src_v + (halfheight - 1) * src_stride_v;
470    src_stride_y = -src_stride_y;
471    src_stride_u = -src_stride_u;
472    src_stride_v = -src_stride_v;
473  }
474
475  if (dst_y) {
476    MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
477  }
478  MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
479  MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
480  return 0;
481}
482
483// ARGB mirror.
484LIBYUV_API
485int ARGBMirror(const uint8* src_argb, int src_stride_argb,
486               uint8* dst_argb, int dst_stride_argb,
487               int width, int height) {
488  int y;
489  void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
490      ARGBMirrorRow_C;
491  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
492    return -1;
493  }
494  // Negative height means invert the image.
495  if (height < 0) {
496    height = -height;
497    src_argb = src_argb + (height - 1) * src_stride_argb;
498    src_stride_argb = -src_stride_argb;
499  }
500
501#if defined(HAS_ARGBMIRRORROW_SSSE3)
502  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
503      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
504      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
505    ARGBMirrorRow = ARGBMirrorRow_SSSE3;
506  }
507#endif
508#if defined(HAS_ARGBMIRRORROW_AVX2)
509  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
510    ARGBMirrorRow = ARGBMirrorRow_AVX2;
511  }
512#endif
513#if defined(HAS_ARGBMIRRORROW_NEON)
514  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
515    ARGBMirrorRow = ARGBMirrorRow_NEON;
516  }
517#endif
518
519  // Mirror plane
520  for (y = 0; y < height; ++y) {
521    ARGBMirrorRow(src_argb, dst_argb, width);
522    src_argb += src_stride_argb;
523    dst_argb += dst_stride_argb;
524  }
525  return 0;
526}
527
528// Get a blender that optimized for the CPU, alignment and pixel count.
529// As there are 6 blenders to choose from, the caller should try to use
530// the same blend function for all pixels if possible.
531LIBYUV_API
532ARGBBlendRow GetARGBBlend() {
533  void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
534                       uint8* dst_argb, int width) = ARGBBlendRow_C;
535#if defined(HAS_ARGBBLENDROW_SSSE3)
536  if (TestCpuFlag(kCpuHasSSSE3)) {
537    ARGBBlendRow = ARGBBlendRow_SSSE3;
538    return ARGBBlendRow;
539  }
540#endif
541#if defined(HAS_ARGBBLENDROW_SSE2)
542  if (TestCpuFlag(kCpuHasSSE2)) {
543    ARGBBlendRow = ARGBBlendRow_SSE2;
544  }
545#endif
546#if defined(HAS_ARGBBLENDROW_NEON)
547  if (TestCpuFlag(kCpuHasNEON)) {
548    ARGBBlendRow = ARGBBlendRow_NEON;
549  }
550#endif
551  return ARGBBlendRow;
552}
553
554// Alpha Blend 2 ARGB images and store to destination.
555LIBYUV_API
556int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
557              const uint8* src_argb1, int src_stride_argb1,
558              uint8* dst_argb, int dst_stride_argb,
559              int width, int height) {
560  int y;
561  void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
562                       uint8* dst_argb, int width) = GetARGBBlend();
563  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
564    return -1;
565  }
566  // Negative height means invert the image.
567  if (height < 0) {
568    height = -height;
569    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
570    dst_stride_argb = -dst_stride_argb;
571  }
572  // Coalesce rows.
573  if (src_stride_argb0 == width * 4 &&
574      src_stride_argb1 == width * 4 &&
575      dst_stride_argb == width * 4) {
576    width *= height;
577    height = 1;
578    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
579  }
580
581  for (y = 0; y < height; ++y) {
582    ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
583    src_argb0 += src_stride_argb0;
584    src_argb1 += src_stride_argb1;
585    dst_argb += dst_stride_argb;
586  }
587  return 0;
588}
589
590// Multiply 2 ARGB images and store to destination.
591LIBYUV_API
592int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
593                 const uint8* src_argb1, int src_stride_argb1,
594                 uint8* dst_argb, int dst_stride_argb,
595                 int width, int height) {
596  int y;
597  void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst,
598                          int width) = ARGBMultiplyRow_C;
599  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
600    return -1;
601  }
602  // Negative height means invert the image.
603  if (height < 0) {
604    height = -height;
605    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
606    dst_stride_argb = -dst_stride_argb;
607  }
608  // Coalesce rows.
609  if (src_stride_argb0 == width * 4 &&
610      src_stride_argb1 == width * 4 &&
611      dst_stride_argb == width * 4) {
612    width *= height;
613    height = 1;
614    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
615  }
616#if defined(HAS_ARGBMULTIPLYROW_SSE2)
617  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
618    ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
619    if (IS_ALIGNED(width, 4)) {
620      ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
621    }
622  }
623#endif
624#if defined(HAS_ARGBMULTIPLYROW_AVX2)
625  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
626    ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
627    if (IS_ALIGNED(width, 8)) {
628      ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
629    }
630  }
631#endif
632#if defined(HAS_ARGBMULTIPLYROW_NEON)
633  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
634    ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
635    if (IS_ALIGNED(width, 8)) {
636      ARGBMultiplyRow = ARGBMultiplyRow_NEON;
637    }
638  }
639#endif
640
641  // Multiply plane
642  for (y = 0; y < height; ++y) {
643    ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
644    src_argb0 += src_stride_argb0;
645    src_argb1 += src_stride_argb1;
646    dst_argb += dst_stride_argb;
647  }
648  return 0;
649}
650
651// Add 2 ARGB images and store to destination.
652LIBYUV_API
653int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
654            const uint8* src_argb1, int src_stride_argb1,
655            uint8* dst_argb, int dst_stride_argb,
656            int width, int height) {
657  int y;
658  void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst,
659                     int width) = ARGBAddRow_C;
660  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
661    return -1;
662  }
663  // Negative height means invert the image.
664  if (height < 0) {
665    height = -height;
666    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
667    dst_stride_argb = -dst_stride_argb;
668  }
669  // Coalesce rows.
670  if (src_stride_argb0 == width * 4 &&
671      src_stride_argb1 == width * 4 &&
672      dst_stride_argb == width * 4) {
673    width *= height;
674    height = 1;
675    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
676  }
677#if defined(HAS_ARGBADDROW_SSE2) && defined(_MSC_VER)
678  if (TestCpuFlag(kCpuHasSSE2)) {
679    ARGBAddRow = ARGBAddRow_SSE2;
680  }
681#endif
682#if defined(HAS_ARGBADDROW_SSE2) && !defined(_MSC_VER)
683  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
684    ARGBAddRow = ARGBAddRow_Any_SSE2;
685    if (IS_ALIGNED(width, 4)) {
686      ARGBAddRow = ARGBAddRow_SSE2;
687    }
688  }
689#endif
690#if defined(HAS_ARGBADDROW_AVX2)
691  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
692    ARGBAddRow = ARGBAddRow_Any_AVX2;
693    if (IS_ALIGNED(width, 8)) {
694      ARGBAddRow = ARGBAddRow_AVX2;
695    }
696  }
697#endif
698#if defined(HAS_ARGBADDROW_NEON)
699  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
700    ARGBAddRow = ARGBAddRow_Any_NEON;
701    if (IS_ALIGNED(width, 8)) {
702      ARGBAddRow = ARGBAddRow_NEON;
703    }
704  }
705#endif
706
707  // Add plane
708  for (y = 0; y < height; ++y) {
709    ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
710    src_argb0 += src_stride_argb0;
711    src_argb1 += src_stride_argb1;
712    dst_argb += dst_stride_argb;
713  }
714  return 0;
715}
716
717// Subtract 2 ARGB images and store to destination.
718LIBYUV_API
719int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
720                 const uint8* src_argb1, int src_stride_argb1,
721                 uint8* dst_argb, int dst_stride_argb,
722                 int width, int height) {
723  int y;
724  void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst,
725                          int width) = ARGBSubtractRow_C;
726  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
727    return -1;
728  }
729  // Negative height means invert the image.
730  if (height < 0) {
731    height = -height;
732    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
733    dst_stride_argb = -dst_stride_argb;
734  }
735  // Coalesce rows.
736  if (src_stride_argb0 == width * 4 &&
737      src_stride_argb1 == width * 4 &&
738      dst_stride_argb == width * 4) {
739    width *= height;
740    height = 1;
741    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
742  }
743#if defined(HAS_ARGBSUBTRACTROW_SSE2)
744  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
745    ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
746    if (IS_ALIGNED(width, 4)) {
747      ARGBSubtractRow = ARGBSubtractRow_SSE2;
748    }
749  }
750#endif
751#if defined(HAS_ARGBSUBTRACTROW_AVX2)
752  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
753    ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
754    if (IS_ALIGNED(width, 8)) {
755      ARGBSubtractRow = ARGBSubtractRow_AVX2;
756    }
757  }
758#endif
759#if defined(HAS_ARGBSUBTRACTROW_NEON)
760  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
761    ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
762    if (IS_ALIGNED(width, 8)) {
763      ARGBSubtractRow = ARGBSubtractRow_NEON;
764    }
765  }
766#endif
767
768  // Subtract plane
769  for (y = 0; y < height; ++y) {
770    ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
771    src_argb0 += src_stride_argb0;
772    src_argb1 += src_stride_argb1;
773    dst_argb += dst_stride_argb;
774  }
775  return 0;
776}
777
778// Convert I422 to BGRA.
779LIBYUV_API
780int I422ToBGRA(const uint8* src_y, int src_stride_y,
781               const uint8* src_u, int src_stride_u,
782               const uint8* src_v, int src_stride_v,
783               uint8* dst_bgra, int dst_stride_bgra,
784               int width, int height) {
785  int y;
786  void (*I422ToBGRARow)(const uint8* y_buf,
787                        const uint8* u_buf,
788                        const uint8* v_buf,
789                        uint8* rgb_buf,
790                        int width) = I422ToBGRARow_C;
791  if (!src_y || !src_u || !src_v ||
792      !dst_bgra ||
793      width <= 0 || height == 0) {
794    return -1;
795  }
796  // Negative height means invert the image.
797  if (height < 0) {
798    height = -height;
799    dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
800    dst_stride_bgra = -dst_stride_bgra;
801  }
802  // Coalesce rows.
803  if (src_stride_y == width &&
804      src_stride_u * 2 == width &&
805      src_stride_v * 2 == width &&
806      dst_stride_bgra == width * 4) {
807    width *= height;
808    height = 1;
809    src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0;
810  }
811#if defined(HAS_I422TOBGRAROW_NEON)
812  if (TestCpuFlag(kCpuHasNEON)) {
813    I422ToBGRARow = I422ToBGRARow_Any_NEON;
814    if (IS_ALIGNED(width, 16)) {
815      I422ToBGRARow = I422ToBGRARow_NEON;
816    }
817  }
818#elif defined(HAS_I422TOBGRAROW_SSSE3)
819  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
820    I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
821    if (IS_ALIGNED(width, 8)) {
822      I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
823      if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
824        I422ToBGRARow = I422ToBGRARow_SSSE3;
825      }
826    }
827  }
828#elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
829  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
830      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
831      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
832      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
833      IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
834    I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
835  }
836#endif
837
838  for (y = 0; y < height; ++y) {
839    I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
840    dst_bgra += dst_stride_bgra;
841    src_y += src_stride_y;
842    src_u += src_stride_u;
843    src_v += src_stride_v;
844  }
845  return 0;
846}
847
848// Convert I422 to ABGR.
849LIBYUV_API
850int I422ToABGR(const uint8* src_y, int src_stride_y,
851               const uint8* src_u, int src_stride_u,
852               const uint8* src_v, int src_stride_v,
853               uint8* dst_abgr, int dst_stride_abgr,
854               int width, int height) {
855  int y;
856  void (*I422ToABGRRow)(const uint8* y_buf,
857                        const uint8* u_buf,
858                        const uint8* v_buf,
859                        uint8* rgb_buf,
860                        int width) = I422ToABGRRow_C;
861  if (!src_y || !src_u || !src_v ||
862      !dst_abgr ||
863      width <= 0 || height == 0) {
864    return -1;
865  }
866  // Negative height means invert the image.
867  if (height < 0) {
868    height = -height;
869    dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
870    dst_stride_abgr = -dst_stride_abgr;
871  }
872  // Coalesce rows.
873  if (src_stride_y == width &&
874      src_stride_u * 2 == width &&
875      src_stride_v * 2 == width &&
876      dst_stride_abgr == width * 4) {
877    width *= height;
878    height = 1;
879    src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
880  }
881#if defined(HAS_I422TOABGRROW_NEON)
882  if (TestCpuFlag(kCpuHasNEON)) {
883    I422ToABGRRow = I422ToABGRRow_Any_NEON;
884    if (IS_ALIGNED(width, 16)) {
885      I422ToABGRRow = I422ToABGRRow_NEON;
886    }
887  }
888#elif defined(HAS_I422TOABGRROW_SSSE3)
889  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
890    I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
891    if (IS_ALIGNED(width, 8)) {
892      I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
893      if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
894        I422ToABGRRow = I422ToABGRRow_SSSE3;
895      }
896    }
897  }
898#endif
899
900  for (y = 0; y < height; ++y) {
901    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
902    dst_abgr += dst_stride_abgr;
903    src_y += src_stride_y;
904    src_u += src_stride_u;
905    src_v += src_stride_v;
906  }
907  return 0;
908}
909
910// Convert I422 to RGBA.
911LIBYUV_API
912int I422ToRGBA(const uint8* src_y, int src_stride_y,
913               const uint8* src_u, int src_stride_u,
914               const uint8* src_v, int src_stride_v,
915               uint8* dst_rgba, int dst_stride_rgba,
916               int width, int height) {
917  int y;
918  void (*I422ToRGBARow)(const uint8* y_buf,
919                        const uint8* u_buf,
920                        const uint8* v_buf,
921                        uint8* rgb_buf,
922                        int width) = I422ToRGBARow_C;
923  if (!src_y || !src_u || !src_v ||
924      !dst_rgba ||
925      width <= 0 || height == 0) {
926    return -1;
927  }
928  // Negative height means invert the image.
929  if (height < 0) {
930    height = -height;
931    dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
932    dst_stride_rgba = -dst_stride_rgba;
933  }
934  // Coalesce rows.
935  if (src_stride_y == width &&
936      src_stride_u * 2 == width &&
937      src_stride_v * 2 == width &&
938      dst_stride_rgba == width * 4) {
939    width *= height;
940    height = 1;
941    src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0;
942  }
943#if defined(HAS_I422TORGBAROW_NEON)
944  if (TestCpuFlag(kCpuHasNEON)) {
945    I422ToRGBARow = I422ToRGBARow_Any_NEON;
946    if (IS_ALIGNED(width, 16)) {
947      I422ToRGBARow = I422ToRGBARow_NEON;
948    }
949  }
950#elif defined(HAS_I422TORGBAROW_SSSE3)
951  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
952    I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
953    if (IS_ALIGNED(width, 8)) {
954      I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3;
955      if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
956        I422ToRGBARow = I422ToRGBARow_SSSE3;
957      }
958    }
959  }
960#endif
961
962  for (y = 0; y < height; ++y) {
963    I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
964    dst_rgba += dst_stride_rgba;
965    src_y += src_stride_y;
966    src_u += src_stride_u;
967    src_v += src_stride_v;
968  }
969  return 0;
970}
971
972// Convert NV12 to RGB565.
973LIBYUV_API
974int NV12ToRGB565(const uint8* src_y, int src_stride_y,
975                 const uint8* src_uv, int src_stride_uv,
976                 uint8* dst_rgb565, int dst_stride_rgb565,
977                 int width, int height) {
978  int y;
979  void (*NV12ToRGB565Row)(const uint8* y_buf,
980                          const uint8* uv_buf,
981                          uint8* rgb_buf,
982                          int width) = NV12ToRGB565Row_C;
983  if (!src_y || !src_uv || !dst_rgb565 ||
984      width <= 0 || height == 0) {
985    return -1;
986  }
987  // Negative height means invert the image.
988  if (height < 0) {
989    height = -height;
990    dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
991    dst_stride_rgb565 = -dst_stride_rgb565;
992  }
993#if defined(HAS_NV12TORGB565ROW_SSSE3)
994  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
995    NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
996    if (IS_ALIGNED(width, 8)) {
997      NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
998    }
999  }
1000#elif defined(HAS_NV12TORGB565ROW_NEON)
1001  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
1002    NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
1003    if (IS_ALIGNED(width, 8)) {
1004      NV12ToRGB565Row = NV12ToRGB565Row_NEON;
1005    }
1006  }
1007#endif
1008
1009  for (y = 0; y < height; ++y) {
1010    NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width);
1011    dst_rgb565 += dst_stride_rgb565;
1012    src_y += src_stride_y;
1013    if (y & 1) {
1014      src_uv += src_stride_uv;
1015    }
1016  }
1017  return 0;
1018}
1019
1020// Convert NV21 to RGB565.
1021LIBYUV_API
1022int NV21ToRGB565(const uint8* src_y, int src_stride_y,
1023                 const uint8* src_vu, int src_stride_vu,
1024                 uint8* dst_rgb565, int dst_stride_rgb565,
1025                 int width, int height) {
1026  int y;
1027  void (*NV21ToRGB565Row)(const uint8* y_buf,
1028                          const uint8* src_vu,
1029                          uint8* rgb_buf,
1030                          int width) = NV21ToRGB565Row_C;
1031  if (!src_y || !src_vu || !dst_rgb565 ||
1032      width <= 0 || height == 0) {
1033    return -1;
1034  }
1035  // Negative height means invert the image.
1036  if (height < 0) {
1037    height = -height;
1038    dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
1039    dst_stride_rgb565 = -dst_stride_rgb565;
1040  }
1041#if defined(HAS_NV21TORGB565ROW_SSSE3)
1042  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
1043    NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3;
1044    if (IS_ALIGNED(width, 8)) {
1045      NV21ToRGB565Row = NV21ToRGB565Row_SSSE3;
1046    }
1047  }
1048#elif defined(HAS_NV21TORGB565ROW_NEON)
1049  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
1050    NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
1051    if (IS_ALIGNED(width, 8)) {
1052      NV21ToRGB565Row = NV21ToRGB565Row_NEON;
1053    }
1054  }
1055#endif
1056
1057  for (y = 0; y < height; ++y) {
1058    NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width);
1059    dst_rgb565 += dst_stride_rgb565;
1060    src_y += src_stride_y;
1061    if (y & 1) {
1062      src_vu += src_stride_vu;
1063    }
1064  }
1065  return 0;
1066}
1067
1068LIBYUV_API
1069void SetPlane(uint8* dst_y, int dst_stride_y,
1070              int width, int height,
1071              uint32 value) {
1072  int y;
1073  uint32 v32 = value | (value << 8) | (value << 16) | (value << 24);
1074  void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C;
1075  // Coalesce rows.
1076  if (dst_stride_y == width) {
1077    width *= height;
1078    height = 1;
1079    dst_stride_y = 0;
1080  }
1081#if defined(HAS_SETROW_NEON)
1082  if (TestCpuFlag(kCpuHasNEON) &&
1083      IS_ALIGNED(width, 16) &&
1084      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
1085    SetRow = SetRow_NEON;
1086  }
1087#endif
1088#if defined(HAS_SETROW_X86)
1089  if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
1090    SetRow = SetRow_X86;
1091  }
1092#endif
1093
1094  // Set plane
1095  for (y = 0; y < height; ++y) {
1096    SetRow(dst_y, v32, width);
1097    dst_y += dst_stride_y;
1098  }
1099}
1100
1101// Draw a rectangle into I420
1102LIBYUV_API
1103int I420Rect(uint8* dst_y, int dst_stride_y,
1104             uint8* dst_u, int dst_stride_u,
1105             uint8* dst_v, int dst_stride_v,
1106             int x, int y,
1107             int width, int height,
1108             int value_y, int value_u, int value_v) {
1109  int halfwidth = (width + 1) >> 1;
1110  int halfheight = (height + 1) >> 1;
1111  uint8* start_y = dst_y + y * dst_stride_y + x;
1112  uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
1113  uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
1114  if (!dst_y || !dst_u || !dst_v ||
1115      width <= 0 || height <= 0 ||
1116      x < 0 || y < 0 ||
1117      value_y < 0 || value_y > 255 ||
1118      value_u < 0 || value_u > 255 ||
1119      value_v < 0 || value_v > 255) {
1120    return -1;
1121  }
1122
1123  SetPlane(start_y, dst_stride_y, width, height, value_y);
1124  SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
1125  SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
1126  return 0;
1127}
1128
1129// Draw a rectangle into ARGB
1130LIBYUV_API
1131int ARGBRect(uint8* dst_argb, int dst_stride_argb,
1132             int dst_x, int dst_y,
1133             int width, int height,
1134             uint32 value) {
1135  if (!dst_argb ||
1136      width <= 0 || height <= 0 ||
1137      dst_x < 0 || dst_y < 0) {
1138    return -1;
1139  }
1140  dst_argb += dst_y * dst_stride_argb + dst_x * 4;
1141  // Coalesce rows.
1142  if (dst_stride_argb == width * 4) {
1143    width *= height;
1144    height = 1;
1145    dst_stride_argb = 0;
1146  }
1147#if defined(HAS_SETROW_NEON)
1148  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) &&
1149      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1150    ARGBSetRows_NEON(dst_argb, value, width, dst_stride_argb, height);
1151    return 0;
1152  }
1153#endif
1154#if defined(HAS_SETROW_X86)
1155  if (TestCpuFlag(kCpuHasX86)) {
1156    ARGBSetRows_X86(dst_argb, value, width, dst_stride_argb, height);
1157    return 0;
1158  }
1159#endif
1160  ARGBSetRows_C(dst_argb, value, width, dst_stride_argb, height);
1161  return 0;
1162}
1163
1164// Convert unattentuated ARGB to preattenuated ARGB.
1165// An unattenutated ARGB alpha blend uses the formula
1166// p = a * f + (1 - a) * b
1167// where
1168//   p is output pixel
1169//   f is foreground pixel
1170//   b is background pixel
1171//   a is alpha value from foreground pixel
1172// An preattenutated ARGB alpha blend uses the formula
1173// p = f + (1 - a) * b
1174// where
1175//   f is foreground pixel premultiplied by alpha
1176
1177LIBYUV_API
1178int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
1179                  uint8* dst_argb, int dst_stride_argb,
1180                  int width, int height) {
1181  int y;
1182  void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
1183                           int width) = ARGBAttenuateRow_C;
1184  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1185    return -1;
1186  }
1187  if (height < 0) {
1188    height = -height;
1189    src_argb = src_argb + (height - 1) * src_stride_argb;
1190    src_stride_argb = -src_stride_argb;
1191  }
1192  // Coalesce rows.
1193  if (src_stride_argb == width * 4 &&
1194      dst_stride_argb == width * 4) {
1195    width *= height;
1196    height = 1;
1197    src_stride_argb = dst_stride_argb = 0;
1198  }
1199#if defined(HAS_ARGBATTENUATEROW_SSE2)
1200  if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
1201      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
1202      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1203    ARGBAttenuateRow = ARGBAttenuateRow_Any_SSE2;
1204    if (IS_ALIGNED(width, 4)) {
1205      ARGBAttenuateRow = ARGBAttenuateRow_SSE2;
1206    }
1207  }
1208#endif
1209#if defined(HAS_ARGBATTENUATEROW_SSSE3)
1210  if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
1211    ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
1212    if (IS_ALIGNED(width, 4)) {
1213      ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
1214    }
1215  }
1216#endif
1217#if defined(HAS_ARGBATTENUATEROW_AVX2)
1218  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
1219    ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
1220    if (IS_ALIGNED(width, 8)) {
1221      ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
1222    }
1223  }
1224#endif
1225#if defined(HAS_ARGBATTENUATEROW_NEON)
1226  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
1227    ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
1228    if (IS_ALIGNED(width, 8)) {
1229      ARGBAttenuateRow = ARGBAttenuateRow_NEON;
1230    }
1231  }
1232#endif
1233
1234  for (y = 0; y < height; ++y) {
1235    ARGBAttenuateRow(src_argb, dst_argb, width);
1236    src_argb += src_stride_argb;
1237    dst_argb += dst_stride_argb;
1238  }
1239  return 0;
1240}
1241
1242// Convert preattentuated ARGB to unattenuated ARGB.
1243LIBYUV_API
1244int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
1245                    uint8* dst_argb, int dst_stride_argb,
1246                    int width, int height) {
1247  int y;
1248  void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
1249                             int width) = ARGBUnattenuateRow_C;
1250  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1251    return -1;
1252  }
1253  if (height < 0) {
1254    height = -height;
1255    src_argb = src_argb + (height - 1) * src_stride_argb;
1256    src_stride_argb = -src_stride_argb;
1257  }
1258  // Coalesce rows.
1259  if (src_stride_argb == width * 4 &&
1260      dst_stride_argb == width * 4) {
1261    width *= height;
1262    height = 1;
1263    src_stride_argb = dst_stride_argb = 0;
1264  }
1265#if defined(HAS_ARGBUNATTENUATEROW_SSE2)
1266  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
1267    ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
1268    if (IS_ALIGNED(width, 4)) {
1269      ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
1270    }
1271  }
1272#endif
1273#if defined(HAS_ARGBUNATTENUATEROW_AVX2)
1274  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
1275    ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
1276    if (IS_ALIGNED(width, 8)) {
1277      ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
1278    }
1279  }
1280#endif
1281// TODO(fbarchard): Neon version.
1282
1283  for (y = 0; y < height; ++y) {
1284    ARGBUnattenuateRow(src_argb, dst_argb, width);
1285    src_argb += src_stride_argb;
1286    dst_argb += dst_stride_argb;
1287  }
1288  return 0;
1289}
1290
1291// Convert ARGB to Grayed ARGB.
1292LIBYUV_API
1293int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
1294               uint8* dst_argb, int dst_stride_argb,
1295               int width, int height) {
1296  int y;
1297  void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
1298                      int width) = ARGBGrayRow_C;
1299  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1300    return -1;
1301  }
1302  if (height < 0) {
1303    height = -height;
1304    src_argb = src_argb + (height - 1) * src_stride_argb;
1305    src_stride_argb = -src_stride_argb;
1306  }
1307  // Coalesce rows.
1308  if (src_stride_argb == width * 4 &&
1309      dst_stride_argb == width * 4) {
1310    width *= height;
1311    height = 1;
1312    src_stride_argb = dst_stride_argb = 0;
1313  }
1314#if defined(HAS_ARGBGRAYROW_SSSE3)
1315  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
1316      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
1317      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1318    ARGBGrayRow = ARGBGrayRow_SSSE3;
1319  }
1320#elif defined(HAS_ARGBGRAYROW_NEON)
1321  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1322    ARGBGrayRow = ARGBGrayRow_NEON;
1323  }
1324#endif
1325
1326  for (y = 0; y < height; ++y) {
1327    ARGBGrayRow(src_argb, dst_argb, width);
1328    src_argb += src_stride_argb;
1329    dst_argb += dst_stride_argb;
1330  }
1331  return 0;
1332}
1333
1334// Make a rectangle of ARGB gray scale.
1335LIBYUV_API
1336int ARGBGray(uint8* dst_argb, int dst_stride_argb,
1337             int dst_x, int dst_y,
1338             int width, int height) {
1339  int y;
1340  void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
1341                      int width) = ARGBGrayRow_C;
1342  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1343  if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
1344    return -1;
1345  }
1346  // Coalesce rows.
1347  if (dst_stride_argb == width * 4) {
1348    width *= height;
1349    height = 1;
1350    dst_stride_argb = 0;
1351  }
1352#if defined(HAS_ARGBGRAYROW_SSSE3)
1353  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
1354      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1355    ARGBGrayRow = ARGBGrayRow_SSSE3;
1356  }
1357#elif defined(HAS_ARGBGRAYROW_NEON)
1358  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1359    ARGBGrayRow = ARGBGrayRow_NEON;
1360  }
1361#endif
1362  for (y = 0; y < height; ++y) {
1363    ARGBGrayRow(dst, dst, width);
1364    dst += dst_stride_argb;
1365  }
1366  return 0;
1367}
1368
1369// Make a rectangle of ARGB Sepia tone.
1370LIBYUV_API
1371int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
1372              int dst_x, int dst_y, int width, int height) {
1373  int y;
1374  void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C;
1375  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1376  if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
1377    return -1;
1378  }
1379  // Coalesce rows.
1380  if (dst_stride_argb == width * 4) {
1381    width *= height;
1382    height = 1;
1383    dst_stride_argb = 0;
1384  }
1385#if defined(HAS_ARGBSEPIAROW_SSSE3)
1386  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
1387      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1388    ARGBSepiaRow = ARGBSepiaRow_SSSE3;
1389  }
1390#elif defined(HAS_ARGBSEPIAROW_NEON)
1391  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1392    ARGBSepiaRow = ARGBSepiaRow_NEON;
1393  }
1394#endif
1395  for (y = 0; y < height; ++y) {
1396    ARGBSepiaRow(dst, width);
1397    dst += dst_stride_argb;
1398  }
1399  return 0;
1400}
1401
1402// Apply a 4x4 matrix to each ARGB pixel.
1403// Note: Normally for shading, but can be used to swizzle or invert.
1404LIBYUV_API
1405int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
1406                    uint8* dst_argb, int dst_stride_argb,
1407                    const int8* matrix_argb,
1408                    int width, int height) {
1409  int y;
1410  void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb,
1411      const int8* matrix_argb, int width) = ARGBColorMatrixRow_C;
1412  if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
1413    return -1;
1414  }
1415  if (height < 0) {
1416    height = -height;
1417    src_argb = src_argb + (height - 1) * src_stride_argb;
1418    src_stride_argb = -src_stride_argb;
1419  }
1420  // Coalesce rows.
1421  if (src_stride_argb == width * 4 &&
1422      dst_stride_argb == width * 4) {
1423    width *= height;
1424    height = 1;
1425    src_stride_argb = dst_stride_argb = 0;
1426  }
1427#if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
1428  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
1429      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1430    ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
1431  }
1432#elif defined(HAS_ARGBCOLORMATRIXROW_NEON)
1433  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1434    ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
1435  }
1436#endif
1437  for (y = 0; y < height; ++y) {
1438    ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
1439    src_argb += src_stride_argb;
1440    dst_argb += dst_stride_argb;
1441  }
1442  return 0;
1443}
1444
1445// Apply a 4x3 matrix to each ARGB pixel.
1446// Deprecated.
1447LIBYUV_API
1448int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
1449                   const int8* matrix_rgb,
1450                   int dst_x, int dst_y, int width, int height) {
1451  SIMD_ALIGNED(int8 matrix_argb[16]);
1452  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1453  if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 ||
1454      dst_x < 0 || dst_y < 0) {
1455    return -1;
1456  }
1457
1458  // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
1459  matrix_argb[0] = matrix_rgb[0] / 2;
1460  matrix_argb[1] = matrix_rgb[1] / 2;
1461  matrix_argb[2] = matrix_rgb[2] / 2;
1462  matrix_argb[3] = matrix_rgb[3] / 2;
1463  matrix_argb[4] = matrix_rgb[4] / 2;
1464  matrix_argb[5] = matrix_rgb[5] / 2;
1465  matrix_argb[6] = matrix_rgb[6] / 2;
1466  matrix_argb[7] = matrix_rgb[7] / 2;
1467  matrix_argb[8] = matrix_rgb[8] / 2;
1468  matrix_argb[9] = matrix_rgb[9] / 2;
1469  matrix_argb[10] = matrix_rgb[10] / 2;
1470  matrix_argb[11] = matrix_rgb[11] / 2;
1471  matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
1472  matrix_argb[15] = 64;  // 1.0
1473
1474  return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb,
1475                         dst, dst_stride_argb,
1476                         &matrix_argb[0], width, height);
1477}
1478
1479// Apply a color table each ARGB pixel.
1480// Table contains 256 ARGB values.
1481LIBYUV_API
1482int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
1483                   const uint8* table_argb,
1484                   int dst_x, int dst_y, int width, int height) {
1485  int y;
1486  void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
1487                            int width) = ARGBColorTableRow_C;
1488  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1489  if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
1490      dst_x < 0 || dst_y < 0) {
1491    return -1;
1492  }
1493  // Coalesce rows.
1494  if (dst_stride_argb == width * 4) {
1495    width *= height;
1496    height = 1;
1497    dst_stride_argb = 0;
1498  }
1499#if defined(HAS_ARGBCOLORTABLEROW_X86)
1500  if (TestCpuFlag(kCpuHasX86)) {
1501    ARGBColorTableRow = ARGBColorTableRow_X86;
1502  }
1503#endif
1504  for (y = 0; y < height; ++y) {
1505    ARGBColorTableRow(dst, table_argb, width);
1506    dst += dst_stride_argb;
1507  }
1508  return 0;
1509}
1510
1511// Apply a color table each ARGB pixel but preserve destination alpha.
1512// Table contains 256 ARGB values.
1513LIBYUV_API
1514int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
1515                  const uint8* table_argb,
1516                  int dst_x, int dst_y, int width, int height) {
1517  int y;
1518  void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
1519                           int width) = RGBColorTableRow_C;
1520  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1521  if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
1522      dst_x < 0 || dst_y < 0) {
1523    return -1;
1524  }
1525  // Coalesce rows.
1526  if (dst_stride_argb == width * 4) {
1527    width *= height;
1528    height = 1;
1529    dst_stride_argb = 0;
1530  }
1531#if defined(HAS_RGBCOLORTABLEROW_X86)
1532  if (TestCpuFlag(kCpuHasX86)) {
1533    RGBColorTableRow = RGBColorTableRow_X86;
1534  }
1535#endif
1536  for (y = 0; y < height; ++y) {
1537    RGBColorTableRow(dst, table_argb, width);
1538    dst += dst_stride_argb;
1539  }
1540  return 0;
1541}
1542
1543// ARGBQuantize is used to posterize art.
1544// e.g. rgb / qvalue * qvalue + qvalue / 2
1545// But the low levels implement efficiently with 3 parameters, and could be
1546// used for other high level operations.
1547// dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
1548// where scale is 1 / interval_size as a fixed point value.
1549// The divide is replaces with a multiply by reciprocal fixed point multiply.
1550// Caveat - although SSE2 saturates, the C function does not and should be used
1551// with care if doing anything but quantization.
1552LIBYUV_API
1553int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
1554                 int scale, int interval_size, int interval_offset,
1555                 int dst_x, int dst_y, int width, int height) {
1556  int y;
1557  void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size,
1558                          int interval_offset, int width) = ARGBQuantizeRow_C;
1559  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1560  if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
1561      interval_size < 1 || interval_size > 255) {
1562    return -1;
1563  }
1564  // Coalesce rows.
1565  if (dst_stride_argb == width * 4) {
1566    width *= height;
1567    height = 1;
1568    dst_stride_argb = 0;
1569  }
1570#if defined(HAS_ARGBQUANTIZEROW_SSE2)
1571  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
1572      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1573    ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
1574  }
1575#elif defined(HAS_ARGBQUANTIZEROW_NEON)
1576  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1577    ARGBQuantizeRow = ARGBQuantizeRow_NEON;
1578  }
1579#endif
1580  for (y = 0; y < height; ++y) {
1581    ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
1582    dst += dst_stride_argb;
1583  }
1584  return 0;
1585}
1586
1587// Computes table of cumulative sum for image where the value is the sum
1588// of all values above and to the left of the entry. Used by ARGBBlur.
1589LIBYUV_API
1590int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
1591                             int32* dst_cumsum, int dst_stride32_cumsum,
1592                             int width, int height) {
1593  int y;
1594  void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
1595      const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
1596  int32* previous_cumsum = dst_cumsum;
1597  if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
1598    return -1;
1599  }
1600#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
1601  if (TestCpuFlag(kCpuHasSSE2)) {
1602    ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
1603  }
1604#endif
1605  memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4);  // 4 int per pixel.
1606  for (y = 0; y < height; ++y) {
1607    ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
1608    previous_cumsum = dst_cumsum;
1609    dst_cumsum += dst_stride32_cumsum;
1610    src_argb += src_stride_argb;
1611  }
1612  return 0;
1613}
1614
1615// Blur ARGB image.
1616// Caller should allocate CumulativeSum table of width * height * 16 bytes
1617// aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
1618// as the buffer is treated as circular.
1619LIBYUV_API
1620int ARGBBlur(const uint8* src_argb, int src_stride_argb,
1621             uint8* dst_argb, int dst_stride_argb,
1622             int32* dst_cumsum, int dst_stride32_cumsum,
1623             int width, int height, int radius) {
1624  int y;
1625  void (*ComputeCumulativeSumRow)(const uint8 *row, int32 *cumsum,
1626      const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
1627  void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft,
1628      int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C;
1629  int32* cumsum_bot_row;
1630  int32* max_cumsum_bot_row;
1631  int32* cumsum_top_row;
1632
1633  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1634    return -1;
1635  }
1636  if (height < 0) {
1637    height = -height;
1638    src_argb = src_argb + (height - 1) * src_stride_argb;
1639    src_stride_argb = -src_stride_argb;
1640  }
1641  if (radius > height) {
1642    radius = height;
1643  }
1644  if (radius > (width / 2 - 1)) {
1645    radius = width / 2 - 1;
1646  }
1647  if (radius <= 0) {
1648    return -1;
1649  }
1650#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
1651  if (TestCpuFlag(kCpuHasSSE2)) {
1652    ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
1653    CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
1654  }
1655#endif
1656  // Compute enough CumulativeSum for first row to be blurred. After this
1657  // one row of CumulativeSum is updated at a time.
1658  ARGBComputeCumulativeSum(src_argb, src_stride_argb,
1659                           dst_cumsum, dst_stride32_cumsum,
1660                           width, radius);
1661
1662  src_argb = src_argb + radius * src_stride_argb;
1663  cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
1664
1665  max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
1666  cumsum_top_row = &dst_cumsum[0];
1667
1668  for (y = 0; y < height; ++y) {
1669    int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
1670    int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
1671    int area = radius * (bot_y - top_y);
1672    int boxwidth = radius * 4;
1673    int x;
1674    int n;
1675
1676    // Increment cumsum_top_row pointer with circular buffer wrap around.
1677    if (top_y) {
1678      cumsum_top_row += dst_stride32_cumsum;
1679      if (cumsum_top_row >= max_cumsum_bot_row) {
1680        cumsum_top_row = dst_cumsum;
1681      }
1682    }
1683    // Increment cumsum_bot_row pointer with circular buffer wrap around and
1684    // then fill in a row of CumulativeSum.
1685    if ((y + radius) < height) {
1686      const int32* prev_cumsum_bot_row = cumsum_bot_row;
1687      cumsum_bot_row += dst_stride32_cumsum;
1688      if (cumsum_bot_row >= max_cumsum_bot_row) {
1689        cumsum_bot_row = dst_cumsum;
1690      }
1691      ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
1692                              width);
1693      src_argb += src_stride_argb;
1694    }
1695
1696    // Left clipped.
1697    for (x = 0; x < radius + 1; ++x) {
1698      CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
1699                                boxwidth, area, &dst_argb[x * 4], 1);
1700      area += (bot_y - top_y);
1701      boxwidth += 4;
1702    }
1703
1704    // Middle unclipped.
1705    n = (width - 1) - radius - x + 1;
1706    CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
1707                              boxwidth, area, &dst_argb[x * 4], n);
1708
1709    // Right clipped.
1710    for (x += n; x <= width - 1; ++x) {
1711      area -= (bot_y - top_y);
1712      boxwidth -= 4;
1713      CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
1714                                cumsum_bot_row + (x - radius - 1) * 4,
1715                                boxwidth, area, &dst_argb[x * 4], 1);
1716    }
1717    dst_argb += dst_stride_argb;
1718  }
1719  return 0;
1720}
1721
1722// Multiply ARGB image by a specified ARGB value.
1723LIBYUV_API
1724int ARGBShade(const uint8* src_argb, int src_stride_argb,
1725              uint8* dst_argb, int dst_stride_argb,
1726              int width, int height, uint32 value) {
1727  int y;
1728  void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb,
1729                       int width, uint32 value) = ARGBShadeRow_C;
1730  if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
1731    return -1;
1732  }
1733  if (height < 0) {
1734    height = -height;
1735    src_argb = src_argb + (height - 1) * src_stride_argb;
1736    src_stride_argb = -src_stride_argb;
1737  }
1738  // Coalesce rows.
1739  if (src_stride_argb == width * 4 &&
1740      dst_stride_argb == width * 4) {
1741    width *= height;
1742    height = 1;
1743    src_stride_argb = dst_stride_argb = 0;
1744  }
1745#if defined(HAS_ARGBSHADEROW_SSE2)
1746  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
1747      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
1748      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1749    ARGBShadeRow = ARGBShadeRow_SSE2;
1750  }
1751#elif defined(HAS_ARGBSHADEROW_NEON)
1752  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1753    ARGBShadeRow = ARGBShadeRow_NEON;
1754  }
1755#endif
1756
1757  for (y = 0; y < height; ++y) {
1758    ARGBShadeRow(src_argb, dst_argb, width, value);
1759    src_argb += src_stride_argb;
1760    dst_argb += dst_stride_argb;
1761  }
1762  return 0;
1763}
1764
1765// Interpolate 2 ARGB images by specified amount (0 to 255).
1766LIBYUV_API
1767int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
1768                    const uint8* src_argb1, int src_stride_argb1,
1769                    uint8* dst_argb, int dst_stride_argb,
1770                    int width, int height, int interpolation) {
1771  int y;
1772  void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
1773                         ptrdiff_t src_stride, int dst_width,
1774                         int source_y_fraction) = InterpolateRow_C;
1775  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
1776    return -1;
1777  }
1778  // Negative height means invert the image.
1779  if (height < 0) {
1780    height = -height;
1781    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1782    dst_stride_argb = -dst_stride_argb;
1783  }
1784  // Coalesce rows.
1785  if (src_stride_argb0 == width * 4 &&
1786      src_stride_argb1 == width * 4 &&
1787      dst_stride_argb == width * 4) {
1788    width *= height;
1789    height = 1;
1790    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
1791  }
1792#if defined(HAS_INTERPOLATEROW_SSE2)
1793  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
1794    InterpolateRow = InterpolateRow_Any_SSE2;
1795    if (IS_ALIGNED(width, 4)) {
1796      InterpolateRow = InterpolateRow_Unaligned_SSE2;
1797      if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
1798          IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
1799          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1800        InterpolateRow = InterpolateRow_SSE2;
1801      }
1802    }
1803  }
1804#endif
1805#if defined(HAS_INTERPOLATEROW_SSSE3)
1806  if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
1807    InterpolateRow = InterpolateRow_Any_SSSE3;
1808    if (IS_ALIGNED(width, 4)) {
1809      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
1810      if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
1811          IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
1812          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1813        InterpolateRow = InterpolateRow_SSSE3;
1814      }
1815    }
1816  }
1817#endif
1818#if defined(HAS_INTERPOLATEROW_AVX2)
1819  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
1820    InterpolateRow = InterpolateRow_Any_AVX2;
1821    if (IS_ALIGNED(width, 8)) {
1822      InterpolateRow = InterpolateRow_AVX2;
1823    }
1824  }
1825#endif
1826#if defined(HAS_INTERPOLATEROW_NEON)
1827  if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
1828    InterpolateRow = InterpolateRow_Any_NEON;
1829    if (IS_ALIGNED(width, 4)) {
1830      InterpolateRow = InterpolateRow_NEON;
1831    }
1832  }
1833#endif
1834#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
1835  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && width >= 1 &&
1836      IS_ALIGNED(src_argb0, 4) && IS_ALIGNED(src_stride_argb0, 4) &&
1837      IS_ALIGNED(src_argb1, 4) && IS_ALIGNED(src_stride_argb1, 4) &&
1838      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
1839    ScaleARGBFilterRows = InterpolateRow_MIPS_DSPR2;
1840  }
1841#endif
1842
1843  for (y = 0; y < height; ++y) {
1844    InterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0,
1845                   width * 4, interpolation);
1846    src_argb0 += src_stride_argb0;
1847    src_argb1 += src_stride_argb1;
1848    dst_argb += dst_stride_argb;
1849  }
1850  return 0;
1851}
1852
1853// Shuffle ARGB channel order.  e.g. BGRA to ARGB.
1854LIBYUV_API
1855int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
1856                uint8* dst_argb, int dst_stride_argb,
1857                const uint8* shuffler, int width, int height) {
1858  int y;
1859  void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
1860                         const uint8* shuffler, int pix) = ARGBShuffleRow_C;
1861  if (!src_bgra || !dst_argb ||
1862      width <= 0 || height == 0) {
1863    return -1;
1864  }
1865  // Negative height means invert the image.
1866  if (height < 0) {
1867    height = -height;
1868    src_bgra = src_bgra + (height - 1) * src_stride_bgra;
1869    src_stride_bgra = -src_stride_bgra;
1870  }
1871  // Coalesce rows.
1872  if (src_stride_bgra == width * 4 &&
1873      dst_stride_argb == width * 4) {
1874    width *= height;
1875    height = 1;
1876    src_stride_bgra = dst_stride_argb = 0;
1877  }
1878#if defined(HAS_ARGBSHUFFLEROW_SSE2)
1879  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
1880    ARGBShuffleRow = ARGBShuffleRow_Any_SSE2;
1881    if (IS_ALIGNED(width, 4)) {
1882      ARGBShuffleRow = ARGBShuffleRow_SSE2;
1883    }
1884  }
1885#endif
1886#if defined(HAS_ARGBSHUFFLEROW_SSSE3)
1887  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
1888    ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
1889    if (IS_ALIGNED(width, 8)) {
1890      ARGBShuffleRow = ARGBShuffleRow_Unaligned_SSSE3;
1891      if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16) &&
1892          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1893        ARGBShuffleRow = ARGBShuffleRow_SSSE3;
1894      }
1895    }
1896  }
1897#endif
1898#if defined(HAS_ARGBSHUFFLEROW_AVX2)
1899  if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
1900    ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
1901    if (IS_ALIGNED(width, 16)) {
1902      ARGBShuffleRow = ARGBShuffleRow_AVX2;
1903    }
1904  }
1905#endif
1906#if defined(HAS_ARGBSHUFFLEROW_NEON)
1907  if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
1908    ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
1909    if (IS_ALIGNED(width, 4)) {
1910      ARGBShuffleRow = ARGBShuffleRow_NEON;
1911    }
1912  }
1913#endif
1914
1915  for (y = 0; y < height; ++y) {
1916    ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
1917    src_bgra += src_stride_bgra;
1918    dst_argb += dst_stride_argb;
1919  }
1920  return 0;
1921}
1922
1923// Sobel ARGB effect.
1924static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
1925                        uint8* dst_argb, int dst_stride_argb,
1926                        int width, int height,
1927                        void (*SobelRow)(const uint8* src_sobelx,
1928                                         const uint8* src_sobely,
1929                                         uint8* dst, int width)) {
1930  int y;
1931  void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
1932                         uint32 selector, int pix) = ARGBToBayerGGRow_C;
1933  void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
1934                    uint8* dst_sobely, int width) = SobelYRow_C;
1935  void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
1936                    const uint8* src_y2, uint8* dst_sobely, int width) =
1937      SobelXRow_C;
1938  const int kEdge = 16;  // Extra pixels at start of row for extrude/align.
1939  if (!src_argb  || !dst_argb || width <= 0 || height == 0) {
1940    return -1;
1941  }
1942  // Negative height means invert the image.
1943  if (height < 0) {
1944    height = -height;
1945    src_argb  = src_argb  + (height - 1) * src_stride_argb;
1946    src_stride_argb = -src_stride_argb;
1947  }
1948  // ARGBToBayer used to select G channel from ARGB.
1949#if defined(HAS_ARGBTOBAYERGGROW_SSE2)
1950  if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
1951      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
1952    ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2;
1953    if (IS_ALIGNED(width, 8)) {
1954      ARGBToBayerRow = ARGBToBayerGGRow_SSE2;
1955    }
1956  }
1957#endif
1958#if defined(HAS_ARGBTOBAYERROW_SSSE3)
1959  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
1960      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
1961    ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
1962    if (IS_ALIGNED(width, 8)) {
1963      ARGBToBayerRow = ARGBToBayerRow_SSSE3;
1964    }
1965  }
1966#endif
1967#if defined(HAS_ARGBTOBAYERGGROW_NEON)
1968  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
1969    ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON;
1970    if (IS_ALIGNED(width, 8)) {
1971      ARGBToBayerRow = ARGBToBayerGGRow_NEON;
1972    }
1973  }
1974#endif
1975#if defined(HAS_SOBELYROW_SSE2)
1976  if (TestCpuFlag(kCpuHasSSE2)) {
1977    SobelYRow = SobelYRow_SSE2;
1978  }
1979#endif
1980#if defined(HAS_SOBELYROW_NEON)
1981  if (TestCpuFlag(kCpuHasNEON)) {
1982    SobelYRow = SobelYRow_NEON;
1983  }
1984#endif
1985#if defined(HAS_SOBELXROW_SSE2)
1986  if (TestCpuFlag(kCpuHasSSE2)) {
1987    SobelXRow = SobelXRow_SSE2;
1988  }
1989#endif
1990#if defined(HAS_SOBELXROW_NEON)
1991  if (TestCpuFlag(kCpuHasNEON)) {
1992    SobelXRow = SobelXRow_NEON;
1993  }
1994#endif
1995  {
1996    // 3 rows with edges before/after.
1997    const int kRowSize = (width + kEdge + 15) & ~15;
1998    align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
1999    uint8* row_sobelx = rows;
2000    uint8* row_sobely = rows + kRowSize;
2001    uint8* row_y = rows + kRowSize * 2;
2002
2003    // Convert first row.
2004    uint8* row_y0 = row_y + kEdge;
2005    uint8* row_y1 = row_y0 + kRowSize;
2006    uint8* row_y2 = row_y1 + kRowSize;
2007    ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width);
2008    row_y0[-1] = row_y0[0];
2009    memset(row_y0 + width, row_y0[width - 1], 16);  // Extrude 16 for valgrind.
2010    ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width);
2011    row_y1[-1] = row_y1[0];
2012    memset(row_y1 + width, row_y1[width - 1], 16);
2013    memset(row_y2 + width, 0, 16);
2014
2015    for (y = 0; y < height; ++y) {
2016      // Convert next row of ARGB to Y.
2017      if (y < (height - 1)) {
2018        src_argb += src_stride_argb;
2019      }
2020      ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width);
2021      row_y2[-1] = row_y2[0];
2022      row_y2[width] = row_y2[width - 1];
2023
2024      SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
2025      SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
2026      SobelRow(row_sobelx, row_sobely, dst_argb, width);
2027
2028      // Cycle thru circular queue of 3 row_y buffers.
2029      {
2030        uint8* row_yt = row_y0;
2031        row_y0 = row_y1;
2032        row_y1 = row_y2;
2033        row_y2 = row_yt;
2034      }
2035
2036      dst_argb += dst_stride_argb;
2037    }
2038    free_aligned_buffer_64(rows);
2039  }
2040  return 0;
2041}
2042
2043// Sobel ARGB effect.
2044LIBYUV_API
2045int ARGBSobel(const uint8* src_argb, int src_stride_argb,
2046              uint8* dst_argb, int dst_stride_argb,
2047              int width, int height) {
2048  void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
2049                   uint8* dst_argb, int width) = SobelRow_C;
2050#if defined(HAS_SOBELROW_SSE2)
2051  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
2052      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
2053    SobelRow = SobelRow_SSE2;
2054  }
2055#endif
2056#if defined(HAS_SOBELROW_NEON)
2057  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2058    SobelRow = SobelRow_NEON;
2059  }
2060#endif
2061  return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
2062                      width, height, SobelRow);
2063}
2064
2065// Sobel ARGB effect with planar output.
2066LIBYUV_API
2067int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
2068                     uint8* dst_y, int dst_stride_y,
2069                     int width, int height) {
2070  void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
2071                          uint8* dst_, int width) = SobelToPlaneRow_C;
2072#if defined(HAS_SOBELTOPLANEROW_SSE2)
2073  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
2074      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
2075    SobelToPlaneRow = SobelToPlaneRow_SSE2;
2076  }
2077#endif
2078#if defined(HAS_SOBELTOPLANEROW_NEON)
2079  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
2080    SobelToPlaneRow = SobelToPlaneRow_NEON;
2081  }
2082#endif
2083  return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y,
2084                      width, height, SobelToPlaneRow);
2085}
2086
2087// SobelXY ARGB effect.
2088// Similar to Sobel, but also stores Sobel X in R and Sobel Y in B.  G = Sobel.
2089LIBYUV_API
2090int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
2091                uint8* dst_argb, int dst_stride_argb,
2092                int width, int height) {
2093  void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
2094                     uint8* dst_argb, int width) = SobelXYRow_C;
2095#if defined(HAS_SOBELXYROW_SSE2)
2096  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
2097      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
2098    SobelXYRow = SobelXYRow_SSE2;
2099  }
2100#endif
2101#if defined(HAS_SOBELXYROW_NEON)
2102  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2103    SobelXYRow = SobelXYRow_NEON;
2104  }
2105#endif
2106  return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
2107                      width, height, SobelXYRow);
2108}
2109
2110// Apply a 4x4 polynomial to each ARGB pixel.
2111LIBYUV_API
2112int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
2113                   uint8* dst_argb, int dst_stride_argb,
2114                   const float* poly,
2115                   int width, int height) {
2116  int y;
2117  void (*ARGBPolynomialRow)(const uint8* src_argb,
2118                            uint8* dst_argb, const float* poly,
2119                            int width) = ARGBPolynomialRow_C;
2120  if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
2121    return -1;
2122  }
2123  // Negative height means invert the image.
2124  if (height < 0) {
2125    height = -height;
2126    src_argb  = src_argb  + (height - 1) * src_stride_argb;
2127    src_stride_argb = -src_stride_argb;
2128  }
2129  // Coalesce rows.
2130  if (src_stride_argb == width * 4 &&
2131      dst_stride_argb == width * 4) {
2132    width *= height;
2133    height = 1;
2134    src_stride_argb = dst_stride_argb = 0;
2135  }
2136#if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
2137  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
2138    ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
2139  }
2140#endif
2141#if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
2142  if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
2143      IS_ALIGNED(width, 2)) {
2144    ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
2145  }
2146#endif
2147
2148  for (y = 0; y < height; ++y) {
2149    ARGBPolynomialRow(src_argb, dst_argb, poly, width);
2150    src_argb += src_stride_argb;
2151    dst_argb += dst_stride_argb;
2152  }
2153  return 0;
2154}
2155
2156// Apply a lumacolortable to each ARGB pixel.
2157LIBYUV_API
2158int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
2159                       uint8* dst_argb, int dst_stride_argb,
2160                       const uint8* luma,
2161                       int width, int height) {
2162  int y;
2163  void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb,
2164      int width, const uint8* luma, const uint32 lumacoeff) =
2165      ARGBLumaColorTableRow_C;
2166  if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
2167    return -1;
2168  }
2169  // Negative height means invert the image.
2170  if (height < 0) {
2171    height = -height;
2172    src_argb  = src_argb  + (height - 1) * src_stride_argb;
2173    src_stride_argb = -src_stride_argb;
2174  }
2175  // Coalesce rows.
2176  if (src_stride_argb == width * 4 &&
2177      dst_stride_argb == width * 4) {
2178    width *= height;
2179    height = 1;
2180    src_stride_argb = dst_stride_argb = 0;
2181  }
2182#if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
2183  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
2184    ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
2185  }
2186#endif
2187
2188  for (y = 0; y < height; ++y) {
2189    ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
2190    src_argb += src_stride_argb;
2191    dst_argb += dst_stride_argb;
2192  }
2193  return 0;
2194}
2195
2196// Copy Alpha from one ARGB image to another.
2197LIBYUV_API
2198int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
2199                  uint8* dst_argb, int dst_stride_argb,
2200                  int width, int height) {
2201  int y;
2202  void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) =
2203      ARGBCopyAlphaRow_C;
2204  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2205    return -1;
2206  }
2207  // Negative height means invert the image.
2208  if (height < 0) {
2209    height = -height;
2210    src_argb = src_argb + (height - 1) * src_stride_argb;
2211    src_stride_argb = -src_stride_argb;
2212  }
2213  // Coalesce rows.
2214  if (src_stride_argb == width * 4 &&
2215      dst_stride_argb == width * 4) {
2216    width *= height;
2217    height = 1;
2218    src_stride_argb = dst_stride_argb = 0;
2219  }
2220#if defined(HAS_ARGBCOPYALPHAROW_SSE2)
2221  if (TestCpuFlag(kCpuHasSSE2) &&
2222      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
2223      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
2224      IS_ALIGNED(width, 8)) {
2225    ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
2226  }
2227#endif
2228#if defined(HAS_ARGBCOPYALPHAROW_AVX2)
2229  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
2230    ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
2231  }
2232#endif
2233
2234  for (y = 0; y < height; ++y) {
2235    ARGBCopyAlphaRow(src_argb, dst_argb, width);
2236    src_argb += src_stride_argb;
2237    dst_argb += dst_stride_argb;
2238  }
2239  return 0;
2240}
2241
2242// Copy a planar Y channel to the alpha channel of a destination ARGB image.
2243LIBYUV_API
2244int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
2245                     uint8* dst_argb, int dst_stride_argb,
2246                     int width, int height) {
2247  int y;
2248  void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
2249      ARGBCopyYToAlphaRow_C;
2250  if (!src_y || !dst_argb || width <= 0 || height == 0) {
2251    return -1;
2252  }
2253  // Negative height means invert the image.
2254  if (height < 0) {
2255    height = -height;
2256    src_y = src_y + (height - 1) * src_stride_y;
2257    src_stride_y = -src_stride_y;
2258  }
2259  // Coalesce rows.
2260  if (src_stride_y == width &&
2261      dst_stride_argb == width * 4) {
2262    width *= height;
2263    height = 1;
2264    src_stride_y = dst_stride_argb = 0;
2265  }
2266#if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
2267  if (TestCpuFlag(kCpuHasSSE2) &&
2268      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
2269      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
2270      IS_ALIGNED(width, 8)) {
2271    ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
2272  }
2273#endif
2274#if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
2275  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
2276    ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
2277  }
2278#endif
2279
2280  for (y = 0; y < height; ++y) {
2281    ARGBCopyYToAlphaRow(src_y, dst_argb, width);
2282    src_y += src_stride_y;
2283    dst_argb += dst_stride_argb;
2284  }
2285  return 0;
2286}
2287
2288#ifdef __cplusplus
2289}  // extern "C"
2290}  // namespace libyuv
2291#endif
2292