1/*
2 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS. All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#ifndef INCLUDE_LIBYUV_ROW_H_  // NOLINT
12#define INCLUDE_LIBYUV_ROW_H_
13
14#include <stdlib.h>  // For malloc.
15
16#include "libyuv/basic_types.h"
17
18#ifdef __cplusplus
19namespace libyuv {
20extern "C" {
21#endif
22
23#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
24
25#ifdef __cplusplus
26#define align_buffer_64(var, size)                                             \
27  uint8* var##_mem = reinterpret_cast<uint8*>(malloc((size) + 63));            \
28  uint8* var = reinterpret_cast<uint8*>                                        \
29      ((reinterpret_cast<intptr_t>(var##_mem) + 63) & ~63)
30#else
31#define align_buffer_64(var, size)                                             \
32  uint8* var##_mem = (uint8*)(malloc((size) + 63));               /* NOLINT */ \
33  uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63)       /* NOLINT */
34#endif
35
36#define free_aligned_buffer_64(var) \
37  free(var##_mem);  \
38  var = 0
39
40#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
41    defined(TARGET_IPHONE_SIMULATOR)
42#define LIBYUV_DISABLE_X86
43#endif
44// True if compiling for SSSE3 as a requirement.
45#if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3))
46#define LIBYUV_SSSE3_ONLY
47#endif
48
49// Enable for NaCL pepper 33 for bundle and AVX2 support.
50//  #define NEW_BINUTILS
51
52// The following are available on all x86 platforms:
53#if !defined(LIBYUV_DISABLE_X86) && \
54    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
55// Effects:
56#define HAS_ARGBADDROW_SSE2
57#define HAS_ARGBAFFINEROW_SSE2
58#define HAS_ARGBATTENUATEROW_SSSE3
59#define HAS_ARGBBLENDROW_SSSE3
60#define HAS_ARGBCOLORMATRIXROW_SSSE3
61#define HAS_ARGBCOLORTABLEROW_X86
62#define HAS_ARGBCOPYALPHAROW_SSE2
63#define HAS_ARGBCOPYYTOALPHAROW_SSE2
64#define HAS_ARGBGRAYROW_SSSE3
65#define HAS_ARGBLUMACOLORTABLEROW_SSSE3
66#define HAS_ARGBMIRRORROW_SSSE3
67#define HAS_ARGBMULTIPLYROW_SSE2
68#define HAS_ARGBPOLYNOMIALROW_SSE2
69#define HAS_ARGBQUANTIZEROW_SSE2
70#define HAS_ARGBSEPIAROW_SSSE3
71#define HAS_ARGBSHADEROW_SSE2
72#define HAS_ARGBSUBTRACTROW_SSE2
73#define HAS_ARGBTOUVROW_SSSE3
74#define HAS_ARGBUNATTENUATEROW_SSE2
75#define HAS_COMPUTECUMULATIVESUMROW_SSE2
76#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
77#define HAS_INTERPOLATEROW_SSE2
78#define HAS_INTERPOLATEROW_SSSE3
79#define HAS_RGBCOLORTABLEROW_X86
80#define HAS_SOBELROW_SSE2
81#define HAS_SOBELTOPLANEROW_SSE2
82#define HAS_SOBELXROW_SSE2
83#define HAS_SOBELXYROW_SSE2
84#define HAS_SOBELYROW_SSE2
85
86// Conversions:
87#define HAS_ABGRTOUVROW_SSSE3
88#define HAS_ABGRTOYROW_SSSE3
89#define HAS_ARGB1555TOARGBROW_SSE2
90#define HAS_ARGB4444TOARGBROW_SSE2
91#define HAS_ARGBSHUFFLEROW_SSE2
92#define HAS_ARGBSHUFFLEROW_SSSE3
93#define HAS_ARGBTOARGB1555ROW_SSE2
94#define HAS_ARGBTOARGB4444ROW_SSE2
95#define HAS_ARGBTOBAYERGGROW_SSE2
96#define HAS_ARGBTOBAYERROW_SSSE3
97#define HAS_ARGBTORAWROW_SSSE3
98#define HAS_ARGBTORGB24ROW_SSSE3
99#define HAS_ARGBTORGB565ROW_SSE2
100#define HAS_ARGBTOUV422ROW_SSSE3
101#define HAS_ARGBTOUV444ROW_SSSE3
102#define HAS_ARGBTOUVJROW_SSSE3
103#define HAS_ARGBTOYJROW_SSSE3
104#define HAS_ARGBTOYROW_SSSE3
105#define HAS_BGRATOUVROW_SSSE3
106#define HAS_BGRATOYROW_SSSE3
107#define HAS_COPYROW_ERMS
108#define HAS_COPYROW_SSE2
109#define HAS_COPYROW_X86
110#define HAS_HALFROW_SSE2
111#define HAS_I400TOARGBROW_SSE2
112#define HAS_I411TOARGBROW_SSSE3
113#define HAS_I422TOARGB1555ROW_SSSE3
114#define HAS_I422TOABGRROW_SSSE3
115#define HAS_I422TOARGB1555ROW_SSSE3
116#define HAS_I422TOARGB4444ROW_SSSE3
117#define HAS_I422TOARGBROW_SSSE3
118#define HAS_I422TOBGRAROW_SSSE3
119#define HAS_I422TORAWROW_SSSE3
120#define HAS_I422TORGB24ROW_SSSE3
121#define HAS_I422TORGB565ROW_SSSE3
122#define HAS_I422TORGBAROW_SSSE3
123#define HAS_I422TOUYVYROW_SSE2
124#define HAS_I422TOYUY2ROW_SSE2
125#define HAS_I444TOARGBROW_SSSE3
126#define HAS_MERGEUVROW_SSE2
127#define HAS_MIRRORROW_SSE2
128#define HAS_MIRRORROW_SSSE3
129#define HAS_MIRRORROW_UV_SSSE3
130#define HAS_MIRRORUVROW_SSSE3
131#define HAS_NV12TOARGBROW_SSSE3
132#define HAS_NV12TORGB565ROW_SSSE3
133#define HAS_NV21TOARGBROW_SSSE3
134#define HAS_NV21TORGB565ROW_SSSE3
135#define HAS_RAWTOARGBROW_SSSE3
136#define HAS_RAWTOYROW_SSSE3
137#define HAS_RGB24TOARGBROW_SSSE3
138#define HAS_RGB24TOYROW_SSSE3
139#define HAS_RGB565TOARGBROW_SSE2
140#define HAS_RGBATOUVROW_SSSE3
141#define HAS_RGBATOYROW_SSSE3
142#define HAS_SETROW_X86
143#define HAS_SPLITUVROW_SSE2
144#define HAS_UYVYTOARGBROW_SSSE3
145#define HAS_UYVYTOUV422ROW_SSE2
146#define HAS_UYVYTOUVROW_SSE2
147#define HAS_UYVYTOYROW_SSE2
148#define HAS_YTOARGBROW_SSE2
149#define HAS_YUY2TOARGBROW_SSSE3
150#define HAS_YUY2TOUV422ROW_SSE2
151#define HAS_YUY2TOUVROW_SSE2
152#define HAS_YUY2TOYROW_SSE2
153#endif
154
155// GCC >= 4.7.0 required for AVX2.
156#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
157#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
158#define GCC_HAS_AVX2 1
159#endif  // GNUC >= 4.7
160#endif  // __GNUC__
161
162// clang >= 3.4.0 required for AVX2.
163#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
164#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
165#define CLANG_HAS_AVX2 1
166#endif  // clang >= 3.4
167#endif  // __clang__
168
169// Visual C 2012 required for AVX2.
170#if defined(_M_IX86) && defined(_MSC_VER) && _MSC_VER >= 1700
171#define VISUALC_HAS_AVX2 1
172#endif  // VisualStudio >= 2012
173
174// The following are available on all x86 platforms, but
175// require VS2012, clang 3.4 or gcc 4.7.
176// The code supports NaCL but requires a new compiler and validator.
177#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
178    defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
179// Effects:
180#define HAS_ARGBPOLYNOMIALROW_AVX2
181#define HAS_ARGBSHUFFLEROW_AVX2
182#define HAS_ARGBCOPYALPHAROW_AVX2
183#define HAS_ARGBCOPYYTOALPHAROW_AVX2
184#endif
185
186// The following are require VS2012.
187// TODO(fbarchard): Port to gcc.
188#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
189#define HAS_ARGBTOUVROW_AVX2
190#define HAS_ARGBTOYJROW_AVX2
191#define HAS_ARGBTOYROW_AVX2
192#define HAS_HALFROW_AVX2
193#define HAS_I422TOARGBROW_AVX2
194#define HAS_INTERPOLATEROW_AVX2
195#define HAS_MERGEUVROW_AVX2
196#define HAS_MIRRORROW_AVX2
197#define HAS_SPLITUVROW_AVX2
198#define HAS_UYVYTOUV422ROW_AVX2
199#define HAS_UYVYTOUVROW_AVX2
200#define HAS_UYVYTOYROW_AVX2
201#define HAS_YUY2TOUV422ROW_AVX2
202#define HAS_YUY2TOUVROW_AVX2
203#define HAS_YUY2TOYROW_AVX2
204
205// Effects:
206#define HAS_ARGBADDROW_AVX2
207#define HAS_ARGBATTENUATEROW_AVX2
208#define HAS_ARGBMIRRORROW_AVX2
209#define HAS_ARGBMULTIPLYROW_AVX2
210#define HAS_ARGBSUBTRACTROW_AVX2
211#define HAS_ARGBUNATTENUATEROW_AVX2
212#endif  // defined(VISUALC_HAS_AVX2)
213
214// The following are Yasm x86 only:
215// TODO(fbarchard): Port AVX2 to inline.
216#if !defined(LIBYUV_DISABLE_X86) && defined(HAVE_YASM)
217    (defined(_M_IX86) || defined(_M_X64) || \
218    defined(__x86_64__) || defined(__i386__))
219#define HAS_MERGEUVROW_AVX2
220#define HAS_MERGEUVROW_MMX
221#define HAS_SPLITUVROW_AVX2
222#define HAS_SPLITUVROW_MMX
223#define HAS_UYVYTOYROW_AVX2
224#define HAS_UYVYTOYROW_MMX
225#define HAS_YUY2TOYROW_AVX2
226#define HAS_YUY2TOYROW_MMX
227#endif
228
229// The following are disabled when SSSE3 is available:
230#if !defined(LIBYUV_DISABLE_X86) && \
231    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
232    !defined(LIBYUV_SSSE3_ONLY)
233#define HAS_ARGBBLENDROW_SSE2
234#define HAS_ARGBATTENUATEROW_SSE2
235#define HAS_MIRRORROW_SSE2
236#endif
237
238// The following are available on Neon platforms:
239#if !defined(LIBYUV_DISABLE_NEON) && \
240    (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) && \
241    !defined(__native_client__)
242#define HAS_ABGRTOUVROW_NEON
243#define HAS_ABGRTOYROW_NEON
244#define HAS_ARGB1555TOARGBROW_NEON
245#define HAS_ARGB1555TOUVROW_NEON
246#define HAS_ARGB1555TOYROW_NEON
247#define HAS_ARGB4444TOARGBROW_NEON
248#define HAS_ARGB4444TOUVROW_NEON
249#define HAS_ARGB4444TOYROW_NEON
250#define HAS_ARGBTOARGB1555ROW_NEON
251#define HAS_ARGBTOARGB4444ROW_NEON
252#define HAS_ARGBTOBAYERROW_NEON
253#define HAS_ARGBTOBAYERGGROW_NEON
254#define HAS_ARGBTORAWROW_NEON
255#define HAS_ARGBTORGB24ROW_NEON
256#define HAS_ARGBTORGB565ROW_NEON
257#define HAS_ARGBTOUV411ROW_NEON
258#define HAS_ARGBTOUV422ROW_NEON
259#define HAS_ARGBTOUV444ROW_NEON
260#define HAS_ARGBTOUVROW_NEON
261#define HAS_ARGBTOUVJROW_NEON
262#define HAS_ARGBTOYROW_NEON
263#define HAS_ARGBTOYJROW_NEON
264#define HAS_BGRATOUVROW_NEON
265#define HAS_BGRATOYROW_NEON
266#define HAS_COPYROW_NEON
267#define HAS_HALFROW_NEON
268#define HAS_I400TOARGBROW_NEON
269#define HAS_I411TOARGBROW_NEON
270#define HAS_I422TOABGRROW_NEON
271#define HAS_I422TOARGB1555ROW_NEON
272#define HAS_I422TOARGB4444ROW_NEON
273#define HAS_I422TOARGBROW_NEON
274#define HAS_I422TOBGRAROW_NEON
275#define HAS_I422TORAWROW_NEON
276#define HAS_I422TORGB24ROW_NEON
277#define HAS_I422TORGB565ROW_NEON
278#define HAS_I422TORGBAROW_NEON
279#define HAS_I422TOUYVYROW_NEON
280#define HAS_I422TOYUY2ROW_NEON
281#define HAS_I444TOARGBROW_NEON
282#define HAS_MERGEUVROW_NEON
283#define HAS_MIRRORROW_NEON
284#define HAS_MIRRORUVROW_NEON
285#define HAS_NV12TOARGBROW_NEON
286#define HAS_NV12TORGB565ROW_NEON
287#define HAS_NV21TOARGBROW_NEON
288#define HAS_NV21TORGB565ROW_NEON
289#define HAS_RAWTOARGBROW_NEON
290#define HAS_RAWTOUVROW_NEON
291#define HAS_RAWTOYROW_NEON
292#define HAS_RGB24TOARGBROW_NEON
293#define HAS_RGB24TOUVROW_NEON
294#define HAS_RGB24TOYROW_NEON
295#define HAS_RGB565TOARGBROW_NEON
296#define HAS_RGB565TOUVROW_NEON
297#define HAS_RGB565TOYROW_NEON
298#define HAS_RGBATOUVROW_NEON
299#define HAS_RGBATOYROW_NEON
300#define HAS_SETROW_NEON
301#define HAS_SPLITUVROW_NEON
302#define HAS_UYVYTOARGBROW_NEON
303#define HAS_UYVYTOUV422ROW_NEON
304#define HAS_UYVYTOUVROW_NEON
305#define HAS_UYVYTOYROW_NEON
306#define HAS_YTOARGBROW_NEON
307#define HAS_YUY2TOARGBROW_NEON
308#define HAS_YUY2TOUV422ROW_NEON
309#define HAS_YUY2TOUVROW_NEON
310#define HAS_YUY2TOYROW_NEON
311
312// Effects:
313#define HAS_ARGBADDROW_NEON
314#define HAS_ARGBATTENUATEROW_NEON
315#define HAS_ARGBBLENDROW_NEON
316#define HAS_ARGBGRAYROW_NEON
317#define HAS_ARGBMIRRORROW_NEON
318#define HAS_ARGBMULTIPLYROW_NEON
319#define HAS_ARGBQUANTIZEROW_NEON
320#define HAS_ARGBSEPIAROW_NEON
321#define HAS_ARGBSHADEROW_NEON
322#define HAS_ARGBSUBTRACTROW_NEON
323#define HAS_SOBELROW_NEON
324#define HAS_SOBELTOPLANEROW_NEON
325#define HAS_SOBELXYROW_NEON
326#define HAS_SOBELXROW_NEON
327#define HAS_SOBELYROW_NEON
328#define HAS_INTERPOLATEROW_NEON
329// TODO(fbarchard): Investigate neon unittest failure.
330// #define HAS_ARGBCOLORMATRIXROW_NEON
331#endif
332
333// The following are available on Mips platforms:
334#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__)
335#define HAS_COPYROW_MIPS
336#if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
337#define HAS_I422TOABGRROW_MIPS_DSPR2
338#define HAS_I422TOARGBROW_MIPS_DSPR2
339#define HAS_I422TOBGRAROW_MIPS_DSPR2
340#define HAS_INTERPOLATEROWS_MIPS_DSPR2
341#define HAS_MIRRORROW_MIPS_DSPR2
342#define HAS_MIRRORUVROW_MIPS_DSPR2
343#define HAS_SPLITUVROW_MIPS_DSPR2
344#endif
345#endif
346
347#if defined(_MSC_VER) && !defined(__CLR_VER)
348#define SIMD_ALIGNED(var) __declspec(align(16)) var
349typedef __declspec(align(16)) int16 vec16[8];
350typedef __declspec(align(16)) int32 vec32[4];
351typedef __declspec(align(16)) int8 vec8[16];
352typedef __declspec(align(16)) uint16 uvec16[8];
353typedef __declspec(align(16)) uint32 uvec32[4];
354typedef __declspec(align(16)) uint8 uvec8[16];
355typedef __declspec(align(32)) int16 lvec16[16];
356typedef __declspec(align(32)) int32 lvec32[8];
357typedef __declspec(align(32)) int8 lvec8[32];
358typedef __declspec(align(32)) uint16 ulvec16[16];
359typedef __declspec(align(32)) uint32 ulvec32[8];
360typedef __declspec(align(32)) uint8 ulvec8[32];
361
362#elif defined(__GNUC__)
363// Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
364#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
365typedef int16 __attribute__((vector_size(16))) vec16;
366typedef int32 __attribute__((vector_size(16))) vec32;
367typedef int8 __attribute__((vector_size(16))) vec8;
368typedef uint16 __attribute__((vector_size(16))) uvec16;
369typedef uint32 __attribute__((vector_size(16))) uvec32;
370typedef uint8 __attribute__((vector_size(16))) uvec8;
371#else
372#define SIMD_ALIGNED(var) var
373typedef int16 vec16[8];
374typedef int32 vec32[4];
375typedef int8 vec8[16];
376typedef uint16 uvec16[8];
377typedef uint32 uvec32[4];
378typedef uint8 uvec8[16];
379#endif
380
381#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
382#define OMITFP
383#else
384#define OMITFP __attribute__((optimize("omit-frame-pointer")))
385#endif
386
387// NaCL macros for GCC x86 and x64.
388
389// TODO(nfullagar): When pepper_33 toolchain is distributed, default to
390// NEW_BINUTILS and remove all BUNDLEALIGN occurances.
391#if defined(__native_client__)
392#define LABELALIGN ".p2align 5\n"
393#else
394#define LABELALIGN ".p2align 2\n"
395#endif
396#if defined(__native_client__) && defined(__x86_64__)
397#if defined(NEW_BINUTILS)
398#define BUNDLELOCK ".bundle_lock\n"
399#define BUNDLEUNLOCK ".bundle_unlock\n"
400#define BUNDLEALIGN "\n"
401#else
402#define BUNDLELOCK "\n"
403#define BUNDLEUNLOCK "\n"
404#define BUNDLEALIGN ".p2align 5\n"
405#endif
406#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
407#define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")"
408#define MEMLEA(offset, base) #offset "(%q" #base ")"
409#define MEMLEA3(offset, index, scale) \
410    #offset "(,%q" #index "," #scale ")"
411#define MEMLEA4(offset, base, index, scale) \
412    #offset "(%q" #base ",%q" #index "," #scale ")"
413#define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15"
414#define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15"
415#define MEMOPREG(opcode, offset, base, index, scale, reg) \
416    BUNDLELOCK \
417    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
418    #opcode " (%%r15,%%r14),%%" #reg "\n" \
419    BUNDLEUNLOCK
420#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
421    BUNDLELOCK \
422    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
423    #opcode " %%" #reg ",(%%r15,%%r14)\n" \
424    BUNDLEUNLOCK
425#define MEMOPARG(opcode, offset, base, index, scale, arg) \
426    BUNDLELOCK \
427    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
428    #opcode " (%%r15,%%r14),%" #arg "\n" \
429    BUNDLEUNLOCK
430#else
431#define BUNDLEALIGN "\n"
432#define MEMACCESS(base) "(%" #base ")"
433#define MEMACCESS2(offset, base) #offset "(%" #base ")"
434#define MEMLEA(offset, base) #offset "(%" #base ")"
435#define MEMLEA3(offset, index, scale) \
436    #offset "(,%" #index "," #scale ")"
437#define MEMLEA4(offset, base, index, scale) \
438    #offset "(%" #base ",%" #index "," #scale ")"
439#define MEMMOVESTRING(s, d)
440#define MEMSTORESTRING(reg, d)
441#define MEMOPREG(opcode, offset, base, index, scale, reg) \
442    #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
443#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
444    #opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
445#define MEMOPARG(opcode, offset, base, index, scale, arg) \
446    #opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n"
447#endif
448
449void I444ToARGBRow_NEON(const uint8* src_y,
450                        const uint8* src_u,
451                        const uint8* src_v,
452                        uint8* dst_argb,
453                        int width);
454void I422ToARGBRow_NEON(const uint8* src_y,
455                        const uint8* src_u,
456                        const uint8* src_v,
457                        uint8* dst_argb,
458                        int width);
459void I411ToARGBRow_NEON(const uint8* src_y,
460                        const uint8* src_u,
461                        const uint8* src_v,
462                        uint8* dst_argb,
463                        int width);
464void I422ToBGRARow_NEON(const uint8* src_y,
465                        const uint8* src_u,
466                        const uint8* src_v,
467                        uint8* dst_bgra,
468                        int width);
469void I422ToABGRRow_NEON(const uint8* src_y,
470                        const uint8* src_u,
471                        const uint8* src_v,
472                        uint8* dst_abgr,
473                        int width);
474void I422ToRGBARow_NEON(const uint8* src_y,
475                        const uint8* src_u,
476                        const uint8* src_v,
477                        uint8* dst_rgba,
478                        int width);
479void I422ToRGB24Row_NEON(const uint8* src_y,
480                         const uint8* src_u,
481                         const uint8* src_v,
482                         uint8* dst_rgb24,
483                         int width);
484void I422ToRAWRow_NEON(const uint8* src_y,
485                       const uint8* src_u,
486                       const uint8* src_v,
487                       uint8* dst_raw,
488                       int width);
489void I422ToRGB565Row_NEON(const uint8* src_y,
490                          const uint8* src_u,
491                          const uint8* src_v,
492                          uint8* dst_rgb565,
493                          int width);
494void I422ToARGB1555Row_NEON(const uint8* src_y,
495                            const uint8* src_u,
496                            const uint8* src_v,
497                            uint8* dst_argb1555,
498                            int width);
499void I422ToARGB4444Row_NEON(const uint8* src_y,
500                            const uint8* src_u,
501                            const uint8* src_v,
502                            uint8* dst_argb4444,
503                            int width);
504void NV12ToARGBRow_NEON(const uint8* src_y,
505                        const uint8* src_uv,
506                        uint8* dst_argb,
507                        int width);
508void NV21ToARGBRow_NEON(const uint8* src_y,
509                        const uint8* src_vu,
510                        uint8* dst_argb,
511                        int width);
512void NV12ToRGB565Row_NEON(const uint8* src_y,
513                          const uint8* src_uv,
514                          uint8* dst_rgb565,
515                          int width);
516void NV21ToRGB565Row_NEON(const uint8* src_y,
517                          const uint8* src_vu,
518                          uint8* dst_rgb565,
519                          int width);
520void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
521                        uint8* dst_argb,
522                        int width);
523void UYVYToARGBRow_NEON(const uint8* src_uyvy,
524                        uint8* dst_argb,
525                        int width);
526
527void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
528void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
529void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
530void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
531void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
532void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
533void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
534void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
535void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
536void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
537void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
538void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
539void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
540void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
541void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
542void RGBAToYRow_Unaligned_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
543void RGB24ToYRow_Unaligned_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
544void RAWToYRow_Unaligned_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
545void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
546void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
547void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
548                         int pix);
549void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
550                         int pix);
551void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
552                         int pix);
553void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
554                      uint8* dst_u, uint8* dst_v, int pix);
555void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
556                       uint8* dst_u, uint8* dst_v, int pix);
557void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
558                      uint8* dst_u, uint8* dst_v, int pix);
559void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
560                      uint8* dst_u, uint8* dst_v, int pix);
561void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
562                      uint8* dst_u, uint8* dst_v, int pix);
563void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
564                       uint8* dst_u, uint8* dst_v, int pix);
565void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
566                     uint8* dst_u, uint8* dst_v, int pix);
567void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
568                        uint8* dst_u, uint8* dst_v, int pix);
569void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
570                          uint8* dst_u, uint8* dst_v, int pix);
571void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
572                          uint8* dst_u, uint8* dst_v, int pix);
573void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
574void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
575void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
576void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
577void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix);
578void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
579void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
580void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
581void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
582void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int pix);
583void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int pix);
584void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int pix);
585void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int pix);
586void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int pix);
587void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int pix);
588void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int pix);
589void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int pix);
590void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int pix);
591void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
592void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
593void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
594void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
595void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
596void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
597void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
598void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
599void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
600void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
601void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
602void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
603void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
604void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int pix);
605void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
606void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
607void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
608
609void ARGBToUVRow_AVX2(const uint8* src_argb, int src_stride_argb,
610                      uint8* dst_u, uint8* dst_v, int width);
611void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb,
612                          uint8* dst_u, uint8* dst_v, int width);
613void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
614                       uint8* dst_u, uint8* dst_v, int width);
615void ARGBToUVJRow_SSSE3(const uint8* src_argb, int src_stride_argb,
616                        uint8* dst_u, uint8* dst_v, int width);
617void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra,
618                       uint8* dst_u, uint8* dst_v, int width);
619void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr,
620                       uint8* dst_u, uint8* dst_v, int width);
621void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba,
622                       uint8* dst_u, uint8* dst_v, int width);
623void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
624                                 uint8* dst_u, uint8* dst_v, int width);
625void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
626                                  uint8* dst_u, uint8* dst_v, int width);
627void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra, int src_stride_bgra,
628                                 uint8* dst_u, uint8* dst_v, int width);
629void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr, int src_stride_abgr,
630                                 uint8* dst_u, uint8* dst_v, int width);
631void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba, int src_stride_rgba,
632                                 uint8* dst_u, uint8* dst_v, int width);
633void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
634                           uint8* dst_u, uint8* dst_v, int width);
635void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
636                            uint8* dst_u, uint8* dst_v, int width);
637void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra,
638                           uint8* dst_u, uint8* dst_v, int width);
639void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
640                           uint8* dst_u, uint8* dst_v, int width);
641void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba,
642                           uint8* dst_u, uint8* dst_v, int width);
643void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
644                             int pix);
645void ARGBToUV422Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
646                             int pix);
647void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
648                             int pix);
649void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
650                          uint8* dst_u, uint8* dst_v, int pix);
651void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
652                           uint8* dst_u, uint8* dst_v, int pix);
653void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra,
654                          uint8* dst_u, uint8* dst_v, int pix);
655void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr,
656                          uint8* dst_u, uint8* dst_v, int pix);
657void RGBAToUVRow_Any_NEON(const uint8* src_rgba, int src_stride_rgba,
658                          uint8* dst_u, uint8* dst_v, int pix);
659void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, int src_stride_rgb24,
660                           uint8* dst_u, uint8* dst_v, int pix);
661void RAWToUVRow_Any_NEON(const uint8* src_raw, int src_stride_raw,
662                         uint8* dst_u, uint8* dst_v, int pix);
663void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565,
664                            uint8* dst_u, uint8* dst_v, int pix);
665void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555,
666                              int src_stride_argb1555,
667                              uint8* dst_u, uint8* dst_v, int pix);
668void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
669                              int src_stride_argb4444,
670                              uint8* dst_u, uint8* dst_v, int pix);
671void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb,
672                   uint8* dst_u, uint8* dst_v, int width);
673void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb,
674                    uint8* dst_u, uint8* dst_v, int width);
675void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra,
676                   uint8* dst_u, uint8* dst_v, int width);
677void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr,
678                   uint8* dst_u, uint8* dst_v, int width);
679void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba,
680                   uint8* dst_u, uint8* dst_v, int width);
681void RGB24ToUVRow_C(const uint8* src_rgb24, int src_stride_rgb24,
682                    uint8* dst_u, uint8* dst_v, int width);
683void RAWToUVRow_C(const uint8* src_raw, int src_stride_raw,
684                  uint8* dst_u, uint8* dst_v, int width);
685void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
686                     uint8* dst_u, uint8* dst_v, int width);
687void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
688                       uint8* dst_u, uint8* dst_v, int width);
689void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
690                       uint8* dst_u, uint8* dst_v, int width);
691
692void ARGBToUV444Row_SSSE3(const uint8* src_argb,
693                          uint8* dst_u, uint8* dst_v, int width);
694void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb,
695                                    uint8* dst_u, uint8* dst_v, int width);
696void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
697                              uint8* dst_u, uint8* dst_v, int width);
698
699void ARGBToUV422Row_SSSE3(const uint8* src_argb,
700                          uint8* dst_u, uint8* dst_v, int width);
701void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb,
702                                    uint8* dst_u, uint8* dst_v, int width);
703void ARGBToUV422Row_Any_SSSE3(const uint8* src_argb,
704                              uint8* dst_u, uint8* dst_v, int width);
705
706void ARGBToUV444Row_C(const uint8* src_argb,
707                      uint8* dst_u, uint8* dst_v, int width);
708void ARGBToUV422Row_C(const uint8* src_argb,
709                      uint8* dst_u, uint8* dst_v, int width);
710void ARGBToUV411Row_C(const uint8* src_argb,
711                      uint8* dst_u, uint8* dst_v, int width);
712
713void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
714void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
715void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
716void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
717void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width);
718void MirrorRow_C(const uint8* src, uint8* dst, int width);
719
720void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
721                       int width);
722void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
723                      int width);
724void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
725                            int width);
726void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
727                   int width);
728
729void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width);
730void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
731void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width);
732void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width);
733
734void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
735void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
736void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
737void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
738void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
739                           int pix);
740void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
741                               int pix);
742void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
743                                     uint8* dst_v, int pix);
744void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
745                         int pix);
746void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
747                         int pix);
748void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
749                         int pix);
750void SplitUVRow_Any_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
751                               int pix);
752
753void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
754                  int width);
755void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
756                     int width);
757void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
758                     int width);
759void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
760                     int width);
761void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
762                               uint8* dst_uv, int width);
763void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
764                         int width);
765void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
766                         int width);
767void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
768                         int width);
769
770void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
771void CopyRow_ERMS(const uint8* src, uint8* dst, int count);
772void CopyRow_X86(const uint8* src, uint8* dst, int count);
773void CopyRow_NEON(const uint8* src, uint8* dst, int count);
774void CopyRow_MIPS(const uint8* src, uint8* dst, int count);
775void CopyRow_C(const uint8* src, uint8* dst, int count);
776
777void CopyRow_16_C(const uint16* src, uint16* dst, int count);
778
779void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width);
780void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
781void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
782
783void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width);
784void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
785void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
786
787void SetRow_X86(uint8* dst, uint32 v32, int count);
788void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
789                     int dst_stride, int height);
790void SetRow_NEON(uint8* dst, uint32 v32, int count);
791void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
792                      int dst_stride, int height);
793void SetRow_C(uint8* dst, uint32 v32, int count);
794void ARGBSetRows_C(uint8* dst, uint32 v32, int width, int dst_stride,
795                   int height);
796
797// ARGBShufflers for BGRAToARGB etc.
798void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
799                      const uint8* shuffler, int pix);
800void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
801                         const uint8* shuffler, int pix);
802void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
803                          const uint8* shuffler, int pix);
804void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
805                         const uint8* shuffler, int pix);
806void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
807                         const uint8* shuffler, int pix);
808void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
809                                    const uint8* shuffler, int pix);
810void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
811                             const uint8* shuffler, int pix);
812void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
813                              const uint8* shuffler, int pix);
814void ARGBShuffleRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
815                             const uint8* shuffler, int pix);
816void ARGBShuffleRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
817                             const uint8* shuffler, int pix);
818
819void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
820void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
821void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int pix);
822void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
823                            int pix);
824void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
825                            int pix);
826
827void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
828void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
829void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix);
830void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
831                            int pix);
832void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
833                            int pix);
834void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix);
835void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int pix);
836void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix);
837void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
838void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
839void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
840void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
841void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb,
842                              int pix);
843void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb,
844                                int pix);
845void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb,
846                                int pix);
847void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
848void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
849void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb,
850                              int pix);
851void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb,
852                                int pix);
853void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb,
854                                int pix);
855
856void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
857void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
858void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
859void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
860void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
861
862void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
863void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
864void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
865void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
866void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
867
868void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
869void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
870void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
871void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
872void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
873void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
874
875void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
876void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
877void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int pix);
878void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
879void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
880void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int pix);
881
882void I444ToARGBRow_C(const uint8* src_y,
883                     const uint8* src_u,
884                     const uint8* src_v,
885                     uint8* dst_argb,
886                     int width);
887void I422ToARGBRow_C(const uint8* src_y,
888                     const uint8* src_u,
889                     const uint8* src_v,
890                     uint8* dst_argb,
891                     int width);
892void I411ToARGBRow_C(const uint8* src_y,
893                     const uint8* src_u,
894                     const uint8* src_v,
895                     uint8* dst_argb,
896                     int width);
897void NV12ToARGBRow_C(const uint8* src_y,
898                     const uint8* src_uv,
899                     uint8* dst_argb,
900                     int width);
901void NV21ToRGB565Row_C(const uint8* src_y,
902                       const uint8* src_vu,
903                       uint8* dst_argb,
904                       int width);
905void NV12ToRGB565Row_C(const uint8* src_y,
906                       const uint8* src_uv,
907                       uint8* dst_argb,
908                       int width);
909void NV21ToARGBRow_C(const uint8* src_y,
910                     const uint8* src_vu,
911                     uint8* dst_argb,
912                     int width);
913void YUY2ToARGBRow_C(const uint8* src_yuy2,
914                     uint8* dst_argb,
915                     int width);
916void UYVYToARGBRow_C(const uint8* src_uyvy,
917                     uint8* dst_argb,
918                     int width);
919void I422ToBGRARow_C(const uint8* src_y,
920                     const uint8* src_u,
921                     const uint8* src_v,
922                     uint8* dst_bgra,
923                     int width);
924void I422ToABGRRow_C(const uint8* src_y,
925                     const uint8* src_u,
926                     const uint8* src_v,
927                     uint8* dst_abgr,
928                     int width);
929void I422ToRGBARow_C(const uint8* src_y,
930                     const uint8* src_u,
931                     const uint8* src_v,
932                     uint8* dst_rgba,
933                     int width);
934void I422ToRGB24Row_C(const uint8* src_y,
935                      const uint8* src_u,
936                      const uint8* src_v,
937                      uint8* dst_rgb24,
938                      int width);
939void I422ToRAWRow_C(const uint8* src_y,
940                    const uint8* src_u,
941                    const uint8* src_v,
942                    uint8* dst_raw,
943                    int width);
944void I422ToARGB4444Row_C(const uint8* src_y,
945                         const uint8* src_u,
946                         const uint8* src_v,
947                         uint8* dst_argb4444,
948                         int width);
949void I422ToARGB1555Row_C(const uint8* src_y,
950                         const uint8* src_u,
951                         const uint8* src_v,
952                         uint8* dst_argb4444,
953                         int width);
954void I422ToRGB565Row_C(const uint8* src_y,
955                       const uint8* src_u,
956                       const uint8* src_v,
957                       uint8* dst_rgb565,
958                       int width);
959void YToARGBRow_C(const uint8* src_y,
960                  uint8* dst_argb,
961                  int width);
962void I422ToARGBRow_AVX2(const uint8* src_y,
963                        const uint8* src_u,
964                        const uint8* src_v,
965                        uint8* dst_argb,
966                        int width);
967void I444ToARGBRow_SSSE3(const uint8* src_y,
968                         const uint8* src_u,
969                         const uint8* src_v,
970                         uint8* dst_argb,
971                         int width);
972void I422ToARGBRow_SSSE3(const uint8* src_y,
973                         const uint8* src_u,
974                         const uint8* src_v,
975                         uint8* dst_argb,
976                         int width);
977void I411ToARGBRow_SSSE3(const uint8* src_y,
978                         const uint8* src_u,
979                         const uint8* src_v,
980                         uint8* dst_argb,
981                         int width);
982void NV12ToARGBRow_SSSE3(const uint8* src_y,
983                         const uint8* src_uv,
984                         uint8* dst_argb,
985                         int width);
986void NV21ToARGBRow_SSSE3(const uint8* src_y,
987                         const uint8* src_vu,
988                         uint8* dst_argb,
989                         int width);
990void NV12ToRGB565Row_SSSE3(const uint8* src_y,
991                           const uint8* src_uv,
992                           uint8* dst_argb,
993                           int width);
994void NV21ToRGB565Row_SSSE3(const uint8* src_y,
995                           const uint8* src_vu,
996                           uint8* dst_argb,
997                           int width);
998void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
999                         uint8* dst_argb,
1000                         int width);
1001void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
1002                         uint8* dst_argb,
1003                         int width);
1004void I422ToBGRARow_SSSE3(const uint8* src_y,
1005                         const uint8* src_u,
1006                         const uint8* src_v,
1007                         uint8* dst_bgra,
1008                         int width);
1009void I422ToABGRRow_SSSE3(const uint8* src_y,
1010                         const uint8* src_u,
1011                         const uint8* src_v,
1012                         uint8* dst_abgr,
1013                         int width);
1014void I422ToRGBARow_SSSE3(const uint8* src_y,
1015                         const uint8* src_u,
1016                         const uint8* src_v,
1017                         uint8* dst_rgba,
1018                         int width);
1019void I422ToARGB4444Row_SSSE3(const uint8* src_y,
1020                             const uint8* src_u,
1021                             const uint8* src_v,
1022                             uint8* dst_argb,
1023                             int width);
1024void I422ToARGB1555Row_SSSE3(const uint8* src_y,
1025                             const uint8* src_u,
1026                             const uint8* src_v,
1027                             uint8* dst_argb,
1028                             int width);
1029void I422ToRGB565Row_SSSE3(const uint8* src_y,
1030                           const uint8* src_u,
1031                           const uint8* src_v,
1032                           uint8* dst_argb,
1033                           int width);
1034// RGB24/RAW are unaligned.
1035void I422ToRGB24Row_SSSE3(const uint8* src_y,
1036                          const uint8* src_u,
1037                          const uint8* src_v,
1038                          uint8* dst_rgb24,
1039                          int width);
1040void I422ToRAWRow_SSSE3(const uint8* src_y,
1041                        const uint8* src_u,
1042                        const uint8* src_v,
1043                        uint8* dst_raw,
1044                        int width);
1045
1046void I444ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
1047                                   const uint8* src_u,
1048                                   const uint8* src_v,
1049                                   uint8* dst_argb,
1050                                   int width);
1051void I422ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
1052                                   const uint8* src_u,
1053                                   const uint8* src_v,
1054                                   uint8* dst_argb,
1055                                   int width);
1056void I411ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
1057                                   const uint8* src_u,
1058                                   const uint8* src_v,
1059                                   uint8* dst_argb,
1060                                   int width);
1061void NV12ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
1062                                   const uint8* src_uv,
1063                                   uint8* dst_argb,
1064                                   int width);
1065void NV21ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
1066                                   const uint8* src_vu,
1067                                   uint8* dst_argb,
1068                                   int width);
1069void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
1070                                   uint8* dst_argb,
1071                                   int width);
1072void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
1073                                   uint8* dst_argb,
1074                                   int width);
1075void I422ToBGRARow_Unaligned_SSSE3(const uint8* src_y,
1076                                   const uint8* src_u,
1077                                   const uint8* src_v,
1078                                   uint8* dst_bgra,
1079                                   int width);
1080void I422ToABGRRow_Unaligned_SSSE3(const uint8* src_y,
1081                                   const uint8* src_u,
1082                                   const uint8* src_v,
1083                                   uint8* dst_abgr,
1084                                   int width);
1085void I422ToRGBARow_Unaligned_SSSE3(const uint8* src_y,
1086                                   const uint8* src_u,
1087                                   const uint8* src_v,
1088                                   uint8* dst_rgba,
1089                                   int width);
1090void I422ToARGBRow_Any_AVX2(const uint8* src_y,
1091                            const uint8* src_u,
1092                            const uint8* src_v,
1093                            uint8* dst_argb,
1094                            int width);
1095void I444ToARGBRow_Any_SSSE3(const uint8* src_y,
1096                             const uint8* src_u,
1097                             const uint8* src_v,
1098                             uint8* dst_argb,
1099                             int width);
1100void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
1101                             const uint8* src_u,
1102                             const uint8* src_v,
1103                             uint8* dst_argb,
1104                             int width);
1105void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
1106                             const uint8* src_u,
1107                             const uint8* src_v,
1108                             uint8* dst_argb,
1109                             int width);
1110void NV12ToARGBRow_Any_SSSE3(const uint8* src_y,
1111                             const uint8* src_uv,
1112                             uint8* dst_argb,
1113                             int width);
1114void NV21ToARGBRow_Any_SSSE3(const uint8* src_y,
1115                             const uint8* src_vu,
1116                             uint8* dst_argb,
1117                             int width);
1118void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y,
1119                               const uint8* src_uv,
1120                               uint8* dst_argb,
1121                               int width);
1122void NV21ToRGB565Row_Any_SSSE3(const uint8* src_y,
1123                               const uint8* src_vu,
1124                               uint8* dst_argb,
1125                               int width);
1126void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2,
1127                             uint8* dst_argb,
1128                             int width);
1129void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy,
1130                             uint8* dst_argb,
1131                             int width);
1132void I422ToBGRARow_Any_SSSE3(const uint8* src_y,
1133                             const uint8* src_u,
1134                             const uint8* src_v,
1135                             uint8* dst_bgra,
1136                             int width);
1137void I422ToABGRRow_Any_SSSE3(const uint8* src_y,
1138                             const uint8* src_u,
1139                             const uint8* src_v,
1140                             uint8* dst_abgr,
1141                             int width);
1142void I422ToRGBARow_Any_SSSE3(const uint8* src_y,
1143                             const uint8* src_u,
1144                             const uint8* src_v,
1145                             uint8* dst_rgba,
1146                             int width);
1147void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y,
1148                                 const uint8* src_u,
1149                                 const uint8* src_v,
1150                                 uint8* dst_rgba,
1151                                 int width);
1152void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y,
1153                                 const uint8* src_u,
1154                                 const uint8* src_v,
1155                                 uint8* dst_rgba,
1156                                 int width);
1157void I422ToRGB565Row_Any_SSSE3(const uint8* src_y,
1158                               const uint8* src_u,
1159                               const uint8* src_v,
1160                               uint8* dst_rgba,
1161                               int width);
1162// RGB24/RAW are unaligned.
1163void I422ToRGB24Row_Any_SSSE3(const uint8* src_y,
1164                              const uint8* src_u,
1165                              const uint8* src_v,
1166                              uint8* dst_argb,
1167                              int width);
1168void I422ToRAWRow_Any_SSSE3(const uint8* src_y,
1169                            const uint8* src_u,
1170                            const uint8* src_v,
1171                            uint8* dst_argb,
1172                            int width);
1173void YToARGBRow_SSE2(const uint8* src_y,
1174                     uint8* dst_argb,
1175                     int width);
1176void YToARGBRow_NEON(const uint8* src_y,
1177                     uint8* dst_argb,
1178                     int width);
1179void YToARGBRow_Any_SSE2(const uint8* src_y,
1180                         uint8* dst_argb,
1181                         int width);
1182void YToARGBRow_Any_NEON(const uint8* src_y,
1183                         uint8* dst_argb,
1184                         int width);
1185
1186// ARGB preattenuated alpha blend.
1187void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1,
1188                        uint8* dst_argb, int width);
1189void ARGBBlendRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
1190                       uint8* dst_argb, int width);
1191void ARGBBlendRow_NEON(const uint8* src_argb, const uint8* src_argb1,
1192                       uint8* dst_argb, int width);
1193void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1,
1194                    uint8* dst_argb, int width);
1195
1196// ARGB multiply images. Same API as Blend, but these require
1197// pointer and width alignment for SSE2.
1198void ARGBMultiplyRow_C(const uint8* src_argb, const uint8* src_argb1,
1199                       uint8* dst_argb, int width);
1200void ARGBMultiplyRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
1201                          uint8* dst_argb, int width);
1202void ARGBMultiplyRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
1203                              uint8* dst_argb, int width);
1204void ARGBMultiplyRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
1205                          uint8* dst_argb, int width);
1206void ARGBMultiplyRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
1207                              uint8* dst_argb, int width);
1208void ARGBMultiplyRow_NEON(const uint8* src_argb, const uint8* src_argb1,
1209                          uint8* dst_argb, int width);
1210void ARGBMultiplyRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
1211                              uint8* dst_argb, int width);
1212
1213// ARGB add images.
1214void ARGBAddRow_C(const uint8* src_argb, const uint8* src_argb1,
1215                  uint8* dst_argb, int width);
1216void ARGBAddRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
1217                     uint8* dst_argb, int width);
1218void ARGBAddRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
1219                         uint8* dst_argb, int width);
1220void ARGBAddRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
1221                     uint8* dst_argb, int width);
1222void ARGBAddRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
1223                         uint8* dst_argb, int width);
1224void ARGBAddRow_NEON(const uint8* src_argb, const uint8* src_argb1,
1225                     uint8* dst_argb, int width);
1226void ARGBAddRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
1227                         uint8* dst_argb, int width);
1228
1229// ARGB subtract images. Same API as Blend, but these require
1230// pointer and width alignment for SSE2.
1231void ARGBSubtractRow_C(const uint8* src_argb, const uint8* src_argb1,
1232                       uint8* dst_argb, int width);
1233void ARGBSubtractRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
1234                          uint8* dst_argb, int width);
1235void ARGBSubtractRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
1236                              uint8* dst_argb, int width);
1237void ARGBSubtractRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
1238                          uint8* dst_argb, int width);
1239void ARGBSubtractRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
1240                              uint8* dst_argb, int width);
1241void ARGBSubtractRow_NEON(const uint8* src_argb, const uint8* src_argb1,
1242                          uint8* dst_argb, int width);
1243void ARGBSubtractRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
1244                              uint8* dst_argb, int width);
1245
1246void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
1247void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
1248void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
1249void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
1250void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
1251
1252void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
1253void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
1254void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
1255void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
1256void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
1257
1258void I444ToARGBRow_Any_NEON(const uint8* src_y,
1259                            const uint8* src_u,
1260                            const uint8* src_v,
1261                            uint8* dst_argb,
1262                            int width);
1263void I422ToARGBRow_Any_NEON(const uint8* src_y,
1264                            const uint8* src_u,
1265                            const uint8* src_v,
1266                            uint8* dst_argb,
1267                            int width);
1268void I411ToARGBRow_Any_NEON(const uint8* src_y,
1269                            const uint8* src_u,
1270                            const uint8* src_v,
1271                            uint8* dst_argb,
1272                            int width);
1273void I422ToBGRARow_Any_NEON(const uint8* src_y,
1274                            const uint8* src_u,
1275                            const uint8* src_v,
1276                            uint8* dst_argb,
1277                            int width);
1278void I422ToABGRRow_Any_NEON(const uint8* src_y,
1279                            const uint8* src_u,
1280                            const uint8* src_v,
1281                            uint8* dst_argb,
1282                            int width);
1283void I422ToRGBARow_Any_NEON(const uint8* src_y,
1284                            const uint8* src_u,
1285                            const uint8* src_v,
1286                            uint8* dst_argb,
1287                            int width);
1288void I422ToRGB24Row_Any_NEON(const uint8* src_y,
1289                             const uint8* src_u,
1290                             const uint8* src_v,
1291                             uint8* dst_argb,
1292                             int width);
1293void I422ToRAWRow_Any_NEON(const uint8* src_y,
1294                           const uint8* src_u,
1295                           const uint8* src_v,
1296                           uint8* dst_argb,
1297                           int width);
1298void I422ToARGB4444Row_Any_NEON(const uint8* src_y,
1299                                const uint8* src_u,
1300                                const uint8* src_v,
1301                                uint8* dst_argb,
1302                                int width);
1303void I422ToARGB1555Row_Any_NEON(const uint8* src_y,
1304                                const uint8* src_u,
1305                                const uint8* src_v,
1306                                uint8* dst_argb,
1307                                int width);
1308void I422ToRGB565Row_Any_NEON(const uint8* src_y,
1309                              const uint8* src_u,
1310                              const uint8* src_v,
1311                              uint8* dst_argb,
1312                              int width);
1313void NV12ToARGBRow_Any_NEON(const uint8* src_y,
1314                            const uint8* src_uv,
1315                            uint8* dst_argb,
1316                            int width);
1317void NV21ToARGBRow_Any_NEON(const uint8* src_y,
1318                            const uint8* src_uv,
1319                            uint8* dst_argb,
1320                            int width);
1321void NV12ToRGB565Row_Any_NEON(const uint8* src_y,
1322                              const uint8* src_uv,
1323                              uint8* dst_argb,
1324                              int width);
1325void NV21ToRGB565Row_Any_NEON(const uint8* src_y,
1326                              const uint8* src_uv,
1327                              uint8* dst_argb,
1328                              int width);
1329void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2,
1330                            uint8* dst_argb,
1331                            int width);
1332void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
1333                            uint8* dst_argb,
1334                            int width);
1335void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
1336                              const uint8* src_u,
1337                              const uint8* src_v,
1338                              uint8* dst_argb,
1339                              int width);
1340void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
1341                              const uint8* src_u,
1342                              const uint8* src_v,
1343                              uint8* dst_argb,
1344                              int width);
1345void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
1346                              const uint8* src_u,
1347                              const uint8* src_v,
1348                              uint8* dst_argb,
1349                              int width);
1350void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
1351                              const uint8* src_u,
1352                              const uint8* src_v,
1353                              uint8* dst_argb,
1354                              int width);
1355void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
1356                              const uint8* src_u,
1357                              const uint8* src_v,
1358                              uint8* dst_argb,
1359                              int width);
1360void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
1361                              const uint8* src_u,
1362                              const uint8* src_v,
1363                              uint8* dst_argb,
1364                              int width);
1365
1366void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
1367void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
1368                      uint8* dst_u, uint8* dst_v, int pix);
1369void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
1370                         uint8* dst_u, uint8* dst_v, int pix);
1371void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
1372void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
1373                      uint8* dst_u, uint8* dst_v, int pix);
1374void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
1375                         uint8* dst_u, uint8* dst_v, int pix);
1376void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
1377                               uint8* dst_y, int pix);
1378void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
1379                                uint8* dst_u, uint8* dst_v, int pix);
1380void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
1381                                   uint8* dst_u, uint8* dst_v, int pix);
1382void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
1383void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
1384                      uint8* dst_u, uint8* dst_v, int pix);
1385void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
1386                         uint8* dst_u, uint8* dst_v, int pix);
1387void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int pix);
1388void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2,
1389                   uint8* dst_u, uint8* dst_v, int pix);
1390void YUY2ToUV422Row_C(const uint8* src_yuy2,
1391                      uint8* dst_u, uint8* dst_v, int pix);
1392void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
1393void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2, int stride_yuy2,
1394                          uint8* dst_u, uint8* dst_v, int pix);
1395void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2,
1396                             uint8* dst_u, uint8* dst_v, int pix);
1397void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
1398void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2,
1399                          uint8* dst_u, uint8* dst_v, int pix);
1400void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2,
1401                             uint8* dst_u, uint8* dst_v, int pix);
1402void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
1403void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, int stride_yuy2,
1404                          uint8* dst_u, uint8* dst_v, int pix);
1405void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2,
1406                             uint8* dst_u, uint8* dst_v, int pix);
1407void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
1408void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
1409                      uint8* dst_u, uint8* dst_v, int pix);
1410void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
1411                         uint8* dst_u, uint8* dst_v, int pix);
1412void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
1413void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
1414                      uint8* dst_u, uint8* dst_v, int pix);
1415void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
1416                         uint8* dst_u, uint8* dst_v, int pix);
1417void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
1418                               uint8* dst_y, int pix);
1419void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
1420                                uint8* dst_u, uint8* dst_v, int pix);
1421void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
1422                                   uint8* dst_u, uint8* dst_v, int pix);
1423void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
1424void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
1425                      uint8* dst_u, uint8* dst_v, int pix);
1426void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
1427                         uint8* dst_u, uint8* dst_v, int pix);
1428void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
1429void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
1430                      uint8* dst_u, uint8* dst_v, int pix);
1431void UYVYToUV422Row_NEON(const uint8* src_uyvy,
1432                         uint8* dst_u, uint8* dst_v, int pix);
1433
1434void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int pix);
1435void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy,
1436                   uint8* dst_u, uint8* dst_v, int pix);
1437void UYVYToUV422Row_C(const uint8* src_uyvy,
1438                      uint8* dst_u, uint8* dst_v, int pix);
1439void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
1440void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy, int stride_uyvy,
1441                          uint8* dst_u, uint8* dst_v, int pix);
1442void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy,
1443                             uint8* dst_u, uint8* dst_v, int pix);
1444void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
1445void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy,
1446                          uint8* dst_u, uint8* dst_v, int pix);
1447void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy,
1448                             uint8* dst_u, uint8* dst_v, int pix);
1449void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
1450void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy,
1451                          uint8* dst_u, uint8* dst_v, int pix);
1452void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
1453                             uint8* dst_u, uint8* dst_v, int pix);
1454
1455void HalfRow_C(const uint8* src_uv, int src_uv_stride,
1456               uint8* dst_uv, int pix);
1457void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
1458                  uint8* dst_uv, int pix);
1459void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
1460                  uint8* dst_uv, int pix);
1461void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
1462                  uint8* dst_uv, int pix);
1463
1464void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
1465                  uint16* dst_uv, int pix);
1466
1467void ARGBToBayerRow_C(const uint8* src_argb, uint8* dst_bayer,
1468                      uint32 selector, int pix);
1469void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
1470                          uint32 selector, int pix);
1471void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
1472                         uint32 selector, int pix);
1473void ARGBToBayerRow_Any_SSSE3(const uint8* src_argb, uint8* dst_bayer,
1474                              uint32 selector, int pix);
1475void ARGBToBayerRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer,
1476                             uint32 selector, int pix);
1477void ARGBToBayerGGRow_C(const uint8* src_argb, uint8* dst_bayer,
1478                        uint32 /* selector */, int pix);
1479void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
1480                           uint32 /* selector */, int pix);
1481void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
1482                           uint32 /* selector */, int pix);
1483void ARGBToBayerGGRow_Any_SSE2(const uint8* src_argb, uint8* dst_bayer,
1484                               uint32 /* selector */, int pix);
1485void ARGBToBayerGGRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer,
1486                               uint32 /* selector */, int pix);
1487
1488void I422ToYUY2Row_C(const uint8* src_y,
1489                     const uint8* src_u,
1490                     const uint8* src_v,
1491                     uint8* dst_yuy2, int width);
1492void I422ToUYVYRow_C(const uint8* src_y,
1493                     const uint8* src_u,
1494                     const uint8* src_v,
1495                     uint8* dst_uyvy, int width);
1496void I422ToYUY2Row_SSE2(const uint8* src_y,
1497                        const uint8* src_u,
1498                        const uint8* src_v,
1499                        uint8* dst_yuy2, int width);
1500void I422ToUYVYRow_SSE2(const uint8* src_y,
1501                        const uint8* src_u,
1502                        const uint8* src_v,
1503                        uint8* dst_uyvy, int width);
1504void I422ToYUY2Row_Any_SSE2(const uint8* src_y,
1505                            const uint8* src_u,
1506                            const uint8* src_v,
1507                            uint8* dst_yuy2, int width);
1508void I422ToUYVYRow_Any_SSE2(const uint8* src_y,
1509                            const uint8* src_u,
1510                            const uint8* src_v,
1511                            uint8* dst_uyvy, int width);
1512void I422ToYUY2Row_NEON(const uint8* src_y,
1513                        const uint8* src_u,
1514                        const uint8* src_v,
1515                        uint8* dst_yuy2, int width);
1516void I422ToUYVYRow_NEON(const uint8* src_y,
1517                        const uint8* src_u,
1518                        const uint8* src_v,
1519                        uint8* dst_uyvy, int width);
1520void I422ToYUY2Row_Any_NEON(const uint8* src_y,
1521                            const uint8* src_u,
1522                            const uint8* src_v,
1523                            uint8* dst_yuy2, int width);
1524void I422ToUYVYRow_Any_NEON(const uint8* src_y,
1525                            const uint8* src_u,
1526                            const uint8* src_v,
1527                            uint8* dst_uyvy, int width);
1528
1529// Effects related row functions.
1530void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
1531void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
1532void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
1533void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
1534void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
1535void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
1536                               int width);
1537void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
1538                                int width);
1539void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
1540                               int width);
1541void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
1542                               int width);
1543
1544// Inverse table for unattenuate, shared by C and SSE2.
1545extern const uint32 fixed_invtbl8[256];
1546void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
1547void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
1548void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
1549void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
1550                                 int width);
1551void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
1552                                 int width);
1553
1554void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width);
1555void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
1556void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
1557
1558void ARGBSepiaRow_C(uint8* dst_argb, int width);
1559void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width);
1560void ARGBSepiaRow_NEON(uint8* dst_argb, int width);
1561
1562void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
1563                          const int8* matrix_argb, int width);
1564void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
1565                              const int8* matrix_argb, int width);
1566void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
1567                             const int8* matrix_argb, int width);
1568
1569void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
1570void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
1571
1572void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
1573void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
1574
1575void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
1576                       int interval_offset, int width);
1577void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
1578                          int interval_offset, int width);
1579void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
1580                          int interval_offset, int width);
1581
1582void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
1583                    uint32 value);
1584void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
1585                       uint32 value);
1586void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
1587                       uint32 value);
1588
1589// Used for blur.
1590void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
1591                                    int width, int area, uint8* dst, int count);
1592void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
1593                                  const int32* previous_cumsum, int width);
1594
1595void CumulativeSumToAverageRow_C(const int32* topleft, const int32* botleft,
1596                                 int width, int area, uint8* dst, int count);
1597void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
1598                               const int32* previous_cumsum, int width);
1599
1600LIBYUV_API
1601void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
1602                     uint8* dst_argb, const float* uv_dudv, int width);
1603LIBYUV_API
1604void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
1605                        uint8* dst_argb, const float* uv_dudv, int width);
1606
1607// Used for I420Scale, ARGBScale, and ARGBInterpolate.
1608void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
1609                      ptrdiff_t src_stride_ptr,
1610                      int width, int source_y_fraction);
1611void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
1612                         ptrdiff_t src_stride_ptr, int width,
1613                         int source_y_fraction);
1614void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
1615                          ptrdiff_t src_stride_ptr, int width,
1616                          int source_y_fraction);
1617void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
1618                         ptrdiff_t src_stride_ptr, int width,
1619                         int source_y_fraction);
1620void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr,
1621                         ptrdiff_t src_stride_ptr, int width,
1622                         int source_y_fraction);
1623void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
1624                                ptrdiff_t src_stride_ptr, int width,
1625                                int source_y_fraction);
1626void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
1627                                   ptrdiff_t src_stride_ptr, int width,
1628                                   int source_y_fraction);
1629void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
1630                                    ptrdiff_t src_stride_ptr, int width,
1631                                    int source_y_fraction);
1632void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
1633                             ptrdiff_t src_stride_ptr, int width,
1634                             int source_y_fraction);
1635void InterpolateRow_Any_SSE2(uint8* dst_ptr, const uint8* src_ptr,
1636                             ptrdiff_t src_stride_ptr, int width,
1637                             int source_y_fraction);
1638void InterpolateRow_Any_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
1639                              ptrdiff_t src_stride_ptr, int width,
1640                              int source_y_fraction);
1641void InterpolateRow_Any_AVX2(uint8* dst_ptr, const uint8* src_ptr,
1642                             ptrdiff_t src_stride_ptr, int width,
1643                             int source_y_fraction);
1644void InterpolateRows_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
1645                                    ptrdiff_t src_stride_ptr, int width,
1646                                    int source_y_fraction);
1647
1648void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
1649                         ptrdiff_t src_stride_ptr,
1650                         int width, int source_y_fraction);
1651
1652// Sobel images.
1653void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
1654                 uint8* dst_sobelx, int width);
1655void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
1656                    const uint8* src_y2, uint8* dst_sobelx, int width);
1657void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
1658                    const uint8* src_y2, uint8* dst_sobelx, int width);
1659void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
1660                 uint8* dst_sobely, int width);
1661void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
1662                    uint8* dst_sobely, int width);
1663void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
1664                    uint8* dst_sobely, int width);
1665void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
1666                uint8* dst_argb, int width);
1667void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
1668                   uint8* dst_argb, int width);
1669void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
1670                   uint8* dst_argb, int width);
1671void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
1672                       uint8* dst_y, int width);
1673void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
1674                          uint8* dst_y, int width);
1675void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
1676                          uint8* dst_y, int width);
1677void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
1678                  uint8* dst_argb, int width);
1679void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
1680                     uint8* dst_argb, int width);
1681void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
1682                     uint8* dst_argb, int width);
1683
1684void ARGBPolynomialRow_C(const uint8* src_argb,
1685                         uint8* dst_argb, const float* poly,
1686                         int width);
1687void ARGBPolynomialRow_SSE2(const uint8* src_argb,
1688                            uint8* dst_argb, const float* poly,
1689                            int width);
1690void ARGBPolynomialRow_AVX2(const uint8* src_argb,
1691                            uint8* dst_argb, const float* poly,
1692                            int width);
1693
1694void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
1695                             const uint8* luma, uint32 lumacoeff);
1696void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
1697                                 int width,
1698                                 const uint8* luma, uint32 lumacoeff);
1699
1700#ifdef __cplusplus
1701}  // extern "C"
1702}  // namespace libyuv
1703#endif
1704
1705#endif  // INCLUDE_LIBYUV_ROW_H_  NOLINT
1706