1/*
2 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS. All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#ifndef INCLUDE_LIBYUV_ROW_H_  // NOLINT
12#define INCLUDE_LIBYUV_ROW_H_
13
14#include <stdlib.h>  // For malloc.
15
16#include "libyuv/basic_types.h"
17
18#if defined(__native_client__)
19#include "ppapi/c/pp_macros.h"  // For PPAPI_RELEASE
20#endif
21
22#ifdef __cplusplus
23namespace libyuv {
24extern "C" {
25#endif
26
27#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
28
29#ifdef __cplusplus
30#define align_buffer_64(var, size)                                             \
31  uint8* var##_mem = reinterpret_cast<uint8*>(malloc((size) + 63));            \
32  uint8* var = reinterpret_cast<uint8*>                                        \
33      ((reinterpret_cast<intptr_t>(var##_mem) + 63) & ~63)
34#else
35#define align_buffer_64(var, size)                                             \
36  uint8* var##_mem = (uint8*)(malloc((size) + 63));               /* NOLINT */ \
37  uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63)       /* NOLINT */
38#endif
39
40#define free_aligned_buffer_64(var) \
41  free(var##_mem);  \
42  var = 0
43
44#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
45    defined(TARGET_IPHONE_SIMULATOR) || \
46    (defined(_MSC_VER) && defined(__clang__))
47#define LIBYUV_DISABLE_X86
48#endif
49// True if compiling for SSSE3 as a requirement.
50#if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3))
51#define LIBYUV_SSSE3_ONLY
52#endif
53
54// Enable for NaCL pepper 33 for bundle and AVX2 support.
55#if defined(__native_client__) && PPAPI_RELEASE >= 33
56#define NEW_BINUTILS
57#endif
58#if defined(__native_client__) && defined(__arm__) && PPAPI_RELEASE < 37
59#define LIBYUV_DISABLE_NEON
60#endif
61
62// The following are available on all x86 platforms:
63#if !defined(LIBYUV_DISABLE_X86) && \
64    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
65// Effects:
66#define HAS_ARGBADDROW_SSE2
67#define HAS_ARGBAFFINEROW_SSE2
68#define HAS_ARGBATTENUATEROW_SSSE3
69#define HAS_ARGBBLENDROW_SSSE3
70#define HAS_ARGBCOLORMATRIXROW_SSSE3
71#define HAS_ARGBCOLORTABLEROW_X86
72#define HAS_ARGBCOPYALPHAROW_SSE2
73#define HAS_ARGBCOPYYTOALPHAROW_SSE2
74#define HAS_ARGBGRAYROW_SSSE3
75#define HAS_ARGBLUMACOLORTABLEROW_SSSE3
76#define HAS_ARGBMIRRORROW_SSSE3
77#define HAS_ARGBMULTIPLYROW_SSE2
78#define HAS_ARGBPOLYNOMIALROW_SSE2
79#define HAS_ARGBQUANTIZEROW_SSE2
80#define HAS_ARGBSEPIAROW_SSSE3
81#define HAS_ARGBSHADEROW_SSE2
82#define HAS_ARGBSUBTRACTROW_SSE2
83#define HAS_ARGBTOUVROW_SSSE3
84#define HAS_ARGBUNATTENUATEROW_SSE2
85#define HAS_COMPUTECUMULATIVESUMROW_SSE2
86#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
87#define HAS_INTERPOLATEROW_SSE2
88#define HAS_INTERPOLATEROW_SSSE3
89#define HAS_RGBCOLORTABLEROW_X86
90#define HAS_SOBELROW_SSE2
91#define HAS_SOBELTOPLANEROW_SSE2
92#define HAS_SOBELXROW_SSE2
93#define HAS_SOBELXYROW_SSE2
94#define HAS_SOBELYROW_SSE2
95
96// Conversions:
97#define HAS_ABGRTOUVROW_SSSE3
98#define HAS_ABGRTOYROW_SSSE3
99#define HAS_ARGB1555TOARGBROW_SSE2
100#define HAS_ARGB4444TOARGBROW_SSE2
101#define HAS_ARGBSHUFFLEROW_SSE2
102#define HAS_ARGBSHUFFLEROW_SSSE3
103#define HAS_ARGBTOARGB1555ROW_SSE2
104#define HAS_ARGBTOARGB4444ROW_SSE2
105#define HAS_ARGBTOBAYERGGROW_SSE2
106#define HAS_ARGBTOBAYERROW_SSSE3
107#define HAS_ARGBTORAWROW_SSSE3
108#define HAS_ARGBTORGB24ROW_SSSE3
109#define HAS_ARGBTORGB565ROW_SSE2
110#define HAS_ARGBTOUV422ROW_SSSE3
111#define HAS_ARGBTOUV444ROW_SSSE3
112#define HAS_ARGBTOUVJROW_SSSE3
113#define HAS_ARGBTOYJROW_SSSE3
114#define HAS_ARGBTOYROW_SSSE3
115#define HAS_BGRATOUVROW_SSSE3
116#define HAS_BGRATOYROW_SSSE3
117#define HAS_COPYROW_ERMS
118#define HAS_COPYROW_SSE2
119#define HAS_COPYROW_X86
120#define HAS_HALFROW_SSE2
121#define HAS_I400TOARGBROW_SSE2
122#define HAS_I411TOARGBROW_SSSE3
123#define HAS_I422TOARGB1555ROW_SSSE3
124#define HAS_I422TOABGRROW_SSSE3
125#define HAS_I422TOARGB1555ROW_SSSE3
126#define HAS_I422TOARGB4444ROW_SSSE3
127#define HAS_I422TOARGBROW_SSSE3
128#define HAS_I422TOBGRAROW_SSSE3
129#define HAS_I422TORAWROW_SSSE3
130#define HAS_I422TORGB24ROW_SSSE3
131#define HAS_I422TORGB565ROW_SSSE3
132#define HAS_I422TORGBAROW_SSSE3
133#define HAS_I422TOUYVYROW_SSE2
134#define HAS_I422TOYUY2ROW_SSE2
135#define HAS_I444TOARGBROW_SSSE3
136#define HAS_MERGEUVROW_SSE2
137#define HAS_MIRRORROW_SSE2
138#define HAS_MIRRORROW_SSSE3
139#define HAS_MIRRORROW_UV_SSSE3
140#define HAS_MIRRORUVROW_SSSE3
141#define HAS_NV12TOARGBROW_SSSE3
142#define HAS_NV12TORGB565ROW_SSSE3
143#define HAS_NV21TOARGBROW_SSSE3
144#define HAS_NV21TORGB565ROW_SSSE3
145#define HAS_RAWTOARGBROW_SSSE3
146#define HAS_RAWTOYROW_SSSE3
147#define HAS_RGB24TOARGBROW_SSSE3
148#define HAS_RGB24TOYROW_SSSE3
149#define HAS_RGB565TOARGBROW_SSE2
150#define HAS_RGBATOUVROW_SSSE3
151#define HAS_RGBATOYROW_SSSE3
152#define HAS_SETROW_X86
153#define HAS_SPLITUVROW_SSE2
154#define HAS_UYVYTOARGBROW_SSSE3
155#define HAS_UYVYTOUV422ROW_SSE2
156#define HAS_UYVYTOUVROW_SSE2
157#define HAS_UYVYTOYROW_SSE2
158#define HAS_YTOARGBROW_SSE2
159#define HAS_YUY2TOARGBROW_SSSE3
160#define HAS_YUY2TOUV422ROW_SSE2
161#define HAS_YUY2TOUVROW_SSE2
162#define HAS_YUY2TOYROW_SSE2
163#endif
164
165// The following are available on x64 Visual C:
166#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64)
167#define HAS_I422TOARGBROW_SSSE3
168#endif
169
170// GCC >= 4.7.0 required for AVX2.
171#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
172#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
173#define GCC_HAS_AVX2 1
174#endif  // GNUC >= 4.7
175#endif  // __GNUC__
176
177// clang >= 3.4.0 required for AVX2.
178#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
179#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
180#define CLANG_HAS_AVX2 1
181#endif  // clang >= 3.4
182#endif  // __clang__
183
184// Visual C 2012 required for AVX2.
185#if defined(_M_IX86) && defined(_MSC_VER) && _MSC_VER >= 1700
186#define VISUALC_HAS_AVX2 1
187#endif  // VisualStudio >= 2012
188
189// The following are available on all x86 platforms, but
190// require VS2012, clang 3.4 or gcc 4.7.
191// The code supports NaCL but requires a new compiler and validator.
192#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
193    defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
194// Effects:
195#define HAS_ARGBPOLYNOMIALROW_AVX2
196#define HAS_ARGBSHUFFLEROW_AVX2
197#define HAS_ARGBCOPYALPHAROW_AVX2
198#define HAS_ARGBCOPYYTOALPHAROW_AVX2
199#endif
200
201// The following are require VS2012.
202// TODO(fbarchard): Port to gcc.
203#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
204#define HAS_ARGBTOUVROW_AVX2
205#define HAS_ARGBTOYJROW_AVX2
206#define HAS_ARGBTOYROW_AVX2
207#define HAS_HALFROW_AVX2
208#define HAS_I422TOARGBROW_AVX2
209#define HAS_INTERPOLATEROW_AVX2
210#define HAS_MERGEUVROW_AVX2
211#define HAS_MIRRORROW_AVX2
212#define HAS_SPLITUVROW_AVX2
213#define HAS_UYVYTOUV422ROW_AVX2
214#define HAS_UYVYTOUVROW_AVX2
215#define HAS_UYVYTOYROW_AVX2
216#define HAS_YUY2TOUV422ROW_AVX2
217#define HAS_YUY2TOUVROW_AVX2
218#define HAS_YUY2TOYROW_AVX2
219
220// Effects:
221#define HAS_ARGBADDROW_AVX2
222#define HAS_ARGBATTENUATEROW_AVX2
223#define HAS_ARGBMIRRORROW_AVX2
224#define HAS_ARGBMULTIPLYROW_AVX2
225#define HAS_ARGBSUBTRACTROW_AVX2
226#define HAS_ARGBUNATTENUATEROW_AVX2
227#endif  // defined(VISUALC_HAS_AVX2)
228
229// The following are Yasm x86 only:
230// TODO(fbarchard): Port AVX2 to inline.
231#if !defined(LIBYUV_DISABLE_X86) && defined(HAVE_YASM)
232    (defined(_M_IX86) || defined(_M_X64) || \
233    defined(__x86_64__) || defined(__i386__))
234#define HAS_MERGEUVROW_AVX2
235#define HAS_MERGEUVROW_MMX
236#define HAS_SPLITUVROW_AVX2
237#define HAS_SPLITUVROW_MMX
238#define HAS_UYVYTOYROW_AVX2
239#define HAS_UYVYTOYROW_MMX
240#define HAS_YUY2TOYROW_AVX2
241#define HAS_YUY2TOYROW_MMX
242#endif
243
244// The following are disabled when SSSE3 is available:
245#if !defined(LIBYUV_DISABLE_X86) && \
246    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
247    !defined(LIBYUV_SSSE3_ONLY)
248#define HAS_ARGBBLENDROW_SSE2
249#define HAS_ARGBATTENUATEROW_SSE2
250#define HAS_MIRRORROW_SSE2
251#endif
252
253// The following are available on arm64 platforms:
254#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
255#endif
256
257// The following are available on Neon platforms:
258#if !defined(LIBYUV_DISABLE_NEON) && \
259    (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
260#define HAS_ABGRTOUVROW_NEON
261#define HAS_ABGRTOYROW_NEON
262#define HAS_ARGB1555TOARGBROW_NEON
263#define HAS_ARGB1555TOUVROW_NEON
264#define HAS_ARGB1555TOYROW_NEON
265#define HAS_ARGB4444TOARGBROW_NEON
266#define HAS_ARGB4444TOUVROW_NEON
267#define HAS_ARGB4444TOYROW_NEON
268#define HAS_ARGBTOARGB1555ROW_NEON
269#define HAS_ARGBTOARGB4444ROW_NEON
270#define HAS_ARGBTOBAYERROW_NEON
271#define HAS_ARGBTOBAYERGGROW_NEON
272#define HAS_ARGBTORAWROW_NEON
273#define HAS_ARGBTORGB24ROW_NEON
274#define HAS_ARGBTORGB565ROW_NEON
275#define HAS_ARGBTOUV411ROW_NEON
276#define HAS_ARGBTOUV422ROW_NEON
277#define HAS_ARGBTOUV444ROW_NEON
278#define HAS_ARGBTOUVROW_NEON
279#define HAS_ARGBTOUVJROW_NEON
280#define HAS_ARGBTOYROW_NEON
281#define HAS_ARGBTOYJROW_NEON
282#define HAS_BGRATOUVROW_NEON
283#define HAS_BGRATOYROW_NEON
284#define HAS_COPYROW_NEON
285#define HAS_HALFROW_NEON
286#define HAS_I400TOARGBROW_NEON
287#define HAS_I411TOARGBROW_NEON
288#define HAS_I422TOABGRROW_NEON
289#define HAS_I422TOARGB1555ROW_NEON
290#define HAS_I422TOARGB4444ROW_NEON
291#define HAS_I422TOARGBROW_NEON
292#define HAS_I422TOBGRAROW_NEON
293#define HAS_I422TORAWROW_NEON
294#define HAS_I422TORGB24ROW_NEON
295#define HAS_I422TORGB565ROW_NEON
296#define HAS_I422TORGBAROW_NEON
297#define HAS_I422TOUYVYROW_NEON
298#define HAS_I422TOYUY2ROW_NEON
299#define HAS_I444TOARGBROW_NEON
300#define HAS_MERGEUVROW_NEON
301#define HAS_MIRRORROW_NEON
302#define HAS_MIRRORUVROW_NEON
303#define HAS_NV12TOARGBROW_NEON
304#define HAS_NV12TORGB565ROW_NEON
305#define HAS_NV21TOARGBROW_NEON
306#define HAS_NV21TORGB565ROW_NEON
307#define HAS_RAWTOARGBROW_NEON
308#define HAS_RAWTOUVROW_NEON
309#define HAS_RAWTOYROW_NEON
310#define HAS_RGB24TOARGBROW_NEON
311#define HAS_RGB24TOUVROW_NEON
312#define HAS_RGB24TOYROW_NEON
313#define HAS_RGB565TOARGBROW_NEON
314#define HAS_RGB565TOUVROW_NEON
315#define HAS_RGB565TOYROW_NEON
316#define HAS_RGBATOUVROW_NEON
317#define HAS_RGBATOYROW_NEON
318#define HAS_SETROW_NEON
319#define HAS_SPLITUVROW_NEON
320#define HAS_UYVYTOARGBROW_NEON
321#define HAS_UYVYTOUV422ROW_NEON
322#define HAS_UYVYTOUVROW_NEON
323#define HAS_UYVYTOYROW_NEON
324#define HAS_YTOARGBROW_NEON
325#define HAS_YUY2TOARGBROW_NEON
326#define HAS_YUY2TOUV422ROW_NEON
327#define HAS_YUY2TOUVROW_NEON
328#define HAS_YUY2TOYROW_NEON
329
330// Effects:
331#define HAS_ARGBADDROW_NEON
332#define HAS_ARGBATTENUATEROW_NEON
333#define HAS_ARGBBLENDROW_NEON
334#define HAS_ARGBGRAYROW_NEON
335#define HAS_ARGBMIRRORROW_NEON
336#define HAS_ARGBMULTIPLYROW_NEON
337#define HAS_ARGBQUANTIZEROW_NEON
338#define HAS_ARGBSEPIAROW_NEON
339#define HAS_ARGBSHADEROW_NEON
340#define HAS_ARGBSUBTRACTROW_NEON
341#define HAS_SOBELROW_NEON
342#define HAS_SOBELTOPLANEROW_NEON
343#define HAS_SOBELXYROW_NEON
344#define HAS_SOBELXROW_NEON
345#define HAS_SOBELYROW_NEON
346#define HAS_INTERPOLATEROW_NEON
347// TODO(fbarchard): Investigate neon unittest failure.
348// #define HAS_ARGBCOLORMATRIXROW_NEON
349#endif
350
351// The following are available on Mips platforms:
352#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \
353    (_MIPS_SIM == _MIPS_SIM_ABI32)
354#define HAS_COPYROW_MIPS
355#if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
356#define HAS_I422TOABGRROW_MIPS_DSPR2
357#define HAS_I422TOARGBROW_MIPS_DSPR2
358#define HAS_I422TOBGRAROW_MIPS_DSPR2
359#define HAS_INTERPOLATEROWS_MIPS_DSPR2
360#define HAS_MIRRORROW_MIPS_DSPR2
361#define HAS_MIRRORUVROW_MIPS_DSPR2
362#define HAS_SPLITUVROW_MIPS_DSPR2
363#endif
364#endif
365
366#if defined(_MSC_VER) && !defined(__CLR_VER)
367#define SIMD_ALIGNED(var) __declspec(align(16)) var
368typedef __declspec(align(16)) int16 vec16[8];
369typedef __declspec(align(16)) int32 vec32[4];
370typedef __declspec(align(16)) int8 vec8[16];
371typedef __declspec(align(16)) uint16 uvec16[8];
372typedef __declspec(align(16)) uint32 uvec32[4];
373typedef __declspec(align(16)) uint8 uvec8[16];
374typedef __declspec(align(32)) int16 lvec16[16];
375typedef __declspec(align(32)) int32 lvec32[8];
376typedef __declspec(align(32)) int8 lvec8[32];
377typedef __declspec(align(32)) uint16 ulvec16[16];
378typedef __declspec(align(32)) uint32 ulvec32[8];
379typedef __declspec(align(32)) uint8 ulvec8[32];
380
381#elif defined(__GNUC__)
382// Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
383#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
384typedef int16 __attribute__((vector_size(16))) vec16;
385typedef int32 __attribute__((vector_size(16))) vec32;
386typedef int8 __attribute__((vector_size(16))) vec8;
387typedef uint16 __attribute__((vector_size(16))) uvec16;
388typedef uint32 __attribute__((vector_size(16))) uvec32;
389typedef uint8 __attribute__((vector_size(16))) uvec8;
390#else
391#define SIMD_ALIGNED(var) var
392typedef int16 vec16[8];
393typedef int32 vec32[4];
394typedef int8 vec8[16];
395typedef uint16 uvec16[8];
396typedef uint32 uvec32[4];
397typedef uint8 uvec8[16];
398#endif
399
400#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
401#define OMITFP
402#else
403#define OMITFP __attribute__((optimize("omit-frame-pointer")))
404#endif
405
406// NaCL macros for GCC x86 and x64.
407
408// TODO(nfullagar): When pepper_33 toolchain is distributed, default to
409// NEW_BINUTILS and remove all BUNDLEALIGN occurances.
410#if defined(__native_client__)
411#define LABELALIGN ".p2align 5\n"
412#else
413#define LABELALIGN ".p2align 2\n"
414#endif
415#if defined(__native_client__) && defined(__x86_64__)
416#if defined(NEW_BINUTILS)
417#define BUNDLELOCK ".bundle_lock\n"
418#define BUNDLEUNLOCK ".bundle_unlock\n"
419#define BUNDLEALIGN "\n"
420#else
421#define BUNDLELOCK "\n"
422#define BUNDLEUNLOCK "\n"
423#define BUNDLEALIGN ".p2align 5\n"
424#endif
425#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
426#define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")"
427#define MEMLEA(offset, base) #offset "(%q" #base ")"
428#define MEMLEA3(offset, index, scale) \
429    #offset "(,%q" #index "," #scale ")"
430#define MEMLEA4(offset, base, index, scale) \
431    #offset "(%q" #base ",%q" #index "," #scale ")"
432#define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15"
433#define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15"
434#define MEMOPREG(opcode, offset, base, index, scale, reg) \
435    BUNDLELOCK \
436    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
437    #opcode " (%%r15,%%r14),%%" #reg "\n" \
438    BUNDLEUNLOCK
439#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
440    BUNDLELOCK \
441    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
442    #opcode " %%" #reg ",(%%r15,%%r14)\n" \
443    BUNDLEUNLOCK
444#define MEMOPARG(opcode, offset, base, index, scale, arg) \
445    BUNDLELOCK \
446    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
447    #opcode " (%%r15,%%r14),%" #arg "\n" \
448    BUNDLEUNLOCK
449#else  // defined(__native_client__) && defined(__x86_64__)
450#define BUNDLEALIGN "\n"
451#define MEMACCESS(base) "(%" #base ")"
452#define MEMACCESS2(offset, base) #offset "(%" #base ")"
453#define MEMLEA(offset, base) #offset "(%" #base ")"
454#define MEMLEA3(offset, index, scale) \
455    #offset "(,%" #index "," #scale ")"
456#define MEMLEA4(offset, base, index, scale) \
457    #offset "(%" #base ",%" #index "," #scale ")"
458#define MEMMOVESTRING(s, d)
459#define MEMSTORESTRING(reg, d)
460#define MEMOPREG(opcode, offset, base, index, scale, reg) \
461    #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
462#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
463    #opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
464#define MEMOPARG(opcode, offset, base, index, scale, arg) \
465    #opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n"
466#endif  // defined(__native_client__) && defined(__x86_64__)
467
468#if defined(__arm__)
469#undef MEMACCESS
470#if defined(__native_client__)
471#define MEMACCESS(base) ".p2align   3\nbic %" #base ", #0xc0000000\n"
472#else
473#define MEMACCESS(base) "\n"
474#endif
475#endif
476
477void I444ToARGBRow_NEON(const uint8* src_y,
478                        const uint8* src_u,
479                        const uint8* src_v,
480                        uint8* dst_argb,
481                        int width);
482void I422ToARGBRow_NEON(const uint8* src_y,
483                        const uint8* src_u,
484                        const uint8* src_v,
485                        uint8* dst_argb,
486                        int width);
487void I411ToARGBRow_NEON(const uint8* src_y,
488                        const uint8* src_u,
489                        const uint8* src_v,
490                        uint8* dst_argb,
491                        int width);
492void I422ToBGRARow_NEON(const uint8* src_y,
493                        const uint8* src_u,
494                        const uint8* src_v,
495                        uint8* dst_bgra,
496                        int width);
497void I422ToABGRRow_NEON(const uint8* src_y,
498                        const uint8* src_u,
499                        const uint8* src_v,
500                        uint8* dst_abgr,
501                        int width);
502void I422ToRGBARow_NEON(const uint8* src_y,
503                        const uint8* src_u,
504                        const uint8* src_v,
505                        uint8* dst_rgba,
506                        int width);
507void I422ToRGB24Row_NEON(const uint8* src_y,
508                         const uint8* src_u,
509                         const uint8* src_v,
510                         uint8* dst_rgb24,
511                         int width);
512void I422ToRAWRow_NEON(const uint8* src_y,
513                       const uint8* src_u,
514                       const uint8* src_v,
515                       uint8* dst_raw,
516                       int width);
517void I422ToRGB565Row_NEON(const uint8* src_y,
518                          const uint8* src_u,
519                          const uint8* src_v,
520                          uint8* dst_rgb565,
521                          int width);
522void I422ToARGB1555Row_NEON(const uint8* src_y,
523                            const uint8* src_u,
524                            const uint8* src_v,
525                            uint8* dst_argb1555,
526                            int width);
527void I422ToARGB4444Row_NEON(const uint8* src_y,
528                            const uint8* src_u,
529                            const uint8* src_v,
530                            uint8* dst_argb4444,
531                            int width);
532void NV12ToARGBRow_NEON(const uint8* src_y,
533                        const uint8* src_uv,
534                        uint8* dst_argb,
535                        int width);
536void NV21ToARGBRow_NEON(const uint8* src_y,
537                        const uint8* src_vu,
538                        uint8* dst_argb,
539                        int width);
540void NV12ToRGB565Row_NEON(const uint8* src_y,
541                          const uint8* src_uv,
542                          uint8* dst_rgb565,
543                          int width);
544void NV21ToRGB565Row_NEON(const uint8* src_y,
545                          const uint8* src_vu,
546                          uint8* dst_rgb565,
547                          int width);
548void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
549                        uint8* dst_argb,
550                        int width);
551void UYVYToARGBRow_NEON(const uint8* src_uyvy,
552                        uint8* dst_argb,
553                        int width);
554
555void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
556void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
557void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
558void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
559void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
560void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
561void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
562void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
563void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
564void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
565void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
566void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
567void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
568void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
569void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
570void RGBAToYRow_Unaligned_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
571void RGB24ToYRow_Unaligned_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
572void RAWToYRow_Unaligned_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
573void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
574void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
575void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
576                         int pix);
577void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
578                         int pix);
579void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
580                         int pix);
581void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
582                      uint8* dst_u, uint8* dst_v, int pix);
583void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
584                       uint8* dst_u, uint8* dst_v, int pix);
585void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
586                      uint8* dst_u, uint8* dst_v, int pix);
587void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
588                      uint8* dst_u, uint8* dst_v, int pix);
589void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
590                      uint8* dst_u, uint8* dst_v, int pix);
591void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
592                       uint8* dst_u, uint8* dst_v, int pix);
593void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
594                     uint8* dst_u, uint8* dst_v, int pix);
595void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
596                        uint8* dst_u, uint8* dst_v, int pix);
597void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
598                          uint8* dst_u, uint8* dst_v, int pix);
599void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
600                          uint8* dst_u, uint8* dst_v, int pix);
601void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
602void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
603void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
604void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
605void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix);
606void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
607void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
608void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
609void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
610void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int pix);
611void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int pix);
612void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int pix);
613void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int pix);
614void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int pix);
615void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int pix);
616void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int pix);
617void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int pix);
618void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int pix);
619void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
620void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
621void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
622void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
623void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
624void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
625void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
626void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
627void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
628void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
629void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
630void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
631void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
632void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int pix);
633void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
634void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
635void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
636
637void ARGBToUVRow_AVX2(const uint8* src_argb, int src_stride_argb,
638                      uint8* dst_u, uint8* dst_v, int width);
639void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb,
640                          uint8* dst_u, uint8* dst_v, int width);
641void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
642                       uint8* dst_u, uint8* dst_v, int width);
643void ARGBToUVJRow_SSSE3(const uint8* src_argb, int src_stride_argb,
644                        uint8* dst_u, uint8* dst_v, int width);
645void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra,
646                       uint8* dst_u, uint8* dst_v, int width);
647void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr,
648                       uint8* dst_u, uint8* dst_v, int width);
649void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba,
650                       uint8* dst_u, uint8* dst_v, int width);
651void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
652                                 uint8* dst_u, uint8* dst_v, int width);
653void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
654                                  uint8* dst_u, uint8* dst_v, int width);
655void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra, int src_stride_bgra,
656                                 uint8* dst_u, uint8* dst_v, int width);
657void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr, int src_stride_abgr,
658                                 uint8* dst_u, uint8* dst_v, int width);
659void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba, int src_stride_rgba,
660                                 uint8* dst_u, uint8* dst_v, int width);
661void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
662                           uint8* dst_u, uint8* dst_v, int width);
663void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
664                            uint8* dst_u, uint8* dst_v, int width);
665void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra,
666                           uint8* dst_u, uint8* dst_v, int width);
667void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
668                           uint8* dst_u, uint8* dst_v, int width);
669void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba,
670                           uint8* dst_u, uint8* dst_v, int width);
671void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
672                             int pix);
673void ARGBToUV422Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
674                             int pix);
675void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
676                             int pix);
677void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
678                          uint8* dst_u, uint8* dst_v, int pix);
679void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
680                           uint8* dst_u, uint8* dst_v, int pix);
681void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra,
682                          uint8* dst_u, uint8* dst_v, int pix);
683void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr,
684                          uint8* dst_u, uint8* dst_v, int pix);
685void RGBAToUVRow_Any_NEON(const uint8* src_rgba, int src_stride_rgba,
686                          uint8* dst_u, uint8* dst_v, int pix);
687void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, int src_stride_rgb24,
688                           uint8* dst_u, uint8* dst_v, int pix);
689void RAWToUVRow_Any_NEON(const uint8* src_raw, int src_stride_raw,
690                         uint8* dst_u, uint8* dst_v, int pix);
691void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565,
692                            uint8* dst_u, uint8* dst_v, int pix);
693void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555,
694                              int src_stride_argb1555,
695                              uint8* dst_u, uint8* dst_v, int pix);
696void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
697                              int src_stride_argb4444,
698                              uint8* dst_u, uint8* dst_v, int pix);
699void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb,
700                   uint8* dst_u, uint8* dst_v, int width);
701void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb,
702                    uint8* dst_u, uint8* dst_v, int width);
703void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra,
704                   uint8* dst_u, uint8* dst_v, int width);
705void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr,
706                   uint8* dst_u, uint8* dst_v, int width);
707void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba,
708                   uint8* dst_u, uint8* dst_v, int width);
709void RGB24ToUVRow_C(const uint8* src_rgb24, int src_stride_rgb24,
710                    uint8* dst_u, uint8* dst_v, int width);
711void RAWToUVRow_C(const uint8* src_raw, int src_stride_raw,
712                  uint8* dst_u, uint8* dst_v, int width);
713void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
714                     uint8* dst_u, uint8* dst_v, int width);
715void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
716                       uint8* dst_u, uint8* dst_v, int width);
717void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
718                       uint8* dst_u, uint8* dst_v, int width);
719
720void ARGBToUV444Row_SSSE3(const uint8* src_argb,
721                          uint8* dst_u, uint8* dst_v, int width);
722void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb,
723                                    uint8* dst_u, uint8* dst_v, int width);
724void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
725                              uint8* dst_u, uint8* dst_v, int width);
726
727void ARGBToUV422Row_SSSE3(const uint8* src_argb,
728                          uint8* dst_u, uint8* dst_v, int width);
729void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb,
730                                    uint8* dst_u, uint8* dst_v, int width);
731void ARGBToUV422Row_Any_SSSE3(const uint8* src_argb,
732                              uint8* dst_u, uint8* dst_v, int width);
733
734void ARGBToUV444Row_C(const uint8* src_argb,
735                      uint8* dst_u, uint8* dst_v, int width);
736void ARGBToUV422Row_C(const uint8* src_argb,
737                      uint8* dst_u, uint8* dst_v, int width);
738void ARGBToUV411Row_C(const uint8* src_argb,
739                      uint8* dst_u, uint8* dst_v, int width);
740
741void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
742void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
743void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
744void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
745void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width);
746void MirrorRow_C(const uint8* src, uint8* dst, int width);
747
748void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
749                       int width);
750void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
751                      int width);
752void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
753                            int width);
754void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
755                   int width);
756
757void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width);
758void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
759void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width);
760void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width);
761
762void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
763void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
764void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
765void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
766void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
767                           int pix);
768void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
769                               int pix);
770void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
771                                     uint8* dst_v, int pix);
772void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
773                         int pix);
774void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
775                         int pix);
776void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
777                         int pix);
778void SplitUVRow_Any_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
779                               int pix);
780
781void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
782                  int width);
783void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
784                     int width);
785void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
786                     int width);
787void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
788                     int width);
789void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
790                               uint8* dst_uv, int width);
791void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
792                         int width);
793void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
794                         int width);
795void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
796                         int width);
797
798void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
799void CopyRow_ERMS(const uint8* src, uint8* dst, int count);
800void CopyRow_X86(const uint8* src, uint8* dst, int count);
801void CopyRow_NEON(const uint8* src, uint8* dst, int count);
802void CopyRow_MIPS(const uint8* src, uint8* dst, int count);
803void CopyRow_C(const uint8* src, uint8* dst, int count);
804
805void CopyRow_16_C(const uint16* src, uint16* dst, int count);
806
807void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width);
808void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
809void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
810
811void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width);
812void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
813void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
814
815void SetRow_X86(uint8* dst, uint32 v32, int count);
816void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
817                     int dst_stride, int height);
818void SetRow_NEON(uint8* dst, uint32 v32, int count);
819void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
820                      int dst_stride, int height);
821void SetRow_C(uint8* dst, uint32 v32, int count);
822void ARGBSetRows_C(uint8* dst, uint32 v32, int width, int dst_stride,
823                   int height);
824
825// ARGBShufflers for BGRAToARGB etc.
826void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
827                      const uint8* shuffler, int pix);
828void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
829                         const uint8* shuffler, int pix);
830void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
831                          const uint8* shuffler, int pix);
832void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
833                         const uint8* shuffler, int pix);
834void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
835                         const uint8* shuffler, int pix);
836void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
837                                    const uint8* shuffler, int pix);
838void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
839                             const uint8* shuffler, int pix);
840void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
841                              const uint8* shuffler, int pix);
842void ARGBShuffleRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
843                             const uint8* shuffler, int pix);
844void ARGBShuffleRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
845                             const uint8* shuffler, int pix);
846
847void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
848void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
849void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int pix);
850void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
851                            int pix);
852void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
853                            int pix);
854
855void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
856void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
857void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix);
858void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
859                            int pix);
860void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
861                            int pix);
862void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix);
863void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int pix);
864void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix);
865void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
866void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
867void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
868void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
869void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb,
870                              int pix);
871void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb,
872                                int pix);
873void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb,
874                                int pix);
875void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
876void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
877void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb,
878                              int pix);
879void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb,
880                                int pix);
881void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb,
882                                int pix);
883
884void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
885void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
886void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
887void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
888void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
889
890void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
891void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
892void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
893void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
894void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
895
896void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
897void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
898void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
899void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
900void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
901void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
902
903void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
904void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
905void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int pix);
906void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
907void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
908void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int pix);
909
910void I444ToARGBRow_C(const uint8* src_y,
911                     const uint8* src_u,
912                     const uint8* src_v,
913                     uint8* dst_argb,
914                     int width);
915void I422ToARGBRow_C(const uint8* src_y,
916                     const uint8* src_u,
917                     const uint8* src_v,
918                     uint8* dst_argb,
919                     int width);
920void I411ToARGBRow_C(const uint8* src_y,
921                     const uint8* src_u,
922                     const uint8* src_v,
923                     uint8* dst_argb,
924                     int width);
925void NV12ToARGBRow_C(const uint8* src_y,
926                     const uint8* src_uv,
927                     uint8* dst_argb,
928                     int width);
929void NV21ToRGB565Row_C(const uint8* src_y,
930                       const uint8* src_vu,
931                       uint8* dst_argb,
932                       int width);
933void NV12ToRGB565Row_C(const uint8* src_y,
934                       const uint8* src_uv,
935                       uint8* dst_argb,
936                       int width);
937void NV21ToARGBRow_C(const uint8* src_y,
938                     const uint8* src_vu,
939                     uint8* dst_argb,
940                     int width);
941void YUY2ToARGBRow_C(const uint8* src_yuy2,
942                     uint8* dst_argb,
943                     int width);
944void UYVYToARGBRow_C(const uint8* src_uyvy,
945                     uint8* dst_argb,
946                     int width);
947void I422ToBGRARow_C(const uint8* src_y,
948                     const uint8* src_u,
949                     const uint8* src_v,
950                     uint8* dst_bgra,
951                     int width);
952void I422ToABGRRow_C(const uint8* src_y,
953                     const uint8* src_u,
954                     const uint8* src_v,
955                     uint8* dst_abgr,
956                     int width);
957void I422ToRGBARow_C(const uint8* src_y,
958                     const uint8* src_u,
959                     const uint8* src_v,
960                     uint8* dst_rgba,
961                     int width);
962void I422ToRGB24Row_C(const uint8* src_y,
963                      const uint8* src_u,
964                      const uint8* src_v,
965                      uint8* dst_rgb24,
966                      int width);
967void I422ToRAWRow_C(const uint8* src_y,
968                    const uint8* src_u,
969                    const uint8* src_v,
970                    uint8* dst_raw,
971                    int width);
972void I422ToARGB4444Row_C(const uint8* src_y,
973                         const uint8* src_u,
974                         const uint8* src_v,
975                         uint8* dst_argb4444,
976                         int width);
977void I422ToARGB1555Row_C(const uint8* src_y,
978                         const uint8* src_u,
979                         const uint8* src_v,
980                         uint8* dst_argb4444,
981                         int width);
982void I422ToRGB565Row_C(const uint8* src_y,
983                       const uint8* src_u,
984                       const uint8* src_v,
985                       uint8* dst_rgb565,
986                       int width);
987void YToARGBRow_C(const uint8* src_y,
988                  uint8* dst_argb,
989                  int width);
990void I422ToARGBRow_AVX2(const uint8* src_y,
991                        const uint8* src_u,
992                        const uint8* src_v,
993                        uint8* dst_argb,
994                        int width);
995void I444ToARGBRow_SSSE3(const uint8* src_y,
996                         const uint8* src_u,
997                         const uint8* src_v,
998                         uint8* dst_argb,
999                         int width);
1000void I422ToARGBRow_SSSE3(const uint8* src_y,
1001                         const uint8* src_u,
1002                         const uint8* src_v,
1003                         uint8* dst_argb,
1004                         int width);
1005void I411ToARGBRow_SSSE3(const uint8* src_y,
1006                         const uint8* src_u,
1007                         const uint8* src_v,
1008                         uint8* dst_argb,
1009                         int width);
1010void NV12ToARGBRow_SSSE3(const uint8* src_y,
1011                         const uint8* src_uv,
1012                         uint8* dst_argb,
1013                         int width);
1014void NV21ToARGBRow_SSSE3(const uint8* src_y,
1015                         const uint8* src_vu,
1016                         uint8* dst_argb,
1017                         int width);
1018void NV12ToRGB565Row_SSSE3(const uint8* src_y,
1019                           const uint8* src_uv,
1020                           uint8* dst_argb,
1021                           int width);
1022void NV21ToRGB565Row_SSSE3(const uint8* src_y,
1023                           const uint8* src_vu,
1024                           uint8* dst_argb,
1025                           int width);
1026void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
1027                         uint8* dst_argb,
1028                         int width);
1029void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
1030                         uint8* dst_argb,
1031                         int width);
1032void I422ToBGRARow_SSSE3(const uint8* src_y,
1033                         const uint8* src_u,
1034                         const uint8* src_v,
1035                         uint8* dst_bgra,
1036                         int width);
1037void I422ToABGRRow_SSSE3(const uint8* src_y,
1038                         const uint8* src_u,
1039                         const uint8* src_v,
1040                         uint8* dst_abgr,
1041                         int width);
1042void I422ToRGBARow_SSSE3(const uint8* src_y,
1043                         const uint8* src_u,
1044                         const uint8* src_v,
1045                         uint8* dst_rgba,
1046                         int width);
1047void I422ToARGB4444Row_SSSE3(const uint8* src_y,
1048                             const uint8* src_u,
1049                             const uint8* src_v,
1050                             uint8* dst_argb,
1051                             int width);
1052void I422ToARGB1555Row_SSSE3(const uint8* src_y,
1053                             const uint8* src_u,
1054                             const uint8* src_v,
1055                             uint8* dst_argb,
1056                             int width);
1057void I422ToRGB565Row_SSSE3(const uint8* src_y,
1058                           const uint8* src_u,
1059                           const uint8* src_v,
1060                           uint8* dst_argb,
1061                           int width);
1062// RGB24/RAW are unaligned.
1063void I422ToRGB24Row_SSSE3(const uint8* src_y,
1064                          const uint8* src_u,
1065                          const uint8* src_v,
1066                          uint8* dst_rgb24,
1067                          int width);
1068void I422ToRAWRow_SSSE3(const uint8* src_y,
1069                        const uint8* src_u,
1070                        const uint8* src_v,
1071                        uint8* dst_raw,
1072                        int width);
1073
1074void I444ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
1075                                   const uint8* src_u,
1076                                   const uint8* src_v,
1077                                   uint8* dst_argb,
1078                                   int width);
1079void I422ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
1080                                   const uint8* src_u,
1081                                   const uint8* src_v,
1082                                   uint8* dst_argb,
1083                                   int width);
1084void I411ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
1085                                   const uint8* src_u,
1086                                   const uint8* src_v,
1087                                   uint8* dst_argb,
1088                                   int width);
1089void NV12ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
1090                                   const uint8* src_uv,
1091                                   uint8* dst_argb,
1092                                   int width);
1093void NV21ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
1094                                   const uint8* src_vu,
1095                                   uint8* dst_argb,
1096                                   int width);
1097void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
1098                                   uint8* dst_argb,
1099                                   int width);
1100void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
1101                                   uint8* dst_argb,
1102                                   int width);
1103void I422ToBGRARow_Unaligned_SSSE3(const uint8* src_y,
1104                                   const uint8* src_u,
1105                                   const uint8* src_v,
1106                                   uint8* dst_bgra,
1107                                   int width);
1108void I422ToABGRRow_Unaligned_SSSE3(const uint8* src_y,
1109                                   const uint8* src_u,
1110                                   const uint8* src_v,
1111                                   uint8* dst_abgr,
1112                                   int width);
1113void I422ToRGBARow_Unaligned_SSSE3(const uint8* src_y,
1114                                   const uint8* src_u,
1115                                   const uint8* src_v,
1116                                   uint8* dst_rgba,
1117                                   int width);
1118void I422ToARGBRow_Any_AVX2(const uint8* src_y,
1119                            const uint8* src_u,
1120                            const uint8* src_v,
1121                            uint8* dst_argb,
1122                            int width);
1123void I444ToARGBRow_Any_SSSE3(const uint8* src_y,
1124                             const uint8* src_u,
1125                             const uint8* src_v,
1126                             uint8* dst_argb,
1127                             int width);
1128void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
1129                             const uint8* src_u,
1130                             const uint8* src_v,
1131                             uint8* dst_argb,
1132                             int width);
1133void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
1134                             const uint8* src_u,
1135                             const uint8* src_v,
1136                             uint8* dst_argb,
1137                             int width);
1138void NV12ToARGBRow_Any_SSSE3(const uint8* src_y,
1139                             const uint8* src_uv,
1140                             uint8* dst_argb,
1141                             int width);
1142void NV21ToARGBRow_Any_SSSE3(const uint8* src_y,
1143                             const uint8* src_vu,
1144                             uint8* dst_argb,
1145                             int width);
1146void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y,
1147                               const uint8* src_uv,
1148                               uint8* dst_argb,
1149                               int width);
1150void NV21ToRGB565Row_Any_SSSE3(const uint8* src_y,
1151                               const uint8* src_vu,
1152                               uint8* dst_argb,
1153                               int width);
1154void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2,
1155                             uint8* dst_argb,
1156                             int width);
1157void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy,
1158                             uint8* dst_argb,
1159                             int width);
1160void I422ToBGRARow_Any_SSSE3(const uint8* src_y,
1161                             const uint8* src_u,
1162                             const uint8* src_v,
1163                             uint8* dst_bgra,
1164                             int width);
1165void I422ToABGRRow_Any_SSSE3(const uint8* src_y,
1166                             const uint8* src_u,
1167                             const uint8* src_v,
1168                             uint8* dst_abgr,
1169                             int width);
1170void I422ToRGBARow_Any_SSSE3(const uint8* src_y,
1171                             const uint8* src_u,
1172                             const uint8* src_v,
1173                             uint8* dst_rgba,
1174                             int width);
1175void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y,
1176                                 const uint8* src_u,
1177                                 const uint8* src_v,
1178                                 uint8* dst_rgba,
1179                                 int width);
1180void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y,
1181                                 const uint8* src_u,
1182                                 const uint8* src_v,
1183                                 uint8* dst_rgba,
1184                                 int width);
1185void I422ToRGB565Row_Any_SSSE3(const uint8* src_y,
1186                               const uint8* src_u,
1187                               const uint8* src_v,
1188                               uint8* dst_rgba,
1189                               int width);
1190// RGB24/RAW are unaligned.
1191void I422ToRGB24Row_Any_SSSE3(const uint8* src_y,
1192                              const uint8* src_u,
1193                              const uint8* src_v,
1194                              uint8* dst_argb,
1195                              int width);
1196void I422ToRAWRow_Any_SSSE3(const uint8* src_y,
1197                            const uint8* src_u,
1198                            const uint8* src_v,
1199                            uint8* dst_argb,
1200                            int width);
1201void YToARGBRow_SSE2(const uint8* src_y,
1202                     uint8* dst_argb,
1203                     int width);
1204void YToARGBRow_NEON(const uint8* src_y,
1205                     uint8* dst_argb,
1206                     int width);
1207void YToARGBRow_Any_SSE2(const uint8* src_y,
1208                         uint8* dst_argb,
1209                         int width);
1210void YToARGBRow_Any_NEON(const uint8* src_y,
1211                         uint8* dst_argb,
1212                         int width);
1213
1214// ARGB preattenuated alpha blend.
1215void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1,
1216                        uint8* dst_argb, int width);
1217void ARGBBlendRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
1218                       uint8* dst_argb, int width);
1219void ARGBBlendRow_NEON(const uint8* src_argb, const uint8* src_argb1,
1220                       uint8* dst_argb, int width);
1221void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1,
1222                    uint8* dst_argb, int width);
1223
1224// ARGB multiply images. Same API as Blend, but these require
1225// pointer and width alignment for SSE2.
1226void ARGBMultiplyRow_C(const uint8* src_argb, const uint8* src_argb1,
1227                       uint8* dst_argb, int width);
1228void ARGBMultiplyRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
1229                          uint8* dst_argb, int width);
1230void ARGBMultiplyRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
1231                              uint8* dst_argb, int width);
1232void ARGBMultiplyRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
1233                          uint8* dst_argb, int width);
1234void ARGBMultiplyRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
1235                              uint8* dst_argb, int width);
1236void ARGBMultiplyRow_NEON(const uint8* src_argb, const uint8* src_argb1,
1237                          uint8* dst_argb, int width);
1238void ARGBMultiplyRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
1239                              uint8* dst_argb, int width);
1240
1241// ARGB add images.
1242void ARGBAddRow_C(const uint8* src_argb, const uint8* src_argb1,
1243                  uint8* dst_argb, int width);
1244void ARGBAddRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
1245                     uint8* dst_argb, int width);
1246void ARGBAddRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
1247                         uint8* dst_argb, int width);
1248void ARGBAddRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
1249                     uint8* dst_argb, int width);
1250void ARGBAddRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
1251                         uint8* dst_argb, int width);
1252void ARGBAddRow_NEON(const uint8* src_argb, const uint8* src_argb1,
1253                     uint8* dst_argb, int width);
1254void ARGBAddRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
1255                         uint8* dst_argb, int width);
1256
1257// ARGB subtract images. Same API as Blend, but these require
1258// pointer and width alignment for SSE2.
1259void ARGBSubtractRow_C(const uint8* src_argb, const uint8* src_argb1,
1260                       uint8* dst_argb, int width);
1261void ARGBSubtractRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
1262                          uint8* dst_argb, int width);
1263void ARGBSubtractRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
1264                              uint8* dst_argb, int width);
1265void ARGBSubtractRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
1266                          uint8* dst_argb, int width);
1267void ARGBSubtractRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
1268                              uint8* dst_argb, int width);
1269void ARGBSubtractRow_NEON(const uint8* src_argb, const uint8* src_argb1,
1270                          uint8* dst_argb, int width);
1271void ARGBSubtractRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
1272                              uint8* dst_argb, int width);
1273
1274void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
1275void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
1276void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
1277void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
1278void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
1279
1280void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
1281void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
1282void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
1283void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
1284void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
1285
1286void I444ToARGBRow_Any_NEON(const uint8* src_y,
1287                            const uint8* src_u,
1288                            const uint8* src_v,
1289                            uint8* dst_argb,
1290                            int width);
1291void I422ToARGBRow_Any_NEON(const uint8* src_y,
1292                            const uint8* src_u,
1293                            const uint8* src_v,
1294                            uint8* dst_argb,
1295                            int width);
1296void I411ToARGBRow_Any_NEON(const uint8* src_y,
1297                            const uint8* src_u,
1298                            const uint8* src_v,
1299                            uint8* dst_argb,
1300                            int width);
1301void I422ToBGRARow_Any_NEON(const uint8* src_y,
1302                            const uint8* src_u,
1303                            const uint8* src_v,
1304                            uint8* dst_argb,
1305                            int width);
1306void I422ToABGRRow_Any_NEON(const uint8* src_y,
1307                            const uint8* src_u,
1308                            const uint8* src_v,
1309                            uint8* dst_argb,
1310                            int width);
1311void I422ToRGBARow_Any_NEON(const uint8* src_y,
1312                            const uint8* src_u,
1313                            const uint8* src_v,
1314                            uint8* dst_argb,
1315                            int width);
1316void I422ToRGB24Row_Any_NEON(const uint8* src_y,
1317                             const uint8* src_u,
1318                             const uint8* src_v,
1319                             uint8* dst_argb,
1320                             int width);
1321void I422ToRAWRow_Any_NEON(const uint8* src_y,
1322                           const uint8* src_u,
1323                           const uint8* src_v,
1324                           uint8* dst_argb,
1325                           int width);
1326void I422ToARGB4444Row_Any_NEON(const uint8* src_y,
1327                                const uint8* src_u,
1328                                const uint8* src_v,
1329                                uint8* dst_argb,
1330                                int width);
1331void I422ToARGB1555Row_Any_NEON(const uint8* src_y,
1332                                const uint8* src_u,
1333                                const uint8* src_v,
1334                                uint8* dst_argb,
1335                                int width);
1336void I422ToRGB565Row_Any_NEON(const uint8* src_y,
1337                              const uint8* src_u,
1338                              const uint8* src_v,
1339                              uint8* dst_argb,
1340                              int width);
1341void NV12ToARGBRow_Any_NEON(const uint8* src_y,
1342                            const uint8* src_uv,
1343                            uint8* dst_argb,
1344                            int width);
1345void NV21ToARGBRow_Any_NEON(const uint8* src_y,
1346                            const uint8* src_uv,
1347                            uint8* dst_argb,
1348                            int width);
1349void NV12ToRGB565Row_Any_NEON(const uint8* src_y,
1350                              const uint8* src_uv,
1351                              uint8* dst_argb,
1352                              int width);
1353void NV21ToRGB565Row_Any_NEON(const uint8* src_y,
1354                              const uint8* src_uv,
1355                              uint8* dst_argb,
1356                              int width);
1357void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2,
1358                            uint8* dst_argb,
1359                            int width);
1360void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
1361                            uint8* dst_argb,
1362                            int width);
1363void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
1364                              const uint8* src_u,
1365                              const uint8* src_v,
1366                              uint8* dst_argb,
1367                              int width);
1368void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
1369                              const uint8* src_u,
1370                              const uint8* src_v,
1371                              uint8* dst_argb,
1372                              int width);
1373void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
1374                              const uint8* src_u,
1375                              const uint8* src_v,
1376                              uint8* dst_argb,
1377                              int width);
1378void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
1379                              const uint8* src_u,
1380                              const uint8* src_v,
1381                              uint8* dst_argb,
1382                              int width);
1383void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
1384                              const uint8* src_u,
1385                              const uint8* src_v,
1386                              uint8* dst_argb,
1387                              int width);
1388void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
1389                              const uint8* src_u,
1390                              const uint8* src_v,
1391                              uint8* dst_argb,
1392                              int width);
1393
1394void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
1395void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
1396                      uint8* dst_u, uint8* dst_v, int pix);
1397void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
1398                         uint8* dst_u, uint8* dst_v, int pix);
1399void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
1400void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
1401                      uint8* dst_u, uint8* dst_v, int pix);
1402void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
1403                         uint8* dst_u, uint8* dst_v, int pix);
1404void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
1405                               uint8* dst_y, int pix);
1406void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
1407                                uint8* dst_u, uint8* dst_v, int pix);
1408void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
1409                                   uint8* dst_u, uint8* dst_v, int pix);
1410void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
1411void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
1412                      uint8* dst_u, uint8* dst_v, int pix);
1413void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
1414                         uint8* dst_u, uint8* dst_v, int pix);
1415void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int pix);
1416void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2,
1417                   uint8* dst_u, uint8* dst_v, int pix);
1418void YUY2ToUV422Row_C(const uint8* src_yuy2,
1419                      uint8* dst_u, uint8* dst_v, int pix);
1420void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
1421void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2, int stride_yuy2,
1422                          uint8* dst_u, uint8* dst_v, int pix);
1423void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2,
1424                             uint8* dst_u, uint8* dst_v, int pix);
1425void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
1426void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2,
1427                          uint8* dst_u, uint8* dst_v, int pix);
1428void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2,
1429                             uint8* dst_u, uint8* dst_v, int pix);
1430void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
1431void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, int stride_yuy2,
1432                          uint8* dst_u, uint8* dst_v, int pix);
1433void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2,
1434                             uint8* dst_u, uint8* dst_v, int pix);
1435void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
1436void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
1437                      uint8* dst_u, uint8* dst_v, int pix);
1438void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
1439                         uint8* dst_u, uint8* dst_v, int pix);
1440void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
1441void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
1442                      uint8* dst_u, uint8* dst_v, int pix);
1443void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
1444                         uint8* dst_u, uint8* dst_v, int pix);
1445void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
1446                               uint8* dst_y, int pix);
1447void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
1448                                uint8* dst_u, uint8* dst_v, int pix);
1449void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
1450                                   uint8* dst_u, uint8* dst_v, int pix);
1451void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
1452void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
1453                      uint8* dst_u, uint8* dst_v, int pix);
1454void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
1455                         uint8* dst_u, uint8* dst_v, int pix);
1456void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
1457void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
1458                      uint8* dst_u, uint8* dst_v, int pix);
1459void UYVYToUV422Row_NEON(const uint8* src_uyvy,
1460                         uint8* dst_u, uint8* dst_v, int pix);
1461
1462void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int pix);
1463void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy,
1464                   uint8* dst_u, uint8* dst_v, int pix);
1465void UYVYToUV422Row_C(const uint8* src_uyvy,
1466                      uint8* dst_u, uint8* dst_v, int pix);
1467void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
1468void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy, int stride_uyvy,
1469                          uint8* dst_u, uint8* dst_v, int pix);
1470void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy,
1471                             uint8* dst_u, uint8* dst_v, int pix);
1472void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
1473void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy,
1474                          uint8* dst_u, uint8* dst_v, int pix);
1475void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy,
1476                             uint8* dst_u, uint8* dst_v, int pix);
1477void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
1478void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy,
1479                          uint8* dst_u, uint8* dst_v, int pix);
1480void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
1481                             uint8* dst_u, uint8* dst_v, int pix);
1482
1483void HalfRow_C(const uint8* src_uv, int src_uv_stride,
1484               uint8* dst_uv, int pix);
1485void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
1486                  uint8* dst_uv, int pix);
1487void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
1488                  uint8* dst_uv, int pix);
1489void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
1490                  uint8* dst_uv, int pix);
1491
1492void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
1493                  uint16* dst_uv, int pix);
1494
1495void ARGBToBayerRow_C(const uint8* src_argb, uint8* dst_bayer,
1496                      uint32 selector, int pix);
1497void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
1498                          uint32 selector, int pix);
1499void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
1500                         uint32 selector, int pix);
1501void ARGBToBayerRow_Any_SSSE3(const uint8* src_argb, uint8* dst_bayer,
1502                              uint32 selector, int pix);
1503void ARGBToBayerRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer,
1504                             uint32 selector, int pix);
1505void ARGBToBayerGGRow_C(const uint8* src_argb, uint8* dst_bayer,
1506                        uint32 /* selector */, int pix);
1507void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
1508                           uint32 /* selector */, int pix);
1509void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
1510                           uint32 /* selector */, int pix);
1511void ARGBToBayerGGRow_Any_SSE2(const uint8* src_argb, uint8* dst_bayer,
1512                               uint32 /* selector */, int pix);
1513void ARGBToBayerGGRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer,
1514                               uint32 /* selector */, int pix);
1515
1516void I422ToYUY2Row_C(const uint8* src_y,
1517                     const uint8* src_u,
1518                     const uint8* src_v,
1519                     uint8* dst_yuy2, int width);
1520void I422ToUYVYRow_C(const uint8* src_y,
1521                     const uint8* src_u,
1522                     const uint8* src_v,
1523                     uint8* dst_uyvy, int width);
1524void I422ToYUY2Row_SSE2(const uint8* src_y,
1525                        const uint8* src_u,
1526                        const uint8* src_v,
1527                        uint8* dst_yuy2, int width);
1528void I422ToUYVYRow_SSE2(const uint8* src_y,
1529                        const uint8* src_u,
1530                        const uint8* src_v,
1531                        uint8* dst_uyvy, int width);
1532void I422ToYUY2Row_Any_SSE2(const uint8* src_y,
1533                            const uint8* src_u,
1534                            const uint8* src_v,
1535                            uint8* dst_yuy2, int width);
1536void I422ToUYVYRow_Any_SSE2(const uint8* src_y,
1537                            const uint8* src_u,
1538                            const uint8* src_v,
1539                            uint8* dst_uyvy, int width);
1540void I422ToYUY2Row_NEON(const uint8* src_y,
1541                        const uint8* src_u,
1542                        const uint8* src_v,
1543                        uint8* dst_yuy2, int width);
1544void I422ToUYVYRow_NEON(const uint8* src_y,
1545                        const uint8* src_u,
1546                        const uint8* src_v,
1547                        uint8* dst_uyvy, int width);
1548void I422ToYUY2Row_Any_NEON(const uint8* src_y,
1549                            const uint8* src_u,
1550                            const uint8* src_v,
1551                            uint8* dst_yuy2, int width);
1552void I422ToUYVYRow_Any_NEON(const uint8* src_y,
1553                            const uint8* src_u,
1554                            const uint8* src_v,
1555                            uint8* dst_uyvy, int width);
1556
1557// Effects related row functions.
1558void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
1559void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
1560void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
1561void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
1562void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
1563void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
1564                               int width);
1565void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
1566                                int width);
1567void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
1568                               int width);
1569void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
1570                               int width);
1571
1572// Inverse table for unattenuate, shared by C and SSE2.
1573extern const uint32 fixed_invtbl8[256];
1574void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
1575void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
1576void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
1577void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
1578                                 int width);
1579void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
1580                                 int width);
1581
1582void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width);
1583void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
1584void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
1585
1586void ARGBSepiaRow_C(uint8* dst_argb, int width);
1587void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width);
1588void ARGBSepiaRow_NEON(uint8* dst_argb, int width);
1589
1590void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
1591                          const int8* matrix_argb, int width);
1592void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
1593                              const int8* matrix_argb, int width);
1594void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
1595                             const int8* matrix_argb, int width);
1596
1597void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
1598void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
1599
1600void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
1601void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
1602
1603void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
1604                       int interval_offset, int width);
1605void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
1606                          int interval_offset, int width);
1607void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
1608                          int interval_offset, int width);
1609
1610void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
1611                    uint32 value);
1612void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
1613                       uint32 value);
1614void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
1615                       uint32 value);
1616
1617// Used for blur.
1618void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
1619                                    int width, int area, uint8* dst, int count);
1620void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
1621                                  const int32* previous_cumsum, int width);
1622
1623void CumulativeSumToAverageRow_C(const int32* topleft, const int32* botleft,
1624                                 int width, int area, uint8* dst, int count);
1625void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
1626                               const int32* previous_cumsum, int width);
1627
1628LIBYUV_API
1629void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
1630                     uint8* dst_argb, const float* uv_dudv, int width);
1631LIBYUV_API
1632void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
1633                        uint8* dst_argb, const float* uv_dudv, int width);
1634
1635// Used for I420Scale, ARGBScale, and ARGBInterpolate.
1636void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
1637                      ptrdiff_t src_stride_ptr,
1638                      int width, int source_y_fraction);
1639void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
1640                         ptrdiff_t src_stride_ptr, int width,
1641                         int source_y_fraction);
1642void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
1643                          ptrdiff_t src_stride_ptr, int width,
1644                          int source_y_fraction);
1645void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
1646                         ptrdiff_t src_stride_ptr, int width,
1647                         int source_y_fraction);
1648void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr,
1649                         ptrdiff_t src_stride_ptr, int width,
1650                         int source_y_fraction);
1651void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
1652                                ptrdiff_t src_stride_ptr, int width,
1653                                int source_y_fraction);
1654void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
1655                                   ptrdiff_t src_stride_ptr, int width,
1656                                   int source_y_fraction);
1657void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
1658                                    ptrdiff_t src_stride_ptr, int width,
1659                                    int source_y_fraction);
1660void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
1661                             ptrdiff_t src_stride_ptr, int width,
1662                             int source_y_fraction);
1663void InterpolateRow_Any_SSE2(uint8* dst_ptr, const uint8* src_ptr,
1664                             ptrdiff_t src_stride_ptr, int width,
1665                             int source_y_fraction);
1666void InterpolateRow_Any_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
1667                              ptrdiff_t src_stride_ptr, int width,
1668                              int source_y_fraction);
1669void InterpolateRow_Any_AVX2(uint8* dst_ptr, const uint8* src_ptr,
1670                             ptrdiff_t src_stride_ptr, int width,
1671                             int source_y_fraction);
1672void InterpolateRows_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
1673                                    ptrdiff_t src_stride_ptr, int width,
1674                                    int source_y_fraction);
1675
1676void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
1677                         ptrdiff_t src_stride_ptr,
1678                         int width, int source_y_fraction);
1679
1680// Sobel images.
1681void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
1682                 uint8* dst_sobelx, int width);
1683void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
1684                    const uint8* src_y2, uint8* dst_sobelx, int width);
1685void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
1686                    const uint8* src_y2, uint8* dst_sobelx, int width);
1687void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
1688                 uint8* dst_sobely, int width);
1689void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
1690                    uint8* dst_sobely, int width);
1691void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
1692                    uint8* dst_sobely, int width);
1693void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
1694                uint8* dst_argb, int width);
1695void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
1696                   uint8* dst_argb, int width);
1697void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
1698                   uint8* dst_argb, int width);
1699void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
1700                       uint8* dst_y, int width);
1701void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
1702                          uint8* dst_y, int width);
1703void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
1704                          uint8* dst_y, int width);
1705void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
1706                  uint8* dst_argb, int width);
1707void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
1708                     uint8* dst_argb, int width);
1709void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
1710                     uint8* dst_argb, int width);
1711
1712void ARGBPolynomialRow_C(const uint8* src_argb,
1713                         uint8* dst_argb, const float* poly,
1714                         int width);
1715void ARGBPolynomialRow_SSE2(const uint8* src_argb,
1716                            uint8* dst_argb, const float* poly,
1717                            int width);
1718void ARGBPolynomialRow_AVX2(const uint8* src_argb,
1719                            uint8* dst_argb, const float* poly,
1720                            int width);
1721
1722void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
1723                             const uint8* luma, uint32 lumacoeff);
1724void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
1725                                 int width,
1726                                 const uint8* luma, uint32 lumacoeff);
1727
1728#ifdef __cplusplus
1729}  // extern "C"
1730}  // namespace libyuv
1731#endif
1732
1733#endif  // INCLUDE_LIBYUV_ROW_H_  NOLINT
1734