1/*
2 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS. All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#ifndef INCLUDE_LIBYUV_ROW_H_  // NOLINT
12#define INCLUDE_LIBYUV_ROW_H_
13
14#include <stdlib.h>  // For malloc.
15
16#include "libyuv/basic_types.h"
17
18#ifdef __cplusplus
19namespace libyuv {
20extern "C" {
21#endif
22
23#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
24
25#ifdef __cplusplus
26#define align_buffer_64(var, size)                                             \
27  uint8* var##_mem = reinterpret_cast<uint8*>(malloc((size) + 63));            \
28  uint8* var = reinterpret_cast<uint8*>                                        \
29      ((reinterpret_cast<intptr_t>(var##_mem) + 63) & ~63)
30#else
31#define align_buffer_64(var, size)                                             \
32  uint8* var##_mem = (uint8*)(malloc((size) + 63));               /* NOLINT */ \
33  uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63)       /* NOLINT */
34#endif
35
36#define free_aligned_buffer_64(var) \
37  free(var##_mem);  \
38  var = 0
39
40#if defined(__pnacl__) || defined(__CLR_VER) || \
41    (defined(__i386__) && !defined(__SSE2__))
42#define LIBYUV_DISABLE_X86
43#endif
44// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
45#if defined(__has_feature)
46#if __has_feature(memory_sanitizer)
47#define LIBYUV_DISABLE_X86
48#endif
49#endif
50// True if compiling for SSSE3 as a requirement.
51#if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3))
52#define LIBYUV_SSSE3_ONLY
53#endif
54
55#if defined(__native_client__)
56#define LIBYUV_DISABLE_NEON
57#endif
58// clang >= 3.5.0 required for Arm64.
59#if defined(__clang__) && defined(__aarch64__) && !defined(LIBYUV_DISABLE_NEON)
60#if (__clang_major__ < 3) || (__clang_major__ == 3 && (__clang_minor__ < 5))
61#define LIBYUV_DISABLE_NEON
62#endif  // clang >= 3.5
63#endif  // __clang__
64
65// GCC >= 4.7.0 required for AVX2.
66#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
67#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
68#define GCC_HAS_AVX2 1
69#endif  // GNUC >= 4.7
70#endif  // __GNUC__
71
72// clang >= 3.4.0 required for AVX2.
73#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
74#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
75#define CLANG_HAS_AVX2 1
76#endif  // clang >= 3.4
77#endif  // __clang__
78
79// Visual C 2012 required for AVX2.
80#if defined(_M_IX86) && !defined(__clang__) && \
81    defined(_MSC_VER) && _MSC_VER >= 1700
82#define VISUALC_HAS_AVX2 1
83#endif  // VisualStudio >= 2012
84
85// The following are available on all x86 platforms:
86#if !defined(LIBYUV_DISABLE_X86) && \
87    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
88// Conversions:
89#define HAS_ABGRTOUVROW_SSSE3
90#define HAS_ABGRTOYROW_SSSE3
91#define HAS_ARGB1555TOARGBROW_SSE2
92#define HAS_ARGB4444TOARGBROW_SSE2
93#define HAS_ARGBSETROW_X86
94#define HAS_ARGBSHUFFLEROW_SSE2
95#define HAS_ARGBSHUFFLEROW_SSSE3
96#define HAS_ARGBTOARGB1555ROW_SSE2
97#define HAS_ARGBTOARGB4444ROW_SSE2
98#define HAS_ARGBTORAWROW_SSSE3
99#define HAS_ARGBTORGB24ROW_SSSE3
100#define HAS_ARGBTORGB565DITHERROW_SSE2
101#define HAS_ARGBTORGB565ROW_SSE2
102#define HAS_ARGBTOUV444ROW_SSSE3
103#define HAS_ARGBTOUVJROW_SSSE3
104#define HAS_ARGBTOUVROW_SSSE3
105#define HAS_ARGBTOYJROW_SSSE3
106#define HAS_ARGBTOYROW_SSSE3
107#define HAS_ARGBEXTRACTALPHAROW_SSE2
108#define HAS_BGRATOUVROW_SSSE3
109#define HAS_BGRATOYROW_SSSE3
110#define HAS_COPYROW_ERMS
111#define HAS_COPYROW_SSE2
112#define HAS_H422TOARGBROW_SSSE3
113#define HAS_I400TOARGBROW_SSE2
114#define HAS_I422TOARGB1555ROW_SSSE3
115#define HAS_I422TOARGB4444ROW_SSSE3
116#define HAS_I422TOARGBROW_SSSE3
117#define HAS_I422TORGB24ROW_SSSE3
118#define HAS_I422TORGB565ROW_SSSE3
119#define HAS_I422TORGBAROW_SSSE3
120#define HAS_I422TOUYVYROW_SSE2
121#define HAS_I422TOYUY2ROW_SSE2
122#define HAS_I444TOARGBROW_SSSE3
123#define HAS_J400TOARGBROW_SSE2
124#define HAS_J422TOARGBROW_SSSE3
125#define HAS_MERGEUVROW_SSE2
126#define HAS_MIRRORROW_SSSE3
127#define HAS_MIRRORUVROW_SSSE3
128#define HAS_NV12TOARGBROW_SSSE3
129#define HAS_NV12TORGB565ROW_SSSE3
130#define HAS_NV21TOARGBROW_SSSE3
131#define HAS_RAWTOARGBROW_SSSE3
132#define HAS_RAWTORGB24ROW_SSSE3
133#define HAS_RAWTOYROW_SSSE3
134#define HAS_RGB24TOARGBROW_SSSE3
135#define HAS_RGB24TOYROW_SSSE3
136#define HAS_RGB565TOARGBROW_SSE2
137#define HAS_RGBATOUVROW_SSSE3
138#define HAS_RGBATOYROW_SSSE3
139#define HAS_SETROW_ERMS
140#define HAS_SETROW_X86
141#define HAS_SPLITUVROW_SSE2
142#define HAS_UYVYTOARGBROW_SSSE3
143#define HAS_UYVYTOUV422ROW_SSE2
144#define HAS_UYVYTOUVROW_SSE2
145#define HAS_UYVYTOYROW_SSE2
146#define HAS_YUY2TOARGBROW_SSSE3
147#define HAS_YUY2TOUV422ROW_SSE2
148#define HAS_YUY2TOUVROW_SSE2
149#define HAS_YUY2TOYROW_SSE2
150
151// Effects:
152#define HAS_ARGBADDROW_SSE2
153#define HAS_ARGBAFFINEROW_SSE2
154#define HAS_ARGBATTENUATEROW_SSSE3
155#define HAS_ARGBBLENDROW_SSSE3
156#define HAS_ARGBCOLORMATRIXROW_SSSE3
157#define HAS_ARGBCOLORTABLEROW_X86
158#define HAS_ARGBCOPYALPHAROW_SSE2
159#define HAS_ARGBCOPYYTOALPHAROW_SSE2
160#define HAS_ARGBGRAYROW_SSSE3
161#define HAS_ARGBLUMACOLORTABLEROW_SSSE3
162#define HAS_ARGBMIRRORROW_SSE2
163#define HAS_ARGBMULTIPLYROW_SSE2
164#define HAS_ARGBPOLYNOMIALROW_SSE2
165#define HAS_ARGBQUANTIZEROW_SSE2
166#define HAS_ARGBSEPIAROW_SSSE3
167#define HAS_ARGBSHADEROW_SSE2
168#define HAS_ARGBSUBTRACTROW_SSE2
169#define HAS_ARGBUNATTENUATEROW_SSE2
170#define HAS_BLENDPLANEROW_SSSE3
171#define HAS_COMPUTECUMULATIVESUMROW_SSE2
172#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
173#define HAS_INTERPOLATEROW_SSSE3
174#define HAS_RGBCOLORTABLEROW_X86
175#define HAS_SOBELROW_SSE2
176#define HAS_SOBELTOPLANEROW_SSE2
177#define HAS_SOBELXROW_SSE2
178#define HAS_SOBELXYROW_SSE2
179#define HAS_SOBELYROW_SSE2
180
181// The following functions fail on gcc/clang 32 bit with fpic and framepointer.
182// caveat: clangcl uses row_win.cc which works.
183#if defined(NDEBUG) || !(defined(_DEBUG) && defined(__i386__)) || \
184    !defined(__i386__) || defined(_MSC_VER)
185// TODO(fbarchard): fix build error on x86 debug
186// https://code.google.com/p/libyuv/issues/detail?id=524
187#define HAS_I411TOARGBROW_SSSE3
188// TODO(fbarchard): fix build error on android_full_debug=1
189// https://code.google.com/p/libyuv/issues/detail?id=517
190#define HAS_I422ALPHATOARGBROW_SSSE3
191#endif
192#endif
193
194// The following are available on all x86 platforms, but
195// require VS2012, clang 3.4 or gcc 4.7.
196// The code supports NaCL but requires a new compiler and validator.
197#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
198    defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
199#define HAS_ARGBCOPYALPHAROW_AVX2
200#define HAS_ARGBCOPYYTOALPHAROW_AVX2
201#define HAS_ARGBMIRRORROW_AVX2
202#define HAS_ARGBPOLYNOMIALROW_AVX2
203#define HAS_ARGBSHUFFLEROW_AVX2
204#define HAS_ARGBTORGB565DITHERROW_AVX2
205#define HAS_ARGBTOUVJROW_AVX2
206#define HAS_ARGBTOUVROW_AVX2
207#define HAS_ARGBTOYJROW_AVX2
208#define HAS_ARGBTOYROW_AVX2
209#define HAS_COPYROW_AVX
210#define HAS_H422TOARGBROW_AVX2
211#define HAS_I400TOARGBROW_AVX2
212#if !(defined(_DEBUG) && defined(__i386__))
213// TODO(fbarchard): fix build error on android_full_debug=1
214// https://code.google.com/p/libyuv/issues/detail?id=517
215#define HAS_I422ALPHATOARGBROW_AVX2
216#endif
217#define HAS_I411TOARGBROW_AVX2
218#define HAS_I422TOARGB1555ROW_AVX2
219#define HAS_I422TOARGB4444ROW_AVX2
220#define HAS_I422TOARGBROW_AVX2
221#define HAS_I422TORGB24ROW_AVX2
222#define HAS_I422TORGB565ROW_AVX2
223#define HAS_I422TORGBAROW_AVX2
224#define HAS_I444TOARGBROW_AVX2
225#define HAS_INTERPOLATEROW_AVX2
226#define HAS_J422TOARGBROW_AVX2
227#define HAS_MERGEUVROW_AVX2
228#define HAS_MIRRORROW_AVX2
229#define HAS_NV12TOARGBROW_AVX2
230#define HAS_NV12TORGB565ROW_AVX2
231#define HAS_NV21TOARGBROW_AVX2
232#define HAS_SPLITUVROW_AVX2
233#define HAS_UYVYTOARGBROW_AVX2
234#define HAS_UYVYTOUV422ROW_AVX2
235#define HAS_UYVYTOUVROW_AVX2
236#define HAS_UYVYTOYROW_AVX2
237#define HAS_YUY2TOARGBROW_AVX2
238#define HAS_YUY2TOUV422ROW_AVX2
239#define HAS_YUY2TOUVROW_AVX2
240#define HAS_YUY2TOYROW_AVX2
241
242// Effects:
243#define HAS_ARGBADDROW_AVX2
244#define HAS_ARGBATTENUATEROW_AVX2
245#define HAS_ARGBMULTIPLYROW_AVX2
246#define HAS_ARGBSUBTRACTROW_AVX2
247#define HAS_ARGBUNATTENUATEROW_AVX2
248#define HAS_BLENDPLANEROW_AVX2
249#endif
250
251// The following are available for AVX2 Visual C and clangcl 32 bit:
252// TODO(fbarchard): Port to gcc.
253#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
254    (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
255#define HAS_ARGB1555TOARGBROW_AVX2
256#define HAS_ARGB4444TOARGBROW_AVX2
257#define HAS_ARGBTOARGB1555ROW_AVX2
258#define HAS_ARGBTOARGB4444ROW_AVX2
259#define HAS_ARGBTORGB565ROW_AVX2
260#define HAS_J400TOARGBROW_AVX2
261#define HAS_RGB565TOARGBROW_AVX2
262#endif
263
264// The following are also available on x64 Visual C.
265#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && defined(_M_X64) && \
266    (!defined(__clang__) || defined(__SSSE3__))
267#define HAS_I422ALPHATOARGBROW_SSSE3
268#define HAS_I422TOARGBROW_SSSE3
269#endif
270
271// The following are available on Neon platforms:
272#if !defined(LIBYUV_DISABLE_NEON) && \
273    (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
274#define HAS_ABGRTOUVROW_NEON
275#define HAS_ABGRTOYROW_NEON
276#define HAS_ARGB1555TOARGBROW_NEON
277#define HAS_ARGB1555TOUVROW_NEON
278#define HAS_ARGB1555TOYROW_NEON
279#define HAS_ARGB4444TOARGBROW_NEON
280#define HAS_ARGB4444TOUVROW_NEON
281#define HAS_ARGB4444TOYROW_NEON
282#define HAS_ARGBSETROW_NEON
283#define HAS_ARGBTOARGB1555ROW_NEON
284#define HAS_ARGBTOARGB4444ROW_NEON
285#define HAS_ARGBTORAWROW_NEON
286#define HAS_ARGBTORGB24ROW_NEON
287#define HAS_ARGBTORGB565DITHERROW_NEON
288#define HAS_ARGBTORGB565ROW_NEON
289#define HAS_ARGBTOUV411ROW_NEON
290#define HAS_ARGBTOUV444ROW_NEON
291#define HAS_ARGBTOUVJROW_NEON
292#define HAS_ARGBTOUVROW_NEON
293#define HAS_ARGBTOYJROW_NEON
294#define HAS_ARGBTOYROW_NEON
295#define HAS_ARGBEXTRACTALPHAROW_NEON
296#define HAS_BGRATOUVROW_NEON
297#define HAS_BGRATOYROW_NEON
298#define HAS_COPYROW_NEON
299#define HAS_I400TOARGBROW_NEON
300#define HAS_I411TOARGBROW_NEON
301#define HAS_I422ALPHATOARGBROW_NEON
302#define HAS_I422TOARGB1555ROW_NEON
303#define HAS_I422TOARGB4444ROW_NEON
304#define HAS_I422TOARGBROW_NEON
305#define HAS_I422TORGB24ROW_NEON
306#define HAS_I422TORGB565ROW_NEON
307#define HAS_I422TORGBAROW_NEON
308#define HAS_I422TOUYVYROW_NEON
309#define HAS_I422TOYUY2ROW_NEON
310#define HAS_I444TOARGBROW_NEON
311#define HAS_J400TOARGBROW_NEON
312#define HAS_MERGEUVROW_NEON
313#define HAS_MIRRORROW_NEON
314#define HAS_MIRRORUVROW_NEON
315#define HAS_NV12TOARGBROW_NEON
316#define HAS_NV12TORGB565ROW_NEON
317#define HAS_NV21TOARGBROW_NEON
318#define HAS_RAWTOARGBROW_NEON
319#define HAS_RAWTORGB24ROW_NEON
320#define HAS_RAWTOUVROW_NEON
321#define HAS_RAWTOYROW_NEON
322#define HAS_RGB24TOARGBROW_NEON
323#define HAS_RGB24TOUVROW_NEON
324#define HAS_RGB24TOYROW_NEON
325#define HAS_RGB565TOARGBROW_NEON
326#define HAS_RGB565TOUVROW_NEON
327#define HAS_RGB565TOYROW_NEON
328#define HAS_RGBATOUVROW_NEON
329#define HAS_RGBATOYROW_NEON
330#define HAS_SETROW_NEON
331#define HAS_SPLITUVROW_NEON
332#define HAS_UYVYTOARGBROW_NEON
333#define HAS_UYVYTOUV422ROW_NEON
334#define HAS_UYVYTOUVROW_NEON
335#define HAS_UYVYTOYROW_NEON
336#define HAS_YUY2TOARGBROW_NEON
337#define HAS_YUY2TOUV422ROW_NEON
338#define HAS_YUY2TOUVROW_NEON
339#define HAS_YUY2TOYROW_NEON
340
341// Effects:
342#define HAS_ARGBADDROW_NEON
343#define HAS_ARGBATTENUATEROW_NEON
344#define HAS_ARGBBLENDROW_NEON
345#define HAS_ARGBCOLORMATRIXROW_NEON
346#define HAS_ARGBGRAYROW_NEON
347#define HAS_ARGBMIRRORROW_NEON
348#define HAS_ARGBMULTIPLYROW_NEON
349#define HAS_ARGBQUANTIZEROW_NEON
350#define HAS_ARGBSEPIAROW_NEON
351#define HAS_ARGBSHADEROW_NEON
352#define HAS_ARGBSHUFFLEROW_NEON
353#define HAS_ARGBSUBTRACTROW_NEON
354#define HAS_INTERPOLATEROW_NEON
355#define HAS_SOBELROW_NEON
356#define HAS_SOBELTOPLANEROW_NEON
357#define HAS_SOBELXROW_NEON
358#define HAS_SOBELXYROW_NEON
359#define HAS_SOBELYROW_NEON
360#endif
361
362// The following are available on Mips platforms:
363#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \
364    (_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6)
365#define HAS_COPYROW_MIPS
366#if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
367#define HAS_I422TOARGBROW_DSPR2
368#define HAS_INTERPOLATEROW_DSPR2
369#define HAS_MIRRORROW_DSPR2
370#define HAS_MIRRORUVROW_DSPR2
371#define HAS_SPLITUVROW_DSPR2
372#endif
373#endif
374
375#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
376#if defined(VISUALC_HAS_AVX2)
377#define SIMD_ALIGNED(var) __declspec(align(32)) var
378#else
379#define SIMD_ALIGNED(var) __declspec(align(16)) var
380#endif
381typedef __declspec(align(16)) int16 vec16[8];
382typedef __declspec(align(16)) int32 vec32[4];
383typedef __declspec(align(16)) int8 vec8[16];
384typedef __declspec(align(16)) uint16 uvec16[8];
385typedef __declspec(align(16)) uint32 uvec32[4];
386typedef __declspec(align(16)) uint8 uvec8[16];
387typedef __declspec(align(32)) int16 lvec16[16];
388typedef __declspec(align(32)) int32 lvec32[8];
389typedef __declspec(align(32)) int8 lvec8[32];
390typedef __declspec(align(32)) uint16 ulvec16[16];
391typedef __declspec(align(32)) uint32 ulvec32[8];
392typedef __declspec(align(32)) uint8 ulvec8[32];
393#elif !defined(__pnacl__) && (defined(__GNUC__) || defined(__clang__))
394// Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
395#if defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)
396#define SIMD_ALIGNED(var) var __attribute__((aligned(32)))
397#else
398#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
399#endif
400typedef int16 __attribute__((vector_size(16))) vec16;
401typedef int32 __attribute__((vector_size(16))) vec32;
402typedef int8 __attribute__((vector_size(16))) vec8;
403typedef uint16 __attribute__((vector_size(16))) uvec16;
404typedef uint32 __attribute__((vector_size(16))) uvec32;
405typedef uint8 __attribute__((vector_size(16))) uvec8;
406typedef int16 __attribute__((vector_size(32))) lvec16;
407typedef int32 __attribute__((vector_size(32))) lvec32;
408typedef int8 __attribute__((vector_size(32))) lvec8;
409typedef uint16 __attribute__((vector_size(32))) ulvec16;
410typedef uint32 __attribute__((vector_size(32))) ulvec32;
411typedef uint8 __attribute__((vector_size(32))) ulvec8;
412#else
413#define SIMD_ALIGNED(var) var
414typedef int16 vec16[8];
415typedef int32 vec32[4];
416typedef int8 vec8[16];
417typedef uint16 uvec16[8];
418typedef uint32 uvec32[4];
419typedef uint8 uvec8[16];
420typedef int16 lvec16[16];
421typedef int32 lvec32[8];
422typedef int8 lvec8[32];
423typedef uint16 ulvec16[16];
424typedef uint32 ulvec32[8];
425typedef uint8 ulvec8[32];
426#endif
427
428#if defined(__aarch64__)
429// This struct is for Arm64 color conversion.
430struct YuvConstants {
431  uvec16 kUVToRB;
432  uvec16 kUVToRB2;
433  uvec16 kUVToG;
434  uvec16 kUVToG2;
435  vec16 kUVBiasBGR;
436  vec32 kYToRgb;
437};
438#elif defined(__arm__)
439// This struct is for ArmV7 color conversion.
440struct YuvConstants {
441  uvec8 kUVToRB;
442  uvec8 kUVToG;
443  vec16 kUVBiasBGR;
444  vec32 kYToRgb;
445};
446#else
447// This struct is for Intel color conversion.
448struct YuvConstants {
449  int8 kUVToB[32];
450  int8 kUVToG[32];
451  int8 kUVToR[32];
452  int16 kUVBiasB[16];
453  int16 kUVBiasG[16];
454  int16 kUVBiasR[16];
455  int16 kYToRgb[16];
456};
457
458// Offsets into YuvConstants structure
459#define KUVTOB   0
460#define KUVTOG   32
461#define KUVTOR   64
462#define KUVBIASB 96
463#define KUVBIASG 128
464#define KUVBIASR 160
465#define KYTORGB  192
466#endif
467
468// Conversion matrix for YUV to RGB
469extern const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants);  // BT.601
470extern const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants);  // JPeg
471extern const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants);  // BT.709
472
473// Conversion matrix for YVU to BGR
474extern const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants);  // BT.601
475extern const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants);  // JPeg
476extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants);  // BT.709
477
478#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
479#define OMITFP
480#else
481#define OMITFP __attribute__((optimize("omit-frame-pointer")))
482#endif
483
484// NaCL macros for GCC x86 and x64.
485#if defined(__native_client__)
486#define LABELALIGN ".p2align 5\n"
487#else
488#define LABELALIGN
489#endif
490#if defined(__native_client__) && defined(__x86_64__)
491// r14 is used for MEMOP macros.
492#define NACL_R14 "r14",
493#define BUNDLELOCK ".bundle_lock\n"
494#define BUNDLEUNLOCK ".bundle_unlock\n"
495#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
496#define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")"
497#define MEMLEA(offset, base) #offset "(%q" #base ")"
498#define MEMLEA3(offset, index, scale) \
499    #offset "(,%q" #index "," #scale ")"
500#define MEMLEA4(offset, base, index, scale) \
501    #offset "(%q" #base ",%q" #index "," #scale ")"
502#define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15"
503#define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15"
504#define MEMOPREG(opcode, offset, base, index, scale, reg) \
505    BUNDLELOCK \
506    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
507    #opcode " (%%r15,%%r14),%%" #reg "\n" \
508    BUNDLEUNLOCK
509#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
510    BUNDLELOCK \
511    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
512    #opcode " %%" #reg ",(%%r15,%%r14)\n" \
513    BUNDLEUNLOCK
514#define MEMOPARG(opcode, offset, base, index, scale, arg) \
515    BUNDLELOCK \
516    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
517    #opcode " (%%r15,%%r14),%" #arg "\n" \
518    BUNDLEUNLOCK
519#define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \
520    BUNDLELOCK \
521    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
522    #opcode " (%%r15,%%r14),%%" #reg1 ",%%" #reg2 "\n" \
523    BUNDLEUNLOCK
524#define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \
525    BUNDLELOCK \
526    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
527    #op " $" #sel ",%%" #reg ",(%%r15,%%r14)\n" \
528    BUNDLEUNLOCK
529#else  // defined(__native_client__) && defined(__x86_64__)
530#define NACL_R14
531#define BUNDLEALIGN
532#define MEMACCESS(base) "(%" #base ")"
533#define MEMACCESS2(offset, base) #offset "(%" #base ")"
534#define MEMLEA(offset, base) #offset "(%" #base ")"
535#define MEMLEA3(offset, index, scale) \
536    #offset "(,%" #index "," #scale ")"
537#define MEMLEA4(offset, base, index, scale) \
538    #offset "(%" #base ",%" #index "," #scale ")"
539#define MEMMOVESTRING(s, d)
540#define MEMSTORESTRING(reg, d)
541#define MEMOPREG(opcode, offset, base, index, scale, reg) \
542    #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
543#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
544    #opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
545#define MEMOPARG(opcode, offset, base, index, scale, arg) \
546    #opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n"
547#define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \
548    #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg1 ",%%" \
549    #reg2 "\n"
550#define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \
551    #op " $" #sel ",%%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
552#endif  // defined(__native_client__) && defined(__x86_64__)
553
554#if defined(__arm__) || defined(__aarch64__)
555#undef MEMACCESS
556#if defined(__native_client__)
557#define MEMACCESS(base) ".p2align 3\nbic %" #base ", #0xc0000000\n"
558#else
559#define MEMACCESS(base)
560#endif
561#endif
562
563void I444ToARGBRow_NEON(const uint8* src_y,
564                        const uint8* src_u,
565                        const uint8* src_v,
566                        uint8* dst_argb,
567                        const struct YuvConstants* yuvconstants,
568                        int width);
569void I422ToARGBRow_NEON(const uint8* src_y,
570                        const uint8* src_u,
571                        const uint8* src_v,
572                        uint8* dst_argb,
573                        const struct YuvConstants* yuvconstants,
574                        int width);
575void I422AlphaToARGBRow_NEON(const uint8* y_buf,
576                             const uint8* u_buf,
577                             const uint8* v_buf,
578                             const uint8* a_buf,
579                             uint8* dst_argb,
580                             const struct YuvConstants* yuvconstants,
581                             int width);
582void I422ToARGBRow_NEON(const uint8* src_y,
583                        const uint8* src_u,
584                        const uint8* src_v,
585                        uint8* dst_argb,
586                        const struct YuvConstants* yuvconstants,
587                        int width);
588void I411ToARGBRow_NEON(const uint8* src_y,
589                        const uint8* src_u,
590                        const uint8* src_v,
591                        uint8* dst_argb,
592                        const struct YuvConstants* yuvconstants,
593                        int width);
594void I422ToRGBARow_NEON(const uint8* src_y,
595                        const uint8* src_u,
596                        const uint8* src_v,
597                        uint8* dst_rgba,
598                        const struct YuvConstants* yuvconstants,
599                        int width);
600void I422ToRGB24Row_NEON(const uint8* src_y,
601                         const uint8* src_u,
602                         const uint8* src_v,
603                         uint8* dst_rgb24,
604                         const struct YuvConstants* yuvconstants,
605                         int width);
606void I422ToRGB565Row_NEON(const uint8* src_y,
607                          const uint8* src_u,
608                          const uint8* src_v,
609                          uint8* dst_rgb565,
610                          const struct YuvConstants* yuvconstants,
611                          int width);
612void I422ToARGB1555Row_NEON(const uint8* src_y,
613                            const uint8* src_u,
614                            const uint8* src_v,
615                            uint8* dst_argb1555,
616                            const struct YuvConstants* yuvconstants,
617                            int width);
618void I422ToARGB4444Row_NEON(const uint8* src_y,
619                            const uint8* src_u,
620                            const uint8* src_v,
621                            uint8* dst_argb4444,
622                            const struct YuvConstants* yuvconstants,
623                            int width);
624void NV12ToARGBRow_NEON(const uint8* src_y,
625                        const uint8* src_uv,
626                        uint8* dst_argb,
627                        const struct YuvConstants* yuvconstants,
628                        int width);
629void NV12ToRGB565Row_NEON(const uint8* src_y,
630                          const uint8* src_uv,
631                          uint8* dst_rgb565,
632                          const struct YuvConstants* yuvconstants,
633                          int width);
634void NV21ToARGBRow_NEON(const uint8* src_y,
635                        const uint8* src_vu,
636                        uint8* dst_argb,
637                        const struct YuvConstants* yuvconstants,
638                        int width);
639void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
640                        uint8* dst_argb,
641                        const struct YuvConstants* yuvconstants,
642                        int width);
643void UYVYToARGBRow_NEON(const uint8* src_uyvy,
644                        uint8* dst_argb,
645                        const struct YuvConstants* yuvconstants,
646                        int width);
647
648void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width);
649void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width);
650void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width);
651void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int width);
652void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width);
653void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width);
654void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int width);
655void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int width);
656void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int width);
657void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int width);
658void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int width);
659void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width);
660void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width);
661void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
662                         int width);
663void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
664                         int width);
665void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
666                      uint8* dst_u, uint8* dst_v, int width);
667void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
668                       uint8* dst_u, uint8* dst_v, int width);
669void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
670                      uint8* dst_u, uint8* dst_v, int width);
671void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
672                      uint8* dst_u, uint8* dst_v, int width);
673void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
674                      uint8* dst_u, uint8* dst_v, int width);
675void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
676                       uint8* dst_u, uint8* dst_v, int width);
677void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
678                     uint8* dst_u, uint8* dst_v, int width);
679void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
680                        uint8* dst_u, uint8* dst_v, int width);
681void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
682                          uint8* dst_u, uint8* dst_v, int width);
683void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
684                          uint8* dst_u, uint8* dst_v, int width);
685void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width);
686void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width);
687void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width);
688void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width);
689void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width);
690void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width);
691void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width);
692void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width);
693void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int width);
694void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int width);
695void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int width);
696void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int width);
697void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int width);
698void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int width);
699void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int width);
700void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width);
701void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width);
702void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width);
703void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int width);
704void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int width);
705void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int width);
706void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int width);
707void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int width);
708void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int width);
709void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int width);
710void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int width);
711void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int width);
712void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int width);
713void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int width);
714void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int width);
715void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int width);
716void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int width);
717void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int width);
718void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y,
719                             int width);
720void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y,
721                             int width);
722
723void ARGBToUVRow_AVX2(const uint8* src_argb, int src_stride_argb,
724                      uint8* dst_u, uint8* dst_v, int width);
725void ARGBToUVJRow_AVX2(const uint8* src_argb, int src_stride_argb,
726                       uint8* dst_u, uint8* dst_v, int width);
727void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
728                       uint8* dst_u, uint8* dst_v, int width);
729void ARGBToUVJRow_SSSE3(const uint8* src_argb, int src_stride_argb,
730                        uint8* dst_u, uint8* dst_v, int width);
731void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra,
732                       uint8* dst_u, uint8* dst_v, int width);
733void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr,
734                       uint8* dst_u, uint8* dst_v, int width);
735void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba,
736                       uint8* dst_u, uint8* dst_v, int width);
737void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb,
738                          uint8* dst_u, uint8* dst_v, int width);
739void ARGBToUVJRow_Any_AVX2(const uint8* src_argb, int src_stride_argb,
740                           uint8* dst_u, uint8* dst_v, int width);
741void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
742                           uint8* dst_u, uint8* dst_v, int width);
743void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
744                            uint8* dst_u, uint8* dst_v, int width);
745void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra,
746                           uint8* dst_u, uint8* dst_v, int width);
747void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
748                           uint8* dst_u, uint8* dst_v, int width);
749void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba,
750                           uint8* dst_u, uint8* dst_v, int width);
751void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
752                             int width);
753void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
754                             int width);
755void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
756                          uint8* dst_u, uint8* dst_v, int width);
757void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
758                           uint8* dst_u, uint8* dst_v, int width);
759void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra,
760                          uint8* dst_u, uint8* dst_v, int width);
761void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr,
762                          uint8* dst_u, uint8* dst_v, int width);
763void RGBAToUVRow_Any_NEON(const uint8* src_rgba, int src_stride_rgba,
764                          uint8* dst_u, uint8* dst_v, int width);
765void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, int src_stride_rgb24,
766                           uint8* dst_u, uint8* dst_v, int width);
767void RAWToUVRow_Any_NEON(const uint8* src_raw, int src_stride_raw,
768                         uint8* dst_u, uint8* dst_v, int width);
769void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565,
770                            uint8* dst_u, uint8* dst_v, int width);
771void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555,
772                              int src_stride_argb1555,
773                              uint8* dst_u, uint8* dst_v, int width);
774void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
775                              int src_stride_argb4444,
776                              uint8* dst_u, uint8* dst_v, int width);
777void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb,
778                   uint8* dst_u, uint8* dst_v, int width);
779void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb,
780                    uint8* dst_u, uint8* dst_v, int width);
781void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra,
782                   uint8* dst_u, uint8* dst_v, int width);
783void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr,
784                   uint8* dst_u, uint8* dst_v, int width);
785void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba,
786                   uint8* dst_u, uint8* dst_v, int width);
787void RGB24ToUVRow_C(const uint8* src_rgb24, int src_stride_rgb24,
788                    uint8* dst_u, uint8* dst_v, int width);
789void RAWToUVRow_C(const uint8* src_raw, int src_stride_raw,
790                  uint8* dst_u, uint8* dst_v, int width);
791void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
792                     uint8* dst_u, uint8* dst_v, int width);
793void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
794                       uint8* dst_u, uint8* dst_v, int width);
795void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
796                       uint8* dst_u, uint8* dst_v, int width);
797
798void ARGBToUV444Row_SSSE3(const uint8* src_argb,
799                          uint8* dst_u, uint8* dst_v, int width);
800void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
801                              uint8* dst_u, uint8* dst_v, int width);
802
803void ARGBToUV444Row_C(const uint8* src_argb,
804                      uint8* dst_u, uint8* dst_v, int width);
805void ARGBToUV411Row_C(const uint8* src_argb,
806                      uint8* dst_u, uint8* dst_v, int width);
807
808void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
809void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
810void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
811void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width);
812void MirrorRow_C(const uint8* src, uint8* dst, int width);
813void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
814void MirrorRow_Any_SSSE3(const uint8* src, uint8* dst, int width);
815void MirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width);
816void MirrorRow_Any_NEON(const uint8* src, uint8* dst, int width);
817
818void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
819                       int width);
820void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
821                      int width);
822void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
823                       int width);
824void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width);
825
826void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width);
827void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width);
828void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width);
829void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width);
830void ARGBMirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
831void ARGBMirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width);
832void ARGBMirrorRow_Any_NEON(const uint8* src, uint8* dst, int width);
833
834void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width);
835void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
836                     int width);
837void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
838                     int width);
839void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
840                     int width);
841void SplitUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
842                      int width);
843void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
844                         int width);
845void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
846                         int width);
847void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
848                         int width);
849void SplitUVRow_Any_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
850                          int width);
851
852void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
853                  int width);
854void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
855                     int width);
856void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
857                     int width);
858void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
859                     int width);
860void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
861                         int width);
862void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
863                         int width);
864void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
865                         int width);
866
867void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
868void CopyRow_AVX(const uint8* src, uint8* dst, int count);
869void CopyRow_ERMS(const uint8* src, uint8* dst, int count);
870void CopyRow_NEON(const uint8* src, uint8* dst, int count);
871void CopyRow_MIPS(const uint8* src, uint8* dst, int count);
872void CopyRow_C(const uint8* src, uint8* dst, int count);
873void CopyRow_Any_SSE2(const uint8* src, uint8* dst, int count);
874void CopyRow_Any_AVX(const uint8* src, uint8* dst, int count);
875void CopyRow_Any_NEON(const uint8* src, uint8* dst, int count);
876
877void CopyRow_16_C(const uint16* src, uint16* dst, int count);
878
879void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width);
880void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
881void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
882void ARGBCopyAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
883                               int width);
884void ARGBCopyAlphaRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
885                               int width);
886
887void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width);
888void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width);
889void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width);
890void ARGBExtractAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_a,
891                                  int width);
892void ARGBExtractAlphaRow_Any_NEON(const uint8* src_argb, uint8* dst_a,
893                                  int width);
894
895void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width);
896void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
897void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
898void ARGBCopyYToAlphaRow_Any_SSE2(const uint8* src_y, uint8* dst_argb,
899                                  int width);
900void ARGBCopyYToAlphaRow_Any_AVX2(const uint8* src_y, uint8* dst_argb,
901                                  int width);
902
903void SetRow_C(uint8* dst, uint8 v8, int count);
904void SetRow_X86(uint8* dst, uint8 v8, int count);
905void SetRow_ERMS(uint8* dst, uint8 v8, int count);
906void SetRow_NEON(uint8* dst, uint8 v8, int count);
907void SetRow_Any_X86(uint8* dst, uint8 v8, int count);
908void SetRow_Any_NEON(uint8* dst, uint8 v8, int count);
909
910void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int count);
911void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count);
912void ARGBSetRow_NEON(uint8* dst_argb, uint32 v32, int count);
913void ARGBSetRow_Any_NEON(uint8* dst_argb, uint32 v32, int count);
914
915// ARGBShufflers for BGRAToARGB etc.
916void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
917                      const uint8* shuffler, int width);
918void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
919                         const uint8* shuffler, int width);
920void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
921                          const uint8* shuffler, int width);
922void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
923                         const uint8* shuffler, int width);
924void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
925                         const uint8* shuffler, int width);
926void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
927                             const uint8* shuffler, int width);
928void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
929                              const uint8* shuffler, int width);
930void ARGBShuffleRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
931                             const uint8* shuffler, int width);
932void ARGBShuffleRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
933                             const uint8* shuffler, int width);
934
935void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int width);
936void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int width);
937void RAWToRGB24Row_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width);
938void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int width);
939void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
940                            int width);
941void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
942                            int width);
943void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb, int width);
944void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb,
945                            int width);
946void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, uint8* dst_argb,
947                            int width);
948
949void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width);
950void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width);
951void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width);
952void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width);
953void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
954                            int width);
955void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
956                            int width);
957void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width);
958void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width);
959void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width);
960void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width);
961void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int width);
962void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int width);
963void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb,
964                              int width);
965void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int width);
966void RAWToRGB24Row_Any_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width);
967
968void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb,
969                              int width);
970void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb,
971                                int width);
972void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb,
973                                int width);
974void RGB565ToARGBRow_Any_AVX2(const uint8* src_rgb565, uint8* dst_argb,
975                              int width);
976void ARGB1555ToARGBRow_Any_AVX2(const uint8* src_argb1555, uint8* dst_argb,
977                                int width);
978void ARGB4444ToARGBRow_Any_AVX2(const uint8* src_argb4444, uint8* dst_argb,
979                                int width);
980
981void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb,
982                             int width);
983void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int width);
984void RAWToRGB24Row_Any_NEON(const uint8* src_raw, uint8* dst_rgb24, int width);
985void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb,
986                              int width);
987void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb,
988                                int width);
989void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb,
990                                int width);
991
992void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
993void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
994void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width);
995void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width);
996void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width);
997
998void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb,
999                             const uint32 dither4, int width);
1000void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb,
1001                                const uint32 dither4, int width);
1002void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb,
1003                                const uint32 dither4, int width);
1004
1005void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
1006void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
1007void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
1008
1009void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
1010void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
1011void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
1012void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
1013void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
1014void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb,
1015                                const uint32 dither4, int width);
1016
1017void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int width);
1018void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
1019void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width);
1020void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
1021void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
1022void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
1023
1024void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
1025void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
1026void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width);
1027void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width);
1028void J400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int width);
1029void J400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int width);
1030void J400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width);
1031
1032void I444ToARGBRow_C(const uint8* src_y,
1033                     const uint8* src_u,
1034                     const uint8* src_v,
1035                     uint8* dst_argb,
1036                     const struct YuvConstants* yuvconstants,
1037                     int width);
1038void I422ToARGBRow_C(const uint8* src_y,
1039                     const uint8* src_u,
1040                     const uint8* src_v,
1041                     uint8* dst_argb,
1042                     const struct YuvConstants* yuvconstants,
1043                     int width);
1044void I422ToARGBRow_C(const uint8* src_y,
1045                     const uint8* src_u,
1046                     const uint8* src_v,
1047                     uint8* dst_argb,
1048                     const struct YuvConstants* yuvconstants,
1049                     int width);
1050void I422AlphaToARGBRow_C(const uint8* y_buf,
1051                          const uint8* u_buf,
1052                          const uint8* v_buf,
1053                          const uint8* a_buf,
1054                          uint8* dst_argb,
1055                          const struct YuvConstants* yuvconstants,
1056                          int width);
1057void I411ToARGBRow_C(const uint8* src_y,
1058                     const uint8* src_u,
1059                     const uint8* src_v,
1060                     uint8* dst_argb,
1061                     const struct YuvConstants* yuvconstants,
1062                     int width);
1063void NV12ToARGBRow_C(const uint8* src_y,
1064                     const uint8* src_uv,
1065                     uint8* dst_argb,
1066                     const struct YuvConstants* yuvconstants,
1067                     int width);
1068void NV12ToRGB565Row_C(const uint8* src_y,
1069                       const uint8* src_uv,
1070                       uint8* dst_argb,
1071                       const struct YuvConstants* yuvconstants,
1072                       int width);
1073void NV21ToARGBRow_C(const uint8* src_y,
1074                     const uint8* src_uv,
1075                     uint8* dst_argb,
1076                     const struct YuvConstants* yuvconstants,
1077                     int width);
1078void YUY2ToARGBRow_C(const uint8* src_yuy2,
1079                     uint8* dst_argb,
1080                     const struct YuvConstants* yuvconstants,
1081                     int width);
1082void UYVYToARGBRow_C(const uint8* src_uyvy,
1083                     uint8* dst_argb,
1084                     const struct YuvConstants* yuvconstants,
1085                     int width);
1086void I422ToRGBARow_C(const uint8* src_y,
1087                     const uint8* src_u,
1088                     const uint8* src_v,
1089                     uint8* dst_rgba,
1090                     const struct YuvConstants* yuvconstants,
1091                     int width);
1092void I422ToRGB24Row_C(const uint8* src_y,
1093                      const uint8* src_u,
1094                      const uint8* src_v,
1095                      uint8* dst_rgb24,
1096                      const struct YuvConstants* yuvconstants,
1097                      int width);
1098void I422ToARGB4444Row_C(const uint8* src_y,
1099                         const uint8* src_u,
1100                         const uint8* src_v,
1101                         uint8* dst_argb4444,
1102                         const struct YuvConstants* yuvconstants,
1103                         int width);
1104void I422ToARGB1555Row_C(const uint8* src_y,
1105                         const uint8* src_u,
1106                         const uint8* src_v,
1107                         uint8* dst_argb4444,
1108                         const struct YuvConstants* yuvconstants,
1109                         int width);
1110void I422ToRGB565Row_C(const uint8* src_y,
1111                       const uint8* src_u,
1112                       const uint8* src_v,
1113                       uint8* dst_rgb565,
1114                       const struct YuvConstants* yuvconstants,
1115                       int width);
1116void I422ToARGBRow_AVX2(const uint8* src_y,
1117                        const uint8* src_u,
1118                        const uint8* src_v,
1119                        uint8* dst_argb,
1120                        const struct YuvConstants* yuvconstants,
1121                        int width);
1122void I422ToARGBRow_AVX2(const uint8* src_y,
1123                        const uint8* src_u,
1124                        const uint8* src_v,
1125                        uint8* dst_argb,
1126                        const struct YuvConstants* yuvconstants,
1127                        int width);
1128void I422ToRGBARow_AVX2(const uint8* src_y,
1129                        const uint8* src_u,
1130                        const uint8* src_v,
1131                        uint8* dst_argb,
1132                        const struct YuvConstants* yuvconstants,
1133                        int width);
1134void I444ToARGBRow_SSSE3(const uint8* src_y,
1135                         const uint8* src_u,
1136                         const uint8* src_v,
1137                         uint8* dst_argb,
1138                         const struct YuvConstants* yuvconstants,
1139                         int width);
1140void I444ToARGBRow_AVX2(const uint8* src_y,
1141                        const uint8* src_u,
1142                        const uint8* src_v,
1143                        uint8* dst_argb,
1144                        const struct YuvConstants* yuvconstants,
1145                        int width);
1146void I444ToARGBRow_SSSE3(const uint8* src_y,
1147                         const uint8* src_u,
1148                         const uint8* src_v,
1149                         uint8* dst_argb,
1150                         const struct YuvConstants* yuvconstants,
1151                         int width);
1152void I444ToARGBRow_AVX2(const uint8* src_y,
1153                        const uint8* src_u,
1154                        const uint8* src_v,
1155                        uint8* dst_argb,
1156                        const struct YuvConstants* yuvconstants,
1157                        int width);
1158void I422ToARGBRow_SSSE3(const uint8* src_y,
1159                         const uint8* src_u,
1160                         const uint8* src_v,
1161                         uint8* dst_argb,
1162                         const struct YuvConstants* yuvconstants,
1163                         int width);
1164void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
1165                              const uint8* u_buf,
1166                              const uint8* v_buf,
1167                              const uint8* a_buf,
1168                              uint8* dst_argb,
1169                              const struct YuvConstants* yuvconstants,
1170                              int width);
1171void I422AlphaToARGBRow_AVX2(const uint8* y_buf,
1172                             const uint8* u_buf,
1173                             const uint8* v_buf,
1174                             const uint8* a_buf,
1175                             uint8* dst_argb,
1176                             const struct YuvConstants* yuvconstants,
1177                             int width);
1178void I422ToARGBRow_SSSE3(const uint8* src_y,
1179                         const uint8* src_u,
1180                         const uint8* src_v,
1181                         uint8* dst_argb,
1182                         const struct YuvConstants* yuvconstants,
1183                         int width);
1184void I411ToARGBRow_SSSE3(const uint8* src_y,
1185                         const uint8* src_u,
1186                         const uint8* src_v,
1187                         uint8* dst_argb,
1188                         const struct YuvConstants* yuvconstants,
1189                         int width);
1190void I411ToARGBRow_AVX2(const uint8* src_y,
1191                        const uint8* src_u,
1192                        const uint8* src_v,
1193                        uint8* dst_argb,
1194                        const struct YuvConstants* yuvconstants,
1195                        int width);
1196void NV12ToARGBRow_SSSE3(const uint8* src_y,
1197                         const uint8* src_uv,
1198                         uint8* dst_argb,
1199                         const struct YuvConstants* yuvconstants,
1200                         int width);
1201void NV12ToARGBRow_AVX2(const uint8* src_y,
1202                        const uint8* src_uv,
1203                        uint8* dst_argb,
1204                        const struct YuvConstants* yuvconstants,
1205                        int width);
1206void NV12ToRGB565Row_SSSE3(const uint8* src_y,
1207                           const uint8* src_uv,
1208                           uint8* dst_argb,
1209                           const struct YuvConstants* yuvconstants,
1210                           int width);
1211void NV12ToRGB565Row_AVX2(const uint8* src_y,
1212                          const uint8* src_uv,
1213                          uint8* dst_argb,
1214                          const struct YuvConstants* yuvconstants,
1215                          int width);
1216void NV21ToARGBRow_SSSE3(const uint8* src_y,
1217                         const uint8* src_uv,
1218                         uint8* dst_argb,
1219                         const struct YuvConstants* yuvconstants,
1220                         int width);
1221void NV21ToARGBRow_AVX2(const uint8* src_y,
1222                        const uint8* src_uv,
1223                        uint8* dst_argb,
1224                        const struct YuvConstants* yuvconstants,
1225                        int width);
1226void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
1227                         uint8* dst_argb,
1228                         const struct YuvConstants* yuvconstants,
1229                         int width);
1230void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
1231                         uint8* dst_argb,
1232                         const struct YuvConstants* yuvconstants,
1233                         int width);
1234void YUY2ToARGBRow_AVX2(const uint8* src_yuy2,
1235                        uint8* dst_argb,
1236                        const struct YuvConstants* yuvconstants,
1237                        int width);
1238void UYVYToARGBRow_AVX2(const uint8* src_uyvy,
1239                        uint8* dst_argb,
1240                        const struct YuvConstants* yuvconstants,
1241                        int width);
1242void I422ToRGBARow_SSSE3(const uint8* src_y,
1243                         const uint8* src_u,
1244                         const uint8* src_v,
1245                         uint8* dst_rgba,
1246                         const struct YuvConstants* yuvconstants,
1247                         int width);
1248void I422ToARGB4444Row_SSSE3(const uint8* src_y,
1249                             const uint8* src_u,
1250                             const uint8* src_v,
1251                             uint8* dst_argb,
1252                             const struct YuvConstants* yuvconstants,
1253                             int width);
1254void I422ToARGB4444Row_AVX2(const uint8* src_y,
1255                            const uint8* src_u,
1256                            const uint8* src_v,
1257                            uint8* dst_argb,
1258                            const struct YuvConstants* yuvconstants,
1259                            int width);
1260void I422ToARGB1555Row_SSSE3(const uint8* src_y,
1261                             const uint8* src_u,
1262                             const uint8* src_v,
1263                             uint8* dst_argb,
1264                             const struct YuvConstants* yuvconstants,
1265                             int width);
1266void I422ToARGB1555Row_AVX2(const uint8* src_y,
1267                            const uint8* src_u,
1268                            const uint8* src_v,
1269                            uint8* dst_argb,
1270                            const struct YuvConstants* yuvconstants,
1271                            int width);
1272void I422ToRGB565Row_SSSE3(const uint8* src_y,
1273                           const uint8* src_u,
1274                           const uint8* src_v,
1275                           uint8* dst_argb,
1276                           const struct YuvConstants* yuvconstants,
1277                           int width);
1278void I422ToRGB565Row_AVX2(const uint8* src_y,
1279                          const uint8* src_u,
1280                          const uint8* src_v,
1281                          uint8* dst_argb,
1282                          const struct YuvConstants* yuvconstants,
1283                          int width);
1284void I422ToRGB24Row_SSSE3(const uint8* src_y,
1285                          const uint8* src_u,
1286                          const uint8* src_v,
1287                          uint8* dst_rgb24,
1288                          const struct YuvConstants* yuvconstants,
1289                          int width);
1290void I422ToRGB24Row_AVX2(const uint8* src_y,
1291                         const uint8* src_u,
1292                         const uint8* src_v,
1293                         uint8* dst_rgb24,
1294                         const struct YuvConstants* yuvconstants,
1295                         int width);
1296void I422ToARGBRow_Any_AVX2(const uint8* src_y,
1297                            const uint8* src_u,
1298                            const uint8* src_v,
1299                            uint8* dst_argb,
1300                            const struct YuvConstants* yuvconstants,
1301                            int width);
1302void I422ToRGBARow_Any_AVX2(const uint8* src_y,
1303                            const uint8* src_u,
1304                            const uint8* src_v,
1305                            uint8* dst_argb,
1306                            const struct YuvConstants* yuvconstants,
1307                            int width);
1308void I444ToARGBRow_Any_SSSE3(const uint8* src_y,
1309                             const uint8* src_u,
1310                             const uint8* src_v,
1311                             uint8* dst_argb,
1312                             const struct YuvConstants* yuvconstants,
1313                             int width);
1314void I444ToARGBRow_Any_AVX2(const uint8* src_y,
1315                            const uint8* src_u,
1316                            const uint8* src_v,
1317                            uint8* dst_argb,
1318                            const struct YuvConstants* yuvconstants,
1319                            int width);
1320void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
1321                             const uint8* src_u,
1322                             const uint8* src_v,
1323                             uint8* dst_argb,
1324                             const struct YuvConstants* yuvconstants,
1325                             int width);
1326void I422AlphaToARGBRow_Any_SSSE3(const uint8* y_buf,
1327                                  const uint8* u_buf,
1328                                  const uint8* v_buf,
1329                                  const uint8* a_buf,
1330                                  uint8* dst_argb,
1331                                  const struct YuvConstants* yuvconstants,
1332                                  int width);
1333void I422AlphaToARGBRow_Any_AVX2(const uint8* y_buf,
1334                                 const uint8* u_buf,
1335                                 const uint8* v_buf,
1336                                 const uint8* a_buf,
1337                                 uint8* dst_argb,
1338                                 const struct YuvConstants* yuvconstants,
1339                                 int width);
1340void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
1341                             const uint8* src_u,
1342                             const uint8* src_v,
1343                             uint8* dst_argb,
1344                             const struct YuvConstants* yuvconstants,
1345                             int width);
1346void I411ToARGBRow_Any_AVX2(const uint8* src_y,
1347                            const uint8* src_u,
1348                            const uint8* src_v,
1349                            uint8* dst_argb,
1350                            const struct YuvConstants* yuvconstants,
1351                            int width);
1352void NV12ToARGBRow_Any_SSSE3(const uint8* src_y,
1353                             const uint8* src_uv,
1354                             uint8* dst_argb,
1355                             const struct YuvConstants* yuvconstants,
1356                             int width);
1357void NV12ToARGBRow_Any_AVX2(const uint8* src_y,
1358                            const uint8* src_uv,
1359                            uint8* dst_argb,
1360                            const struct YuvConstants* yuvconstants,
1361                            int width);
1362void NV21ToARGBRow_Any_SSSE3(const uint8* src_y,
1363                             const uint8* src_vu,
1364                             uint8* dst_argb,
1365                             const struct YuvConstants* yuvconstants,
1366                             int width);
1367void NV21ToARGBRow_Any_AVX2(const uint8* src_y,
1368                            const uint8* src_vu,
1369                            uint8* dst_argb,
1370                            const struct YuvConstants* yuvconstants,
1371                            int width);
1372void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y,
1373                               const uint8* src_uv,
1374                               uint8* dst_argb,
1375                               const struct YuvConstants* yuvconstants,
1376                               int width);
1377void NV12ToRGB565Row_Any_AVX2(const uint8* src_y,
1378                              const uint8* src_uv,
1379                              uint8* dst_argb,
1380                              const struct YuvConstants* yuvconstants,
1381                              int width);
1382void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2,
1383                             uint8* dst_argb,
1384                             const struct YuvConstants* yuvconstants,
1385                             int width);
1386void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy,
1387                             uint8* dst_argb,
1388                             const struct YuvConstants* yuvconstants,
1389                             int width);
1390void YUY2ToARGBRow_Any_AVX2(const uint8* src_yuy2,
1391                            uint8* dst_argb,
1392                            const struct YuvConstants* yuvconstants,
1393                            int width);
1394void UYVYToARGBRow_Any_AVX2(const uint8* src_uyvy,
1395                            uint8* dst_argb,
1396                            const struct YuvConstants* yuvconstants,
1397                            int width);
1398void I422ToRGBARow_Any_SSSE3(const uint8* src_y,
1399                             const uint8* src_u,
1400                             const uint8* src_v,
1401                             uint8* dst_rgba,
1402                             const struct YuvConstants* yuvconstants,
1403                             int width);
1404void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y,
1405                                 const uint8* src_u,
1406                                 const uint8* src_v,
1407                                 uint8* dst_rgba,
1408                                 const struct YuvConstants* yuvconstants,
1409                                 int width);
1410void I422ToARGB4444Row_Any_AVX2(const uint8* src_y,
1411                                const uint8* src_u,
1412                                const uint8* src_v,
1413                                uint8* dst_rgba,
1414                                const struct YuvConstants* yuvconstants,
1415                                int width);
1416void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y,
1417                                 const uint8* src_u,
1418                                 const uint8* src_v,
1419                                 uint8* dst_rgba,
1420                                 const struct YuvConstants* yuvconstants,
1421                                 int width);
1422void I422ToARGB1555Row_Any_AVX2(const uint8* src_y,
1423                                const uint8* src_u,
1424                                const uint8* src_v,
1425                                uint8* dst_rgba,
1426                                const struct YuvConstants* yuvconstants,
1427                                int width);
1428void I422ToRGB565Row_Any_SSSE3(const uint8* src_y,
1429                               const uint8* src_u,
1430                               const uint8* src_v,
1431                               uint8* dst_rgba,
1432                               const struct YuvConstants* yuvconstants,
1433                               int width);
1434void I422ToRGB565Row_Any_AVX2(const uint8* src_y,
1435                              const uint8* src_u,
1436                              const uint8* src_v,
1437                              uint8* dst_rgba,
1438                              const struct YuvConstants* yuvconstants,
1439                              int width);
1440void I422ToRGB24Row_Any_SSSE3(const uint8* src_y,
1441                              const uint8* src_u,
1442                              const uint8* src_v,
1443                              uint8* dst_argb,
1444                              const struct YuvConstants* yuvconstants,
1445                              int width);
1446void I422ToRGB24Row_Any_AVX2(const uint8* src_y,
1447                             const uint8* src_u,
1448                             const uint8* src_v,
1449                             uint8* dst_argb,
1450                             const struct YuvConstants* yuvconstants,
1451                             int width);
1452
1453void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width);
1454void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
1455void I400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
1456void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width);
1457void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int width);
1458void I400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int width);
1459void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width);
1460
1461// ARGB preattenuated alpha blend.
1462void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1,
1463                        uint8* dst_argb, int width);
1464void ARGBBlendRow_NEON(const uint8* src_argb, const uint8* src_argb1,
1465                       uint8* dst_argb, int width);
1466void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1,
1467                    uint8* dst_argb, int width);
1468
1469// Unattenuated planar alpha blend.
1470void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1,
1471                         const uint8* alpha, uint8* dst, int width);
1472void BlendPlaneRow_Any_SSSE3(const uint8* src0, const uint8* src1,
1473                             const uint8* alpha, uint8* dst, int width);
1474void BlendPlaneRow_AVX2(const uint8* src0, const uint8* src1,
1475                        const uint8* alpha, uint8* dst, int width);
1476void BlendPlaneRow_Any_AVX2(const uint8* src0, const uint8* src1,
1477                            const uint8* alpha, uint8* dst, int width);
1478void BlendPlaneRow_C(const uint8* src0, const uint8* src1,
1479                     const uint8* alpha, uint8* dst, int width);
1480
1481// ARGB multiply images. Same API as Blend, but these require
1482// pointer and width alignment for SSE2.
1483void ARGBMultiplyRow_C(const uint8* src_argb, const uint8* src_argb1,
1484                       uint8* dst_argb, int width);
1485void ARGBMultiplyRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
1486                          uint8* dst_argb, int width);
1487void ARGBMultiplyRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
1488                              uint8* dst_argb, int width);
1489void ARGBMultiplyRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
1490                          uint8* dst_argb, int width);
1491void ARGBMultiplyRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
1492                              uint8* dst_argb, int width);
1493void ARGBMultiplyRow_NEON(const uint8* src_argb, const uint8* src_argb1,
1494                          uint8* dst_argb, int width);
1495void ARGBMultiplyRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
1496                              uint8* dst_argb, int width);
1497
1498// ARGB add images.
1499void ARGBAddRow_C(const uint8* src_argb, const uint8* src_argb1,
1500                  uint8* dst_argb, int width);
1501void ARGBAddRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
1502                     uint8* dst_argb, int width);
1503void ARGBAddRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
1504                         uint8* dst_argb, int width);
1505void ARGBAddRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
1506                     uint8* dst_argb, int width);
1507void ARGBAddRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
1508                         uint8* dst_argb, int width);
1509void ARGBAddRow_NEON(const uint8* src_argb, const uint8* src_argb1,
1510                     uint8* dst_argb, int width);
1511void ARGBAddRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
1512                         uint8* dst_argb, int width);
1513
1514// ARGB subtract images. Same API as Blend, but these require
1515// pointer and width alignment for SSE2.
1516void ARGBSubtractRow_C(const uint8* src_argb, const uint8* src_argb1,
1517                       uint8* dst_argb, int width);
1518void ARGBSubtractRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
1519                          uint8* dst_argb, int width);
1520void ARGBSubtractRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
1521                              uint8* dst_argb, int width);
1522void ARGBSubtractRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
1523                          uint8* dst_argb, int width);
1524void ARGBSubtractRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
1525                              uint8* dst_argb, int width);
1526void ARGBSubtractRow_NEON(const uint8* src_argb, const uint8* src_argb1,
1527                          uint8* dst_argb, int width);
1528void ARGBSubtractRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
1529                              uint8* dst_argb, int width);
1530
1531void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
1532void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
1533void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int width);
1534void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb,
1535                                int width);
1536void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb,
1537                                int width);
1538
1539void ARGBToRGB565DitherRow_Any_SSE2(const uint8* src_argb, uint8* dst_rgb,
1540                                    const uint32 dither4, int width);
1541void ARGBToRGB565DitherRow_Any_AVX2(const uint8* src_argb, uint8* dst_rgb,
1542                                    const uint32 dither4, int width);
1543
1544void ARGBToRGB565Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
1545void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb,
1546                                int width);
1547void ARGBToARGB4444Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb,
1548                                int width);
1549
1550void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
1551void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
1552void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
1553void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb,
1554                                int width);
1555void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb,
1556                                int width);
1557void ARGBToRGB565DitherRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb,
1558                                    const uint32 dither4, int width);
1559
1560void I444ToARGBRow_Any_NEON(const uint8* src_y,
1561                            const uint8* src_u,
1562                            const uint8* src_v,
1563                            uint8* dst_argb,
1564                            const struct YuvConstants* yuvconstants,
1565                            int width);
1566void I422ToARGBRow_Any_NEON(const uint8* src_y,
1567                            const uint8* src_u,
1568                            const uint8* src_v,
1569                            uint8* dst_argb,
1570                            const struct YuvConstants* yuvconstants,
1571                            int width);
1572void I422AlphaToARGBRow_Any_NEON(const uint8* src_y,
1573                                 const uint8* src_u,
1574                                 const uint8* src_v,
1575                                 const uint8* src_a,
1576                                 uint8* dst_argb,
1577                                 const struct YuvConstants* yuvconstants,
1578                                 int width);
1579void I411ToARGBRow_Any_NEON(const uint8* src_y,
1580                            const uint8* src_u,
1581                            const uint8* src_v,
1582                            uint8* dst_argb,
1583                            const struct YuvConstants* yuvconstants,
1584                            int width);
1585void I422ToRGBARow_Any_NEON(const uint8* src_y,
1586                            const uint8* src_u,
1587                            const uint8* src_v,
1588                            uint8* dst_argb,
1589                            const struct YuvConstants* yuvconstants,
1590                            int width);
1591void I422ToRGB24Row_Any_NEON(const uint8* src_y,
1592                             const uint8* src_u,
1593                             const uint8* src_v,
1594                             uint8* dst_argb,
1595                             const struct YuvConstants* yuvconstants,
1596                             int width);
1597void I422ToARGB4444Row_Any_NEON(const uint8* src_y,
1598                                const uint8* src_u,
1599                                const uint8* src_v,
1600                                uint8* dst_argb,
1601                                const struct YuvConstants* yuvconstants,
1602                                int width);
1603void I422ToARGB1555Row_Any_NEON(const uint8* src_y,
1604                                const uint8* src_u,
1605                                const uint8* src_v,
1606                                uint8* dst_argb,
1607                                const struct YuvConstants* yuvconstants,
1608                                int width);
1609void I422ToRGB565Row_Any_NEON(const uint8* src_y,
1610                              const uint8* src_u,
1611                              const uint8* src_v,
1612                              uint8* dst_argb,
1613                              const struct YuvConstants* yuvconstants,
1614                              int width);
1615void NV12ToARGBRow_Any_NEON(const uint8* src_y,
1616                            const uint8* src_uv,
1617                            uint8* dst_argb,
1618                            const struct YuvConstants* yuvconstants,
1619                            int width);
1620void NV21ToARGBRow_Any_NEON(const uint8* src_y,
1621                            const uint8* src_vu,
1622                            uint8* dst_argb,
1623                            const struct YuvConstants* yuvconstants,
1624                            int width);
1625void NV12ToRGB565Row_Any_NEON(const uint8* src_y,
1626                              const uint8* src_uv,
1627                              uint8* dst_argb,
1628                              const struct YuvConstants* yuvconstants,
1629                              int width);
1630void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2,
1631                            uint8* dst_argb,
1632                            const struct YuvConstants* yuvconstants,
1633                            int width);
1634void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
1635                            uint8* dst_argb,
1636                            const struct YuvConstants* yuvconstants,
1637                            int width);
1638void I422ToARGBRow_DSPR2(const uint8* src_y,
1639                         const uint8* src_u,
1640                         const uint8* src_v,
1641                         uint8* dst_argb,
1642                         const struct YuvConstants* yuvconstants,
1643                         int width);
1644void I422ToARGBRow_DSPR2(const uint8* src_y,
1645                         const uint8* src_u,
1646                         const uint8* src_v,
1647                         uint8* dst_argb,
1648                         const struct YuvConstants* yuvconstants,
1649                         int width);
1650
1651void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width);
1652void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
1653                      uint8* dst_u, uint8* dst_v, int width);
1654void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
1655                         uint8* dst_u, uint8* dst_v, int width);
1656void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int width);
1657void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
1658                      uint8* dst_u, uint8* dst_v, int width);
1659void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
1660                         uint8* dst_u, uint8* dst_v, int width);
1661void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width);
1662void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
1663                      uint8* dst_u, uint8* dst_v, int width);
1664void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
1665                         uint8* dst_u, uint8* dst_v, int width);
1666void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width);
1667void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2,
1668                   uint8* dst_u, uint8* dst_v, int width);
1669void YUY2ToUV422Row_C(const uint8* src_yuy2,
1670                      uint8* dst_u, uint8* dst_v, int width);
1671void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int width);
1672void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2, int stride_yuy2,
1673                          uint8* dst_u, uint8* dst_v, int width);
1674void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2,
1675                             uint8* dst_u, uint8* dst_v, int width);
1676void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int width);
1677void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2,
1678                          uint8* dst_u, uint8* dst_v, int width);
1679void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2,
1680                             uint8* dst_u, uint8* dst_v, int width);
1681void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int width);
1682void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, int stride_yuy2,
1683                          uint8* dst_u, uint8* dst_v, int width);
1684void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2,
1685                             uint8* dst_u, uint8* dst_v, int width);
1686void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width);
1687void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
1688                      uint8* dst_u, uint8* dst_v, int width);
1689void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
1690                         uint8* dst_u, uint8* dst_v, int width);
1691void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int width);
1692void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
1693                      uint8* dst_u, uint8* dst_v, int width);
1694void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
1695                         uint8* dst_u, uint8* dst_v, int width);
1696void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width);
1697void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
1698                      uint8* dst_u, uint8* dst_v, int width);
1699void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
1700                         uint8* dst_u, uint8* dst_v, int width);
1701void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width);
1702void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
1703                      uint8* dst_u, uint8* dst_v, int width);
1704void UYVYToUV422Row_NEON(const uint8* src_uyvy,
1705                         uint8* dst_u, uint8* dst_v, int width);
1706
1707void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width);
1708void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy,
1709                   uint8* dst_u, uint8* dst_v, int width);
1710void UYVYToUV422Row_C(const uint8* src_uyvy,
1711                      uint8* dst_u, uint8* dst_v, int width);
1712void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int width);
1713void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy, int stride_uyvy,
1714                          uint8* dst_u, uint8* dst_v, int width);
1715void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy,
1716                             uint8* dst_u, uint8* dst_v, int width);
1717void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int width);
1718void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy,
1719                          uint8* dst_u, uint8* dst_v, int width);
1720void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy,
1721                             uint8* dst_u, uint8* dst_v, int width);
1722void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int width);
1723void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy,
1724                          uint8* dst_u, uint8* dst_v, int width);
1725void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
1726                             uint8* dst_u, uint8* dst_v, int width);
1727
1728void I422ToYUY2Row_C(const uint8* src_y,
1729                     const uint8* src_u,
1730                     const uint8* src_v,
1731                     uint8* dst_yuy2, int width);
1732void I422ToUYVYRow_C(const uint8* src_y,
1733                     const uint8* src_u,
1734                     const uint8* src_v,
1735                     uint8* dst_uyvy, int width);
1736void I422ToYUY2Row_SSE2(const uint8* src_y,
1737                        const uint8* src_u,
1738                        const uint8* src_v,
1739                        uint8* dst_yuy2, int width);
1740void I422ToUYVYRow_SSE2(const uint8* src_y,
1741                        const uint8* src_u,
1742                        const uint8* src_v,
1743                        uint8* dst_uyvy, int width);
1744void I422ToYUY2Row_Any_SSE2(const uint8* src_y,
1745                            const uint8* src_u,
1746                            const uint8* src_v,
1747                            uint8* dst_yuy2, int width);
1748void I422ToUYVYRow_Any_SSE2(const uint8* src_y,
1749                            const uint8* src_u,
1750                            const uint8* src_v,
1751                            uint8* dst_uyvy, int width);
1752void I422ToYUY2Row_NEON(const uint8* src_y,
1753                        const uint8* src_u,
1754                        const uint8* src_v,
1755                        uint8* dst_yuy2, int width);
1756void I422ToUYVYRow_NEON(const uint8* src_y,
1757                        const uint8* src_u,
1758                        const uint8* src_v,
1759                        uint8* dst_uyvy, int width);
1760void I422ToYUY2Row_Any_NEON(const uint8* src_y,
1761                            const uint8* src_u,
1762                            const uint8* src_v,
1763                            uint8* dst_yuy2, int width);
1764void I422ToUYVYRow_Any_NEON(const uint8* src_y,
1765                            const uint8* src_u,
1766                            const uint8* src_v,
1767                            uint8* dst_uyvy, int width);
1768
1769// Effects related row functions.
1770void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
1771void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
1772void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
1773void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
1774void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
1775                               int width);
1776void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
1777                                int width);
1778void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
1779                               int width);
1780void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
1781                               int width);
1782
1783// Inverse table for unattenuate, shared by C and SSE2.
1784extern const uint32 fixed_invtbl8[256];
1785void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
1786void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
1787void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
1788void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
1789                                 int width);
1790void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
1791                                 int width);
1792
1793void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width);
1794void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
1795void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
1796
1797void ARGBSepiaRow_C(uint8* dst_argb, int width);
1798void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width);
1799void ARGBSepiaRow_NEON(uint8* dst_argb, int width);
1800
1801void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
1802                          const int8* matrix_argb, int width);
1803void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
1804                              const int8* matrix_argb, int width);
1805void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
1806                             const int8* matrix_argb, int width);
1807
1808void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
1809void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
1810
1811void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
1812void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
1813
1814void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
1815                       int interval_offset, int width);
1816void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
1817                          int interval_offset, int width);
1818void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
1819                          int interval_offset, int width);
1820
1821void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
1822                    uint32 value);
1823void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
1824                       uint32 value);
1825void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
1826                       uint32 value);
1827
1828// Used for blur.
1829void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
1830                                    int width, int area, uint8* dst, int count);
1831void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
1832                                  const int32* previous_cumsum, int width);
1833
1834void CumulativeSumToAverageRow_C(const int32* topleft, const int32* botleft,
1835                                 int width, int area, uint8* dst, int count);
1836void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
1837                               const int32* previous_cumsum, int width);
1838
1839LIBYUV_API
1840void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
1841                     uint8* dst_argb, const float* uv_dudv, int width);
1842LIBYUV_API
1843void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
1844                        uint8* dst_argb, const float* uv_dudv, int width);
1845
1846// Used for I420Scale, ARGBScale, and ARGBInterpolate.
1847void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
1848                      ptrdiff_t src_stride_ptr,
1849                      int width, int source_y_fraction);
1850void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
1851                          ptrdiff_t src_stride_ptr, int width,
1852                          int source_y_fraction);
1853void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
1854                         ptrdiff_t src_stride_ptr, int width,
1855                         int source_y_fraction);
1856void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr,
1857                         ptrdiff_t src_stride_ptr, int width,
1858                         int source_y_fraction);
1859void InterpolateRow_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
1860                          ptrdiff_t src_stride_ptr, int width,
1861                          int source_y_fraction);
1862void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
1863                             ptrdiff_t src_stride_ptr, int width,
1864                             int source_y_fraction);
1865void InterpolateRow_Any_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
1866                              ptrdiff_t src_stride_ptr, int width,
1867                              int source_y_fraction);
1868void InterpolateRow_Any_AVX2(uint8* dst_ptr, const uint8* src_ptr,
1869                             ptrdiff_t src_stride_ptr, int width,
1870                             int source_y_fraction);
1871void InterpolateRow_Any_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
1872                              ptrdiff_t src_stride_ptr, int width,
1873                              int source_y_fraction);
1874
1875void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
1876                         ptrdiff_t src_stride_ptr,
1877                         int width, int source_y_fraction);
1878
1879// Sobel images.
1880void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
1881                 uint8* dst_sobelx, int width);
1882void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
1883                    const uint8* src_y2, uint8* dst_sobelx, int width);
1884void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
1885                    const uint8* src_y2, uint8* dst_sobelx, int width);
1886void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
1887                 uint8* dst_sobely, int width);
1888void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
1889                    uint8* dst_sobely, int width);
1890void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
1891                    uint8* dst_sobely, int width);
1892void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
1893                uint8* dst_argb, int width);
1894void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
1895                   uint8* dst_argb, int width);
1896void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
1897                   uint8* dst_argb, int width);
1898void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
1899                       uint8* dst_y, int width);
1900void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
1901                          uint8* dst_y, int width);
1902void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
1903                          uint8* dst_y, int width);
1904void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
1905                  uint8* dst_argb, int width);
1906void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
1907                     uint8* dst_argb, int width);
1908void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
1909                     uint8* dst_argb, int width);
1910void SobelRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
1911                       uint8* dst_argb, int width);
1912void SobelRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
1913                       uint8* dst_argb, int width);
1914void SobelToPlaneRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
1915                              uint8* dst_y, int width);
1916void SobelToPlaneRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
1917                              uint8* dst_y, int width);
1918void SobelXYRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
1919                         uint8* dst_argb, int width);
1920void SobelXYRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
1921                         uint8* dst_argb, int width);
1922
1923void ARGBPolynomialRow_C(const uint8* src_argb,
1924                         uint8* dst_argb, const float* poly,
1925                         int width);
1926void ARGBPolynomialRow_SSE2(const uint8* src_argb,
1927                            uint8* dst_argb, const float* poly,
1928                            int width);
1929void ARGBPolynomialRow_AVX2(const uint8* src_argb,
1930                            uint8* dst_argb, const float* poly,
1931                            int width);
1932
1933void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
1934                             const uint8* luma, uint32 lumacoeff);
1935void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
1936                                 int width,
1937                                 const uint8* luma, uint32 lumacoeff);
1938
1939#ifdef __cplusplus
1940}  // extern "C"
1941}  // namespace libyuv
1942#endif
1943
1944#endif  // INCLUDE_LIBYUV_ROW_H_  NOLINT
1945