1/*
2 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS. All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "libyuv/row.h"
12
13#include <string.h>  // For memcpy and memset.
14
15#include "libyuv/basic_types.h"
16
17#ifdef __cplusplus
18namespace libyuv {
19extern "C" {
20#endif
21
22// llvm x86 is poor at ternary operator, so use branchless min/max.
23
24#define USE_BRANCHLESS 1
25#if USE_BRANCHLESS
26static __inline int32 clamp0(int32 v) {
27  return ((-(v) >> 31) & (v));
28}
29
30static __inline int32 clamp255(int32 v) {
31  return (((255 - (v)) >> 31) | (v)) & 255;
32}
33
34static __inline uint32 Clamp(int32 val) {
35  int v = clamp0(val);
36  return (uint32)(clamp255(v));
37}
38
39static __inline uint32 Abs(int32 v) {
40  int m = v >> 31;
41  return (v + m) ^ m;
42}
43#else  // USE_BRANCHLESS
44static __inline int32 clamp0(int32 v) {
45  return (v < 0) ? 0 : v;
46}
47
48static __inline int32 clamp255(int32 v) {
49  return (v > 255) ? 255 : v;
50}
51
52static __inline uint32 Clamp(int32 val) {
53  int v = clamp0(val);
54  return (uint32)(clamp255(v));
55}
56
57static __inline uint32 Abs(int32 v) {
58  return (v < 0) ? -v : v;
59}
60#endif  // USE_BRANCHLESS
61
62#ifdef LIBYUV_LITTLE_ENDIAN
63#define WRITEWORD(p, v) *(uint32*)(p) = v
64#else
65static inline void WRITEWORD(uint8* p, uint32 v) {
66  p[0] = (uint8)(v & 255);
67  p[1] = (uint8)((v >> 8) & 255);
68  p[2] = (uint8)((v >> 16) & 255);
69  p[3] = (uint8)((v >> 24) & 255);
70}
71#endif
72
73void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) {
74  int x;
75  for (x = 0; x < width; ++x) {
76    uint8 b = src_rgb24[0];
77    uint8 g = src_rgb24[1];
78    uint8 r = src_rgb24[2];
79    dst_argb[0] = b;
80    dst_argb[1] = g;
81    dst_argb[2] = r;
82    dst_argb[3] = 255u;
83    dst_argb += 4;
84    src_rgb24 += 3;
85  }
86}
87
88void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
89  int x;
90  for (x = 0; x < width; ++x) {
91    uint8 r = src_raw[0];
92    uint8 g = src_raw[1];
93    uint8 b = src_raw[2];
94    dst_argb[0] = b;
95    dst_argb[1] = g;
96    dst_argb[2] = r;
97    dst_argb[3] = 255u;
98    dst_argb += 4;
99    src_raw += 3;
100  }
101}
102
103void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
104  int x;
105  for (x = 0; x < width; ++x) {
106    uint8 b = src_rgb565[0] & 0x1f;
107    uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
108    uint8 r = src_rgb565[1] >> 3;
109    dst_argb[0] = (b << 3) | (b >> 2);
110    dst_argb[1] = (g << 2) | (g >> 4);
111    dst_argb[2] = (r << 3) | (r >> 2);
112    dst_argb[3] = 255u;
113    dst_argb += 4;
114    src_rgb565 += 2;
115  }
116}
117
118void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb,
119                         int width) {
120  int x;
121  for (x = 0; x < width; ++x) {
122    uint8 b = src_argb1555[0] & 0x1f;
123    uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
124    uint8 r = (src_argb1555[1] & 0x7c) >> 2;
125    uint8 a = src_argb1555[1] >> 7;
126    dst_argb[0] = (b << 3) | (b >> 2);
127    dst_argb[1] = (g << 3) | (g >> 2);
128    dst_argb[2] = (r << 3) | (r >> 2);
129    dst_argb[3] = -a;
130    dst_argb += 4;
131    src_argb1555 += 2;
132  }
133}
134
135void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb,
136                         int width) {
137  int x;
138  for (x = 0; x < width; ++x) {
139    uint8 b = src_argb4444[0] & 0x0f;
140    uint8 g = src_argb4444[0] >> 4;
141    uint8 r = src_argb4444[1] & 0x0f;
142    uint8 a = src_argb4444[1] >> 4;
143    dst_argb[0] = (b << 4) | b;
144    dst_argb[1] = (g << 4) | g;
145    dst_argb[2] = (r << 4) | r;
146    dst_argb[3] = (a << 4) | a;
147    dst_argb += 4;
148    src_argb4444 += 2;
149  }
150}
151
152void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
153  int x;
154  for (x = 0; x < width; ++x) {
155    uint8 b = src_argb[0];
156    uint8 g = src_argb[1];
157    uint8 r = src_argb[2];
158    dst_rgb[0] = b;
159    dst_rgb[1] = g;
160    dst_rgb[2] = r;
161    dst_rgb += 3;
162    src_argb += 4;
163  }
164}
165
166void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) {
167  int x;
168  for (x = 0; x < width; ++x) {
169    uint8 b = src_argb[0];
170    uint8 g = src_argb[1];
171    uint8 r = src_argb[2];
172    dst_rgb[0] = r;
173    dst_rgb[1] = g;
174    dst_rgb[2] = b;
175    dst_rgb += 3;
176    src_argb += 4;
177  }
178}
179
180void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
181  int x;
182  for (x = 0; x < width - 1; x += 2) {
183    uint8 b0 = src_argb[0] >> 3;
184    uint8 g0 = src_argb[1] >> 2;
185    uint8 r0 = src_argb[2] >> 3;
186    uint8 b1 = src_argb[4] >> 3;
187    uint8 g1 = src_argb[5] >> 2;
188    uint8 r1 = src_argb[6] >> 3;
189    WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) |
190              (b1 << 16) | (g1 << 21) | (r1 << 27));
191    dst_rgb += 4;
192    src_argb += 8;
193  }
194  if (width & 1) {
195    uint8 b0 = src_argb[0] >> 3;
196    uint8 g0 = src_argb[1] >> 2;
197    uint8 r0 = src_argb[2] >> 3;
198    *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
199  }
200}
201
202void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
203  int x;
204  for (x = 0; x < width - 1; x += 2) {
205    uint8 b0 = src_argb[0] >> 3;
206    uint8 g0 = src_argb[1] >> 3;
207    uint8 r0 = src_argb[2] >> 3;
208    uint8 a0 = src_argb[3] >> 7;
209    uint8 b1 = src_argb[4] >> 3;
210    uint8 g1 = src_argb[5] >> 3;
211    uint8 r1 = src_argb[6] >> 3;
212    uint8 a1 = src_argb[7] >> 7;
213    *(uint32*)(dst_rgb) =
214        b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
215        (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
216    dst_rgb += 4;
217    src_argb += 8;
218  }
219  if (width & 1) {
220    uint8 b0 = src_argb[0] >> 3;
221    uint8 g0 = src_argb[1] >> 3;
222    uint8 r0 = src_argb[2] >> 3;
223    uint8 a0 = src_argb[3] >> 7;
224    *(uint16*)(dst_rgb) =
225        b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
226  }
227}
228
229void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
230  int x;
231  for (x = 0; x < width - 1; x += 2) {
232    uint8 b0 = src_argb[0] >> 4;
233    uint8 g0 = src_argb[1] >> 4;
234    uint8 r0 = src_argb[2] >> 4;
235    uint8 a0 = src_argb[3] >> 4;
236    uint8 b1 = src_argb[4] >> 4;
237    uint8 g1 = src_argb[5] >> 4;
238    uint8 r1 = src_argb[6] >> 4;
239    uint8 a1 = src_argb[7] >> 4;
240    *(uint32*)(dst_rgb) =
241        b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
242        (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
243    dst_rgb += 4;
244    src_argb += 8;
245  }
246  if (width & 1) {
247    uint8 b0 = src_argb[0] >> 4;
248    uint8 g0 = src_argb[1] >> 4;
249    uint8 r0 = src_argb[2] >> 4;
250    uint8 a0 = src_argb[3] >> 4;
251    *(uint16*)(dst_rgb) =
252        b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
253  }
254}
255
256static __inline int RGBToY(uint8 r, uint8 g, uint8 b) {
257  return (66 * r + 129 * g +  25 * b + 0x1080) >> 8;
258}
259
260static __inline int RGBToU(uint8 r, uint8 g, uint8 b) {
261  return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
262}
263static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
264  return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
265}
266
267#define MAKEROWY(NAME, R, G, B, BPP) \
268void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) {       \
269  int x;                                                                       \
270  for (x = 0; x < width; ++x) {                                                \
271    dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]);               \
272    src_argb0 += BPP;                                                          \
273    dst_y += 1;                                                                \
274  }                                                                            \
275}                                                                              \
276void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb,              \
277                       uint8* dst_u, uint8* dst_v, int width) {                \
278  const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                           \
279  int x;                                                                       \
280  for (x = 0; x < width - 1; x += 2) {                                         \
281    uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] +                              \
282               src_rgb1[B] + src_rgb1[B + BPP]) >> 2;                          \
283    uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] +                              \
284               src_rgb1[G] + src_rgb1[G + BPP]) >> 2;                          \
285    uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] +                              \
286               src_rgb1[R] + src_rgb1[R + BPP]) >> 2;                          \
287    dst_u[0] = RGBToU(ar, ag, ab);                                             \
288    dst_v[0] = RGBToV(ar, ag, ab);                                             \
289    src_rgb0 += BPP * 2;                                                       \
290    src_rgb1 += BPP * 2;                                                       \
291    dst_u += 1;                                                                \
292    dst_v += 1;                                                                \
293  }                                                                            \
294  if (width & 1) {                                                             \
295    uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1;                               \
296    uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1;                               \
297    uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1;                               \
298    dst_u[0] = RGBToU(ar, ag, ab);                                             \
299    dst_v[0] = RGBToV(ar, ag, ab);                                             \
300  }                                                                            \
301}
302
303MAKEROWY(ARGB, 2, 1, 0, 4)
304MAKEROWY(BGRA, 1, 2, 3, 4)
305MAKEROWY(ABGR, 0, 1, 2, 4)
306MAKEROWY(RGBA, 3, 2, 1, 4)
307MAKEROWY(RGB24, 2, 1, 0, 3)
308MAKEROWY(RAW, 0, 1, 2, 3)
309#undef MAKEROWY
310
311// JPeg uses a variation on BT.601-1 full range
312// y =  0.29900 * r + 0.58700 * g + 0.11400 * b
313// u = -0.16874 * r - 0.33126 * g + 0.50000 * b  + center
314// v =  0.50000 * r - 0.41869 * g - 0.08131 * b  + center
315// BT.601 Mpeg range uses:
316// b 0.1016 * 255 = 25.908 = 25
317// g 0.5078 * 255 = 129.489 = 129
318// r 0.2578 * 255 = 65.739 = 66
319// JPeg 8 bit Y (not used):
320// b 0.11400 * 256 = 29.184 = 29
321// g 0.58700 * 256 = 150.272 = 150
322// r 0.29900 * 256 = 76.544 = 77
323// JPeg 7 bit Y:
324// b 0.11400 * 128 = 14.592 = 15
325// g 0.58700 * 128 = 75.136 = 75
326// r 0.29900 * 128 = 38.272 = 38
327// JPeg 8 bit U:
328// b  0.50000 * 255 = 127.5 = 127
329// g -0.33126 * 255 = -84.4713 = -84
330// r -0.16874 * 255 = -43.0287 = -43
331// JPeg 8 bit V:
332// b -0.08131 * 255 = -20.73405 = -20
333// g -0.41869 * 255 = -106.76595 = -107
334// r  0.50000 * 255 = 127.5 = 127
335
336static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
337  return (38 * r + 75 * g +  15 * b + 64) >> 7;
338}
339
340static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) {
341  return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
342}
343static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) {
344  return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
345}
346
347#define AVGB(a, b) (((a) + (b) + 1) >> 1)
348
349#define MAKEROWYJ(NAME, R, G, B, BPP) \
350void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) {      \
351  int x;                                                                       \
352  for (x = 0; x < width; ++x) {                                                \
353    dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]);              \
354    src_argb0 += BPP;                                                          \
355    dst_y += 1;                                                                \
356  }                                                                            \
357}                                                                              \
358void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb,             \
359                        uint8* dst_u, uint8* dst_v, int width) {               \
360  const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                           \
361  int x;                                                                       \
362  for (x = 0; x < width - 1; x += 2) {                                         \
363    uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]),                            \
364                    AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP]));               \
365    uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]),                            \
366                    AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP]));               \
367    uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]),                            \
368                    AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP]));               \
369    dst_u[0] = RGBToUJ(ar, ag, ab);                                            \
370    dst_v[0] = RGBToVJ(ar, ag, ab);                                            \
371    src_rgb0 += BPP * 2;                                                       \
372    src_rgb1 += BPP * 2;                                                       \
373    dst_u += 1;                                                                \
374    dst_v += 1;                                                                \
375  }                                                                            \
376  if (width & 1) {                                                             \
377    uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]);                                 \
378    uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]);                                 \
379    uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]);                                 \
380    dst_u[0] = RGBToUJ(ar, ag, ab);                                            \
381    dst_v[0] = RGBToVJ(ar, ag, ab);                                            \
382  }                                                                            \
383}
384
385MAKEROWYJ(ARGB, 2, 1, 0, 4)
386#undef MAKEROWYJ
387
388void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
389  int x;
390  for (x = 0; x < width; ++x) {
391    uint8 b = src_rgb565[0] & 0x1f;
392    uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
393    uint8 r = src_rgb565[1] >> 3;
394    b = (b << 3) | (b >> 2);
395    g = (g << 2) | (g >> 4);
396    r = (r << 3) | (r >> 2);
397    dst_y[0] = RGBToY(r, g, b);
398    src_rgb565 += 2;
399    dst_y += 1;
400  }
401}
402
403void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) {
404  int x;
405  for (x = 0; x < width; ++x) {
406    uint8 b = src_argb1555[0] & 0x1f;
407    uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
408    uint8 r = (src_argb1555[1] & 0x7c) >> 2;
409    b = (b << 3) | (b >> 2);
410    g = (g << 3) | (g >> 2);
411    r = (r << 3) | (r >> 2);
412    dst_y[0] = RGBToY(r, g, b);
413    src_argb1555 += 2;
414    dst_y += 1;
415  }
416}
417
418void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {
419  int x;
420  for (x = 0; x < width; ++x) {
421    uint8 b = src_argb4444[0] & 0x0f;
422    uint8 g = src_argb4444[0] >> 4;
423    uint8 r = src_argb4444[1] & 0x0f;
424    b = (b << 4) | b;
425    g = (g << 4) | g;
426    r = (r << 4) | r;
427    dst_y[0] = RGBToY(r, g, b);
428    src_argb4444 += 2;
429    dst_y += 1;
430  }
431}
432
433void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
434                     uint8* dst_u, uint8* dst_v, int width) {
435  const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565;
436  int x;
437  for (x = 0; x < width - 1; x += 2) {
438    uint8 b0 = src_rgb565[0] & 0x1f;
439    uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
440    uint8 r0 = src_rgb565[1] >> 3;
441    uint8 b1 = src_rgb565[2] & 0x1f;
442    uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
443    uint8 r1 = src_rgb565[3] >> 3;
444    uint8 b2 = next_rgb565[0] & 0x1f;
445    uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
446    uint8 r2 = next_rgb565[1] >> 3;
447    uint8 b3 = next_rgb565[2] & 0x1f;
448    uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
449    uint8 r3 = next_rgb565[3] >> 3;
450    uint8 b = (b0 + b1 + b2 + b3);  // 565 * 4 = 787.
451    uint8 g = (g0 + g1 + g2 + g3);
452    uint8 r = (r0 + r1 + r2 + r3);
453    b = (b << 1) | (b >> 6);  // 787 -> 888.
454    r = (r << 1) | (r >> 6);
455    dst_u[0] = RGBToU(r, g, b);
456    dst_v[0] = RGBToV(r, g, b);
457    src_rgb565 += 4;
458    next_rgb565 += 4;
459    dst_u += 1;
460    dst_v += 1;
461  }
462  if (width & 1) {
463    uint8 b0 = src_rgb565[0] & 0x1f;
464    uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
465    uint8 r0 = src_rgb565[1] >> 3;
466    uint8 b2 = next_rgb565[0] & 0x1f;
467    uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
468    uint8 r2 = next_rgb565[1] >> 3;
469    uint8 b = (b0 + b2);  // 565 * 2 = 676.
470    uint8 g = (g0 + g2);
471    uint8 r = (r0 + r2);
472    b = (b << 2) | (b >> 4);  // 676 -> 888
473    g = (g << 1) | (g >> 6);
474    r = (r << 2) | (r >> 4);
475    dst_u[0] = RGBToU(r, g, b);
476    dst_v[0] = RGBToV(r, g, b);
477  }
478}
479
480void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
481                       uint8* dst_u, uint8* dst_v, int width) {
482  const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555;
483  int x;
484  for (x = 0; x < width - 1; x += 2) {
485    uint8 b0 = src_argb1555[0] & 0x1f;
486    uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
487    uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
488    uint8 b1 = src_argb1555[2] & 0x1f;
489    uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
490    uint8 r1 = (src_argb1555[3] & 0x7c) >> 2;
491    uint8 b2 = next_argb1555[0] & 0x1f;
492    uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
493    uint8 r2 = (next_argb1555[1] & 0x7c) >> 2;
494    uint8 b3 = next_argb1555[2] & 0x1f;
495    uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
496    uint8 r3 = (next_argb1555[3] & 0x7c) >> 2;
497    uint8 b = (b0 + b1 + b2 + b3);  // 555 * 4 = 777.
498    uint8 g = (g0 + g1 + g2 + g3);
499    uint8 r = (r0 + r1 + r2 + r3);
500    b = (b << 1) | (b >> 6);  // 777 -> 888.
501    g = (g << 1) | (g >> 6);
502    r = (r << 1) | (r >> 6);
503    dst_u[0] = RGBToU(r, g, b);
504    dst_v[0] = RGBToV(r, g, b);
505    src_argb1555 += 4;
506    next_argb1555 += 4;
507    dst_u += 1;
508    dst_v += 1;
509  }
510  if (width & 1) {
511    uint8 b0 = src_argb1555[0] & 0x1f;
512    uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
513    uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
514    uint8 b2 = next_argb1555[0] & 0x1f;
515    uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
516    uint8 r2 = next_argb1555[1] >> 3;
517    uint8 b = (b0 + b2);  // 555 * 2 = 666.
518    uint8 g = (g0 + g2);
519    uint8 r = (r0 + r2);
520    b = (b << 2) | (b >> 4);  // 666 -> 888.
521    g = (g << 2) | (g >> 4);
522    r = (r << 2) | (r >> 4);
523    dst_u[0] = RGBToU(r, g, b);
524    dst_v[0] = RGBToV(r, g, b);
525  }
526}
527
528void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
529                       uint8* dst_u, uint8* dst_v, int width) {
530  const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444;
531  int x;
532  for (x = 0; x < width - 1; x += 2) {
533    uint8 b0 = src_argb4444[0] & 0x0f;
534    uint8 g0 = src_argb4444[0] >> 4;
535    uint8 r0 = src_argb4444[1] & 0x0f;
536    uint8 b1 = src_argb4444[2] & 0x0f;
537    uint8 g1 = src_argb4444[2] >> 4;
538    uint8 r1 = src_argb4444[3] & 0x0f;
539    uint8 b2 = next_argb4444[0] & 0x0f;
540    uint8 g2 = next_argb4444[0] >> 4;
541    uint8 r2 = next_argb4444[1] & 0x0f;
542    uint8 b3 = next_argb4444[2] & 0x0f;
543    uint8 g3 = next_argb4444[2] >> 4;
544    uint8 r3 = next_argb4444[3] & 0x0f;
545    uint8 b = (b0 + b1 + b2 + b3);  // 444 * 4 = 666.
546    uint8 g = (g0 + g1 + g2 + g3);
547    uint8 r = (r0 + r1 + r2 + r3);
548    b = (b << 2) | (b >> 4);  // 666 -> 888.
549    g = (g << 2) | (g >> 4);
550    r = (r << 2) | (r >> 4);
551    dst_u[0] = RGBToU(r, g, b);
552    dst_v[0] = RGBToV(r, g, b);
553    src_argb4444 += 4;
554    next_argb4444 += 4;
555    dst_u += 1;
556    dst_v += 1;
557  }
558  if (width & 1) {
559    uint8 b0 = src_argb4444[0] & 0x0f;
560    uint8 g0 = src_argb4444[0] >> 4;
561    uint8 r0 = src_argb4444[1] & 0x0f;
562    uint8 b2 = next_argb4444[0] & 0x0f;
563    uint8 g2 = next_argb4444[0] >> 4;
564    uint8 r2 = next_argb4444[1] & 0x0f;
565    uint8 b = (b0 + b2);  // 444 * 2 = 555.
566    uint8 g = (g0 + g2);
567    uint8 r = (r0 + r2);
568    b = (b << 3) | (b >> 2);  // 555 -> 888.
569    g = (g << 3) | (g >> 2);
570    r = (r << 3) | (r >> 2);
571    dst_u[0] = RGBToU(r, g, b);
572    dst_v[0] = RGBToV(r, g, b);
573  }
574}
575
576void ARGBToUV444Row_C(const uint8* src_argb,
577                      uint8* dst_u, uint8* dst_v, int width) {
578  int x;
579  for (x = 0; x < width; ++x) {
580    uint8 ab = src_argb[0];
581    uint8 ag = src_argb[1];
582    uint8 ar = src_argb[2];
583    dst_u[0] = RGBToU(ar, ag, ab);
584    dst_v[0] = RGBToV(ar, ag, ab);
585    src_argb += 4;
586    dst_u += 1;
587    dst_v += 1;
588  }
589}
590
591void ARGBToUV422Row_C(const uint8* src_argb,
592                      uint8* dst_u, uint8* dst_v, int width) {
593  int x;
594  for (x = 0; x < width - 1; x += 2) {
595    uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
596    uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
597    uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
598    dst_u[0] = RGBToU(ar, ag, ab);
599    dst_v[0] = RGBToV(ar, ag, ab);
600    src_argb += 8;
601    dst_u += 1;
602    dst_v += 1;
603  }
604  if (width & 1) {
605    uint8 ab = src_argb[0];
606    uint8 ag = src_argb[1];
607    uint8 ar = src_argb[2];
608    dst_u[0] = RGBToU(ar, ag, ab);
609    dst_v[0] = RGBToV(ar, ag, ab);
610  }
611}
612
613void ARGBToUV411Row_C(const uint8* src_argb,
614                      uint8* dst_u, uint8* dst_v, int width) {
615  int x;
616  for (x = 0; x < width - 3; x += 4) {
617    uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2;
618    uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2;
619    uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2;
620    dst_u[0] = RGBToU(ar, ag, ab);
621    dst_v[0] = RGBToV(ar, ag, ab);
622    src_argb += 16;
623    dst_u += 1;
624    dst_v += 1;
625  }
626  if ((width & 3) == 3) {
627    uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3;
628    uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3;
629    uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3;
630    dst_u[0] = RGBToU(ar, ag, ab);
631    dst_v[0] = RGBToV(ar, ag, ab);
632  } else if ((width & 3) == 2) {
633    uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
634    uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
635    uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
636    dst_u[0] = RGBToU(ar, ag, ab);
637    dst_v[0] = RGBToV(ar, ag, ab);
638  } else if ((width & 3) == 1) {
639    uint8 ab = src_argb[0];
640    uint8 ag = src_argb[1];
641    uint8 ar = src_argb[2];
642    dst_u[0] = RGBToU(ar, ag, ab);
643    dst_v[0] = RGBToV(ar, ag, ab);
644  }
645}
646
647void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
648  int x;
649  for (x = 0; x < width; ++x) {
650    uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
651    dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
652    dst_argb[3] = src_argb[3];
653    dst_argb += 4;
654    src_argb += 4;
655  }
656}
657
658// Convert a row of image to Sepia tone.
659void ARGBSepiaRow_C(uint8* dst_argb, int width) {
660  int x;
661  for (x = 0; x < width; ++x) {
662    int b = dst_argb[0];
663    int g = dst_argb[1];
664    int r = dst_argb[2];
665    int sb = (b * 17 + g * 68 + r * 35) >> 7;
666    int sg = (b * 22 + g * 88 + r * 45) >> 7;
667    int sr = (b * 24 + g * 98 + r * 50) >> 7;
668    // b does not over flow. a is preserved from original.
669    dst_argb[0] = sb;
670    dst_argb[1] = clamp255(sg);
671    dst_argb[2] = clamp255(sr);
672    dst_argb += 4;
673  }
674}
675
676// Apply color matrix to a row of image. Matrix is signed.
677// TODO(fbarchard): Consider adding rounding (+32).
678void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
679                          const int8* matrix_argb, int width) {
680  int x;
681  for (x = 0; x < width; ++x) {
682    int b = src_argb[0];
683    int g = src_argb[1];
684    int r = src_argb[2];
685    int a = src_argb[3];
686    int sb = (b * matrix_argb[0] + g * matrix_argb[1] +
687              r * matrix_argb[2] + a * matrix_argb[3]) >> 6;
688    int sg = (b * matrix_argb[4] + g * matrix_argb[5] +
689              r * matrix_argb[6] + a * matrix_argb[7]) >> 6;
690    int sr = (b * matrix_argb[8] + g * matrix_argb[9] +
691              r * matrix_argb[10] + a * matrix_argb[11]) >> 6;
692    int sa = (b * matrix_argb[12] + g * matrix_argb[13] +
693              r * matrix_argb[14] + a * matrix_argb[15]) >> 6;
694    dst_argb[0] = Clamp(sb);
695    dst_argb[1] = Clamp(sg);
696    dst_argb[2] = Clamp(sr);
697    dst_argb[3] = Clamp(sa);
698    src_argb += 4;
699    dst_argb += 4;
700  }
701}
702
703// Apply color table to a row of image.
704void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
705  int x;
706  for (x = 0; x < width; ++x) {
707    int b = dst_argb[0];
708    int g = dst_argb[1];
709    int r = dst_argb[2];
710    int a = dst_argb[3];
711    dst_argb[0] = table_argb[b * 4 + 0];
712    dst_argb[1] = table_argb[g * 4 + 1];
713    dst_argb[2] = table_argb[r * 4 + 2];
714    dst_argb[3] = table_argb[a * 4 + 3];
715    dst_argb += 4;
716  }
717}
718
719// Apply color table to a row of image.
720void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
721  int x;
722  for (x = 0; x < width; ++x) {
723    int b = dst_argb[0];
724    int g = dst_argb[1];
725    int r = dst_argb[2];
726    dst_argb[0] = table_argb[b * 4 + 0];
727    dst_argb[1] = table_argb[g * 4 + 1];
728    dst_argb[2] = table_argb[r * 4 + 2];
729    dst_argb += 4;
730  }
731}
732
733void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
734                       int interval_offset, int width) {
735  int x;
736  for (x = 0; x < width; ++x) {
737    int b = dst_argb[0];
738    int g = dst_argb[1];
739    int r = dst_argb[2];
740    dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
741    dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
742    dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
743    dst_argb += 4;
744  }
745}
746
747#define REPEAT8(v) (v) | ((v) << 8)
748#define SHADE(f, v) v * f >> 24
749
750void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
751                    uint32 value) {
752  const uint32 b_scale = REPEAT8(value & 0xff);
753  const uint32 g_scale = REPEAT8((value >> 8) & 0xff);
754  const uint32 r_scale = REPEAT8((value >> 16) & 0xff);
755  const uint32 a_scale = REPEAT8(value >> 24);
756
757  int i;
758  for (i = 0; i < width; ++i) {
759    const uint32 b = REPEAT8(src_argb[0]);
760    const uint32 g = REPEAT8(src_argb[1]);
761    const uint32 r = REPEAT8(src_argb[2]);
762    const uint32 a = REPEAT8(src_argb[3]);
763    dst_argb[0] = SHADE(b, b_scale);
764    dst_argb[1] = SHADE(g, g_scale);
765    dst_argb[2] = SHADE(r, r_scale);
766    dst_argb[3] = SHADE(a, a_scale);
767    src_argb += 4;
768    dst_argb += 4;
769  }
770}
771#undef REPEAT8
772#undef SHADE
773
774#define REPEAT8(v) (v) | ((v) << 8)
775#define SHADE(f, v) v * f >> 16
776
777void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1,
778                       uint8* dst_argb, int width) {
779  int i;
780  for (i = 0; i < width; ++i) {
781    const uint32 b = REPEAT8(src_argb0[0]);
782    const uint32 g = REPEAT8(src_argb0[1]);
783    const uint32 r = REPEAT8(src_argb0[2]);
784    const uint32 a = REPEAT8(src_argb0[3]);
785    const uint32 b_scale = src_argb1[0];
786    const uint32 g_scale = src_argb1[1];
787    const uint32 r_scale = src_argb1[2];
788    const uint32 a_scale = src_argb1[3];
789    dst_argb[0] = SHADE(b, b_scale);
790    dst_argb[1] = SHADE(g, g_scale);
791    dst_argb[2] = SHADE(r, r_scale);
792    dst_argb[3] = SHADE(a, a_scale);
793    src_argb0 += 4;
794    src_argb1 += 4;
795    dst_argb += 4;
796  }
797}
798#undef REPEAT8
799#undef SHADE
800
801#define SHADE(f, v) clamp255(v + f)
802
803void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1,
804                  uint8* dst_argb, int width) {
805  int i;
806  for (i = 0; i < width; ++i) {
807    const int b = src_argb0[0];
808    const int g = src_argb0[1];
809    const int r = src_argb0[2];
810    const int a = src_argb0[3];
811    const int b_add = src_argb1[0];
812    const int g_add = src_argb1[1];
813    const int r_add = src_argb1[2];
814    const int a_add = src_argb1[3];
815    dst_argb[0] = SHADE(b, b_add);
816    dst_argb[1] = SHADE(g, g_add);
817    dst_argb[2] = SHADE(r, r_add);
818    dst_argb[3] = SHADE(a, a_add);
819    src_argb0 += 4;
820    src_argb1 += 4;
821    dst_argb += 4;
822  }
823}
824#undef SHADE
825
826#define SHADE(f, v) clamp0(f - v)
827
828void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1,
829                       uint8* dst_argb, int width) {
830  int i;
831  for (i = 0; i < width; ++i) {
832    const int b = src_argb0[0];
833    const int g = src_argb0[1];
834    const int r = src_argb0[2];
835    const int a = src_argb0[3];
836    const int b_sub = src_argb1[0];
837    const int g_sub = src_argb1[1];
838    const int r_sub = src_argb1[2];
839    const int a_sub = src_argb1[3];
840    dst_argb[0] = SHADE(b, b_sub);
841    dst_argb[1] = SHADE(g, g_sub);
842    dst_argb[2] = SHADE(r, r_sub);
843    dst_argb[3] = SHADE(a, a_sub);
844    src_argb0 += 4;
845    src_argb1 += 4;
846    dst_argb += 4;
847  }
848}
849#undef SHADE
850
851// Sobel functions which mimics SSSE3.
852void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
853                 uint8* dst_sobelx, int width) {
854  int i;
855  for (i = 0; i < width; ++i) {
856    int a = src_y0[i];
857    int b = src_y1[i];
858    int c = src_y2[i];
859    int a_sub = src_y0[i + 2];
860    int b_sub = src_y1[i + 2];
861    int c_sub = src_y2[i + 2];
862    int a_diff = a - a_sub;
863    int b_diff = b - b_sub;
864    int c_diff = c - c_sub;
865    int sobel = Abs(a_diff + b_diff * 2 + c_diff);
866    dst_sobelx[i] = (uint8)(clamp255(sobel));
867  }
868}
869
870void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
871                 uint8* dst_sobely, int width) {
872  int i;
873  for (i = 0; i < width; ++i) {
874    int a = src_y0[i + 0];
875    int b = src_y0[i + 1];
876    int c = src_y0[i + 2];
877    int a_sub = src_y1[i + 0];
878    int b_sub = src_y1[i + 1];
879    int c_sub = src_y1[i + 2];
880    int a_diff = a - a_sub;
881    int b_diff = b - b_sub;
882    int c_diff = c - c_sub;
883    int sobel = Abs(a_diff + b_diff * 2 + c_diff);
884    dst_sobely[i] = (uint8)(clamp255(sobel));
885  }
886}
887
888void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
889                uint8* dst_argb, int width) {
890  int i;
891  for (i = 0; i < width; ++i) {
892    int r = src_sobelx[i];
893    int b = src_sobely[i];
894    int s = clamp255(r + b);
895    dst_argb[0] = (uint8)(s);
896    dst_argb[1] = (uint8)(s);
897    dst_argb[2] = (uint8)(s);
898    dst_argb[3] = (uint8)(255u);
899    dst_argb += 4;
900  }
901}
902
903void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
904                       uint8* dst_y, int width) {
905  int i;
906  for (i = 0; i < width; ++i) {
907    int r = src_sobelx[i];
908    int b = src_sobely[i];
909    int s = clamp255(r + b);
910    dst_y[i] = (uint8)(s);
911  }
912}
913
914void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
915                  uint8* dst_argb, int width) {
916  int i;
917  for (i = 0; i < width; ++i) {
918    int r = src_sobelx[i];
919    int b = src_sobely[i];
920    int g = clamp255(r + b);
921    dst_argb[0] = (uint8)(b);
922    dst_argb[1] = (uint8)(g);
923    dst_argb[2] = (uint8)(r);
924    dst_argb[3] = (uint8)(255u);
925    dst_argb += 4;
926  }
927}
928
929void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
930  // Copy a Y to RGB.
931  int x;
932  for (x = 0; x < width; ++x) {
933    uint8 y = src_y[0];
934    dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
935    dst_argb[3] = 255u;
936    dst_argb += 4;
937    ++src_y;
938  }
939}
940
941// C reference code that mimics the YUV assembly.
942
943#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
944
945#define UB 127 /* min(63,(int8)(2.018 * 64)) */
946#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
947#define UR 0
948
949#define VB 0
950#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
951#define VR 102 /* (int8)(1.596 * 64 + 0.5) */
952
953// Bias
954#define BB UB * 128 + VB * 128
955#define BG UG * 128 + VG * 128
956#define BR UR * 128 + VR * 128
957
958static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
959                              uint8* b, uint8* g, uint8* r) {
960  int32 y1 = ((int32)(y) - 16) * YG;
961  *b = Clamp((int32)((u * UB + v * VB) - (BB) + y1) >> 6);
962  *g = Clamp((int32)((u * UG + v * VG) - (BG) + y1) >> 6);
963  *r = Clamp((int32)((u * UR + v * VR) - (BR) + y1) >> 6);
964}
965
966#if !defined(LIBYUV_DISABLE_NEON) && \
967    (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
968// C mimic assembly.
969// TODO(fbarchard): Remove subsampling from Neon.
970void I444ToARGBRow_C(const uint8* src_y,
971                     const uint8* src_u,
972                     const uint8* src_v,
973                     uint8* rgb_buf,
974                     int width) {
975  int x;
976  for (x = 0; x < width - 1; x += 2) {
977    uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
978    uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
979    YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
980    rgb_buf[3] = 255;
981    YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
982    rgb_buf[7] = 255;
983    src_y += 2;
984    src_u += 2;
985    src_v += 2;
986    rgb_buf += 8;  // Advance 2 pixels.
987  }
988  if (width & 1) {
989    YuvPixel(src_y[0], src_u[0], src_v[0],
990             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
991  }
992}
993#else
994void I444ToARGBRow_C(const uint8* src_y,
995                     const uint8* src_u,
996                     const uint8* src_v,
997                     uint8* rgb_buf,
998                     int width) {
999  int x;
1000  for (x = 0; x < width; ++x) {
1001    YuvPixel(src_y[0], src_u[0], src_v[0],
1002             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1003    rgb_buf[3] = 255;
1004    src_y += 1;
1005    src_u += 1;
1006    src_v += 1;
1007    rgb_buf += 4;  // Advance 1 pixel.
1008  }
1009}
1010#endif
1011// Also used for 420
1012void I422ToARGBRow_C(const uint8* src_y,
1013                     const uint8* src_u,
1014                     const uint8* src_v,
1015                     uint8* rgb_buf,
1016                     int width) {
1017  int x;
1018  for (x = 0; x < width - 1; x += 2) {
1019    YuvPixel(src_y[0], src_u[0], src_v[0],
1020             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1021    rgb_buf[3] = 255;
1022    YuvPixel(src_y[1], src_u[0], src_v[0],
1023             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1024    rgb_buf[7] = 255;
1025    src_y += 2;
1026    src_u += 1;
1027    src_v += 1;
1028    rgb_buf += 8;  // Advance 2 pixels.
1029  }
1030  if (width & 1) {
1031    YuvPixel(src_y[0], src_u[0], src_v[0],
1032             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1033    rgb_buf[3] = 255;
1034  }
1035}
1036
1037void I422ToRGB24Row_C(const uint8* src_y,
1038                      const uint8* src_u,
1039                      const uint8* src_v,
1040                      uint8* rgb_buf,
1041                      int width) {
1042  int x;
1043  for (x = 0; x < width - 1; x += 2) {
1044    YuvPixel(src_y[0], src_u[0], src_v[0],
1045             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1046    YuvPixel(src_y[1], src_u[0], src_v[0],
1047             rgb_buf + 3, rgb_buf + 4, rgb_buf + 5);
1048    src_y += 2;
1049    src_u += 1;
1050    src_v += 1;
1051    rgb_buf += 6;  // Advance 2 pixels.
1052  }
1053  if (width & 1) {
1054    YuvPixel(src_y[0], src_u[0], src_v[0],
1055             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1056  }
1057}
1058
1059void I422ToRAWRow_C(const uint8* src_y,
1060                    const uint8* src_u,
1061                    const uint8* src_v,
1062                    uint8* rgb_buf,
1063                    int width) {
1064  int x;
1065  for (x = 0; x < width - 1; x += 2) {
1066    YuvPixel(src_y[0], src_u[0], src_v[0],
1067             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1068    YuvPixel(src_y[1], src_u[0], src_v[0],
1069             rgb_buf + 5, rgb_buf + 4, rgb_buf + 3);
1070    src_y += 2;
1071    src_u += 1;
1072    src_v += 1;
1073    rgb_buf += 6;  // Advance 2 pixels.
1074  }
1075  if (width & 1) {
1076    YuvPixel(src_y[0], src_u[0], src_v[0],
1077             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1078  }
1079}
1080
1081void I422ToARGB4444Row_C(const uint8* src_y,
1082                         const uint8* src_u,
1083                         const uint8* src_v,
1084                         uint8* dst_argb4444,
1085                         int width) {
1086  uint8 b0;
1087  uint8 g0;
1088  uint8 r0;
1089  uint8 b1;
1090  uint8 g1;
1091  uint8 r1;
1092  int x;
1093  for (x = 0; x < width - 1; x += 2) {
1094    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1095    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1096    b0 = b0 >> 4;
1097    g0 = g0 >> 4;
1098    r0 = r0 >> 4;
1099    b1 = b1 >> 4;
1100    g1 = g1 >> 4;
1101    r1 = r1 >> 4;
1102    *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
1103        (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000;
1104    src_y += 2;
1105    src_u += 1;
1106    src_v += 1;
1107    dst_argb4444 += 4;  // Advance 2 pixels.
1108  }
1109  if (width & 1) {
1110    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1111    b0 = b0 >> 4;
1112    g0 = g0 >> 4;
1113    r0 = r0 >> 4;
1114    *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
1115        0xf000;
1116  }
1117}
1118
1119void I422ToARGB1555Row_C(const uint8* src_y,
1120                         const uint8* src_u,
1121                         const uint8* src_v,
1122                         uint8* dst_argb1555,
1123                         int width) {
1124  uint8 b0;
1125  uint8 g0;
1126  uint8 r0;
1127  uint8 b1;
1128  uint8 g1;
1129  uint8 r1;
1130  int x;
1131  for (x = 0; x < width - 1; x += 2) {
1132    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1133    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1134    b0 = b0 >> 3;
1135    g0 = g0 >> 3;
1136    r0 = r0 >> 3;
1137    b1 = b1 >> 3;
1138    g1 = g1 >> 3;
1139    r1 = r1 >> 3;
1140    *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
1141        (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000;
1142    src_y += 2;
1143    src_u += 1;
1144    src_v += 1;
1145    dst_argb1555 += 4;  // Advance 2 pixels.
1146  }
1147  if (width & 1) {
1148    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1149    b0 = b0 >> 3;
1150    g0 = g0 >> 3;
1151    r0 = r0 >> 3;
1152    *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
1153        0x8000;
1154  }
1155}
1156
1157void I422ToRGB565Row_C(const uint8* src_y,
1158                       const uint8* src_u,
1159                       const uint8* src_v,
1160                       uint8* dst_rgb565,
1161                       int width) {
1162  uint8 b0;
1163  uint8 g0;
1164  uint8 r0;
1165  uint8 b1;
1166  uint8 g1;
1167  uint8 r1;
1168  int x;
1169  for (x = 0; x < width - 1; x += 2) {
1170    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1171    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1172    b0 = b0 >> 3;
1173    g0 = g0 >> 2;
1174    r0 = r0 >> 3;
1175    b1 = b1 >> 3;
1176    g1 = g1 >> 2;
1177    r1 = r1 >> 3;
1178    *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1179        (b1 << 16) | (g1 << 21) | (r1 << 27);
1180    src_y += 2;
1181    src_u += 1;
1182    src_v += 1;
1183    dst_rgb565 += 4;  // Advance 2 pixels.
1184  }
1185  if (width & 1) {
1186    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1187    b0 = b0 >> 3;
1188    g0 = g0 >> 2;
1189    r0 = r0 >> 3;
1190    *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1191  }
1192}
1193
1194void I411ToARGBRow_C(const uint8* src_y,
1195                     const uint8* src_u,
1196                     const uint8* src_v,
1197                     uint8* rgb_buf,
1198                     int width) {
1199  int x;
1200  for (x = 0; x < width - 3; x += 4) {
1201    YuvPixel(src_y[0], src_u[0], src_v[0],
1202             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1203    rgb_buf[3] = 255;
1204    YuvPixel(src_y[1], src_u[0], src_v[0],
1205             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1206    rgb_buf[7] = 255;
1207    YuvPixel(src_y[2], src_u[0], src_v[0],
1208             rgb_buf + 8, rgb_buf + 9, rgb_buf + 10);
1209    rgb_buf[11] = 255;
1210    YuvPixel(src_y[3], src_u[0], src_v[0],
1211             rgb_buf + 12, rgb_buf + 13, rgb_buf + 14);
1212    rgb_buf[15] = 255;
1213    src_y += 4;
1214    src_u += 1;
1215    src_v += 1;
1216    rgb_buf += 16;  // Advance 4 pixels.
1217  }
1218  if (width & 2) {
1219    YuvPixel(src_y[0], src_u[0], src_v[0],
1220             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1221    rgb_buf[3] = 255;
1222    YuvPixel(src_y[1], src_u[0], src_v[0],
1223             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1224    rgb_buf[7] = 255;
1225    src_y += 2;
1226    rgb_buf += 8;  // Advance 2 pixels.
1227  }
1228  if (width & 1) {
1229    YuvPixel(src_y[0], src_u[0], src_v[0],
1230             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1231    rgb_buf[3] = 255;
1232  }
1233}
1234
1235void NV12ToARGBRow_C(const uint8* src_y,
1236                     const uint8* usrc_v,
1237                     uint8* rgb_buf,
1238                     int width) {
1239  int x;
1240  for (x = 0; x < width - 1; x += 2) {
1241    YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
1242             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1243    rgb_buf[3] = 255;
1244    YuvPixel(src_y[1], usrc_v[0], usrc_v[1],
1245             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1246    rgb_buf[7] = 255;
1247    src_y += 2;
1248    usrc_v += 2;
1249    rgb_buf += 8;  // Advance 2 pixels.
1250  }
1251  if (width & 1) {
1252    YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
1253             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1254    rgb_buf[3] = 255;
1255  }
1256}
1257
1258void NV21ToARGBRow_C(const uint8* src_y,
1259                     const uint8* src_vu,
1260                     uint8* rgb_buf,
1261                     int width) {
1262  int x;
1263  for (x = 0; x < width - 1; x += 2) {
1264    YuvPixel(src_y[0], src_vu[1], src_vu[0],
1265             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1266    rgb_buf[3] = 255;
1267
1268    YuvPixel(src_y[1], src_vu[1], src_vu[0],
1269             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1270    rgb_buf[7] = 255;
1271
1272    src_y += 2;
1273    src_vu += 2;
1274    rgb_buf += 8;  // Advance 2 pixels.
1275  }
1276  if (width & 1) {
1277    YuvPixel(src_y[0], src_vu[1], src_vu[0],
1278             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1279    rgb_buf[3] = 255;
1280  }
1281}
1282
1283void NV12ToRGB565Row_C(const uint8* src_y,
1284                       const uint8* usrc_v,
1285                       uint8* dst_rgb565,
1286                       int width) {
1287  uint8 b0;
1288  uint8 g0;
1289  uint8 r0;
1290  uint8 b1;
1291  uint8 g1;
1292  uint8 r1;
1293  int x;
1294  for (x = 0; x < width - 1; x += 2) {
1295    YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
1296    YuvPixel(src_y[1], usrc_v[0], usrc_v[1], &b1, &g1, &r1);
1297    b0 = b0 >> 3;
1298    g0 = g0 >> 2;
1299    r0 = r0 >> 3;
1300    b1 = b1 >> 3;
1301    g1 = g1 >> 2;
1302    r1 = r1 >> 3;
1303    *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1304        (b1 << 16) | (g1 << 21) | (r1 << 27);
1305    src_y += 2;
1306    usrc_v += 2;
1307    dst_rgb565 += 4;  // Advance 2 pixels.
1308  }
1309  if (width & 1) {
1310    YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
1311    b0 = b0 >> 3;
1312    g0 = g0 >> 2;
1313    r0 = r0 >> 3;
1314    *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1315  }
1316}
1317
1318void NV21ToRGB565Row_C(const uint8* src_y,
1319                       const uint8* vsrc_u,
1320                       uint8* dst_rgb565,
1321                       int width) {
1322  uint8 b0;
1323  uint8 g0;
1324  uint8 r0;
1325  uint8 b1;
1326  uint8 g1;
1327  uint8 r1;
1328  int x;
1329  for (x = 0; x < width - 1; x += 2) {
1330    YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
1331    YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1);
1332    b0 = b0 >> 3;
1333    g0 = g0 >> 2;
1334    r0 = r0 >> 3;
1335    b1 = b1 >> 3;
1336    g1 = g1 >> 2;
1337    r1 = r1 >> 3;
1338    *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1339        (b1 << 16) | (g1 << 21) | (r1 << 27);
1340    src_y += 2;
1341    vsrc_u += 2;
1342    dst_rgb565 += 4;  // Advance 2 pixels.
1343  }
1344  if (width & 1) {
1345    YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
1346    b0 = b0 >> 3;
1347    g0 = g0 >> 2;
1348    r0 = r0 >> 3;
1349    *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1350  }
1351}
1352
1353void YUY2ToARGBRow_C(const uint8* src_yuy2,
1354                     uint8* rgb_buf,
1355                     int width) {
1356  int x;
1357  for (x = 0; x < width - 1; x += 2) {
1358    YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
1359             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1360    rgb_buf[3] = 255;
1361    YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3],
1362             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1363    rgb_buf[7] = 255;
1364    src_yuy2 += 4;
1365    rgb_buf += 8;  // Advance 2 pixels.
1366  }
1367  if (width & 1) {
1368    YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
1369             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1370    rgb_buf[3] = 255;
1371  }
1372}
1373
1374void UYVYToARGBRow_C(const uint8* src_uyvy,
1375                     uint8* rgb_buf,
1376                     int width) {
1377  int x;
1378  for (x = 0; x < width - 1; x += 2) {
1379    YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
1380             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1381    rgb_buf[3] = 255;
1382    YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2],
1383             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1384    rgb_buf[7] = 255;
1385    src_uyvy += 4;
1386    rgb_buf += 8;  // Advance 2 pixels.
1387  }
1388  if (width & 1) {
1389    YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
1390             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1391    rgb_buf[3] = 255;
1392  }
1393}
1394
1395void I422ToBGRARow_C(const uint8* src_y,
1396                     const uint8* src_u,
1397                     const uint8* src_v,
1398                     uint8* rgb_buf,
1399                     int width) {
1400  int x;
1401  for (x = 0; x < width - 1; x += 2) {
1402    YuvPixel(src_y[0], src_u[0], src_v[0],
1403             rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
1404    rgb_buf[0] = 255;
1405    YuvPixel(src_y[1], src_u[0], src_v[0],
1406             rgb_buf + 7, rgb_buf + 6, rgb_buf + 5);
1407    rgb_buf[4] = 255;
1408    src_y += 2;
1409    src_u += 1;
1410    src_v += 1;
1411    rgb_buf += 8;  // Advance 2 pixels.
1412  }
1413  if (width & 1) {
1414    YuvPixel(src_y[0], src_u[0], src_v[0],
1415             rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
1416    rgb_buf[0] = 255;
1417  }
1418}
1419
1420void I422ToABGRRow_C(const uint8* src_y,
1421                     const uint8* src_u,
1422                     const uint8* src_v,
1423                     uint8* rgb_buf,
1424                     int width) {
1425  int x;
1426  for (x = 0; x < width - 1; x += 2) {
1427    YuvPixel(src_y[0], src_u[0], src_v[0],
1428             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1429    rgb_buf[3] = 255;
1430    YuvPixel(src_y[1], src_u[0], src_v[0],
1431             rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
1432    rgb_buf[7] = 255;
1433    src_y += 2;
1434    src_u += 1;
1435    src_v += 1;
1436    rgb_buf += 8;  // Advance 2 pixels.
1437  }
1438  if (width & 1) {
1439    YuvPixel(src_y[0], src_u[0], src_v[0],
1440             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1441    rgb_buf[3] = 255;
1442  }
1443}
1444
1445void I422ToRGBARow_C(const uint8* src_y,
1446                     const uint8* src_u,
1447                     const uint8* src_v,
1448                     uint8* rgb_buf,
1449                     int width) {
1450  int x;
1451  for (x = 0; x < width - 1; x += 2) {
1452    YuvPixel(src_y[0], src_u[0], src_v[0],
1453             rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
1454    rgb_buf[0] = 255;
1455    YuvPixel(src_y[1], src_u[0], src_v[0],
1456             rgb_buf + 5, rgb_buf + 6, rgb_buf + 7);
1457    rgb_buf[4] = 255;
1458    src_y += 2;
1459    src_u += 1;
1460    src_v += 1;
1461    rgb_buf += 8;  // Advance 2 pixels.
1462  }
1463  if (width & 1) {
1464    YuvPixel(src_y[0], src_u[0], src_v[0],
1465             rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
1466    rgb_buf[0] = 255;
1467  }
1468}
1469
1470void YToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
1471  int x;
1472  for (x = 0; x < width - 1; x += 2) {
1473    YuvPixel(src_y[0], 128, 128,
1474             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1475    rgb_buf[3] = 255;
1476    YuvPixel(src_y[1], 128, 128,
1477             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1478    rgb_buf[7] = 255;
1479    src_y += 2;
1480    rgb_buf += 8;  // Advance 2 pixels.
1481  }
1482  if (width & 1) {
1483    YuvPixel(src_y[0], 128, 128,
1484             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1485    rgb_buf[3] = 255;
1486  }
1487}
1488
1489void MirrorRow_C(const uint8* src, uint8* dst, int width) {
1490  int x;
1491  src += width - 1;
1492  for (x = 0; x < width - 1; x += 2) {
1493    dst[x] = src[0];
1494    dst[x + 1] = src[-1];
1495    src -= 2;
1496  }
1497  if (width & 1) {
1498    dst[width - 1] = src[0];
1499  }
1500}
1501
1502void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
1503  int x;
1504  src_uv += (width - 1) << 1;
1505  for (x = 0; x < width - 1; x += 2) {
1506    dst_u[x] = src_uv[0];
1507    dst_u[x + 1] = src_uv[-2];
1508    dst_v[x] = src_uv[1];
1509    dst_v[x + 1] = src_uv[-2 + 1];
1510    src_uv -= 4;
1511  }
1512  if (width & 1) {
1513    dst_u[width - 1] = src_uv[0];
1514    dst_v[width - 1] = src_uv[1];
1515  }
1516}
1517
1518void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
1519  int x;
1520  const uint32* src32 = (const uint32*)(src);
1521  uint32* dst32 = (uint32*)(dst);
1522  src32 += width - 1;
1523  for (x = 0; x < width - 1; x += 2) {
1524    dst32[x] = src32[0];
1525    dst32[x + 1] = src32[-1];
1526    src32 -= 2;
1527  }
1528  if (width & 1) {
1529    dst32[width - 1] = src32[0];
1530  }
1531}
1532
1533void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
1534  int x;
1535  for (x = 0; x < width - 1; x += 2) {
1536    dst_u[x] = src_uv[0];
1537    dst_u[x + 1] = src_uv[2];
1538    dst_v[x] = src_uv[1];
1539    dst_v[x + 1] = src_uv[3];
1540    src_uv += 4;
1541  }
1542  if (width & 1) {
1543    dst_u[width - 1] = src_uv[0];
1544    dst_v[width - 1] = src_uv[1];
1545  }
1546}
1547
1548void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
1549                  int width) {
1550  int x;
1551  for (x = 0; x < width - 1; x += 2) {
1552    dst_uv[0] = src_u[x];
1553    dst_uv[1] = src_v[x];
1554    dst_uv[2] = src_u[x + 1];
1555    dst_uv[3] = src_v[x + 1];
1556    dst_uv += 4;
1557  }
1558  if (width & 1) {
1559    dst_uv[0] = src_u[width - 1];
1560    dst_uv[1] = src_v[width - 1];
1561  }
1562}
1563
1564void CopyRow_C(const uint8* src, uint8* dst, int count) {
1565  memcpy(dst, src, count);
1566}
1567
1568void CopyRow_16_C(const uint16* src, uint16* dst, int count) {
1569  memcpy(dst, src, count * 2);
1570}
1571
1572void SetRow_C(uint8* dst, uint32 v8, int count) {
1573#ifdef _MSC_VER
1574  // VC will generate rep stosb.
1575  int x;
1576  for (x = 0; x < count; ++x) {
1577    dst[x] = v8;
1578  }
1579#else
1580  memset(dst, v8, count);
1581#endif
1582}
1583
1584void ARGBSetRows_C(uint8* dst, uint32 v32, int width,
1585                 int dst_stride, int height) {
1586  int y;
1587  for (y = 0; y < height; ++y) {
1588    uint32* d = (uint32*)(dst);
1589    int x;
1590    for (x = 0; x < width; ++x) {
1591      d[x] = v32;
1592    }
1593    dst += dst_stride;
1594  }
1595}
1596
1597// Filter 2 rows of YUY2 UV's (422) into U and V (420).
1598void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
1599                   uint8* dst_u, uint8* dst_v, int width) {
1600  // Output a row of UV values, filtering 2 rows of YUY2.
1601  int x;
1602  for (x = 0; x < width; x += 2) {
1603    dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
1604    dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
1605    src_yuy2 += 4;
1606    dst_u += 1;
1607    dst_v += 1;
1608  }
1609}
1610
1611// Copy row of YUY2 UV's (422) into U and V (422).
1612void YUY2ToUV422Row_C(const uint8* src_yuy2,
1613                      uint8* dst_u, uint8* dst_v, int width) {
1614  // Output a row of UV values.
1615  int x;
1616  for (x = 0; x < width; x += 2) {
1617    dst_u[0] = src_yuy2[1];
1618    dst_v[0] = src_yuy2[3];
1619    src_yuy2 += 4;
1620    dst_u += 1;
1621    dst_v += 1;
1622  }
1623}
1624
1625// Copy row of YUY2 Y's (422) into Y (420/422).
1626void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
1627  // Output a row of Y values.
1628  int x;
1629  for (x = 0; x < width - 1; x += 2) {
1630    dst_y[x] = src_yuy2[0];
1631    dst_y[x + 1] = src_yuy2[2];
1632    src_yuy2 += 4;
1633  }
1634  if (width & 1) {
1635    dst_y[width - 1] = src_yuy2[0];
1636  }
1637}
1638
1639// Filter 2 rows of UYVY UV's (422) into U and V (420).
1640void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
1641                   uint8* dst_u, uint8* dst_v, int width) {
1642  // Output a row of UV values.
1643  int x;
1644  for (x = 0; x < width; x += 2) {
1645    dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
1646    dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
1647    src_uyvy += 4;
1648    dst_u += 1;
1649    dst_v += 1;
1650  }
1651}
1652
1653// Copy row of UYVY UV's (422) into U and V (422).
1654void UYVYToUV422Row_C(const uint8* src_uyvy,
1655                      uint8* dst_u, uint8* dst_v, int width) {
1656  // Output a row of UV values.
1657  int x;
1658  for (x = 0; x < width; x += 2) {
1659    dst_u[0] = src_uyvy[0];
1660    dst_v[0] = src_uyvy[2];
1661    src_uyvy += 4;
1662    dst_u += 1;
1663    dst_v += 1;
1664  }
1665}
1666
1667// Copy row of UYVY Y's (422) into Y (420/422).
1668void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {
1669  // Output a row of Y values.
1670  int x;
1671  for (x = 0; x < width - 1; x += 2) {
1672    dst_y[x] = src_uyvy[1];
1673    dst_y[x + 1] = src_uyvy[3];
1674    src_uyvy += 4;
1675  }
1676  if (width & 1) {
1677    dst_y[width - 1] = src_uyvy[1];
1678  }
1679}
1680
1681#define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
1682
1683// Blend src_argb0 over src_argb1 and store to dst_argb.
1684// dst_argb may be src_argb0 or src_argb1.
1685// This code mimics the SSSE3 version for better testability.
1686void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
1687                    uint8* dst_argb, int width) {
1688  int x;
1689  for (x = 0; x < width - 1; x += 2) {
1690    uint32 fb = src_argb0[0];
1691    uint32 fg = src_argb0[1];
1692    uint32 fr = src_argb0[2];
1693    uint32 a = src_argb0[3];
1694    uint32 bb = src_argb1[0];
1695    uint32 bg = src_argb1[1];
1696    uint32 br = src_argb1[2];
1697    dst_argb[0] = BLEND(fb, bb, a);
1698    dst_argb[1] = BLEND(fg, bg, a);
1699    dst_argb[2] = BLEND(fr, br, a);
1700    dst_argb[3] = 255u;
1701
1702    fb = src_argb0[4 + 0];
1703    fg = src_argb0[4 + 1];
1704    fr = src_argb0[4 + 2];
1705    a = src_argb0[4 + 3];
1706    bb = src_argb1[4 + 0];
1707    bg = src_argb1[4 + 1];
1708    br = src_argb1[4 + 2];
1709    dst_argb[4 + 0] = BLEND(fb, bb, a);
1710    dst_argb[4 + 1] = BLEND(fg, bg, a);
1711    dst_argb[4 + 2] = BLEND(fr, br, a);
1712    dst_argb[4 + 3] = 255u;
1713    src_argb0 += 8;
1714    src_argb1 += 8;
1715    dst_argb += 8;
1716  }
1717
1718  if (width & 1) {
1719    uint32 fb = src_argb0[0];
1720    uint32 fg = src_argb0[1];
1721    uint32 fr = src_argb0[2];
1722    uint32 a = src_argb0[3];
1723    uint32 bb = src_argb1[0];
1724    uint32 bg = src_argb1[1];
1725    uint32 br = src_argb1[2];
1726    dst_argb[0] = BLEND(fb, bb, a);
1727    dst_argb[1] = BLEND(fg, bg, a);
1728    dst_argb[2] = BLEND(fr, br, a);
1729    dst_argb[3] = 255u;
1730  }
1731}
1732#undef BLEND
1733#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
1734
1735// Multiply source RGB by alpha and store to destination.
1736// This code mimics the SSSE3 version for better testability.
1737void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
1738  int i;
1739  for (i = 0; i < width - 1; i += 2) {
1740    uint32 b = src_argb[0];
1741    uint32 g = src_argb[1];
1742    uint32 r = src_argb[2];
1743    uint32 a = src_argb[3];
1744    dst_argb[0] = ATTENUATE(b, a);
1745    dst_argb[1] = ATTENUATE(g, a);
1746    dst_argb[2] = ATTENUATE(r, a);
1747    dst_argb[3] = a;
1748    b = src_argb[4];
1749    g = src_argb[5];
1750    r = src_argb[6];
1751    a = src_argb[7];
1752    dst_argb[4] = ATTENUATE(b, a);
1753    dst_argb[5] = ATTENUATE(g, a);
1754    dst_argb[6] = ATTENUATE(r, a);
1755    dst_argb[7] = a;
1756    src_argb += 8;
1757    dst_argb += 8;
1758  }
1759
1760  if (width & 1) {
1761    const uint32 b = src_argb[0];
1762    const uint32 g = src_argb[1];
1763    const uint32 r = src_argb[2];
1764    const uint32 a = src_argb[3];
1765    dst_argb[0] = ATTENUATE(b, a);
1766    dst_argb[1] = ATTENUATE(g, a);
1767    dst_argb[2] = ATTENUATE(r, a);
1768    dst_argb[3] = a;
1769  }
1770}
1771#undef ATTENUATE
1772
1773// Divide source RGB by alpha and store to destination.
1774// b = (b * 255 + (a / 2)) / a;
1775// g = (g * 255 + (a / 2)) / a;
1776// r = (r * 255 + (a / 2)) / a;
1777// Reciprocal method is off by 1 on some values. ie 125
1778// 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
1779#define T(a) 0x01000000 + (0x10000 / a)
1780const uint32 fixed_invtbl8[256] = {
1781  0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
1782  T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
1783  T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
1784  T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
1785  T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27),
1786  T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f),
1787  T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
1788  T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f),
1789  T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47),
1790  T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f),
1791  T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57),
1792  T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f),
1793  T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67),
1794  T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
1795  T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77),
1796  T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f),
1797  T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87),
1798  T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f),
1799  T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97),
1800  T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f),
1801  T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
1802  T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf),
1803  T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7),
1804  T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf),
1805  T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7),
1806  T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf),
1807  T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7),
1808  T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
1809  T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7),
1810  T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef),
1811  T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7),
1812  T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100 };
1813#undef T
1814
1815void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
1816  int i;
1817  for (i = 0; i < width; ++i) {
1818    uint32 b = src_argb[0];
1819    uint32 g = src_argb[1];
1820    uint32 r = src_argb[2];
1821    const uint32 a = src_argb[3];
1822    const uint32 ia = fixed_invtbl8[a] & 0xffff;  // 8.8 fixed point
1823    b = (b * ia) >> 8;
1824    g = (g * ia) >> 8;
1825    r = (r * ia) >> 8;
1826    // Clamping should not be necessary but is free in assembly.
1827    dst_argb[0] = clamp255(b);
1828    dst_argb[1] = clamp255(g);
1829    dst_argb[2] = clamp255(r);
1830    dst_argb[3] = a;
1831    src_argb += 4;
1832    dst_argb += 4;
1833  }
1834}
1835
1836void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
1837                               const int32* previous_cumsum, int width) {
1838  int32 row_sum[4] = {0, 0, 0, 0};
1839  int x;
1840  for (x = 0; x < width; ++x) {
1841    row_sum[0] += row[x * 4 + 0];
1842    row_sum[1] += row[x * 4 + 1];
1843    row_sum[2] += row[x * 4 + 2];
1844    row_sum[3] += row[x * 4 + 3];
1845    cumsum[x * 4 + 0] = row_sum[0]  + previous_cumsum[x * 4 + 0];
1846    cumsum[x * 4 + 1] = row_sum[1]  + previous_cumsum[x * 4 + 1];
1847    cumsum[x * 4 + 2] = row_sum[2]  + previous_cumsum[x * 4 + 2];
1848    cumsum[x * 4 + 3] = row_sum[3]  + previous_cumsum[x * 4 + 3];
1849  }
1850}
1851
1852void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl,
1853                                int w, int area, uint8* dst, int count) {
1854  float ooa = 1.0f / area;
1855  int i;
1856  for (i = 0; i < count; ++i) {
1857    dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
1858    dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
1859    dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
1860    dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
1861    dst += 4;
1862    tl += 4;
1863    bl += 4;
1864  }
1865}
1866
1867// Copy pixels from rotated source to destination row with a slope.
1868LIBYUV_API
1869void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
1870                     uint8* dst_argb, const float* uv_dudv, int width) {
1871  int i;
1872  // Render a row of pixels from source into a buffer.
1873  float uv[2];
1874  uv[0] = uv_dudv[0];
1875  uv[1] = uv_dudv[1];
1876  for (i = 0; i < width; ++i) {
1877    int x = (int)(uv[0]);
1878    int y = (int)(uv[1]);
1879    *(uint32*)(dst_argb) =
1880        *(const uint32*)(src_argb + y * src_argb_stride +
1881                                         x * 4);
1882    dst_argb += 4;
1883    uv[0] += uv_dudv[2];
1884    uv[1] += uv_dudv[3];
1885  }
1886}
1887
1888// Blend 2 rows into 1 for conversions such as I422ToI420.
1889void HalfRow_C(const uint8* src_uv, int src_uv_stride,
1890               uint8* dst_uv, int pix) {
1891  int x;
1892  for (x = 0; x < pix; ++x) {
1893    dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
1894  }
1895}
1896
1897void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
1898                  uint16* dst_uv, int pix) {
1899  int x;
1900  for (x = 0; x < pix; ++x) {
1901    dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
1902  }
1903}
1904
1905// C version 2x2 -> 2x1.
1906void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
1907                      ptrdiff_t src_stride,
1908                      int width, int source_y_fraction) {
1909  int y1_fraction = source_y_fraction;
1910  int y0_fraction = 256 - y1_fraction;
1911  const uint8* src_ptr1 = src_ptr + src_stride;
1912  int x;
1913  if (source_y_fraction == 0) {
1914    memcpy(dst_ptr, src_ptr, width);
1915    return;
1916  }
1917  if (source_y_fraction == 128) {
1918    HalfRow_C(src_ptr, (int)(src_stride), dst_ptr, width);
1919    return;
1920  }
1921  for (x = 0; x < width - 1; x += 2) {
1922    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
1923    dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
1924    src_ptr += 2;
1925    src_ptr1 += 2;
1926    dst_ptr += 2;
1927  }
1928  if (width & 1) {
1929    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
1930  }
1931}
1932
1933void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
1934                         ptrdiff_t src_stride,
1935                         int width, int source_y_fraction) {
1936  int y1_fraction = source_y_fraction;
1937  int y0_fraction = 256 - y1_fraction;
1938  const uint16* src_ptr1 = src_ptr + src_stride;
1939  int x;
1940  if (source_y_fraction == 0) {
1941    memcpy(dst_ptr, src_ptr, width * 2);
1942    return;
1943  }
1944  if (source_y_fraction == 128) {
1945    HalfRow_16_C(src_ptr, (int)(src_stride), dst_ptr, width);
1946    return;
1947  }
1948  for (x = 0; x < width - 1; x += 2) {
1949    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
1950    dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
1951    src_ptr += 2;
1952    src_ptr1 += 2;
1953    dst_ptr += 2;
1954  }
1955  if (width & 1) {
1956    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
1957  }
1958}
1959
1960// Select 2 channels from ARGB on alternating pixels.  e.g.  BGBGBGBG
1961void ARGBToBayerRow_C(const uint8* src_argb,
1962                      uint8* dst_bayer, uint32 selector, int pix) {
1963  int index0 = selector & 0xff;
1964  int index1 = (selector >> 8) & 0xff;
1965  // Copy a row of Bayer.
1966  int x;
1967  for (x = 0; x < pix - 1; x += 2) {
1968    dst_bayer[0] = src_argb[index0];
1969    dst_bayer[1] = src_argb[index1];
1970    src_argb += 8;
1971    dst_bayer += 2;
1972  }
1973  if (pix & 1) {
1974    dst_bayer[0] = src_argb[index0];
1975  }
1976}
1977
1978// Select G channel from ARGB.  e.g.  GGGGGGGG
1979void ARGBToBayerGGRow_C(const uint8* src_argb,
1980                        uint8* dst_bayer, uint32 selector, int pix) {
1981  // Copy a row of G.
1982  int x;
1983  for (x = 0; x < pix - 1; x += 2) {
1984    dst_bayer[0] = src_argb[1];
1985    dst_bayer[1] = src_argb[5];
1986    src_argb += 8;
1987    dst_bayer += 2;
1988  }
1989  if (pix & 1) {
1990    dst_bayer[0] = src_argb[1];
1991  }
1992}
1993
1994// Use first 4 shuffler values to reorder ARGB channels.
1995void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
1996                      const uint8* shuffler, int pix) {
1997  int index0 = shuffler[0];
1998  int index1 = shuffler[1];
1999  int index2 = shuffler[2];
2000  int index3 = shuffler[3];
2001  // Shuffle a row of ARGB.
2002  int x;
2003  for (x = 0; x < pix; ++x) {
2004    // To support in-place conversion.
2005    uint8 b = src_argb[index0];
2006    uint8 g = src_argb[index1];
2007    uint8 r = src_argb[index2];
2008    uint8 a = src_argb[index3];
2009    dst_argb[0] = b;
2010    dst_argb[1] = g;
2011    dst_argb[2] = r;
2012    dst_argb[3] = a;
2013    src_argb += 4;
2014    dst_argb += 4;
2015  }
2016}
2017
2018void I422ToYUY2Row_C(const uint8* src_y,
2019                     const uint8* src_u,
2020                     const uint8* src_v,
2021                     uint8* dst_frame, int width) {
2022  int x;
2023  for (x = 0; x < width - 1; x += 2) {
2024    dst_frame[0] = src_y[0];
2025    dst_frame[1] = src_u[0];
2026    dst_frame[2] = src_y[1];
2027    dst_frame[3] = src_v[0];
2028    dst_frame += 4;
2029    src_y += 2;
2030    src_u += 1;
2031    src_v += 1;
2032  }
2033  if (width & 1) {
2034    dst_frame[0] = src_y[0];
2035    dst_frame[1] = src_u[0];
2036    dst_frame[2] = src_y[0];  // duplicate last y
2037    dst_frame[3] = src_v[0];
2038  }
2039}
2040
2041void I422ToUYVYRow_C(const uint8* src_y,
2042                     const uint8* src_u,
2043                     const uint8* src_v,
2044                     uint8* dst_frame, int width) {
2045  int x;
2046  for (x = 0; x < width - 1; x += 2) {
2047    dst_frame[0] = src_u[0];
2048    dst_frame[1] = src_y[0];
2049    dst_frame[2] = src_v[0];
2050    dst_frame[3] = src_y[1];
2051    dst_frame += 4;
2052    src_y += 2;
2053    src_u += 1;
2054    src_v += 1;
2055  }
2056  if (width & 1) {
2057    dst_frame[0] = src_u[0];
2058    dst_frame[1] = src_y[0];
2059    dst_frame[2] = src_v[0];
2060    dst_frame[3] = src_y[0];  // duplicate last y
2061  }
2062}
2063
2064#if !defined(LIBYUV_DISABLE_X86) && defined(HAS_I422TOARGBROW_SSSE3)
2065// row_win.cc has asm version, but GCC uses 2 step wrapper.
2066#if !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__))
2067void I422ToRGB565Row_SSSE3(const uint8* src_y,
2068                           const uint8* src_u,
2069                           const uint8* src_v,
2070                           uint8* rgb_buf,
2071                           int width) {
2072  // Allocate a row of ARGB.
2073  align_buffer_64(row, width * 4);
2074  I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
2075  ARGBToRGB565Row_SSE2(row, rgb_buf, width);
2076  free_aligned_buffer_64(row);
2077}
2078#endif  // !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__))
2079
2080#if defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
2081void I422ToARGB1555Row_SSSE3(const uint8* src_y,
2082                             const uint8* src_u,
2083                             const uint8* src_v,
2084                             uint8* rgb_buf,
2085                             int width) {
2086  // Allocate a row of ARGB.
2087  align_buffer_64(row, width * 4);
2088  I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
2089  ARGBToARGB1555Row_SSE2(row, rgb_buf, width);
2090  free_aligned_buffer_64(row);
2091}
2092
2093void I422ToARGB4444Row_SSSE3(const uint8* src_y,
2094                             const uint8* src_u,
2095                             const uint8* src_v,
2096                             uint8* rgb_buf,
2097                             int width) {
2098  // Allocate a row of ARGB.
2099  align_buffer_64(row, width * 4);
2100  I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
2101  ARGBToARGB4444Row_SSE2(row, rgb_buf, width);
2102  free_aligned_buffer_64(row);
2103}
2104
2105void NV12ToRGB565Row_SSSE3(const uint8* src_y,
2106                           const uint8* src_uv,
2107                           uint8* dst_rgb565,
2108                           int width) {
2109  // Allocate a row of ARGB.
2110  align_buffer_64(row, width * 4);
2111  NV12ToARGBRow_SSSE3(src_y, src_uv, row, width);
2112  ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
2113  free_aligned_buffer_64(row);
2114}
2115
2116void NV21ToRGB565Row_SSSE3(const uint8* src_y,
2117                           const uint8* src_vu,
2118                           uint8* dst_rgb565,
2119                           int width) {
2120  // Allocate a row of ARGB.
2121  align_buffer_64(row, width * 4);
2122  NV21ToARGBRow_SSSE3(src_y, src_vu, row, width);
2123  ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
2124  free_aligned_buffer_64(row);
2125}
2126
2127void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
2128                         uint8* dst_argb,
2129                         int width) {
2130  // Allocate a rows of yuv.
2131  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
2132  uint8* row_u = row_y + ((width + 63) & ~63);
2133  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2134  YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, width);
2135  YUY2ToYRow_SSE2(src_yuy2, row_y, width);
2136  I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
2137  free_aligned_buffer_64(row_y);
2138}
2139
2140void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
2141                                   uint8* dst_argb,
2142                                   int width) {
2143  // Allocate a rows of yuv.
2144  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
2145  uint8* row_u = row_y + ((width + 63) & ~63);
2146  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2147  YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, row_u, row_v, width);
2148  YUY2ToYRow_Unaligned_SSE2(src_yuy2, row_y, width);
2149  I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
2150  free_aligned_buffer_64(row_y);
2151}
2152
2153void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
2154                         uint8* dst_argb,
2155                         int width) {
2156  // Allocate a rows of yuv.
2157  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
2158  uint8* row_u = row_y + ((width + 63) & ~63);
2159  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2160  UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, width);
2161  UYVYToYRow_SSE2(src_uyvy, row_y, width);
2162  I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
2163  free_aligned_buffer_64(row_y);
2164}
2165
2166void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
2167                                   uint8* dst_argb,
2168                                   int width) {
2169  // Allocate a rows of yuv.
2170  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
2171  uint8* row_u = row_y + ((width + 63) & ~63);
2172  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2173  UYVYToUV422Row_Unaligned_SSE2(src_uyvy, row_u, row_v, width);
2174  UYVYToYRow_Unaligned_SSE2(src_uyvy, row_y, width);
2175  I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
2176  free_aligned_buffer_64(row_y);
2177}
2178
2179#endif  // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
2180#endif  // !defined(LIBYUV_DISABLE_X86)
2181
2182void ARGBPolynomialRow_C(const uint8* src_argb,
2183                         uint8* dst_argb, const float* poly,
2184                         int width) {
2185  int i;
2186  for (i = 0; i < width; ++i) {
2187    float b = (float)(src_argb[0]);
2188    float g = (float)(src_argb[1]);
2189    float r = (float)(src_argb[2]);
2190    float a = (float)(src_argb[3]);
2191    float b2 = b * b;
2192    float g2 = g * g;
2193    float r2 = r * r;
2194    float a2 = a * a;
2195    float db = poly[0] + poly[4] * b;
2196    float dg = poly[1] + poly[5] * g;
2197    float dr = poly[2] + poly[6] * r;
2198    float da = poly[3] + poly[7] * a;
2199    float b3 = b2 * b;
2200    float g3 = g2 * g;
2201    float r3 = r2 * r;
2202    float a3 = a2 * a;
2203    db += poly[8] * b2;
2204    dg += poly[9] * g2;
2205    dr += poly[10] * r2;
2206    da += poly[11] * a2;
2207    db += poly[12] * b3;
2208    dg += poly[13] * g3;
2209    dr += poly[14] * r3;
2210    da += poly[15] * a3;
2211
2212    dst_argb[0] = Clamp((int32)(db));
2213    dst_argb[1] = Clamp((int32)(dg));
2214    dst_argb[2] = Clamp((int32)(dr));
2215    dst_argb[3] = Clamp((int32)(da));
2216    src_argb += 4;
2217    dst_argb += 4;
2218  }
2219}
2220
2221void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
2222                             const uint8* luma, uint32 lumacoeff) {
2223  uint32 bc = lumacoeff & 0xff;
2224  uint32 gc = (lumacoeff >> 8) & 0xff;
2225  uint32 rc = (lumacoeff >> 16) & 0xff;
2226
2227  int i;
2228  for (i = 0; i < width - 1; i += 2) {
2229    // Luminance in rows, color values in columns.
2230    const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
2231                           src_argb[2] * rc) & 0x7F00u) + luma;
2232    const uint8* luma1;
2233    dst_argb[0] = luma0[src_argb[0]];
2234    dst_argb[1] = luma0[src_argb[1]];
2235    dst_argb[2] = luma0[src_argb[2]];
2236    dst_argb[3] = src_argb[3];
2237    luma1 = ((src_argb[4] * bc + src_argb[5] * gc +
2238              src_argb[6] * rc) & 0x7F00u) + luma;
2239    dst_argb[4] = luma1[src_argb[4]];
2240    dst_argb[5] = luma1[src_argb[5]];
2241    dst_argb[6] = luma1[src_argb[6]];
2242    dst_argb[7] = src_argb[7];
2243    src_argb += 8;
2244    dst_argb += 8;
2245  }
2246  if (width & 1) {
2247    // Luminance in rows, color values in columns.
2248    const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
2249                           src_argb[2] * rc) & 0x7F00u) + luma;
2250    dst_argb[0] = luma0[src_argb[0]];
2251    dst_argb[1] = luma0[src_argb[1]];
2252    dst_argb[2] = luma0[src_argb[2]];
2253    dst_argb[3] = src_argb[3];
2254  }
2255}
2256
2257void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {
2258  int i;
2259  for (i = 0; i < width - 1; i += 2) {
2260    dst[3] = src[3];
2261    dst[7] = src[7];
2262    dst += 8;
2263    src += 8;
2264  }
2265  if (width & 1) {
2266    dst[3] = src[3];
2267  }
2268}
2269
2270void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
2271  int i;
2272  for (i = 0; i < width - 1; i += 2) {
2273    dst[3] = src[0];
2274    dst[7] = src[1];
2275    dst += 8;
2276    src += 2;
2277  }
2278  if (width & 1) {
2279    dst[3] = src[0];
2280  }
2281}
2282
2283#ifdef __cplusplus
2284}  // extern "C"
2285}  // namespace libyuv
2286#endif
2287