1/*
2 *  Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "libyuv/convert.h"
12
13#include "conversion_tables.h"
14#include "libyuv/basic_types.h"
15#include "libyuv/cpu_id.h"
16#include "row.h"
17
18//#define SCALEOPT //Currently for windows only. June 2010
19
20#ifdef SCALEOPT
21#include <emmintrin.h>
22#endif
23
24namespace libyuv {
25
26static inline uint8 Clip(int32 val) {
27  if (val < 0) {
28    return (uint8) 0;
29  } else if (val > 255){
30    return (uint8) 255;
31  }
32  return (uint8) val;
33}
34
35int I420ToRGB24(const uint8* src_y, int src_stride_y,
36                const uint8* src_u, int src_stride_u,
37                const uint8* src_v, int src_stride_v,
38                uint8* dst_frame, int dst_stride_frame,
39                int width, int height) {
40  if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
41    return -1;
42  }
43
44  // RGB orientation - bottom up
45  // TODO(fbarchard): support inversion
46  uint8* out = dst_frame + dst_stride_frame * height - dst_stride_frame;
47  uint8* out2 = out - dst_stride_frame;
48  int h, w;
49  int tmp_r, tmp_g, tmp_b;
50  const uint8 *y1, *y2 ,*u, *v;
51  y1 = src_y;
52  y2 = y1 + src_stride_y;
53  u = src_u;
54  v = src_v;
55  for (h = ((height + 1) >> 1); h > 0; h--){
56    // 2 rows at a time, 2 y's at a time
57    for (w = 0; w < ((width + 1) >> 1); w++){
58      // Vertical and horizontal sub-sampling
59      tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
60      tmp_g = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
61      tmp_b = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
62      out[0] = Clip(tmp_b);
63      out[1] = Clip(tmp_g);
64      out[2] = Clip(tmp_r);
65
66      tmp_r = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
67      tmp_g = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
68      tmp_b = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
69      out[3] = Clip(tmp_b);
70      out[4] = Clip(tmp_g);
71      out[5] = Clip(tmp_r);
72
73      tmp_r = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
74      tmp_g = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
75      tmp_b = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
76      out2[0] = Clip(tmp_b);
77      out2[1] = Clip(tmp_g);
78      out2[2] = Clip(tmp_r);
79
80      tmp_r = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
81      tmp_g = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
82      tmp_b = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
83      out2[3] = Clip(tmp_b);
84      out2[4] = Clip(tmp_g);
85      out2[5] = Clip(tmp_r);
86
87      out += 6;
88      out2 += 6;
89      y1 += 2;
90      y2 += 2;
91      u++;
92      v++;
93    }
94    y1 += src_stride_y + src_stride_y - width;
95    y2 += src_stride_y + src_stride_y - width;
96    u += src_stride_u - ((width + 1) >> 1);
97    v += src_stride_v - ((width + 1) >> 1);
98    out -= dst_stride_frame * 3;
99    out2 -= dst_stride_frame * 3;
100  } // end height for
101  return 0;
102}
103
104// Little Endian...
105int I420ToARGB4444(const uint8* src_y, int src_stride_y,
106                   const uint8* src_u, int src_stride_u,
107                   const uint8* src_v, int src_stride_v,
108                   uint8* dst_frame, int dst_stride_frame,
109                   int width, int height) {
110  if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
111    return -1;
112  }
113
114  // RGB orientation - bottom up
115  uint8* out = dst_frame + dst_stride_frame * (height - 1);
116  uint8* out2 = out - dst_stride_frame;
117  int tmp_r, tmp_g, tmp_b;
118  const uint8 *y1,*y2, *u, *v;
119  y1 = src_y;
120  y2 = y1 + src_stride_y;
121  u = src_u;
122  v = src_v;
123  int h, w;
124
125  for (h = ((height + 1) >> 1); h > 0; h--) {
126    // 2 rows at a time, 2 y's at a time
127    for (w = 0; w < ((width + 1) >> 1); w++) {
128        // Vertical and horizontal sub-sampling
129        // Convert to RGB888 and re-scale to 4 bits
130        tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
131        tmp_g = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
132        tmp_b = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
133        out[0] =(uint8)((Clip(tmp_g) & 0xf0) + (Clip(tmp_b) >> 4));
134        out[1] = (uint8)(0xf0 + (Clip(tmp_r) >> 4));
135
136        tmp_r = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
137        tmp_g = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
138        tmp_b = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
139        out[2] = (uint8)((Clip(tmp_g) & 0xf0 ) + (Clip(tmp_b) >> 4));
140        out[3] = (uint8)(0xf0 + (Clip(tmp_r) >> 4));
141
142        tmp_r = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
143        tmp_g = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
144        tmp_b = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
145        out2[0] = (uint8)((Clip(tmp_g) & 0xf0 ) + (Clip(tmp_b) >> 4));
146        out2[1] = (uint8) (0xf0 + (Clip(tmp_r) >> 4));
147
148        tmp_r = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
149        tmp_g = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
150        tmp_b = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
151        out2[2] = (uint8)((Clip(tmp_g) & 0xf0 ) + (Clip(tmp_b) >> 4));
152        out2[3] = (uint8)(0xf0 + (Clip(tmp_r) >> 4));
153
154        out += 4;
155        out2 += 4;
156        y1 += 2;
157        y2 += 2;
158        u++;
159        v++;
160    }
161    y1 += 2 * src_stride_y - width;
162    y2 += 2 * src_stride_y - width;
163    u += src_stride_u - ((width + 1) >> 1);
164    v += src_stride_v - ((width + 1) >> 1);
165    out -= (dst_stride_frame + width) * 2;
166    out2 -= (dst_stride_frame + width) * 2;
167  } // end height for
168  return 0;
169}
170
171
172int I420ToRGB565(const uint8* src_y, int src_stride_y,
173                 const uint8* src_u, int src_stride_u,
174                 const uint8* src_v, int src_stride_v,
175                 uint8* dst_frame, int dst_stride_frame,
176                 int width, int height) {
177  if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
178    return -1;
179  }
180
181  // Negative height means invert the image.
182  if (height < 0) {
183    height = -height;
184    src_y = src_y + (height - 1) * src_stride_y;
185    src_u = src_u + (height - 1) * src_stride_u;
186    src_v = src_v + (height - 1) * src_stride_v;
187    src_stride_y = -src_stride_y;
188    src_stride_u = -src_stride_u;
189    src_stride_v = -src_stride_v;
190  }
191  uint16* out = (uint16*)(dst_frame) + dst_stride_frame * (height - 1);
192  uint16* out2 = out - dst_stride_frame;
193
194  int tmp_r, tmp_g, tmp_b;
195  const uint8* y1,* y2, * u, * v;
196  y1 = src_y;
197  y2 = y1 + src_stride_y;
198  u = src_u;
199  v = src_v;
200  int h, w;
201
202  for (h = ((height + 1) >> 1); h > 0; h--){
203    // 2 rows at a time, 2 y's at a time
204    for (w = 0; w < ((width + 1) >> 1); w++){
205      // Vertical and horizontal sub-sampling
206      // 1. Convert to RGB888
207      // 2. Shift to adequate location (in the 16 bit word) - RGB 565
208
209      tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
210      tmp_g = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
211      tmp_b = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
212      out[0]  = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
213                          & 0xfc) << 3) + (Clip(tmp_b) >> 3);
214
215      tmp_r = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
216      tmp_g = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
217      tmp_b = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
218      out[1] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
219                         & 0xfc) << 3) + (Clip(tmp_b ) >> 3);
220
221      tmp_r = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
222      tmp_g = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
223      tmp_b = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
224      out2[0] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
225                          & 0xfc) << 3) + (Clip(tmp_b) >> 3);
226
227      tmp_r = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
228      tmp_g = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
229      tmp_b = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
230      out2[1] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
231                          & 0xfc) << 3) + (Clip(tmp_b) >> 3);
232
233      y1 += 2;
234      y2 += 2;
235      out += 2;
236      out2 += 2;
237      u++;
238      v++;
239    }
240    y1 += 2 * src_stride_y - width;
241    y2 += 2 * src_stride_y - width;
242    u += src_stride_u - ((width + 1) >> 1);
243    v += src_stride_v - ((width + 1) >> 1);
244    out -= 2 * dst_stride_frame + width;
245    out2 -=  2 * dst_stride_frame + width;
246  }
247  return 0;
248}
249
250
251int I420ToARGB1555(const uint8* src_y, int src_stride_y,
252                   const uint8* src_u, int src_stride_u,
253                   const uint8* src_v, int src_stride_v,
254                   uint8* dst_frame, int dst_stride_frame,
255                   int width, int height) {
256  if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
257    return -1;
258  }
259  uint16* out = (uint16*)(dst_frame) + dst_stride_frame * (height - 1);
260  uint16* out2 = out - dst_stride_frame ;
261  int32 tmp_r, tmp_g, tmp_b;
262  const uint8 *y1,*y2, *u, *v;
263  int h, w;
264
265  y1 = src_y;
266  y2 = y1 + src_stride_y;
267  u = src_u;
268  v = src_v;
269
270  for (h = ((height + 1) >> 1); h > 0; h--){
271    // 2 rows at a time, 2 y's at a time
272    for (w = 0; w < ((width + 1) >> 1); w++){
273      // Vertical and horizontal sub-sampling
274      // 1. Convert to RGB888
275      // 2. Shift to adequate location (in the 16 bit word) - RGB 555
276      // 3. Add 1 for alpha value
277      tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
278      tmp_g = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
279      tmp_b = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
280      out[0]  = (uint16)(0x8000 + ((Clip(tmp_r) & 0xf8) << 10) +
281                ((Clip(tmp_g) & 0xf8) << 3) + (Clip(tmp_b) >> 3));
282
283      tmp_r = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
284      tmp_g = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]]  + 128) >> 8);
285      tmp_b = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
286      out[1]  = (uint16)(0x8000 + ((Clip(tmp_r) & 0xf8) << 10) +
287                ((Clip(tmp_g) & 0xf8) << 3)  + (Clip(tmp_b) >> 3));
288
289      tmp_r = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
290      tmp_g = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
291      tmp_b = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
292      out2[0]  = (uint16)(0x8000 + ((Clip(tmp_r) & 0xf8) << 10) +
293                 ((Clip(tmp_g) & 0xf8) << 3) + (Clip(tmp_b) >> 3));
294
295      tmp_r = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
296      tmp_g = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
297      tmp_b = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
298      out2[1]  = (uint16)(0x8000 + ((Clip(tmp_r) & 0xf8) << 10) +
299                 ((Clip(tmp_g) & 0xf8) << 3)  + (Clip(tmp_b) >> 3));
300
301      y1 += 2;
302      y2 += 2;
303      out += 2;
304      out2 += 2;
305      u++;
306      v++;
307    }
308    y1 += 2 * src_stride_y - width;
309    y2 += 2 * src_stride_y - width;
310    u += src_stride_u - ((width + 1) >> 1);
311    v += src_stride_v - ((width + 1) >> 1);
312    out -= 2 * dst_stride_frame + width;
313    out2 -=  2 * dst_stride_frame + width;
314  }
315  return 0;
316}
317
318
319int I420ToYUY2(const uint8* src_y, int src_stride_y,
320               const uint8* src_u, int src_stride_u,
321               const uint8* src_v, int src_stride_v,
322               uint8* dst_frame, int dst_stride_frame,
323               int width, int height) {
324  if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
325    return -1;
326  }
327
328  const uint8* in1 = src_y;
329  const uint8* in2 = src_y + src_stride_y;
330
331  uint8* out1 = dst_frame;
332  uint8* out2 = dst_frame + dst_stride_frame;
333
334  // YUY2 - Macro-pixel = 2 image pixels
335  // Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4....
336#ifndef SCALEOPT
337  for (int i = 0; i < ((height + 1) >> 1); i++){
338    for (int j = 0; j < ((width + 1) >> 1); j++){
339      out1[0] = in1[0];
340      out1[1] = *src_u;
341      out1[2] = in1[1];
342      out1[3] = *src_v;
343
344      out2[0] = in2[0];
345      out2[1] = *src_u;
346      out2[2] = in2[1];
347      out2[3] = *src_v;
348      out1 += 4;
349      out2 += 4;
350      src_u++;
351      src_v++;
352      in1 += 2;
353      in2 += 2;
354    }
355    in1 += 2 * src_stride_y - width;
356    in2 += 2 * src_stride_y - width;
357    src_u += src_stride_u - ((width + 1) >> 1);
358    src_v += src_stride_v - ((width + 1) >> 1);
359    out1 += dst_stride_frame + dst_stride_frame - 2 * width;
360    out2 += dst_stride_frame + dst_stride_frame - 2 * width;
361  }
362#else
363  for (WebRtc_UWord32 i = 0; i < ((height + 1) >> 1);i++) {
364    int32 width__ = (width >> 4);
365    _asm
366    {
367      ;pusha
368      mov       eax, DWORD PTR [in1]                       ;1939.33
369      mov       ecx, DWORD PTR [in2]                       ;1939.33
370      mov       ebx, DWORD PTR [src_u]                       ;1939.33
371      mov       edx, DWORD PTR [src_v]                       ;1939.33
372      loop0:
373      movq      xmm6, QWORD PTR [ebx]          ;src_u
374      movq      xmm0, QWORD PTR [edx]          ;src_v
375      punpcklbw xmm6, xmm0                     ;src_u, src_v mix
376      ;movdqa    xmm1, xmm6
377      ;movdqa    xmm2, xmm6
378      ;movdqa    xmm4, xmm6
379
380      movdqu    xmm3, XMMWORD PTR [eax]        ;in1
381      movdqa    xmm1, xmm3
382      punpcklbw xmm1, xmm6                     ;in1, src_u, in1, src_v
383      mov       esi, DWORD PTR [out1]
384      movdqu    XMMWORD PTR [esi], xmm1        ;write to out1
385
386      movdqu    xmm5, XMMWORD PTR [ecx]        ;in2
387      movdqa    xmm2, xmm5
388      punpcklbw xmm2, xmm6                     ;in2, src_u, in2, src_v
389      mov       edi, DWORD PTR [out2]
390      movdqu    XMMWORD PTR [edi], xmm2        ;write to out2
391
392      punpckhbw xmm3, xmm6                     ;in1, src_u, in1, src_v again
393      movdqu    XMMWORD PTR [esi+16], xmm3     ;write to out1 again
394      add       esi, 32
395      mov       DWORD PTR [out1], esi
396
397      punpckhbw xmm5, xmm6                     ;src_u, in2, src_v again
398      movdqu    XMMWORD PTR [edi+16], xmm5     ;write to out2 again
399      add       edi, 32
400      mov       DWORD PTR [out2], edi
401
402      add       ebx, 8
403      add       edx, 8
404      add       eax, 16
405      add       ecx, 16
406
407      mov       esi, DWORD PTR [width__]
408      sub       esi, 1
409      mov       DWORD PTR [width__], esi
410      jg        loop0
411
412      mov       DWORD PTR [in1], eax                       ;1939.33
413      mov       DWORD PTR [in2], ecx                       ;1939.33
414      mov       DWORD PTR [src_u], ebx                       ;1939.33
415      mov       DWORD PTR [src_v], edx                       ;1939.33
416
417      ;popa
418      emms
419    }
420    in1 += 2 * src_stride_y - width;
421    in2 += 2 * src_stride_y - width;
422    out1 += dst_stride_frame + dst_stride_frame - 2 * width;
423    out2 += dst_stride_frame + dst_stride_frame - 2 * width;
424  }
425#endif
426  return 0;
427}
428
429int I420ToUYVY(const uint8* src_y, int src_stride_y,
430               const uint8* src_u, int src_stride_u,
431               const uint8* src_v, int src_stride_v,
432               uint8* dst_frame, int dst_stride_frame,
433               int width, int height) {
434  if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
435    return -1;
436  }
437
438  int i = 0;
439  const uint8* y1 = src_y;
440  const uint8* y2 = y1 + src_stride_y;
441  const uint8* u = src_u;
442  const uint8* v = src_v;
443
444  uint8* out1 = dst_frame;
445  uint8* out2 = dst_frame + dst_stride_frame;
446
447  // Macro-pixel = 2 image pixels
448  // U0Y0V0Y1....U2Y2V2Y3...U4Y4V4Y5.....
449
450#ifndef SCALEOPT
451  for (; i < ((height + 1) >> 1); i++) {
452    for (int j = 0; j < ((width + 1) >> 1); j++) {
453      out1[0] = *u;
454      out1[1] = y1[0];
455      out1[2] = *v;
456      out1[3] = y1[1];
457
458      out2[0] = *u;
459      out2[1] = y2[0];
460      out2[2] = *v;
461      out2[3] = y2[1];
462      out1 += 4;
463      out2 += 4;
464      u++;
465      v++;
466      y1 += 2;
467      y2 += 2;
468    }
469    y1 += 2 * src_stride_y - width;
470    y2 += 2 * src_stride_y - width;
471    u += src_stride_u - ((width + 1) >> 1);
472    v += src_stride_v - ((width + 1) >> 1);
473    out1 += 2 * (dst_stride_frame - width);
474    out2 += 2 * (dst_stride_frame - width);
475  }
476#else
477  for (; i < (height >> 1);i++) {
478    int32 width__ = (width >> 4);
479    _asm
480    {
481      ;pusha
482      mov       eax, DWORD PTR [in1]                       ;1939.33
483      mov       ecx, DWORD PTR [in2]                       ;1939.33
484      mov       ebx, DWORD PTR [src_u]                       ;1939.33
485      mov       edx, DWORD PTR [src_v]                       ;1939.33
486loop0:
487      movq      xmm6, QWORD PTR [ebx]          ;src_u
488      movq      xmm0, QWORD PTR [edx]          ;src_v
489      punpcklbw xmm6, xmm0                     ;src_u, src_v mix
490      movdqa    xmm1, xmm6
491      movdqa    xmm2, xmm6
492      movdqa    xmm4, xmm6
493
494      movdqu    xmm3, XMMWORD PTR [eax]        ;in1
495      punpcklbw xmm1, xmm3                     ;src_u, in1, src_v
496      mov       esi, DWORD PTR [out1]
497      movdqu    XMMWORD PTR [esi], xmm1        ;write to out1
498
499      movdqu    xmm5, XMMWORD PTR [ecx]        ;in2
500      punpcklbw xmm2, xmm5                     ;src_u, in2, src_v
501      mov       edi, DWORD PTR [out2]
502      movdqu    XMMWORD PTR [edi], xmm2        ;write to out2
503
504      punpckhbw xmm4, xmm3                     ;src_u, in1, src_v again
505      movdqu    XMMWORD PTR [esi+16], xmm4     ;write to out1 again
506      add       esi, 32
507      mov       DWORD PTR [out1], esi
508
509      punpckhbw xmm6, xmm5                     ;src_u, in2, src_v again
510      movdqu    XMMWORD PTR [edi+16], xmm6     ;write to out2 again
511      add       edi, 32
512      mov       DWORD PTR [out2], edi
513
514      add       ebx, 8
515      add       edx, 8
516      add       eax, 16
517      add       ecx, 16
518
519      mov       esi, DWORD PTR [width__]
520      sub       esi, 1
521      mov       DWORD PTR [width__], esi
522      jg        loop0
523
524      mov       DWORD PTR [in1], eax                       ;1939.33
525      mov       DWORD PTR [in2], ecx                       ;1939.33
526      mov       DWORD PTR [src_u], ebx                       ;1939.33
527      mov       DWORD PTR [src_v], edx                       ;1939.33
528
529      ;popa
530      emms
531    }
532    in1 += width;
533    in2 += width;
534    out1 += 2 * (dst_stride_frame - width);
535    out2 += 2 * (dst_stride_frame - width);
536  }
537#endif
538  return 0;
539}
540
541
542int NV12ToRGB565(const uint8* src_y, int src_stride_y,
543                 const uint8* src_uv, int src_stride_uv,
544                 uint8* dst_frame, int dst_stride_frame,
545                 int width, int height) {
546  if (src_y == NULL || src_uv == NULL || dst_frame == NULL) {
547    return -1;
548  }
549
550  // Bi-Planar: Y plane followed by an interlaced U and V plane
551  const uint8* interlacedSrc = src_uv;
552  uint16* out = (uint16*)(src_y) + dst_stride_frame * (height - 1);
553  uint16* out2 = out - dst_stride_frame;
554  int32 tmp_r, tmp_g, tmp_b;
555  const uint8 *y1,*y2;
556  y1 = src_y;
557  y2 = y1 + src_stride_y;
558  int h, w;
559
560  for (h = ((height + 1) >> 1); h > 0; h--) {
561    // 2 rows at a time, 2 y's at a time
562    for (w = 0; w < ((width + 1) >> 1); w++) {
563      // Vertical and horizontal sub-sampling
564      // 1. Convert to RGB888
565      // 2. Shift to adequate location (in the 16 bit word) - RGB 565
566
567      tmp_r = (int32)((mapYc[y1[0]] + mapVcr[interlacedSrc[1]] + 128) >> 8);
568      tmp_g = (int32)((mapYc[y1[0]] + mapUcg[interlacedSrc[0]]
569                      + mapVcg[interlacedSrc[1]] + 128) >> 8);
570      tmp_b = (int32)((mapYc[y1[0]] + mapUcb[interlacedSrc[0]] + 128) >> 8);
571      out[0]  = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
572                          & 0xfc) << 3) + (Clip(tmp_b) >> 3);
573
574      tmp_r = (int32)((mapYc[y1[1]] + mapVcr[interlacedSrc[1]] + 128) >> 8);
575      tmp_g = (int32)((mapYc[y1[1]] + mapUcg[interlacedSrc[0]]
576                      + mapVcg[interlacedSrc[1]] + 128) >> 8);
577      tmp_b = (int32)((mapYc[y1[1]] + mapUcb[interlacedSrc[0]] + 128) >> 8);
578      out[1] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
579                         & 0xfc) << 3) + (Clip(tmp_b ) >> 3);
580
581      tmp_r = (int32)((mapYc[y2[0]] + mapVcr[interlacedSrc[1]] + 128) >> 8);
582      tmp_g = (int32)((mapYc[y2[0]] + mapUcg[interlacedSrc[0]]
583                      + mapVcg[interlacedSrc[1]] + 128) >> 8);
584      tmp_b = (int32)((mapYc[y2[0]] + mapUcb[interlacedSrc[0]] + 128) >> 8);
585      out2[0] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
586                          & 0xfc) << 3) + (Clip(tmp_b) >> 3);
587
588      tmp_r = (int32)((mapYc[y2[1]] + mapVcr[interlacedSrc[1]]
589                      + 128) >> 8);
590      tmp_g = (int32)((mapYc[y2[1]] + mapUcg[interlacedSrc[0]]
591                      + mapVcg[interlacedSrc[1]] + 128) >> 8);
592      tmp_b = (int32)((mapYc[y2[1]] + mapUcb[interlacedSrc[0]] + 128) >> 8);
593      out2[1] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
594                          & 0xfc) << 3) + (Clip(tmp_b) >> 3);
595
596      y1 += 2;
597      y2 += 2;
598      out += 2;
599      out2 += 2;
600      interlacedSrc += 2;
601    }
602    y1 += 2 * src_stride_y - width;
603    y2 += 2 * src_stride_y - width;
604    interlacedSrc += src_stride_uv - ((width + 1) >> 1);
605    out -= 3 * dst_stride_frame + dst_stride_frame - width;
606    out2 -= 3 * dst_stride_frame + dst_stride_frame - width;
607  }
608  return 0;
609}
610
611// TODO(fbarchard): Deprecated - this is same as BG24ToARGB with -height
612int RGB24ToARGB(const uint8* src_frame, int src_stride_frame,
613                uint8* dst_frame, int dst_stride_frame,
614                int width, int height) {
615  if (src_frame == NULL || dst_frame == NULL) {
616    return -1;
617  }
618
619  int i, j, offset;
620  uint8* outFrame = dst_frame;
621  const uint8* inFrame = src_frame;
622
623  outFrame += dst_stride_frame * (height - 1) * 4;
624  for (i = 0; i < height; i++) {
625    for (j = 0; j < width; j++) {
626      offset = j * 4;
627      outFrame[0 + offset] = inFrame[0];
628      outFrame[1 + offset] = inFrame[1];
629      outFrame[2 + offset] = inFrame[2];
630      outFrame[3 + offset] = 0xff;
631      inFrame += 3;
632    }
633    outFrame -= 4 * (dst_stride_frame - width);
634    inFrame += src_stride_frame - width;
635  }
636  return 0;
637}
638
639int ARGBToI420(const uint8* src_frame, int src_stride_frame,
640               uint8* dst_y, int dst_stride_y,
641               uint8* dst_u, int dst_stride_u,
642               uint8* dst_v, int dst_stride_v,
643               int width, int height) {
644  if (height < 0) {
645    height = -height;
646    src_frame = src_frame + (height - 1) * src_stride_frame;
647    src_stride_frame = -src_stride_frame;
648  }
649  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
650  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
651                      uint8* dst_u, uint8* dst_v, int width);
652#if defined(HAS_ARGBTOYROW_SSSE3)
653  if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
654      (width % 16 == 0) &&
655      IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
656      IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
657    ARGBToYRow = ARGBToYRow_SSSE3;
658  } else
659#endif
660  {
661    ARGBToYRow = ARGBToYRow_C;
662  }
663#if defined(HAS_ARGBTOUVROW_SSSE3)
664  if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
665      (width % 16 == 0) &&
666      IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
667      IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
668      IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) {
669    ARGBToUVRow = ARGBToUVRow_SSSE3;
670  } else
671#endif
672  {
673    ARGBToUVRow = ARGBToUVRow_C;
674  }
675
676  for (int y = 0; y < (height - 1); y += 2) {
677    ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
678    ARGBToYRow(src_frame, dst_y, width);
679    ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
680    src_frame += src_stride_frame * 2;
681    dst_y += dst_stride_y * 2;
682    dst_u += dst_stride_u;
683    dst_v += dst_stride_v;
684  }
685  if (height & 1) {
686    ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
687    ARGBToYRow(src_frame, dst_y, width);
688  }
689  return 0;
690}
691
692int BGRAToI420(const uint8* src_frame, int src_stride_frame,
693               uint8* dst_y, int dst_stride_y,
694               uint8* dst_u, int dst_stride_u,
695               uint8* dst_v, int dst_stride_v,
696               int width, int height) {
697  if (height < 0) {
698    height = -height;
699    src_frame = src_frame + (height - 1) * src_stride_frame;
700    src_stride_frame = -src_stride_frame;
701  }
702  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
703  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
704                      uint8* dst_u, uint8* dst_v, int width);
705#if defined(HAS_BGRATOYROW_SSSE3)
706  if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
707      (width % 16 == 0) &&
708      IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
709      IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
710    ARGBToYRow = BGRAToYRow_SSSE3;
711  } else
712#endif
713  {
714    ARGBToYRow = BGRAToYRow_C;
715  }
716#if defined(HAS_BGRATOUVROW_SSSE3)
717  if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
718      (width % 16 == 0) &&
719      IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
720      IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
721      IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) {
722    ARGBToUVRow = BGRAToUVRow_SSSE3;
723  } else
724#endif
725  {
726    ARGBToUVRow = BGRAToUVRow_C;
727  }
728
729  for (int y = 0; y < (height - 1); y += 2) {
730    ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
731    ARGBToYRow(src_frame, dst_y, width);
732    ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
733    src_frame += src_stride_frame * 2;
734    dst_y += dst_stride_y * 2;
735    dst_u += dst_stride_u;
736    dst_v += dst_stride_v;
737  }
738  if (height & 1) {
739    ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
740    ARGBToYRow(src_frame, dst_y, width);
741  }
742  return 0;
743}
744
745int ABGRToI420(const uint8* src_frame, int src_stride_frame,
746               uint8* dst_y, int dst_stride_y,
747               uint8* dst_u, int dst_stride_u,
748               uint8* dst_v, int dst_stride_v,
749               int width, int height) {
750  if (height < 0) {
751    height = -height;
752    src_frame = src_frame + (height - 1) * src_stride_frame;
753    src_stride_frame = -src_stride_frame;
754  }
755  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
756  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
757                      uint8* dst_u, uint8* dst_v, int width);
758#if defined(HAS_ABGRTOYROW_SSSE3)
759  if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
760      (width % 16 == 0) &&
761      IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
762      IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
763    ARGBToYRow = ABGRToYRow_SSSE3;
764  } else
765#endif
766  {
767    ARGBToYRow = ABGRToYRow_C;
768  }
769#if defined(HAS_ABGRTOUVROW_SSSE3)
770  if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
771      (width % 16 == 0) &&
772      IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
773      IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
774      IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) {
775    ARGBToUVRow = ABGRToUVRow_SSSE3;
776  } else
777#endif
778  {
779    ARGBToUVRow = ABGRToUVRow_C;
780  }
781
782  for (int y = 0; y < (height - 1); y += 2) {
783    ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
784    ARGBToYRow(src_frame, dst_y, width);
785    ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
786    src_frame += src_stride_frame * 2;
787    dst_y += dst_stride_y * 2;
788    dst_u += dst_stride_u;
789    dst_v += dst_stride_v;
790  }
791  if (height & 1) {
792    ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
793    ARGBToYRow(src_frame, dst_y, width);
794  }
795  return 0;
796}
797
798int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
799                uint8* dst_y, int dst_stride_y,
800                uint8* dst_u, int dst_stride_u,
801                uint8* dst_v, int dst_stride_v,
802                int width, int height) {
803  if (height < 0) {
804    height = -height;
805    src_frame = src_frame + (height - 1) * src_stride_frame;
806    src_stride_frame = -src_stride_frame;
807  }
808  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
809  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
810                      uint8* dst_u, uint8* dst_v, int width);
811#if defined(HAS_RGB24TOYROW_SSSE3)
812  if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
813      (width % 16 == 0) &&
814      IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
815      IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
816    ARGBToYRow = RGB24ToYRow_SSSE3;
817  } else
818#endif
819  {
820    ARGBToYRow = RGB24ToYRow_C;
821  }
822#if defined(HAS_RGB24TOUVROW_SSSE3)
823  if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
824      (width % 16 == 0) &&
825      IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
826      IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
827      IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) {
828    ARGBToUVRow = RGB24ToUVRow_SSSE3;
829  } else
830#endif
831  {
832    ARGBToUVRow = RGB24ToUVRow_C;
833  }
834
835  for (int y = 0; y < (height - 1); y += 2) {
836    ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
837    ARGBToYRow(src_frame, dst_y, width);
838    ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
839    src_frame += src_stride_frame * 2;
840    dst_y += dst_stride_y * 2;
841    dst_u += dst_stride_u;
842    dst_v += dst_stride_v;
843  }
844  if (height & 1) {
845    ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
846    ARGBToYRow(src_frame, dst_y, width);
847  }
848  return 0;
849}
850
851int RAWToI420(const uint8* src_frame, int src_stride_frame,
852                uint8* dst_y, int dst_stride_y,
853                uint8* dst_u, int dst_stride_u,
854                uint8* dst_v, int dst_stride_v,
855                int width, int height) {
856  if (height < 0) {
857    height = -height;
858    src_frame = src_frame + (height - 1) * src_stride_frame;
859    src_stride_frame = -src_stride_frame;
860  }
861  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
862  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
863                      uint8* dst_u, uint8* dst_v, int width);
864#if defined(HAS_RAWTOYROW_SSSE3)
865  if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
866      (width % 16 == 0) &&
867      IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
868      IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
869    ARGBToYRow = RAWToYRow_SSSE3;
870  } else
871#endif
872  {
873    ARGBToYRow = RAWToYRow_C;
874  }
875#if defined(HAS_RAWTOUVROW_SSSE3)
876  if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
877      (width % 16 == 0) &&
878      IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
879      IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
880      IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) {
881    ARGBToUVRow = RAWToUVRow_SSSE3;
882  } else
883#endif
884  {
885    ARGBToUVRow = RAWToUVRow_C;
886  }
887
888  for (int y = 0; y < (height - 1); y += 2) {
889    ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
890    ARGBToYRow(src_frame, dst_y, width);
891    ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
892    src_frame += src_stride_frame * 2;
893    dst_y += dst_stride_y * 2;
894    dst_u += dst_stride_u;
895    dst_v += dst_stride_v;
896  }
897  if (height & 1) {
898    ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
899    ARGBToYRow(src_frame, dst_y, width);
900  }
901  return 0;
902}
903
904} // namespace libyuv
905