row_posix.cc revision 7cd8149e2cbad8b1ff6d481c37a4775d3c8cf2fa
1/*
2 *  Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "row.h"
12
13extern "C" {
14
15#ifdef HAS_ARGBTOYROW_SSSE3
16
17// Constant multiplication table for converting ARGB to I400.
18extern "C" TALIGN16(const uint8, kMultiplyMaskARGBToI400[16]) = {
19  13u, 64u, 33u, 0u, 13u, 64u, 33u, 0u, 13u, 64u, 33u, 0u, 13u, 64u, 33u, 0u
20};
21
22extern "C" TALIGN16(const uint8, kAdd16[16]) = {
23  1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u
24};
25
26// Shuffle table for converting BG24 to ARGB.
27extern "C" TALIGN16(const uint8, kShuffleMaskBG24ToARGB[16]) = {
28  0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
29};
30
31// Shuffle table for converting RAW to ARGB.
32extern "C" TALIGN16(const uint8, kShuffleMaskRAWToARGB[16]) = {
33  2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u
34};
35
36void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
37  asm volatile(
38  "movdqa     (%3),%%xmm7\n"
39  "movdqa     (%4),%%xmm6\n"
40  "movdqa     %%xmm6,%%xmm5\n"
41  "psllw      $0x4,%%xmm5\n"  // Generate a mask of 0x10 on each byte.
42"1:"
43  "movdqa     (%0),%%xmm0\n"
44  "pmaddubsw  %%xmm7,%%xmm0\n"
45  "movdqa     0x10(%0),%%xmm1\n"
46  "psrlw      $0x7,%%xmm0\n"
47  "pmaddubsw  %%xmm7,%%xmm1\n"
48  "lea        0x20(%0),%0\n"
49  "psrlw      $0x7,%%xmm1\n"
50  "packuswb   %%xmm1,%%xmm0\n"
51  "pmaddubsw  %%xmm6,%%xmm0\n"
52  "packuswb   %%xmm0,%%xmm0\n"
53  "paddb      %%xmm5,%%xmm0\n"
54  "movq       %%xmm0,(%1)\n"
55  "lea        0x8(%1),%1\n"
56  "sub        $0x8,%2\n"
57  "ja         1b\n"
58  : "+r"(src_argb),   // %0
59    "+r"(dst_y),      // %1
60    "+r"(pix)         // %2
61  : "r"(kMultiplyMaskARGBToI400),    // %3
62    "r"(kAdd16)   // %4
63  : "memory"
64);
65}
66#endif
67
68#ifdef  HAS_BG24TOARGBROW_SSSE3
69void BG24ToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix) {
70  asm volatile(
71  "pcmpeqb    %%xmm7,%%xmm7\n"  // generate mask 0xff000000
72  "pslld      $0x18,%%xmm7\n"
73  "movdqa     (%3),%%xmm6\n"
74"1:"
75  "movdqa     (%0),%%xmm0\n"
76  "movdqa     0x10(%0),%%xmm1\n"
77  "movdqa     0x20(%0),%%xmm3\n"
78  "lea        0x30(%0),%0\n"
79  "movdqa     %%xmm3,%%xmm2\n"
80  "palignr    $0x8,%%xmm1,%%xmm2\n"  // xmm2 = { xmm3[0:3] xmm1[8:15] }
81  "pshufb     %%xmm6,%%xmm2\n"
82  "por        %%xmm7,%%xmm2\n"
83  "palignr    $0xc,%%xmm0,%%xmm1\n"  // xmm1 = { xmm3[0:7] xmm0[12:15] }
84  "pshufb     %%xmm6,%%xmm0\n"
85  "movdqa     %%xmm2,0x20(%1)\n"
86  "por        %%xmm7,%%xmm0\n"
87  "pshufb     %%xmm6,%%xmm1\n"
88  "movdqa     %%xmm0,(%1)\n"
89  "por        %%xmm7,%%xmm1\n"
90  "palignr    $0x4,%%xmm3,%%xmm3\n"  // xmm3 = { xmm3[4:15] }
91  "pshufb     %%xmm6,%%xmm3\n"
92  "movdqa     %%xmm1,0x10(%1)\n"
93  "por        %%xmm7,%%xmm3\n"
94  "movdqa     %%xmm3,0x30(%1)\n"
95  "lea        0x40(%1),%1\n"
96  "sub        $0x10,%2\n"
97  "ja         1b\n"
98  : "+r"(src_bg24),  // %0
99    "+r"(dst_argb),  // %1
100    "+r"(pix)        // %2
101  : "r"(kShuffleMaskBG24ToARGB)  // %3
102  : "memory"
103);
104}
105
106void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) {
107  asm volatile(
108  "pcmpeqb    %%xmm7,%%xmm7\n"  // generate mask 0xff000000
109  "pslld      $0x18,%%xmm7\n"
110  "movdqa     (%3),%%xmm6\n"
111"1:"
112  "movdqa     (%0),%%xmm0\n"
113  "movdqa     0x10(%0),%%xmm1\n"
114  "movdqa     0x20(%0),%%xmm3\n"
115  "lea        0x30(%0),%0\n"
116  "movdqa     %%xmm3,%%xmm2\n"
117  "palignr    $0x8,%%xmm1,%%xmm2\n"  // xmm2 = { xmm3[0:3] xmm1[8:15] }
118  "pshufb     %%xmm6,%%xmm2\n"
119  "por        %%xmm7,%%xmm2\n"
120  "palignr    $0xc,%%xmm0,%%xmm1\n"  // xmm1 = { xmm3[0:7] xmm0[12:15] }
121  "pshufb     %%xmm6,%%xmm0\n"
122  "movdqa     %%xmm2,0x20(%1)\n"
123  "por        %%xmm7,%%xmm0\n"
124  "pshufb     %%xmm6,%%xmm1\n"
125  "movdqa     %%xmm0,(%1)\n"
126  "por        %%xmm7,%%xmm1\n"
127  "palignr    $0x4,%%xmm3,%%xmm3\n"  // xmm3 = { xmm3[4:15] }
128  "pshufb     %%xmm6,%%xmm3\n"
129  "movdqa     %%xmm1,0x10(%1)\n"
130  "por        %%xmm7,%%xmm3\n"
131  "movdqa     %%xmm3,0x30(%1)\n"
132  "lea        0x40(%1),%1\n"
133  "sub        $0x10,%2\n"
134  "ja         1b\n"
135  : "+r"(src_raw),   // %0
136    "+r"(dst_argb),  // %1
137    "+r"(pix)        // %2
138  : "r"(kShuffleMaskRAWToARGB)  // %3
139  : "memory"
140);
141}
142#endif
143
144#if defined(__x86_64__)
145
146// 64 bit linux gcc version
147
148void FastConvertYUVToRGB32Row(const uint8* y_buf,  // rdi
149                              const uint8* u_buf,  // rsi
150                              const uint8* v_buf,  // rdx
151                              uint8* rgb_buf,      // rcx
152                              int width) {         // r8
153  asm volatile(
154"1:"
155  "movzb  (%1),%%r10\n"
156  "lea    1(%1),%1\n"
157  "movzb  (%2),%%r11\n"
158  "lea    1(%2),%2\n"
159  "movq   2048(%5,%%r10,8),%%xmm0\n"
160  "movzb  (%0),%%r10\n"
161  "movq   4096(%5,%%r11,8),%%xmm1\n"
162  "movzb  0x1(%0),%%r11\n"
163  "paddsw %%xmm1,%%xmm0\n"
164  "movq   (%5,%%r10,8),%%xmm2\n"
165  "lea    2(%0),%0\n"
166  "movq   (%5,%%r11,8),%%xmm3\n"
167  "paddsw %%xmm0,%%xmm2\n"
168  "paddsw %%xmm0,%%xmm3\n"
169  "shufps $0x44,%%xmm3,%%xmm2\n"
170  "psraw  $0x6,%%xmm2\n"
171  "packuswb %%xmm2,%%xmm2\n"
172  "movq   %%xmm2,0x0(%3)\n"
173  "lea    8(%3),%3\n"
174  "sub    $0x2,%4\n"
175  "ja     1b\n"
176  : "+r"(y_buf),    // %0
177    "+r"(u_buf),    // %1
178    "+r"(v_buf),    // %2
179    "+r"(rgb_buf),  // %3
180    "+r"(width)     // %4
181  : "r" (_kCoefficientsRgbY)  // %5
182  : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
183);
184}
185
186void FastConvertYUVToBGRARow(const uint8* y_buf,  // rdi
187                             const uint8* u_buf,  // rsi
188                             const uint8* v_buf,  // rdx
189                             uint8* rgb_buf,      // rcx
190                             int width) {         // r8
191  asm volatile(
192"1:"
193  "movzb  (%1),%%r10\n"
194  "lea    1(%1),%1\n"
195  "movzb  (%2),%%r11\n"
196  "lea    1(%2),%2\n"
197  "movq   2048(%5,%%r10,8),%%xmm0\n"
198  "movzb  (%0),%%r10\n"
199  "movq   4096(%5,%%r11,8),%%xmm1\n"
200  "movzb  0x1(%0),%%r11\n"
201  "paddsw %%xmm1,%%xmm0\n"
202  "movq   (%5,%%r10,8),%%xmm2\n"
203  "lea    2(%0),%0\n"
204  "movq   (%5,%%r11,8),%%xmm3\n"
205  "paddsw %%xmm0,%%xmm2\n"
206  "paddsw %%xmm0,%%xmm3\n"
207  "shufps $0x44,%%xmm3,%%xmm2\n"
208  "psraw  $0x6,%%xmm2\n"
209  "packuswb %%xmm2,%%xmm2\n"
210  "movq   %%xmm2,0x0(%3)\n"
211  "lea    8(%3),%3\n"
212  "sub    $0x2,%4\n"
213  "ja     1b\n"
214  : "+r"(y_buf),    // %0
215    "+r"(u_buf),    // %1
216    "+r"(v_buf),    // %2
217    "+r"(rgb_buf),  // %3
218    "+r"(width)     // %4
219  : "r" (_kCoefficientsBgraY)  // %5
220  : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
221);
222}
223
224void FastConvertYUVToABGRRow(const uint8* y_buf,  // rdi
225                             const uint8* u_buf,  // rsi
226                             const uint8* v_buf,  // rdx
227                             uint8* rgb_buf,      // rcx
228                             int width) {         // r8
229  asm volatile(
230"1:"
231  "movzb  (%1),%%r10\n"
232  "lea    1(%1),%1\n"
233  "movzb  (%2),%%r11\n"
234  "lea    1(%2),%2\n"
235  "movq   2048(%5,%%r10,8),%%xmm0\n"
236  "movzb  (%0),%%r10\n"
237  "movq   4096(%5,%%r11,8),%%xmm1\n"
238  "movzb  0x1(%0),%%r11\n"
239  "paddsw %%xmm1,%%xmm0\n"
240  "movq   (%5,%%r10,8),%%xmm2\n"
241  "lea    2(%0),%0\n"
242  "movq   (%5,%%r11,8),%%xmm3\n"
243  "paddsw %%xmm0,%%xmm2\n"
244  "paddsw %%xmm0,%%xmm3\n"
245  "shufps $0x44,%%xmm3,%%xmm2\n"
246  "psraw  $0x6,%%xmm2\n"
247  "packuswb %%xmm2,%%xmm2\n"
248  "movq   %%xmm2,0x0(%3)\n"
249  "lea    8(%3),%3\n"
250  "sub    $0x2,%4\n"
251  "ja     1b\n"
252  : "+r"(y_buf),    // %0
253    "+r"(u_buf),    // %1
254    "+r"(v_buf),    // %2
255    "+r"(rgb_buf),  // %3
256    "+r"(width)     // %4
257  : "r" (_kCoefficientsAbgrY)  // %5
258  : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
259);
260}
261
262void FastConvertYUV444ToRGB32Row(const uint8* y_buf,  // rdi
263                                 const uint8* u_buf,  // rsi
264                                 const uint8* v_buf,  // rdx
265                                 uint8* rgb_buf,      // rcx
266                                 int width) {         // r8
267  asm volatile(
268"1:"
269  "movzb  (%1),%%r10\n"
270  "lea    1(%1),%1\n"
271  "movzb  (%2),%%r11\n"
272  "lea    1(%2),%2\n"
273  "movq   2048(%5,%%r10,8),%%xmm0\n"
274  "movzb  (%0),%%r10\n"
275  "movq   4096(%5,%%r11,8),%%xmm1\n"
276  "paddsw %%xmm1,%%xmm0\n"
277  "movq   (%5,%%r10,8),%%xmm2\n"
278  "lea    1(%0),%0\n"
279  "paddsw %%xmm0,%%xmm2\n"
280  "shufps $0x44,%%xmm2,%%xmm2\n"
281  "psraw  $0x6,%%xmm2\n"
282  "packuswb %%xmm2,%%xmm2\n"
283  "movd   %%xmm2,0x0(%3)\n"
284  "lea    4(%3),%3\n"
285  "sub    $0x1,%4\n"
286  "ja     1b\n"
287  : "+r"(y_buf),    // %0
288    "+r"(u_buf),    // %1
289    "+r"(v_buf),    // %2
290    "+r"(rgb_buf),  // %3
291    "+r"(width)     // %4
292  : "r" (_kCoefficientsRgbY)  // %5
293  : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2"
294);
295}
296
297void FastConvertYToRGB32Row(const uint8* y_buf,  // rdi
298                            uint8* rgb_buf,      // rcx
299                            int width) {         // r8
300  asm volatile(
301"1:"
302  "movzb  (%0),%%r10\n"
303  "movzb  0x1(%0),%%r11\n"
304  "movq   (%3,%%r10,8),%%xmm2\n"
305  "lea    2(%0),%0\n"
306  "movq   (%3,%%r11,8),%%xmm3\n"
307  "shufps $0x44,%%xmm3,%%xmm2\n"
308  "psraw  $0x6,%%xmm2\n"
309  "packuswb %%xmm2,%%xmm2\n"
310  "movq   %%xmm2,0x0(%1)\n"
311  "lea    8(%1),%1\n"
312  "sub    $0x2,%2\n"
313  "ja     1b\n"
314  : "+r"(y_buf),    // %0
315    "+r"(rgb_buf),  // %1
316    "+r"(width)     // %2
317  : "r" (_kCoefficientsRgbY)  // %3
318  : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
319);
320}
321
322#elif defined(__i386__)
323// 32 bit gcc version
324
325void FastConvertYUVToRGB32Row(const uint8* y_buf,
326                              const uint8* u_buf,
327                              const uint8* v_buf,
328                              uint8* rgb_buf,
329                              int width);
330  asm(
331  ".text\n"
332#if defined(OSX) || defined(IOS)
333  ".globl _FastConvertYUVToRGB32Row\n"
334"_FastConvertYUVToRGB32Row:\n"
335#else
336  ".global FastConvertYUVToRGB32Row\n"
337"FastConvertYUVToRGB32Row:\n"
338#endif
339  "pusha\n"
340  "mov    0x24(%esp),%edx\n"
341  "mov    0x28(%esp),%edi\n"
342  "mov    0x2c(%esp),%esi\n"
343  "mov    0x30(%esp),%ebp\n"
344  "mov    0x34(%esp),%ecx\n"
345
346"1:"
347  "movzbl (%edi),%eax\n"
348  "lea    1(%edi),%edi\n"
349  "movzbl (%esi),%ebx\n"
350  "lea    1(%esi),%esi\n"
351  "movq   _kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
352  "movzbl (%edx),%eax\n"
353  "paddsw _kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
354  "movzbl 0x1(%edx),%ebx\n"
355  "movq   _kCoefficientsRgbY(,%eax,8),%mm1\n"
356  "lea    2(%edx),%edx\n"
357  "movq   _kCoefficientsRgbY(,%ebx,8),%mm2\n"
358  "paddsw %mm0,%mm1\n"
359  "paddsw %mm0,%mm2\n"
360  "psraw  $0x6,%mm1\n"
361  "psraw  $0x6,%mm2\n"
362  "packuswb %mm2,%mm1\n"
363  "movntq %mm1,0x0(%ebp)\n"
364  "lea    8(%ebp),%ebp\n"
365  "sub    $0x2,%ecx\n"
366  "ja     1b\n"
367  "popa\n"
368  "ret\n"
369);
370
371void FastConvertYUVToBGRARow(const uint8* y_buf,
372                              const uint8* u_buf,
373                              const uint8* v_buf,
374                              uint8* rgb_buf,
375                              int width);
376  asm(
377  ".text\n"
378#if defined(OSX) || defined(IOS)
379  ".globl _FastConvertYUVToBGRARow\n"
380"_FastConvertYUVToBGRARow:\n"
381#else
382  ".global FastConvertYUVToBGRARow\n"
383"FastConvertYUVToBGRARow:\n"
384#endif
385  "pusha\n"
386  "mov    0x24(%esp),%edx\n"
387  "mov    0x28(%esp),%edi\n"
388  "mov    0x2c(%esp),%esi\n"
389  "mov    0x30(%esp),%ebp\n"
390  "mov    0x34(%esp),%ecx\n"
391
392"1:"
393  "movzbl (%edi),%eax\n"
394  "lea    1(%edi),%edi\n"
395  "movzbl (%esi),%ebx\n"
396  "lea    1(%esi),%esi\n"
397  "movq   _kCoefficientsBgraY+2048(,%eax,8),%mm0\n"
398  "movzbl (%edx),%eax\n"
399  "paddsw _kCoefficientsBgraY+4096(,%ebx,8),%mm0\n"
400  "movzbl 0x1(%edx),%ebx\n"
401  "movq   _kCoefficientsBgraY(,%eax,8),%mm1\n"
402  "lea    2(%edx),%edx\n"
403  "movq   _kCoefficientsBgraY(,%ebx,8),%mm2\n"
404  "paddsw %mm0,%mm1\n"
405  "paddsw %mm0,%mm2\n"
406  "psraw  $0x6,%mm1\n"
407  "psraw  $0x6,%mm2\n"
408  "packuswb %mm2,%mm1\n"
409  "movntq %mm1,0x0(%ebp)\n"
410  "lea    8(%ebp),%ebp\n"
411  "sub    $0x2,%ecx\n"
412  "ja     1b\n"
413  "popa\n"
414  "ret\n"
415);
416
417void FastConvertYUVToABGRRow(const uint8* y_buf,
418                              const uint8* u_buf,
419                              const uint8* v_buf,
420                              uint8* rgb_buf,
421                              int width);
422  asm(
423  ".text\n"
424#if defined(OSX) || defined(IOS)
425  ".globl _FastConvertYUVToABGRRow\n"
426"_FastConvertYUVToABGRRow:\n"
427#else
428  ".global FastConvertYUVToABGRRow\n"
429"FastConvertYUVToABGRRow:\n"
430#endif
431  "pusha\n"
432  "mov    0x24(%esp),%edx\n"
433  "mov    0x28(%esp),%edi\n"
434  "mov    0x2c(%esp),%esi\n"
435  "mov    0x30(%esp),%ebp\n"
436  "mov    0x34(%esp),%ecx\n"
437
438"1:"
439  "movzbl (%edi),%eax\n"
440  "lea    1(%edi),%edi\n"
441  "movzbl (%esi),%ebx\n"
442  "lea    1(%esi),%esi\n"
443  "movq   _kCoefficientsAbgrY+2048(,%eax,8),%mm0\n"
444  "movzbl (%edx),%eax\n"
445  "paddsw _kCoefficientsAbgrY+4096(,%ebx,8),%mm0\n"
446  "movzbl 0x1(%edx),%ebx\n"
447  "movq   _kCoefficientsAbgrY(,%eax,8),%mm1\n"
448  "lea    2(%edx),%edx\n"
449  "movq   _kCoefficientsAbgrY(,%ebx,8),%mm2\n"
450  "paddsw %mm0,%mm1\n"
451  "paddsw %mm0,%mm2\n"
452  "psraw  $0x6,%mm1\n"
453  "psraw  $0x6,%mm2\n"
454  "packuswb %mm2,%mm1\n"
455  "movntq %mm1,0x0(%ebp)\n"
456  "lea    8(%ebp),%ebp\n"
457  "sub    $0x2,%ecx\n"
458  "ja     1b\n"
459  "popa\n"
460  "ret\n"
461);
462
463void FastConvertYUV444ToRGB32Row(const uint8* y_buf,
464                                 const uint8* u_buf,
465                                 const uint8* v_buf,
466                                 uint8* rgb_buf,
467                                 int width);
468  asm(
469  ".text\n"
470#if defined(OSX) || defined(IOS)
471  ".globl _FastConvertYUV444ToRGB32Row\n"
472"_FastConvertYUV444ToRGB32Row:\n"
473#else
474  ".global FastConvertYUV444ToRGB32Row\n"
475"FastConvertYUV444ToRGB32Row:\n"
476#endif
477  "pusha\n"
478  "mov    0x24(%esp),%edx\n"
479  "mov    0x28(%esp),%edi\n"
480  "mov    0x2c(%esp),%esi\n"
481  "mov    0x30(%esp),%ebp\n"
482  "mov    0x34(%esp),%ecx\n"
483
484"1:"
485  "movzbl (%edi),%eax\n"
486  "lea    1(%edi),%edi\n"
487  "movzbl (%esi),%ebx\n"
488  "lea    1(%esi),%esi\n"
489  "movq   _kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
490  "movzbl (%edx),%eax\n"
491  "paddsw _kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
492  "lea    1(%edx),%edx\n"
493  "paddsw _kCoefficientsRgbY(,%eax,8),%mm0\n"
494  "psraw  $0x6,%mm0\n"
495  "packuswb %mm0,%mm0\n"
496  "movd   %mm0,0x0(%ebp)\n"
497  "lea    4(%ebp),%ebp\n"
498  "sub    $0x1,%ecx\n"
499  "ja     1b\n"
500  "popa\n"
501  "ret\n"
502);
503
504void FastConvertYToRGB32Row(const uint8* y_buf,
505                            uint8* rgb_buf,
506                            int width);
507  asm(
508  ".text\n"
509#if defined(OSX) || defined(IOS)
510  ".globl _FastConvertYToRGB32Row\n"
511"_FastConvertYToRGB32Row:\n"
512#else
513  ".global FastConvertYToRGB32Row\n"
514"FastConvertYToRGB32Row:\n"
515#endif
516  "push   %ebx\n"
517  "mov    0x8(%esp),%eax\n"
518  "mov    0xc(%esp),%edx\n"
519  "mov    0x10(%esp),%ecx\n"
520
521"1:"
522  "movzbl (%eax),%ebx\n"
523  "movq   _kCoefficientsRgbY(,%ebx,8),%mm0\n"
524  "psraw  $0x6,%mm0\n"
525  "movzbl 0x1(%eax),%ebx\n"
526  "movq   _kCoefficientsRgbY(,%ebx,8),%mm1\n"
527  "psraw  $0x6,%mm1\n"
528  "packuswb %mm1,%mm0\n"
529  "lea    0x2(%eax),%eax\n"
530  "movq   %mm0,(%edx)\n"
531  "lea    0x8(%edx),%edx\n"
532  "sub    $0x2,%ecx\n"
533  "ja     1b\n"
534  "pop    %ebx\n"
535  "ret\n"
536);
537
538#else
539// C reference code that mimic the YUV assembly.
540#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x)))
541#define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \
542    (((x) + (y)) > 32767 ? 32767 : ((x) + (y))))
543
544static inline void YuvPixel(uint8 y,
545                            uint8 u,
546                            uint8 v,
547                            uint8* rgb_buf,
548                            int ashift,
549                            int rshift,
550                            int gshift,
551                            int bshift) {
552
553  int b = _kCoefficientsRgbY[256+u][0];
554  int g = _kCoefficientsRgbY[256+u][1];
555  int r = _kCoefficientsRgbY[256+u][2];
556  int a = _kCoefficientsRgbY[256+u][3];
557
558  b = paddsw(b, _kCoefficientsRgbY[512+v][0]);
559  g = paddsw(g, _kCoefficientsRgbY[512+v][1]);
560  r = paddsw(r, _kCoefficientsRgbY[512+v][2]);
561  a = paddsw(a, _kCoefficientsRgbY[512+v][3]);
562
563  b = paddsw(b, _kCoefficientsRgbY[y][0]);
564  g = paddsw(g, _kCoefficientsRgbY[y][1]);
565  r = paddsw(r, _kCoefficientsRgbY[y][2]);
566  a = paddsw(a, _kCoefficientsRgbY[y][3]);
567
568  b >>= 6;
569  g >>= 6;
570  r >>= 6;
571  a >>= 6;
572
573  *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b) << bshift) |
574                                        (packuswb(g) << gshift) |
575                                        (packuswb(r) << rshift) |
576                                        (packuswb(a) << ashift);
577}
578
579void FastConvertYUVToRGB32Row(const uint8* y_buf,
580                              const uint8* u_buf,
581                              const uint8* v_buf,
582                              uint8* rgb_buf,
583                              int width) {
584  for (int x = 0; x < width; x += 2) {
585    uint8 u = u_buf[x >> 1];
586    uint8 v = v_buf[x >> 1];
587    uint8 y0 = y_buf[x];
588    YuvPixel(y0, u, v, rgb_buf, 24, 16, 8, 0);
589    if ((x + 1) < width) {
590      uint8 y1 = y_buf[x + 1];
591      YuvPixel(y1, u, v, rgb_buf + 4, 24, 16, 8, 0);
592    }
593    rgb_buf += 8;  // Advance 2 pixels.
594  }
595}
596
597void FastConvertYUVToBGRARow(const uint8* y_buf,
598                             const uint8* u_buf,
599                             const uint8* v_buf,
600                             uint8* rgb_buf,
601                             int width) {
602  for (int x = 0; x < width; x += 2) {
603    uint8 u = u_buf[x >> 1];
604    uint8 v = v_buf[x >> 1];
605    uint8 y0 = y_buf[x];
606    YuvPixel(y0, u, v, rgb_buf, 0, 8, 16, 24);
607    if ((x + 1) < width) {
608      uint8 y1 = y_buf[x + 1];
609      YuvPixel(y1, u, v, rgb_buf + 4, 0, 8, 16, 24);
610    }
611    rgb_buf += 8;  // Advance 2 pixels.
612  }
613}
614
615void FastConvertYUVToABGRRow(const uint8* y_buf,
616                             const uint8* u_buf,
617                             const uint8* v_buf,
618                             uint8* rgb_buf,
619                             int width) {
620  for (int x = 0; x < width; x += 2) {
621    uint8 u = u_buf[x >> 1];
622    uint8 v = v_buf[x >> 1];
623    uint8 y0 = y_buf[x];
624    YuvPixel(y0, u, v, rgb_buf, 24, 0, 8, 16);
625    if ((x + 1) < width) {
626      uint8 y1 = y_buf[x + 1];
627      YuvPixel(y1, u, v, rgb_buf + 4, 24, 0, 8, 16);
628    }
629    rgb_buf += 8;  // Advance 2 pixels.
630  }
631}
632
633void FastConvertYUV444ToRGB32Row(const uint8* y_buf,
634                                 const uint8* u_buf,
635                                 const uint8* v_buf,
636                                 uint8* rgb_buf,
637                                 int width) {
638  for (int x = 0; x < width; ++x) {
639    uint8 u = u_buf[x];
640    uint8 v = v_buf[x];
641    uint8 y = y_buf[x];
642    YuvPixel(y, u, v, rgb_buf, 24, 16, 8, 0);
643    rgb_buf += 4;  // Advance 1 pixel.
644  }
645}
646
647void FastConvertYToRGB32Row(const uint8* y_buf,
648                            uint8* rgb_buf,
649                            int width) {
650  for (int x = 0; x < width; ++x) {
651    uint8 y = y_buf[x];
652    YuvPixel(y, 128, 128, rgb_buf, 24, 16, 8, 0);
653    rgb_buf += 4;  // Advance 1 pixel.
654  }
655}
656
657#endif
658
659}  // extern "C"
660