15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); Copyright (c) 2011 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); void SYMBOL(const uint8* argb, uint8* y, uint8* u, uint8* v, int width); 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); The main code that converts RGB pixels to YUV pixels. This function roughly 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); consists of three parts: converting one ARGB pixel to YUV pixels, converting 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); two ARGB pixels to YUV pixels, and converting four ARGB pixels to YUV pixels. 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); To write the structure of this function in C, it becomes the snippet listed 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); below. 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); if (width & 1) { 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); --width; 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); // Convert one ARGB pixel to one Y pixel, one U pixel, and one V pixel. 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); } 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); if (width & 2) { 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); width -= 2; 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); // Convert two ARGB pixels to two Y pixels, one U pixel, and one V pixel. 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); } 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); while (width) { 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); width -= 4; 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); // Convert four ARGB pixels to four Y pixels, two U pixels, and two V 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); // pixels. 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); } 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 3090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) EXPORT SYMBOL 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) align function_align 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)mangle(SYMBOL): 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) %assign stack_offset 0 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PROLOGUE 5, 6, 8, ARGB, Y, U, V, WIDTH, TEMP 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Initialize constants used in this function. (We use immediates to avoid 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; dependency onto GOT.) 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOAD_XMM XMM_CONST_Y0, 0x00420219 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOAD_XMM XMM_CONST_Y1, 0x00007F00 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOAD_XMM XMM_CONST_U, 0x00DAB670 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOAD_XMM XMM_CONST_V, 0x0070A2EE 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOAD_XMM XMM_CONST_128, 0x00800080 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).convert_one_pixel: 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Divide the input width by two so it represents the offsets for u[] and v[]. 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; When the width is odd, We read the rightmost ARGB pixel and convert its 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; colorspace to YUV. This code stores one Y pixel, one U pixel, and one V 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; pixel. 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sar WIDTHq, 1 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) jnc .convert_two_pixels 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Read one ARGB (or RGB) pixel. 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) READ_ARGB xmm0, 1 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Calculate y[0] from one RGB pixel read above. 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CALC_Y xmm1, xmm0 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movd TEMPd, xmm1 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov BYTE [Yq + WIDTHq * 2], TEMPb 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Calculate u[0] from one RGB pixel read above. If this is an odd line, the 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; output pixel contains the U value calculated in the previous call. We also 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; read this pixel and calculate their average. 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) INIT_UV TEMPd, Uq, 4 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CALC_UV xmm1, xmm0, XMM_CONST_U, TEMPd 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movd TEMPd, xmm1 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov BYTE [Uq + WIDTHq], TEMPb 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Calculate v[0] from one RGB pixel. Same as u[0], we read the result of the 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; previous call and get their average. 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) INIT_UV TEMPd, Uq, 4 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CALC_UV xmm1, xmm0, XMM_CONST_V, TEMPd 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movd TEMPd, xmm1 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov BYTE [Vq + WIDTHq], TEMPb 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).convert_two_pixels: 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; If the input width is not a multiple of four, read the rightmost two ARGB 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; pixels and convert their colorspace to YUV. This code stores two Y pixels, 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; one U pixel, and one V pixel. 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) test WIDTHb, 2 / 2 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) jz .convert_four_pixels 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sub WIDTHb, 2 / 2 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Read two ARGB (or RGB) pixels. 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) READ_ARGB xmm0, 2 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Calculate r[0] and r[1] from two RGB pixels read above. 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CALC_Y xmm1, xmm0 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movd TEMPd, xmm1 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov WORD [Yq + WIDTHq * 2], TEMPw 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Skip calculating u and v if the output buffer is NULL. 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) test Uq, Uq 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) jz .convert_four_pixels 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Calculate u[0] from two RGB pixels read above. (For details, read the above 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; comment in .convert_one_pixel). 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) INIT_UV TEMPd, Uq, 2 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CALC_UV xmm1, xmm0, XMM_CONST_U, TEMPd 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movd TEMPd, xmm1 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov BYTE [Uq + WIDTHq], TEMPb 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Calculate v[0] from two RGB pixels read above. 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) INIT_UV TEMPd, Vq, 2 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CALC_UV xmm1, xmm0, XMM_CONST_V, TEMPd 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movd TEMPd, xmm1 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov BYTE [Vq + WIDTHq], TEMPb 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).convert_four_pixels: 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Read four ARGB pixels and convert their colorspace to YUV. This code stores 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; four Y pixels, two U pixels, and two V pixels. 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) test WIDTHq, WIDTHq 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) jz .convert_finish 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%if PIXELSIZE == 4 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Check if the input buffer is aligned to a 16-byte boundary and use movdqa 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; for reading the ARGB pixels. 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) test ARGBw, 15 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) jnz .convert_four_pixels_unaligned 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).convert_four_pixels_aligned: 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sub WIDTHq, 4 / 2 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Read four ARGB pixels. (We can use movdqa here since we have checked if the 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; source address is aligned.) 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movdqa xmm0, DQWORD [ARGBq + WIDTHq * 4 * 2] 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Calculate y[0], y[1], y[2],and, y[3] from the input ARGB pixels. 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CALC_Y xmm1, xmm0 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movd DWORD [Yq + WIDTHq * 2], xmm1 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%if SUBSAMPLING == 0 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Skip calculating u and v if the output buffer is NULL, which means we are 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; converting an odd line. (When we enable subsampling, these buffers must 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; contain the u and v values for the previous call, i.e. these variables must 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; not be NULL.) 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) test Uq, Uq 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) jz .convert_four_pixels_aligned_next 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%endif 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Calculate u[0] and u[1] from four ARGB pixels read above. 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) INIT_UV TEMPd, Uq, 4 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CALC_UV xmm1, xmm0, XMM_CONST_U, TEMPd 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movd TEMPd, xmm1 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov WORD [Uq + WIDTHq], TEMPw 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Calculate v[0] and v[1] from four ARGB pixels read above. 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) INIT_UV TEMPd, Vq, 4 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CALC_UV xmm1, xmm0, XMM_CONST_V, TEMPd 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movd TEMPd, xmm1 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov WORD [Vq + WIDTHq], TEMPw 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%if SUBSAMPLING == 0 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).convert_four_pixels_aligned_next: 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%endif 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) test WIDTHq, WIDTHq 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) jnz .convert_four_pixels_aligned 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) jmp .convert_finish 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%endif 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).convert_four_pixels_unaligned: 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sub WIDTHq, 4 / 2 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Read four ARGB (or RGB) pixels. 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) READ_ARGB xmm0, 4 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Calculate y[0], y[1], y[2],and, y[3] from the input ARGB pixels. 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CALC_Y xmm1, xmm0 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movd DWORD [Yq + WIDTHq * 2], xmm1 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%if SUBSAMPLING == 0 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Skip calculating u and v if the output buffer is NULL. 1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) test Uq, Uq 1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) jz .convert_four_pixels_unaligned_next 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%endif 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Calculate u[0] and u[1] from the input ARGB pixels. 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) INIT_UV TEMPd, Uq, 4 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CALC_UV xmm1, xmm0, XMM_CONST_U, TEMPd 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movd TEMPd, xmm1 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov WORD [Uq + WIDTHq], TEMPw 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Calculate v[0] and v[1] from the input ARGB pixels. 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) INIT_UV TEMPd, Vq, 4 1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CALC_UV xmm1, xmm0, XMM_CONST_V, TEMPd 1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movd TEMPd, xmm1 1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov WORD [Vq + WIDTHq], TEMPw 1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%if SUBSAMPLING == 0 1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).convert_four_pixels_unaligned_next: 1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%endif 1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) test WIDTHq, WIDTHq 1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) jnz .convert_four_pixels_unaligned 1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).convert_finish: 1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Just exit this function since this is a void function. 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) RET 201