15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); Copyright (c) 2011 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)%include "media/base/simd/media_export.asm" 62a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)%include "third_party/x86inc/x86inc.asm" 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); This file uses MMX, SSE2 and instructions. 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SECTION_TEXT 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CPU SSE2 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); void ScaleYUVToRGB32Row_SSE2_X64(const uint8* y_buf, 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); const uint8* u_buf, 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); const uint8* v_buf, 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); uint8* rgb_buf, 182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles); ptrdiff_t width, 192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles); ptrdiff_t source_dx); 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%define SYMBOL ScaleYUVToRGB32Row_SSE2_X64 2190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) EXPORT SYMBOL 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) align function_align 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)mangle(SYMBOL): 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) %assign stack_offset 0 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) extern mangle(kCoefficientsRgbY) 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); Parameters are in the following order: 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 1. Y plane 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 2. U plane 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 3. V plane 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 4. ARGB frame 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 5. Width 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 6. Source dx 35010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles); 7. Convert table 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 37010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)PROLOGUE 7, 7, 3, Y, U, V, ARGB, WIDTH, SOURCE_DX, R1 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%define TABLEq r10 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%define Xq r11 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%define INDEXq r12 42010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)%define COMPq R1q 43010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)%define COMPd R1d 44010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PUSH r10 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PUSH r11 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PUSH r12 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 49010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) mov TABLEq, R1q 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Set Xq index to 0. 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xor Xq, Xq 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) jmp .scaleend 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).scaleloop: 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Read UV pixels. 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov INDEXq, Xq 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sar INDEXq, 17 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movzx COMPd, BYTE [Uq + INDEXq] 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq xmm0, [TABLEq + 2048 + 8 * COMPq] 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movzx COMPd, BYTE [Vq + INDEXq] 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq xmm1, [TABLEq + 4096 + 8 * COMPq] 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Read first Y pixel. 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lea INDEXq, [Xq + SOURCE_DXq] ; INDEXq nows points to next pixel. 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sar Xq, 16 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movzx COMPd, BYTE [Yq + Xq] 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) paddsw xmm0, xmm1 ; Hide a ADD after memory load. 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq xmm1, [TABLEq + 8 * COMPq] 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Read next Y pixel. 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lea Xq, [INDEXq + SOURCE_DXq] ; Xq now points to next pixel. 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sar INDEXq, 16 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movzx COMPd, BYTE [Yq + INDEXq] 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq xmm2, [TABLEq + 8 * COMPq] 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) paddsw xmm1, xmm0 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) paddsw xmm2, xmm0 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) shufps xmm1, xmm2, 0x44 ; Join two pixels into one XMM register 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) psraw xmm1, 6 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) packuswb xmm1, xmm1 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq QWORD [ARGBq], xmm1 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) add ARGBq, 8 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).scaleend: 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sub WIDTHq, 2 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) jns .scaleloop 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) and WIDTHq, 1 ; odd number of pixels? 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) jz .scaledone 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Read U V components. 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov INDEXq, Xq 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sar INDEXq, 17 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movzx COMPd, BYTE [Uq + INDEXq] 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq xmm0, [TABLEq + 2048 + 8 * COMPq] 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movzx COMPd, BYTE [Vq + INDEXq] 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq xmm1, [TABLEq + 4096 + 8 * COMPq] 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) paddsw xmm0, xmm1 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Read one Y component. 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov INDEXq, Xq 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sar INDEXq, 16 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movzx COMPd, BYTE [Yq + INDEXq] 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq xmm1, [TABLEq + 8 * COMPq] 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) paddsw xmm1, xmm0 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) psraw xmm1, 6 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) packuswb xmm1, xmm1 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movd DWORD [ARGBq], xmm1 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).scaledone: 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) POP r12 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) POP r11 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) POP r10 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) RET 115