1; Copyright (c) 2011 The Chromium Authors. All rights reserved. 2; Use of this source code is governed by a BSD-style license that can be 3; found in the LICENSE file. 4 5%include "media/base/simd/media_export.asm" 6%include "third_party/x86inc/x86inc.asm" 7 8; 9; This file uses MMX, SSE2 and instructions. 10; 11 SECTION_TEXT 12 CPU SSE2 13 14; void ScaleYUVToRGB32Row_SSE2_X64(const uint8* y_buf, 15; const uint8* u_buf, 16; const uint8* v_buf, 17; uint8* rgb_buf, 18; ptrdiff_t width, 19; ptrdiff_t source_dx); 20%define SYMBOL ScaleYUVToRGB32Row_SSE2_X64 21 EXPORT SYMBOL 22 align function_align 23 24mangle(SYMBOL): 25 %assign stack_offset 0 26 extern mangle(kCoefficientsRgbY) 27 28; Parameters are in the following order: 29; 1. Y plane 30; 2. U plane 31; 3. V plane 32; 4. ARGB frame 33; 5. Width 34; 6. Source dx 35; 7. Convert table 36 37PROLOGUE 7, 7, 3, Y, U, V, ARGB, WIDTH, SOURCE_DX, R1 38 39%define TABLEq r10 40%define Xq r11 41%define INDEXq r12 42%define COMPq R1q 43%define COMPd R1d 44 45 PUSH r10 46 PUSH r11 47 PUSH r12 48 49 mov TABLEq, R1q 50 51 ; Set Xq index to 0. 52 xor Xq, Xq 53 jmp .scaleend 54 55.scaleloop: 56 ; Read UV pixels. 57 mov INDEXq, Xq 58 sar INDEXq, 17 59 movzx COMPd, BYTE [Uq + INDEXq] 60 movq xmm0, [TABLEq + 2048 + 8 * COMPq] 61 movzx COMPd, BYTE [Vq + INDEXq] 62 movq xmm1, [TABLEq + 4096 + 8 * COMPq] 63 64 ; Read first Y pixel. 65 lea INDEXq, [Xq + SOURCE_DXq] ; INDEXq nows points to next pixel. 66 sar Xq, 16 67 movzx COMPd, BYTE [Yq + Xq] 68 paddsw xmm0, xmm1 ; Hide a ADD after memory load. 69 movq xmm1, [TABLEq + 8 * COMPq] 70 71 ; Read next Y pixel. 72 lea Xq, [INDEXq + SOURCE_DXq] ; Xq now points to next pixel. 73 sar INDEXq, 16 74 movzx COMPd, BYTE [Yq + INDEXq] 75 movq xmm2, [TABLEq + 8 * COMPq] 76 paddsw xmm1, xmm0 77 paddsw xmm2, xmm0 78 shufps xmm1, xmm2, 0x44 ; Join two pixels into one XMM register 79 psraw xmm1, 6 80 packuswb xmm1, xmm1 81 movq QWORD [ARGBq], xmm1 82 add ARGBq, 8 83 84.scaleend: 85 sub WIDTHq, 2 86 jns .scaleloop 87 88 and WIDTHq, 1 ; odd number of pixels? 89 jz .scaledone 90 91 ; Read U V components. 92 mov INDEXq, Xq 93 sar INDEXq, 17 94 movzx COMPd, BYTE [Uq + INDEXq] 95 movq xmm0, [TABLEq + 2048 + 8 * COMPq] 96 movzx COMPd, BYTE [Vq + INDEXq] 97 movq xmm1, [TABLEq + 4096 + 8 * COMPq] 98 paddsw xmm0, xmm1 99 100 ; Read one Y component. 101 mov INDEXq, Xq 102 sar INDEXq, 16 103 movzx COMPd, BYTE [Yq + INDEXq] 104 movq xmm1, [TABLEq + 8 * COMPq] 105 paddsw xmm1, xmm0 106 psraw xmm1, 6 107 packuswb xmm1, xmm1 108 movd DWORD [ARGBq], xmm1 109 110.scaledone: 111 POP r12 112 POP r11 113 POP r10 114 RET 115