1c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles); Copyright (c) 2011 The Chromium Authors. All rights reserved. 2c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles); Use of this source code is governed by a BSD-style license that can be 3c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles); found in the LICENSE file. 4c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)%include "media/base/simd/media_export.asm" 690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) EXPORT SYMBOL 8c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) align function_align 9c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 10c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)mangle(SYMBOL): 11c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) %assign stack_offset 0 120de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) PROLOGUE 7, 7, 3, Y, U, V, A, ARGB, WIDTH, TABLE 13c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) PUSH WIDTHq 14c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DEFINE_ARGS Y, U, V, A, ARGB, TABLE, TEMP 150de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) mov TABLEq, TEMPq 16c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) jmp .convertend 17c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 18c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles).convertloop: 19c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) movzx TEMPd, BYTE [Uq] 20c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) movq mm0, [TABLEq + 2048 + 8 * TEMPq] 21c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) add Uq, 1 22c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 23c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) movzx TEMPd, BYTE [Vq] 24c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) paddsw mm0, [TABLEq + 4096 + 8 * TEMPq] 25c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) add Vq, 1 26c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 27c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) movzx TEMPd, BYTE [Yq] 28c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) movq mm1, [TABLEq + 8 * TEMPq] 29c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 30c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) movzx TEMPd, BYTE [Yq + 1] 31c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) movq mm2, [TABLEq + 8 * TEMPq] 32c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) add Yq, 2 33c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 34c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ; Add UV components to Y component. 35c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) paddsw mm1, mm0 36c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) paddsw mm2, mm0 37c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 38c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ; Down shift and then pack. 39c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) psraw mm1, 6 40c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) psraw mm2, 6 41c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) packuswb mm1, mm2 42c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 430de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) ; Unpack 44c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) movq mm0, mm1 45c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) pxor mm2, mm2 46c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) punpcklbw mm0, mm2 47c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) punpckhbw mm1, mm2 480de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) 490de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) ; Add one to our alpha values, this is a somewhat unfortunate hack; while 500de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) ; the pack/unpack above handle saturating any negative numbers to 0, they also 510de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) ; truncate the alpha value to 255. The math ahead wants to produce the same 520de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) ; ARGB alpha value as the source pixel in YUVA, but this depends on the alpha 530de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) ; value in |mm0| and |mm1| being 256, (let A be the source image alpha, 540de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) ; 256 * A >> 8 == A, whereas 255 * A >> 8 is off by one except at 0). 550de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) mov TEMPq, 0x00010000 560de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) movd mm2, TEMPd 570de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) psllq mm2, 32 580de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) paddsw mm0, mm2 590de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) paddsw mm1, mm2 600de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) 610de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) ; Multiply by alpha value, then repack high bytes of words. 62c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) movzx TEMPd, BYTE [Aq] 63c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) movq mm2, [TABLEq + 6144 + 8 * TEMPq] 64c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) pmullw mm0, mm2 65c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) psrlw mm0, 8 66c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) movzx TEMPd, BYTE [Aq + 1] 67c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) movq mm2, [TABLEq + 6144 + 8 * TEMPq] 68c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) add Aq, 2 69c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) pmullw mm1, mm2 70c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) psrlw mm1, 8 71c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) packuswb mm0, mm1 72c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 73c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) MOVQ [ARGBq], mm0 74c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) add ARGBq, 8 75c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 76c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles).convertend: 77c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) sub dword [rsp], 2 78c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) jns .convertloop 79c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 80c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ; If number of pixels is odd then compute it. 81c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) and dword [rsp], 1 82c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) jz .convertdone 83c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 84c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) movzx TEMPd, BYTE [Uq] 85c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) movq mm0, [TABLEq + 2048 + 8 * TEMPq] 86c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) movzx TEMPd, BYTE [Vq] 87c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) paddsw mm0, [TABLEq + 4096 + 8 * TEMPq] 88c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) movzx TEMPd, BYTE [Yq] 89c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) movq mm1, [TABLEq + 8 * TEMPq] 90c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) paddsw mm1, mm0 91c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) psraw mm1, 6 92c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) packuswb mm1, mm1 93c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 94c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ; Multiply ARGB by alpha value. 95c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) pxor mm0, mm0 96c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) punpcklbw mm1, mm0 970de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) 980de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) ; See above note about this hack. 990de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) mov TEMPq, 0x00010000 1000de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) movd mm0, TEMPd 1010de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) psllq mm0, 32 1020de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) paddsw mm1, mm0 1030de6073388f4e2780db8536178b129cd8f6ab386Torne (Richard Coles) 104c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) movzx TEMPd, BYTE [Aq] 105c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) movq mm0, [TABLEq + 6144 + 8 * TEMPq] 106c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) pmullw mm1, mm0 107c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) psrlw mm1, 8 108c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) packuswb mm1, mm1 109c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 110c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) movd [ARGBq], mm1 111c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 112c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles).convertdone: 113c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) POP TABLEq 114c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) RET 115