15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); Copyright (c) 2011 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)%include "media/base/simd/media_export.asm" 62a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)%include "third_party/x86inc/x86inc.asm" 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); This file uses MMX instructions. 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SECTION_TEXT 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CPU MMX 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles);void LinearScaleYUVToRGB32Row_MMX_X64(const uint8* y_buf, 152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles); const uint8* u_buf, 162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles); const uint8* v_buf, 172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles); uint8* rgb_buf, 182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles); ptrdiff_t width, 192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles); ptrdiff_t source_dx); 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%define SYMBOL LinearScaleYUVToRGB32Row_MMX_X64 2190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) EXPORT SYMBOL 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) align function_align 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)mangle(SYMBOL): 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) %assign stack_offset 0 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) extern mangle(kCoefficientsRgbY) 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); Parameters are in the following order: 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 1. Y plane 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 2. U plane 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 3. V plane 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 4. ARGB frame 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 5. Width 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 6. Source dx 35010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles); 7. Conversion lookup table 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 37010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)PROLOGUE 7, 7, 3, Y, U, V, ARGB, WIDTH, SOURCE_DX, R1 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%define TABLEq r10 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%define Xq r11 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%define INDEXq r12 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%define COMPRd r13d 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%define COMPRq r13 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%define FRACTIONq r14 45010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)%define COMPL R1 46010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)%define COMPLq R1q 47010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)%define COMPLd R1d 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PUSH TABLEq 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PUSH Xq 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PUSH INDEXq 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PUSH COMPRq 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PUSH FRACTIONq 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%macro EPILOGUE 0 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) POP FRACTIONq 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) POP COMPRq 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) POP INDEXq 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) POP Xq 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) POP TABLEq 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%endmacro 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 63010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) mov TABLEq, R1q 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) imul WIDTHq, SOURCE_DXq ; source_width = width * source_dx 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xor Xq, Xq ; x = 0 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmp SOURCE_DXq, 0x20000 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) jl .lscaleend 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov Xq, 0x8000 ; x = 0.5 for 1/2 or less 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) jmp .lscaleend 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).lscaleloop: 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Interpolate U 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov INDEXq, Xq 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sar INDEXq, 0x11 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movzx COMPLd, BYTE [Uq + INDEXq] 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movzx COMPRd, BYTE [Uq + INDEXq + 1] 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov FRACTIONq, Xq 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) and FRACTIONq, 0x1fffe 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) imul COMPRq, FRACTIONq 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xor FRACTIONq, 0x1fffe 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) imul COMPLq, FRACTIONq 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) add COMPLq, COMPRq 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) shr COMPLq, 17 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm0, [TABLEq + 2048 + 8 * COMPLq] 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Interpolate V 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movzx COMPLd, BYTE [Vq + INDEXq] 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movzx COMPRd, BYTE [Vq + INDEXq + 1] 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Trick here to imul COMPL first then COMPR. 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Saves two instruction. :) 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) imul COMPLq, FRACTIONq 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xor FRACTIONq, 0x1fffe 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) imul COMPRq, FRACTIONq 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) add COMPLq, COMPRq 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) shr COMPLq, 17 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) paddsw mm0, [TABLEq + 4096 + 8 * COMPLq] 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Interpolate first Y1. 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lea INDEXq, [Xq + SOURCE_DXq] ; INDEXq now points to next pixel. 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Xq points to current pixel. 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov FRACTIONq, Xq 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sar Xq, 0x10 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movzx COMPLd, BYTE [Yq + Xq] 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movzx COMPRd, BYTE [Yq + Xq + 1] 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) and FRACTIONq, 0xffff 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) imul COMPRq, FRACTIONq 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xor FRACTIONq, 0xffff 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) imul COMPLq, FRACTIONq 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) add COMPLq, COMPRq 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) shr COMPLq, 16 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm1, [TABLEq + 8 * COMPLq] 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; Interpolate Y2 if available. 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmp INDEXq, WIDTHq 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) jge .lscalelastpixel 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lea Xq, [INDEXq + SOURCE_DXq] ; Xq points to next pixel. 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; INDEXq points to current pixel. 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov FRACTIONq, INDEXq 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sar INDEXq, 0x10 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movzx COMPLd, BYTE [Yq + INDEXq] 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movzx COMPRd, BYTE [Yq + INDEXq + 1] 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) and FRACTIONq, 0xffff 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) imul COMPRq, FRACTIONq 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xor FRACTIONq, 0xffff 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) imul COMPLq, FRACTIONq 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) add COMPLq, COMPRq 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) shr COMPLq, 16 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm2, [TABLEq + 8 * COMPLq] 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) paddsw mm1, mm0 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) paddsw mm2, mm0 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) psraw mm1, 0x6 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) psraw mm2, 0x6 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) packuswb mm1, mm2 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movntq [ARGBq], mm1 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) add ARGBq, 0x8 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).lscaleend: 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmp Xq, WIDTHq 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) jl .lscaleloop 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) jmp .epilogue 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).lscalelastpixel: 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) paddsw mm1, mm0 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) psraw mm1, 6 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) packuswb mm1, mm1 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movd [ARGBq], mm1 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).epilogue 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EPILOGUE 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) RET 154