187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; 287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; Copyright (c) 2014 The WebM project authors. All Rights Reserved. 387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; 487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; Use of this source code is governed by a BSD-style license 587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; that can be found in the LICENSE file in the root of the source 687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; tree. An additional intellectual property rights grant can be found 787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; in the file PATENTS. All contributing project authors may 887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; be found in the AUTHORS file in the root of the source tree. 987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; 1087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 1187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 1287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%include "vpx_ports/x86_abi_support.asm" 1387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 1487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;Note: tap3 and tap4 have to be applied and added after other taps to avoid 1587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;overflow. 1687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 1787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%macro HIGH_GET_FILTERS_4 0 1887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rdx, arg(5) ;filter ptr 1987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rcx, 0x00000040 2087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 2187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa xmm7, [rdx] ;load filters 2287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pshuflw xmm0, xmm7, 0b ;k0 2387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pshuflw xmm1, xmm7, 01010101b ;k1 2487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pshuflw xmm2, xmm7, 10101010b ;k2 2587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pshuflw xmm3, xmm7, 11111111b ;k3 2687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org psrldq xmm7, 8 2787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pshuflw xmm4, xmm7, 0b ;k4 2887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pshuflw xmm5, xmm7, 01010101b ;k5 2987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pshuflw xmm6, xmm7, 10101010b ;k6 3087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pshuflw xmm7, xmm7, 11111111b ;k7 3187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 3287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpcklwd xmm0, xmm6 3387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpcklwd xmm2, xmm5 3487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpcklwd xmm3, xmm4 3587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpcklwd xmm1, xmm7 3687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 3787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa k0k6, xmm0 3887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa k2k5, xmm2 3987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa k3k4, xmm3 4087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa k1k7, xmm1 4187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 4287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm6, rcx 4387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pshufd xmm6, xmm6, 0 4487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa krd, xmm6 4587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 4687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ;Compute max and min values of a pixel 4787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rdx, 0x00010001 4887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rcx, DWORD PTR arg(6) ;bps 4987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm0, rdx 5087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm1, rcx 5187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pshufd xmm0, xmm0, 0b 5287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa xmm2, xmm0 5387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org psllw xmm0, xmm1 5487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org psubw xmm0, xmm2 5587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pxor xmm1, xmm1 5687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa max, xmm0 ;max value (for clamping) 5787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa min, xmm1 ;min value (for clamping) 5887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 5987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%endm 6087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 6187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%macro HIGH_APPLY_FILTER_4 1 6287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpcklwd xmm0, xmm6 ;two row in one register 6387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpcklwd xmm1, xmm7 6487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpcklwd xmm2, xmm5 6587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpcklwd xmm3, xmm4 6687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 6787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pmaddwd xmm0, k0k6 ;multiply the filter factors 6887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pmaddwd xmm1, k1k7 6987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pmaddwd xmm2, k2k5 7087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pmaddwd xmm3, k3k4 7187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 7287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org paddd xmm0, xmm1 ;sum 7387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org paddd xmm0, xmm2 7487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org paddd xmm0, xmm3 7587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 7687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org paddd xmm0, krd ;rounding 7787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org psrad xmm0, 7 ;shift 7887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org packssdw xmm0, xmm0 ;pack to word 7987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 8087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ;clamp the values 8187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pminsw xmm0, max 8287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pmaxsw xmm0, min 8387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 8487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%if %1 8587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm1, [rdi] 8687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pavgw xmm0, xmm1 8787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%endif 8887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq [rdi], xmm0 8987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%endm 9087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 9187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%macro HIGH_GET_FILTERS 0 9287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rdx, arg(5) ;filter ptr 9387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rsi, arg(0) ;src_ptr 9487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rdi, arg(2) ;output_ptr 9587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rcx, 0x00000040 9687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 9787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa xmm7, [rdx] ;load filters 9887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pshuflw xmm0, xmm7, 0b ;k0 9987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pshuflw xmm1, xmm7, 01010101b ;k1 10087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pshuflw xmm2, xmm7, 10101010b ;k2 10187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pshuflw xmm3, xmm7, 11111111b ;k3 10287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pshufhw xmm4, xmm7, 0b ;k4 10387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pshufhw xmm5, xmm7, 01010101b ;k5 10487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pshufhw xmm6, xmm7, 10101010b ;k6 10587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pshufhw xmm7, xmm7, 11111111b ;k7 10687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpcklqdq xmm2, xmm2 10787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpcklqdq xmm3, xmm3 10887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpcklwd xmm0, xmm1 10987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpckhwd xmm6, xmm7 11087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpckhwd xmm2, xmm5 11187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpckhwd xmm3, xmm4 11287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 11387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa k0k1, xmm0 ;store filter factors on stack 11487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa k6k7, xmm6 11587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa k2k5, xmm2 11687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa k3k4, xmm3 11787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 11887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm6, rcx 11987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pshufd xmm6, xmm6, 0 12087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa krd, xmm6 ;rounding 12187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 12287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ;Compute max and min values of a pixel 12387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rdx, 0x00010001 12487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rcx, DWORD PTR arg(6) ;bps 12587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm0, rdx 12687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm1, rcx 12787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pshufd xmm0, xmm0, 0b 12887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa xmm2, xmm0 12987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org psllw xmm0, xmm1 13087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org psubw xmm0, xmm2 13187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pxor xmm1, xmm1 13287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa max, xmm0 ;max value (for clamping) 13387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa min, xmm1 ;min value (for clamping) 13487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%endm 13587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 13687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%macro LOAD_VERT_8 1 13787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm0, [rsi + %1] ;0 13887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm1, [rsi + rax + %1] ;1 13987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm6, [rsi + rdx * 2 + %1] ;6 14087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rsi, [rsi + rax] 14187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm7, [rsi + rdx * 2 + %1] ;7 14287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm2, [rsi + rax + %1] ;2 14387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm3, [rsi + rax * 2 + %1] ;3 14487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm4, [rsi + rdx + %1] ;4 14587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm5, [rsi + rax * 4 + %1] ;5 14687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%endm 14787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 14887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%macro HIGH_APPLY_FILTER_8 2 14987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu temp, xmm4 15087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa xmm4, xmm0 15187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpcklwd xmm0, xmm1 15287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpckhwd xmm4, xmm1 15387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa xmm1, xmm6 15487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpcklwd xmm6, xmm7 15587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpckhwd xmm1, xmm7 15687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa xmm7, xmm2 15787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpcklwd xmm2, xmm5 15887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpckhwd xmm7, xmm5 15987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 16087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm5, temp 16187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu temp, xmm4 16287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa xmm4, xmm3 16387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpcklwd xmm3, xmm5 16487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org punpckhwd xmm4, xmm5 16587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm5, temp 16687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 16787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pmaddwd xmm0, k0k1 16887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pmaddwd xmm5, k0k1 16987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pmaddwd xmm6, k6k7 17087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pmaddwd xmm1, k6k7 17187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pmaddwd xmm2, k2k5 17287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pmaddwd xmm7, k2k5 17387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pmaddwd xmm3, k3k4 17487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pmaddwd xmm4, k3k4 17587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 17687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org paddd xmm0, xmm6 17787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org paddd xmm0, xmm2 17887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org paddd xmm0, xmm3 17987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org paddd xmm5, xmm1 18087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org paddd xmm5, xmm7 18187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org paddd xmm5, xmm4 18287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 18387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org paddd xmm0, krd ;rounding 18487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org paddd xmm5, krd 18587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org psrad xmm0, 7 ;shift 18687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org psrad xmm5, 7 18787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org packssdw xmm0, xmm5 ;pack back to word 18887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 18987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ;clamp the values 19087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pminsw xmm0, max 19187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pmaxsw xmm0, min 19287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 19387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%if %1 19487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm1, [rdi + %2] 19587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pavgw xmm0, xmm1 19687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%endif 19787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu [rdi + %2], xmm0 19887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%endm 19987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 20087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;void vp9_filter_block1d4_v8_sse2 20187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;( 20287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned char *src_ptr, 20387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned int src_pitch, 20487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned char *output_ptr, 20587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned int out_pitch, 20687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned int output_height, 20787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; short *filter 20887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;) 20987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d4_v8_sse2) PRIVATE 21087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d4_v8_sse2): 21187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rbp 21287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rbp, rsp 21387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SHADOW_ARGS_TO_STACK 7 21487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SAVE_XMM 7 21587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rsi 21687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rdi 21787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rbx 21887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; end prolog 21987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 22087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ALIGN_STACK 16, rax 22187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org sub rsp, 16 * 7 22287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k0k6 [rsp + 16 * 0] 22387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k2k5 [rsp + 16 * 1] 22487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k3k4 [rsp + 16 * 2] 22587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k1k7 [rsp + 16 * 3] 22687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define krd [rsp + 16 * 4] 22787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define max [rsp + 16 * 5] 22887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define min [rsp + 16 * 6] 22987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 23087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_GET_FILTERS_4 23187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 23287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rsi, arg(0) ;src_ptr 23387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rdi, arg(2) ;output_ptr 23487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 23587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 23687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rbx, DWORD PTR arg(3) ;out_pitch 23787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rax, [rax + rax] ;bytes per line 23887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rbx, [rbx + rbx] 23987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdx, [rax + rax * 2] 24087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 24187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 24287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop: 24387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm0, [rsi] ;load src: row 0 24487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm1, [rsi + rax] ;1 24587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm6, [rsi + rdx * 2] ;6 24687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rsi, [rsi + rax] 24787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm7, [rsi + rdx * 2] ;7 24887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm2, [rsi + rax] ;2 24987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm3, [rsi + rax * 2] ;3 25087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm4, [rsi + rdx] ;4 25187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm5, [rsi + rax * 4] ;5 25287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 25387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_APPLY_FILTER_4 0 25487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 25587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdi, [rdi + rbx] 25687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org dec rcx 25787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org jnz .loop 25887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 25987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org add rsp, 16 * 7 26087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsp 26187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rbx 26287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; begin epilog 26387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rdi 26487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsi 26587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org RESTORE_XMM 26687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org UNSHADOW_ARGS 26787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rbp 26887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ret 26987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 27087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;void vp9_filter_block1d8_v8_sse2 27187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;( 27287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned char *src_ptr, 27387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned int src_pitch, 27487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned char *output_ptr, 27587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned int out_pitch, 27687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned int output_height, 27787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; short *filter 27887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;) 27987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d8_v8_sse2) PRIVATE 28087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d8_v8_sse2): 28187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rbp 28287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rbp, rsp 28387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SHADOW_ARGS_TO_STACK 7 28487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SAVE_XMM 7 28587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rsi 28687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rdi 28787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rbx 28887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; end prolog 28987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 29087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ALIGN_STACK 16, rax 29187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org sub rsp, 16 * 8 29287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k0k1 [rsp + 16 * 0] 29387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k6k7 [rsp + 16 * 1] 29487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k2k5 [rsp + 16 * 2] 29587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k3k4 [rsp + 16 * 3] 29687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define krd [rsp + 16 * 4] 29787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define temp [rsp + 16 * 5] 29887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define max [rsp + 16 * 6] 29987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define min [rsp + 16 * 7] 30087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 30187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_GET_FILTERS 30287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 30387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 30487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rbx, DWORD PTR arg(3) ;out_pitch 30587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rax, [rax + rax] ;bytes per line 30687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rbx, [rbx + rbx] 30787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdx, [rax + rax * 2] 30887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 30987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 31087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop: 31187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org LOAD_VERT_8 0 31287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_APPLY_FILTER_8 0, 0 31387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 31487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdi, [rdi + rbx] 31587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org dec rcx 31687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org jnz .loop 31787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 31887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org add rsp, 16 * 8 31987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsp 32087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rbx 32187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; begin epilog 32287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rdi 32387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsi 32487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org RESTORE_XMM 32587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org UNSHADOW_ARGS 32687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rbp 32787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ret 32887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 32987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;void vp9_filter_block1d16_v8_sse2 33087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;( 33187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned char *src_ptr, 33287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned int src_pitch, 33387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned char *output_ptr, 33487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned int out_pitch, 33587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned int output_height, 33687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; short *filter 33787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;) 33887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d16_v8_sse2) PRIVATE 33987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d16_v8_sse2): 34087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rbp 34187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rbp, rsp 34287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SHADOW_ARGS_TO_STACK 7 34387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SAVE_XMM 7 34487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rsi 34587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rdi 34687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rbx 34787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; end prolog 34887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 34987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ALIGN_STACK 16, rax 35087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org sub rsp, 16 * 8 35187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k0k1 [rsp + 16 * 0] 35287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k6k7 [rsp + 16 * 1] 35387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k2k5 [rsp + 16 * 2] 35487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k3k4 [rsp + 16 * 3] 35587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define krd [rsp + 16 * 4] 35687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define temp [rsp + 16 * 5] 35787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define max [rsp + 16 * 6] 35887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define min [rsp + 16 * 7] 35987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 36087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_GET_FILTERS 36187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 36287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 36387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rbx, DWORD PTR arg(3) ;out_pitch 36487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rax, [rax + rax] ;bytes per line 36587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rbx, [rbx + rbx] 36687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdx, [rax + rax * 2] 36787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 36887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 36987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop: 37087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org LOAD_VERT_8 0 37187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_APPLY_FILTER_8 0, 0 37287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org sub rsi, rax 37387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 37487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org LOAD_VERT_8 16 37587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_APPLY_FILTER_8 0, 16 37687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org add rdi, rbx 37787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 37887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org dec rcx 37987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org jnz .loop 38087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 38187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org add rsp, 16 * 8 38287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsp 38387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rbx 38487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; begin epilog 38587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rdi 38687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsi 38787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org RESTORE_XMM 38887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org UNSHADOW_ARGS 38987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rbp 39087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ret 39187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 39287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d4_v8_avg_sse2) PRIVATE 39387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d4_v8_avg_sse2): 39487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rbp 39587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rbp, rsp 39687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SHADOW_ARGS_TO_STACK 7 39787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SAVE_XMM 7 39887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rsi 39987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rdi 40087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rbx 40187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; end prolog 40287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 40387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ALIGN_STACK 16, rax 40487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org sub rsp, 16 * 7 40587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k0k6 [rsp + 16 * 0] 40687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k2k5 [rsp + 16 * 1] 40787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k3k4 [rsp + 16 * 2] 40887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k1k7 [rsp + 16 * 3] 40987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define krd [rsp + 16 * 4] 41087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define max [rsp + 16 * 5] 41187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define min [rsp + 16 * 6] 41287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 41387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_GET_FILTERS_4 41487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 41587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rsi, arg(0) ;src_ptr 41687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rdi, arg(2) ;output_ptr 41787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 41887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 41987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rbx, DWORD PTR arg(3) ;out_pitch 42087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rax, [rax + rax] ;bytes per line 42187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rbx, [rbx + rbx] 42287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdx, [rax + rax * 2] 42387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 42487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 42587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop: 42687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm0, [rsi] ;load src: row 0 42787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm1, [rsi + rax] ;1 42887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm6, [rsi + rdx * 2] ;6 42987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rsi, [rsi + rax] 43087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm7, [rsi + rdx * 2] ;7 43187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm2, [rsi + rax] ;2 43287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm3, [rsi + rax * 2] ;3 43387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm4, [rsi + rdx] ;4 43487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movq xmm5, [rsi + rax * 4] ;5 43587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 43687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_APPLY_FILTER_4 1 43787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 43887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdi, [rdi + rbx] 43987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org dec rcx 44087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org jnz .loop 44187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 44287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org add rsp, 16 * 7 44387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsp 44487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rbx 44587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; begin epilog 44687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rdi 44787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsi 44887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org RESTORE_XMM 44987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org UNSHADOW_ARGS 45087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rbp 45187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ret 45287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 45387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d8_v8_avg_sse2) PRIVATE 45487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d8_v8_avg_sse2): 45587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rbp 45687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rbp, rsp 45787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SHADOW_ARGS_TO_STACK 7 45887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SAVE_XMM 7 45987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rsi 46087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rdi 46187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rbx 46287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; end prolog 46387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 46487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ALIGN_STACK 16, rax 46587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org sub rsp, 16 * 8 46687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k0k1 [rsp + 16 * 0] 46787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k6k7 [rsp + 16 * 1] 46887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k2k5 [rsp + 16 * 2] 46987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k3k4 [rsp + 16 * 3] 47087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define krd [rsp + 16 * 4] 47187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define temp [rsp + 16 * 5] 47287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define max [rsp + 16 * 6] 47387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define min [rsp + 16 * 7] 47487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 47587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_GET_FILTERS 47687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 47787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 47887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rbx, DWORD PTR arg(3) ;out_pitch 47987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rax, [rax + rax] ;bytes per line 48087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rbx, [rbx + rbx] 48187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdx, [rax + rax * 2] 48287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 48387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop: 48487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org LOAD_VERT_8 0 48587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_APPLY_FILTER_8 1, 0 48687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 48787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdi, [rdi + rbx] 48887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org dec rcx 48987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org jnz .loop 49087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 49187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org add rsp, 16 * 8 49287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsp 49387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rbx 49487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; begin epilog 49587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rdi 49687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsi 49787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org RESTORE_XMM 49887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org UNSHADOW_ARGS 49987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rbp 50087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ret 50187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 50287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d16_v8_avg_sse2) PRIVATE 50387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d16_v8_avg_sse2): 50487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rbp 50587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rbp, rsp 50687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SHADOW_ARGS_TO_STACK 7 50787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SAVE_XMM 7 50887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rsi 50987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rdi 51087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rbx 51187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; end prolog 51287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 51387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ALIGN_STACK 16, rax 51487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org sub rsp, 16 * 8 51587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k0k1 [rsp + 16 * 0] 51687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k6k7 [rsp + 16 * 1] 51787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k2k5 [rsp + 16 * 2] 51887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k3k4 [rsp + 16 * 3] 51987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define krd [rsp + 16 * 4] 52087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define temp [rsp + 16 * 5] 52187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define max [rsp + 16 * 6] 52287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define min [rsp + 16 * 7] 52387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 52487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_GET_FILTERS 52587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 52687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 52787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rbx, DWORD PTR arg(3) ;out_pitch 52887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rax, [rax + rax] ;bytes per line 52987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rbx, [rbx + rbx] 53087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdx, [rax + rax * 2] 53187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 53287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop: 53387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org LOAD_VERT_8 0 53487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_APPLY_FILTER_8 1, 0 53587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org sub rsi, rax 53687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 53787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org LOAD_VERT_8 16 53887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_APPLY_FILTER_8 1, 16 53987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org add rdi, rbx 54087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 54187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org dec rcx 54287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org jnz .loop 54387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 54487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org add rsp, 16 * 8 54587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsp 54687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rbx 54787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; begin epilog 54887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rdi 54987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsi 55087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org RESTORE_XMM 55187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org UNSHADOW_ARGS 55287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rbp 55387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ret 55487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 55587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;void vp9_filter_block1d4_h8_sse2 55687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;( 55787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned char *src_ptr, 55887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned int src_pixels_per_line, 55987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned char *output_ptr, 56087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned int output_pitch, 56187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned int output_height, 56287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; short *filter 56387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;) 56487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d4_h8_sse2) PRIVATE 56587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d4_h8_sse2): 56687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rbp 56787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rbp, rsp 56887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SHADOW_ARGS_TO_STACK 7 56987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SAVE_XMM 7 57087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rsi 57187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rdi 57287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; end prolog 57387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 57487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ALIGN_STACK 16, rax 57587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org sub rsp, 16 * 7 57687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k0k6 [rsp + 16 * 0] 57787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k2k5 [rsp + 16 * 1] 57887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k3k4 [rsp + 16 * 2] 57987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k1k7 [rsp + 16 * 3] 58087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define krd [rsp + 16 * 4] 58187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define max [rsp + 16 * 5] 58287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define min [rsp + 16 * 6] 58387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 58487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_GET_FILTERS_4 58587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 58687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rsi, arg(0) ;src_ptr 58787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rdi, arg(2) ;output_ptr 58887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 58987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 59087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rdx, DWORD PTR arg(3) ;out_pitch 59187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rax, [rax + rax] ;bytes per line 59287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdx, [rdx + rdx] 59387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 59487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 59587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop: 59687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm0, [rsi - 6] ;load src 59787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm4, [rsi + 2] 59887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa xmm1, xmm0 59987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa xmm6, xmm4 60087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa xmm7, xmm4 60187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa xmm2, xmm0 60287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa xmm3, xmm0 60387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa xmm5, xmm4 60487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 60587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org psrldq xmm1, 2 60687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org psrldq xmm6, 4 60787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org psrldq xmm7, 6 60887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org psrldq xmm2, 4 60987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org psrldq xmm3, 6 61087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org psrldq xmm5, 2 61187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 61287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_APPLY_FILTER_4 0 61387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 61487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rsi, [rsi + rax] 61587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdi, [rdi + rdx] 61687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org dec rcx 61787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org jnz .loop 61887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 61987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org add rsp, 16 * 7 62087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsp 62187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 62287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; begin epilog 62387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rdi 62487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsi 62587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org RESTORE_XMM 62687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org UNSHADOW_ARGS 62787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rbp 62887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ret 62987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 63087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;void vp9_filter_block1d8_h8_sse2 63187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;( 63287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned char *src_ptr, 63387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned int src_pixels_per_line, 63487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned char *output_ptr, 63587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned int output_pitch, 63687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned int output_height, 63787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; short *filter 63887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;) 63987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d8_h8_sse2) PRIVATE 64087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d8_h8_sse2): 64187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rbp 64287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rbp, rsp 64387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SHADOW_ARGS_TO_STACK 7 64487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SAVE_XMM 7 64587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rsi 64687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rdi 64787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; end prolog 64887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 64987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ALIGN_STACK 16, rax 65087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org sub rsp, 16 * 8 65187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k0k1 [rsp + 16 * 0] 65287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k6k7 [rsp + 16 * 1] 65387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k2k5 [rsp + 16 * 2] 65487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k3k4 [rsp + 16 * 3] 65587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define krd [rsp + 16 * 4] 65687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define temp [rsp + 16 * 5] 65787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define max [rsp + 16 * 6] 65887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define min [rsp + 16 * 7] 65987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 66087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_GET_FILTERS 66187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 66287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 66387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rdx, DWORD PTR arg(3) ;out_pitch 66487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rax, [rax + rax] ;bytes per line 66587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdx, [rdx + rdx] 66687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 66787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 66887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop: 66987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm0, [rsi - 6] ;load src 67087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm1, [rsi - 4] 67187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm2, [rsi - 2] 67287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm3, [rsi] 67387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm4, [rsi + 2] 67487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm5, [rsi + 4] 67587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm6, [rsi + 6] 67687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm7, [rsi + 8] 67787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 67887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_APPLY_FILTER_8 0, 0 67987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 68087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rsi, [rsi + rax] 68187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdi, [rdi + rdx] 68287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org dec rcx 68387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org jnz .loop 68487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 68587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org add rsp, 16 * 8 68687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsp 68787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 68887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; begin epilog 68987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rdi 69087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsi 69187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org RESTORE_XMM 69287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org UNSHADOW_ARGS 69387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rbp 69487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ret 69587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 69687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;void vp9_filter_block1d16_h8_sse2 69787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;( 69887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned char *src_ptr, 69987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned int src_pixels_per_line, 70087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned char *output_ptr, 70187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned int output_pitch, 70287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; unsigned int output_height, 70387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org; short *filter 70487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;) 70587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d16_h8_sse2) PRIVATE 70687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d16_h8_sse2): 70787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rbp 70887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rbp, rsp 70987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SHADOW_ARGS_TO_STACK 7 71087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SAVE_XMM 7 71187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rsi 71287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rdi 71387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; end prolog 71487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 71587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ALIGN_STACK 16, rax 71687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org sub rsp, 16 * 8 71787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k0k1 [rsp + 16 * 0] 71887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k6k7 [rsp + 16 * 1] 71987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k2k5 [rsp + 16 * 2] 72087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k3k4 [rsp + 16 * 3] 72187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define krd [rsp + 16 * 4] 72287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define temp [rsp + 16 * 5] 72387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define max [rsp + 16 * 6] 72487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define min [rsp + 16 * 7] 72587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 72687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_GET_FILTERS 72787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 72887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 72987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rdx, DWORD PTR arg(3) ;out_pitch 73087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rax, [rax + rax] ;bytes per line 73187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdx, [rdx + rdx] 73287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 73387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 73487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop: 73587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm0, [rsi - 6] ;load src 73687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm1, [rsi - 4] 73787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm2, [rsi - 2] 73887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm3, [rsi] 73987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm4, [rsi + 2] 74087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm5, [rsi + 4] 74187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm6, [rsi + 6] 74287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm7, [rsi + 8] 74387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 74487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_APPLY_FILTER_8 0, 0 74587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 74687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm0, [rsi + 10] ;load src 74787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm1, [rsi + 12] 74887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm2, [rsi + 14] 74987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm3, [rsi + 16] 75087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm4, [rsi + 18] 75187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm5, [rsi + 20] 75287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm6, [rsi + 22] 75387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm7, [rsi + 24] 75487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 75587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_APPLY_FILTER_8 0, 16 75687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 75787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rsi, [rsi + rax] 75887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdi, [rdi + rdx] 75987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org dec rcx 76087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org jnz .loop 76187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 76287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org add rsp, 16 * 8 76387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsp 76487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 76587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; begin epilog 76687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rdi 76787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsi 76887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org RESTORE_XMM 76987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org UNSHADOW_ARGS 77087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rbp 77187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ret 77287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 77387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d4_h8_avg_sse2) PRIVATE 77487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d4_h8_avg_sse2): 77587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rbp 77687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rbp, rsp 77787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SHADOW_ARGS_TO_STACK 7 77887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SAVE_XMM 7 77987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rsi 78087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rdi 78187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; end prolog 78287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 78387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ALIGN_STACK 16, rax 78487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org sub rsp, 16 * 7 78587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k0k6 [rsp + 16 * 0] 78687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k2k5 [rsp + 16 * 1] 78787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k3k4 [rsp + 16 * 2] 78887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k1k7 [rsp + 16 * 3] 78987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define krd [rsp + 16 * 4] 79087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define max [rsp + 16 * 5] 79187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define min [rsp + 16 * 6] 79287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 79387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_GET_FILTERS_4 79487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 79587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rsi, arg(0) ;src_ptr 79687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rdi, arg(2) ;output_ptr 79787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 79887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 79987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rdx, DWORD PTR arg(3) ;out_pitch 80087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rax, [rax + rax] ;bytes per line 80187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdx, [rdx + rdx] 80287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 80387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 80487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop: 80587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm0, [rsi - 6] ;load src 80687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm4, [rsi + 2] 80787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa xmm1, xmm0 80887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa xmm6, xmm4 80987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa xmm7, xmm4 81087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa xmm2, xmm0 81187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa xmm3, xmm0 81287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqa xmm5, xmm4 81387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 81487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org psrldq xmm1, 2 81587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org psrldq xmm6, 4 81687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org psrldq xmm7, 6 81787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org psrldq xmm2, 4 81887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org psrldq xmm3, 6 81987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org psrldq xmm5, 2 82087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 82187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_APPLY_FILTER_4 1 82287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 82387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rsi, [rsi + rax] 82487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdi, [rdi + rdx] 82587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org dec rcx 82687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org jnz .loop 82787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 82887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org add rsp, 16 * 7 82987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsp 83087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 83187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; begin epilog 83287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rdi 83387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsi 83487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org RESTORE_XMM 83587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org UNSHADOW_ARGS 83687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rbp 83787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ret 83887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 83987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d8_h8_avg_sse2) PRIVATE 84087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d8_h8_avg_sse2): 84187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rbp 84287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rbp, rsp 84387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SHADOW_ARGS_TO_STACK 7 84487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SAVE_XMM 7 84587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rsi 84687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rdi 84787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; end prolog 84887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 84987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ALIGN_STACK 16, rax 85087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org sub rsp, 16 * 8 85187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k0k1 [rsp + 16 * 0] 85287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k6k7 [rsp + 16 * 1] 85387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k2k5 [rsp + 16 * 2] 85487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k3k4 [rsp + 16 * 3] 85587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define krd [rsp + 16 * 4] 85687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define temp [rsp + 16 * 5] 85787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define max [rsp + 16 * 6] 85887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define min [rsp + 16 * 7] 85987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 86087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_GET_FILTERS 86187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 86287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 86387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rdx, DWORD PTR arg(3) ;out_pitch 86487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rax, [rax + rax] ;bytes per line 86587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdx, [rdx + rdx] 86687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 86787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 86887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop: 86987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm0, [rsi - 6] ;load src 87087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm1, [rsi - 4] 87187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm2, [rsi - 2] 87287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm3, [rsi] 87387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm4, [rsi + 2] 87487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm5, [rsi + 4] 87587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm6, [rsi + 6] 87687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm7, [rsi + 8] 87787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 87887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_APPLY_FILTER_8 1, 0 87987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 88087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rsi, [rsi + rax] 88187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdi, [rdi + rdx] 88287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org dec rcx 88387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org jnz .loop 88487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 88587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org add rsp, 16 * 8 88687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsp 88787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 88887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; begin epilog 88987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rdi 89087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsi 89187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org RESTORE_XMM 89287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org UNSHADOW_ARGS 89387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rbp 89487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ret 89587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 89687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d16_h8_avg_sse2) PRIVATE 89787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d16_h8_avg_sse2): 89887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rbp 89987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org mov rbp, rsp 90087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SHADOW_ARGS_TO_STACK 7 90187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org SAVE_XMM 7 90287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rsi 90387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org push rdi 90487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; end prolog 90587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 90687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ALIGN_STACK 16, rax 90787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org sub rsp, 16 * 8 90887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k0k1 [rsp + 16 * 0] 90987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k6k7 [rsp + 16 * 1] 91087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k2k5 [rsp + 16 * 2] 91187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define k3k4 [rsp + 16 * 3] 91287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define krd [rsp + 16 * 4] 91387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define temp [rsp + 16 * 5] 91487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define max [rsp + 16 * 6] 91587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org %define min [rsp + 16 * 7] 91687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 91787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_GET_FILTERS 91887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 91987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 92087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rdx, DWORD PTR arg(3) ;out_pitch 92187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rax, [rax + rax] ;bytes per line 92287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdx, [rdx + rdx] 92387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 92487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 92587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop: 92687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm0, [rsi - 6] ;load src 92787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm1, [rsi - 4] 92887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm2, [rsi - 2] 92987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm3, [rsi] 93087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm4, [rsi + 2] 93187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm5, [rsi + 4] 93287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm6, [rsi + 6] 93387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm7, [rsi + 8] 93487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 93587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_APPLY_FILTER_8 1, 0 93687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 93787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm0, [rsi + 10] ;load src 93887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm1, [rsi + 12] 93987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm2, [rsi + 14] 94087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm3, [rsi + 16] 94187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm4, [rsi + 18] 94287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm5, [rsi + 20] 94387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm6, [rsi + 22] 94487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org movdqu xmm7, [rsi + 24] 94587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 94687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org HIGH_APPLY_FILTER_8 1, 16 94787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 94887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rsi, [rsi + rax] 94987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org lea rdi, [rdi + rdx] 95087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org dec rcx 95187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org jnz .loop 95287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 95387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org add rsp, 16 * 8 95487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsp 95587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 95687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ; begin epilog 95787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rdi 95887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rsi 95987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org RESTORE_XMM 96087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org UNSHADOW_ARGS 96187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org pop rbp 96287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org ret 963