1ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; 2ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; 4ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; Use of this source code is governed by a BSD-style license 5ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; that can be found in the LICENSE file in the root of the source 6ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; tree. An additional intellectual property rights grant can be found 7ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; in the file PATENTS. All contributing project authors may 8ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; be found in the AUTHORS file in the root of the source tree. 9ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; 10ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 11ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 12ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%include "vpx_ports/x86_abi_support.asm" 13ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 14ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;Note: tap3 and tap4 have to be applied and added after other taps to avoid 15ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;overflow. 16ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 17ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%macro GET_FILTERS_4 0 18ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rdx, arg(5) ;filter ptr 19ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rcx, 0x0400040 20ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 21ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm7, [rdx] ;load filters 22ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pshuflw xmm0, xmm7, 0b ;k0 23ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pshuflw xmm1, xmm7, 01010101b ;k1 24ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pshuflw xmm2, xmm7, 10101010b ;k2 25ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pshuflw xmm3, xmm7, 11111111b ;k3 26ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm7, 8 27ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pshuflw xmm4, xmm7, 0b ;k4 28ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pshuflw xmm5, xmm7, 01010101b ;k5 29ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pshuflw xmm6, xmm7, 10101010b ;k6 30ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pshuflw xmm7, xmm7, 11111111b ;k7 31ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 32ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpcklqdq xmm0, xmm1 33ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpcklqdq xmm2, xmm3 34ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpcklqdq xmm5, xmm4 35ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpcklqdq xmm6, xmm7 36ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 37ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa k0k1, xmm0 38ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa k2k3, xmm2 39ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa k5k4, xmm5 40ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa k6k7, xmm6 41ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 42ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movq xmm6, rcx 43ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pshufd xmm6, xmm6, 0 44ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa krd, xmm6 45ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 46ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pxor xmm7, xmm7 47ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa zero, xmm7 48ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%endm 49ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 50ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%macro APPLY_FILTER_4 1 51ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpckldq xmm0, xmm1 ;two row in one register 52ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpckldq xmm6, xmm7 53ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpckldq xmm2, xmm3 54ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpckldq xmm5, xmm4 55ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 56ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpcklbw xmm0, zero ;unpack to word 57ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpcklbw xmm6, zero 58ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpcklbw xmm2, zero 59ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpcklbw xmm5, zero 60ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 61ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pmullw xmm0, k0k1 ;multiply the filter factors 62ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pmullw xmm6, k6k7 63ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pmullw xmm2, k2k3 64ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pmullw xmm5, k5k4 65ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 66ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org paddsw xmm0, xmm6 ;sum 67ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm1, xmm0 68ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm1, 8 69ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org paddsw xmm0, xmm1 70ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org paddsw xmm0, xmm2 71ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm2, 8 72ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org paddsw xmm0, xmm5 73ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm5, 8 74ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org paddsw xmm0, xmm2 75ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org paddsw xmm0, xmm5 76ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 77ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org paddsw xmm0, krd ;rounding 78ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psraw xmm0, 7 ;shift 79ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org packuswb xmm0, xmm0 ;pack to byte 80ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 81ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%if %1 82ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movd xmm1, [rdi] 83ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pavgb xmm0, xmm1 84ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%endif 85ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movd [rdi], xmm0 86ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%endm 87ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 88ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%macro GET_FILTERS 0 89ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rdx, arg(5) ;filter ptr 90ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rsi, arg(0) ;src_ptr 91ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rdi, arg(2) ;output_ptr 92ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rcx, 0x0400040 93ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 94ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm7, [rdx] ;load filters 95ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pshuflw xmm0, xmm7, 0b ;k0 96ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pshuflw xmm1, xmm7, 01010101b ;k1 97ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pshuflw xmm2, xmm7, 10101010b ;k2 98ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pshuflw xmm3, xmm7, 11111111b ;k3 99ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pshufhw xmm4, xmm7, 0b ;k4 100ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pshufhw xmm5, xmm7, 01010101b ;k5 101ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pshufhw xmm6, xmm7, 10101010b ;k6 102ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pshufhw xmm7, xmm7, 11111111b ;k7 103ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 104ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpcklwd xmm0, xmm0 105ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpcklwd xmm1, xmm1 106ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpcklwd xmm2, xmm2 107ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpcklwd xmm3, xmm3 108ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpckhwd xmm4, xmm4 109ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpckhwd xmm5, xmm5 110ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpckhwd xmm6, xmm6 111ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpckhwd xmm7, xmm7 112ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 113ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa k0, xmm0 ;store filter factors on stack 114ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa k1, xmm1 115ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa k2, xmm2 116ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa k3, xmm3 117ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa k4, xmm4 118ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa k5, xmm5 119ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa k6, xmm6 120ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa k7, xmm7 121ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 122ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movq xmm6, rcx 123ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pshufd xmm6, xmm6, 0 124ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa krd, xmm6 ;rounding 125ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 126ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pxor xmm7, xmm7 127ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa zero, xmm7 128ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%endm 129ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 130ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%macro LOAD_VERT_8 1 131ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movq xmm0, [rsi + %1] ;0 132ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movq xmm1, [rsi + rax + %1] ;1 133ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movq xmm6, [rsi + rdx * 2 + %1] ;6 134ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rsi, [rsi + rax] 135ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movq xmm7, [rsi + rdx * 2 + %1] ;7 136ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movq xmm2, [rsi + rax + %1] ;2 137ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movq xmm3, [rsi + rax * 2 + %1] ;3 138ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movq xmm4, [rsi + rdx + %1] ;4 139ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movq xmm5, [rsi + rax * 4 + %1] ;5 140ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%endm 141ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 142ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%macro APPLY_FILTER_8 2 143ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpcklbw xmm0, zero 144ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpcklbw xmm1, zero 145ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpcklbw xmm6, zero 146ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpcklbw xmm7, zero 147ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpcklbw xmm2, zero 148ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpcklbw xmm5, zero 149ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpcklbw xmm3, zero 150ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org punpcklbw xmm4, zero 151ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 152ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pmullw xmm0, k0 153ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pmullw xmm1, k1 154ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pmullw xmm6, k6 155ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pmullw xmm7, k7 156ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pmullw xmm2, k2 157ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pmullw xmm5, k5 158ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pmullw xmm3, k3 159ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pmullw xmm4, k4 160ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 161ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org paddsw xmm0, xmm1 162ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org paddsw xmm0, xmm6 163ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org paddsw xmm0, xmm7 164ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org paddsw xmm0, xmm2 165ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org paddsw xmm0, xmm5 166ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org paddsw xmm0, xmm3 167ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org paddsw xmm0, xmm4 168ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 169ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org paddsw xmm0, krd ;rounding 170ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psraw xmm0, 7 ;shift 171ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org packuswb xmm0, xmm0 ;pack back to byte 172ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%if %1 173ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movq xmm1, [rdi + %2] 174ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pavgb xmm0, xmm1 175ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%endif 176ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movq [rdi + %2], xmm0 177ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%endm 178ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 179ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;void vp9_filter_block1d4_v8_sse2 180ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;( 181ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned char *src_ptr, 182ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned int src_pitch, 183ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned char *output_ptr, 184ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned int out_pitch, 185ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned int output_height, 186ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; short *filter 187ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;) 188ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d4_v8_sse2) PRIVATE 189ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d4_v8_sse2): 190ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rbp 191ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rbp, rsp 192ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SHADOW_ARGS_TO_STACK 6 193ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SAVE_XMM 7 194ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rsi 195ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rdi 196ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rbx 197ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; end prolog 198ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 199ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ALIGN_STACK 16, rax 200ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org sub rsp, 16 * 6 201ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k0k1 [rsp + 16 * 0] 202ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k2k3 [rsp + 16 * 1] 203ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k5k4 [rsp + 16 * 2] 204ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k6k7 [rsp + 16 * 3] 205ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define krd [rsp + 16 * 4] 206ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define zero [rsp + 16 * 5] 207ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 208ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org GET_FILTERS_4 209ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 210ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rsi, arg(0) ;src_ptr 211ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rdi, arg(2) ;output_ptr 212ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 213ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 214ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rbx, DWORD PTR arg(3) ;out_pitch 215ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rdx, [rax + rax * 2] 216ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 217ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 218ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop: 219ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movd xmm0, [rsi] ;load src: row 0 220ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movd xmm1, [rsi + rax] ;1 221ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movd xmm6, [rsi + rdx * 2] ;6 222ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rsi, [rsi + rax] 223ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movd xmm7, [rsi + rdx * 2] ;7 224ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movd xmm2, [rsi + rax] ;2 225ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movd xmm3, [rsi + rax * 2] ;3 226ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movd xmm4, [rsi + rdx] ;4 227ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movd xmm5, [rsi + rax * 4] ;5 228ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 229ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org APPLY_FILTER_4 0 230ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 231ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rdi, [rdi + rbx] 232ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org dec rcx 233ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org jnz .loop 234ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 235ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org add rsp, 16 * 6 236ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsp 237ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rbx 238ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; begin epilog 239ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rdi 240ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsi 241ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org RESTORE_XMM 242ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org UNSHADOW_ARGS 243ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rbp 244ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ret 245ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 246ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;void vp9_filter_block1d8_v8_sse2 247ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;( 248ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned char *src_ptr, 249ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned int src_pitch, 250ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned char *output_ptr, 251ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned int out_pitch, 252ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned int output_height, 253ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; short *filter 254ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;) 255ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d8_v8_sse2) PRIVATE 256ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d8_v8_sse2): 257ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rbp 258ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rbp, rsp 259ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SHADOW_ARGS_TO_STACK 6 260ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SAVE_XMM 7 261ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rsi 262ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rdi 263ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rbx 264ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; end prolog 265ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 266ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ALIGN_STACK 16, rax 267ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org sub rsp, 16 * 10 268ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k0 [rsp + 16 * 0] 269ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k1 [rsp + 16 * 1] 270ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k2 [rsp + 16 * 2] 271ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k3 [rsp + 16 * 3] 272ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k4 [rsp + 16 * 4] 273ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k5 [rsp + 16 * 5] 274ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k6 [rsp + 16 * 6] 275ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k7 [rsp + 16 * 7] 276ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define krd [rsp + 16 * 8] 277ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define zero [rsp + 16 * 9] 278ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 279ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org GET_FILTERS 280ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 281ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 282ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rbx, DWORD PTR arg(3) ;out_pitch 283ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rdx, [rax + rax * 2] 284ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 285ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 286ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop: 287ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org LOAD_VERT_8 0 288ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org APPLY_FILTER_8 0, 0 289ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 290ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rdi, [rdi + rbx] 291ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org dec rcx 292ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org jnz .loop 293ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 294ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org add rsp, 16 * 10 295ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsp 296ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rbx 297ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; begin epilog 298ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rdi 299ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsi 300ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org RESTORE_XMM 301ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org UNSHADOW_ARGS 302ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rbp 303ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ret 304ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 305ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;void vp9_filter_block1d16_v8_sse2 306ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;( 307ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned char *src_ptr, 308ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned int src_pitch, 309ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned char *output_ptr, 310ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned int out_pitch, 311ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned int output_height, 312ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; short *filter 313ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;) 314ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d16_v8_sse2) PRIVATE 315ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d16_v8_sse2): 316ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rbp 317ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rbp, rsp 318ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SHADOW_ARGS_TO_STACK 6 319ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SAVE_XMM 7 320ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rsi 321ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rdi 322ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rbx 323ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; end prolog 324ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 325ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ALIGN_STACK 16, rax 326ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org sub rsp, 16 * 10 327ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k0 [rsp + 16 * 0] 328ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k1 [rsp + 16 * 1] 329ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k2 [rsp + 16 * 2] 330ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k3 [rsp + 16 * 3] 331ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k4 [rsp + 16 * 4] 332ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k5 [rsp + 16 * 5] 333ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k6 [rsp + 16 * 6] 334ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k7 [rsp + 16 * 7] 335ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define krd [rsp + 16 * 8] 336ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define zero [rsp + 16 * 9] 337ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 338ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org GET_FILTERS 339ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 340ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 341ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rbx, DWORD PTR arg(3) ;out_pitch 342ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rdx, [rax + rax * 2] 343ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 344ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 345ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop: 346ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org LOAD_VERT_8 0 347ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org APPLY_FILTER_8 0, 0 348ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org sub rsi, rax 349ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 350ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org LOAD_VERT_8 8 351ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org APPLY_FILTER_8 0, 8 352ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org add rdi, rbx 353ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 354ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org dec rcx 355ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org jnz .loop 356ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 357ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org add rsp, 16 * 10 358ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsp 359ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rbx 360ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; begin epilog 361ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rdi 362ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsi 363ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org RESTORE_XMM 364ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org UNSHADOW_ARGS 365ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rbp 366ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ret 367ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 368ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d4_v8_avg_sse2) PRIVATE 369ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d4_v8_avg_sse2): 370ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rbp 371ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rbp, rsp 372ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SHADOW_ARGS_TO_STACK 6 373ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SAVE_XMM 7 374ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rsi 375ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rdi 376ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rbx 377ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; end prolog 378ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 379ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ALIGN_STACK 16, rax 380ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org sub rsp, 16 * 6 381ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k0k1 [rsp + 16 * 0] 382ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k2k3 [rsp + 16 * 1] 383ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k5k4 [rsp + 16 * 2] 384ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k6k7 [rsp + 16 * 3] 385ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define krd [rsp + 16 * 4] 386ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define zero [rsp + 16 * 5] 387ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 388ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org GET_FILTERS_4 389ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 390ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rsi, arg(0) ;src_ptr 391ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rdi, arg(2) ;output_ptr 392ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 393ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 394ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rbx, DWORD PTR arg(3) ;out_pitch 395ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rdx, [rax + rax * 2] 396ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 397ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 398ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop: 399ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movd xmm0, [rsi] ;load src: row 0 400ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movd xmm1, [rsi + rax] ;1 401ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movd xmm6, [rsi + rdx * 2] ;6 402ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rsi, [rsi + rax] 403ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movd xmm7, [rsi + rdx * 2] ;7 404ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movd xmm2, [rsi + rax] ;2 405ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movd xmm3, [rsi + rax * 2] ;3 406ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movd xmm4, [rsi + rdx] ;4 407ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movd xmm5, [rsi + rax * 4] ;5 408ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 409ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org APPLY_FILTER_4 1 410ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 411ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rdi, [rdi + rbx] 412ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org dec rcx 413ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org jnz .loop 414ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 415ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org add rsp, 16 * 6 416ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsp 417ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rbx 418ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; begin epilog 419ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rdi 420ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsi 421ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org RESTORE_XMM 422ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org UNSHADOW_ARGS 423ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rbp 424ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ret 425ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 426ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d8_v8_avg_sse2) PRIVATE 427ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d8_v8_avg_sse2): 428ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rbp 429ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rbp, rsp 430ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SHADOW_ARGS_TO_STACK 6 431ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SAVE_XMM 7 432ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rsi 433ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rdi 434ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rbx 435ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; end prolog 436ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 437ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ALIGN_STACK 16, rax 438ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org sub rsp, 16 * 10 439ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k0 [rsp + 16 * 0] 440ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k1 [rsp + 16 * 1] 441ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k2 [rsp + 16 * 2] 442ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k3 [rsp + 16 * 3] 443ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k4 [rsp + 16 * 4] 444ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k5 [rsp + 16 * 5] 445ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k6 [rsp + 16 * 6] 446ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k7 [rsp + 16 * 7] 447ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define krd [rsp + 16 * 8] 448ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define zero [rsp + 16 * 9] 449ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 450ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org GET_FILTERS 451ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 452ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 453ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rbx, DWORD PTR arg(3) ;out_pitch 454ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rdx, [rax + rax * 2] 455ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 456ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop: 457ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org LOAD_VERT_8 0 458ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org APPLY_FILTER_8 1, 0 459ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 460ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rdi, [rdi + rbx] 461ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org dec rcx 462ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org jnz .loop 463ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 464ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org add rsp, 16 * 10 465ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsp 466ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rbx 467ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; begin epilog 468ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rdi 469ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsi 470ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org RESTORE_XMM 471ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org UNSHADOW_ARGS 472ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rbp 473ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ret 474ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 475ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d16_v8_avg_sse2) PRIVATE 476ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d16_v8_avg_sse2): 477ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rbp 478ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rbp, rsp 479ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SHADOW_ARGS_TO_STACK 6 480ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SAVE_XMM 7 481ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rsi 482ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rdi 483ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rbx 484ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; end prolog 485ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 486ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ALIGN_STACK 16, rax 487ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org sub rsp, 16 * 10 488ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k0 [rsp + 16 * 0] 489ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k1 [rsp + 16 * 1] 490ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k2 [rsp + 16 * 2] 491ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k3 [rsp + 16 * 3] 492ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k4 [rsp + 16 * 4] 493ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k5 [rsp + 16 * 5] 494ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k6 [rsp + 16 * 6] 495ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k7 [rsp + 16 * 7] 496ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define krd [rsp + 16 * 8] 497ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define zero [rsp + 16 * 9] 498ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 499ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org GET_FILTERS 500ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 501ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 502ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rbx, DWORD PTR arg(3) ;out_pitch 503ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rdx, [rax + rax * 2] 504ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 505ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop: 506ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org LOAD_VERT_8 0 507ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org APPLY_FILTER_8 1, 0 508ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org sub rsi, rax 509ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 510ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org LOAD_VERT_8 8 511ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org APPLY_FILTER_8 1, 8 512ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org add rdi, rbx 513ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 514ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org dec rcx 515ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org jnz .loop 516ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 517ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org add rsp, 16 * 10 518ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsp 519ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rbx 520ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; begin epilog 521ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rdi 522ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsi 523ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org RESTORE_XMM 524ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org UNSHADOW_ARGS 525ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rbp 526ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ret 527ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 528ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;void vp9_filter_block1d4_h8_sse2 529ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;( 530ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned char *src_ptr, 531ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned int src_pixels_per_line, 532ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned char *output_ptr, 533ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned int output_pitch, 534ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned int output_height, 535ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; short *filter 536ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;) 537ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d4_h8_sse2) PRIVATE 538ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d4_h8_sse2): 539ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rbp 540ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rbp, rsp 541ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SHADOW_ARGS_TO_STACK 6 542ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SAVE_XMM 7 543ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rsi 544ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rdi 545ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; end prolog 546ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 547ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ALIGN_STACK 16, rax 548ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org sub rsp, 16 * 6 549ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k0k1 [rsp + 16 * 0] 550ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k2k3 [rsp + 16 * 1] 551ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k5k4 [rsp + 16 * 2] 552ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k6k7 [rsp + 16 * 3] 553ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define krd [rsp + 16 * 4] 554ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define zero [rsp + 16 * 5] 555ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 556ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org GET_FILTERS_4 557ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 558ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rsi, arg(0) ;src_ptr 559ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rdi, arg(2) ;output_ptr 560ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 561ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 562ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rdx, DWORD PTR arg(3) ;out_pitch 563ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 564ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 565ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop: 566ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqu xmm0, [rsi - 3] ;load src 567ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 568ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm1, xmm0 569ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm6, xmm0 570ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm7, xmm0 571ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm2, xmm0 572ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm3, xmm0 573ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm5, xmm0 574ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm4, xmm0 575ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 576ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm1, 1 577ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm6, 6 578ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm7, 7 579ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm2, 2 580ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm3, 3 581ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm5, 5 582ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm4, 4 583ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 584ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org APPLY_FILTER_4 0 585ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 586ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rsi, [rsi + rax] 587ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rdi, [rdi + rdx] 588ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org dec rcx 589ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org jnz .loop 590ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 591ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org add rsp, 16 * 6 592ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsp 593ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 594ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; begin epilog 595ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rdi 596ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsi 597ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org RESTORE_XMM 598ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org UNSHADOW_ARGS 599ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rbp 600ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ret 601ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 602ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;void vp9_filter_block1d8_h8_sse2 603ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;( 604ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned char *src_ptr, 605ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned int src_pixels_per_line, 606ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned char *output_ptr, 607ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned int output_pitch, 608ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned int output_height, 609ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; short *filter 610ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;) 611ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d8_h8_sse2) PRIVATE 612ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d8_h8_sse2): 613ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rbp 614ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rbp, rsp 615ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SHADOW_ARGS_TO_STACK 6 616ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SAVE_XMM 7 617ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rsi 618ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rdi 619ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; end prolog 620ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 621ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ALIGN_STACK 16, rax 622ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org sub rsp, 16 * 10 623ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k0 [rsp + 16 * 0] 624ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k1 [rsp + 16 * 1] 625ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k2 [rsp + 16 * 2] 626ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k3 [rsp + 16 * 3] 627ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k4 [rsp + 16 * 4] 628ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k5 [rsp + 16 * 5] 629ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k6 [rsp + 16 * 6] 630ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k7 [rsp + 16 * 7] 631ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define krd [rsp + 16 * 8] 632ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define zero [rsp + 16 * 9] 633ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 634ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org GET_FILTERS 635ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 636ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 637ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rdx, DWORD PTR arg(3) ;out_pitch 638ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 639ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 640ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop: 641ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqu xmm0, [rsi - 3] ;load src 642ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 643ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm1, xmm0 644ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm6, xmm0 645ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm7, xmm0 646ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm2, xmm0 647ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm5, xmm0 648ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm3, xmm0 649ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm4, xmm0 650ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 651ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm1, 1 652ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm6, 6 653ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm7, 7 654ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm2, 2 655ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm5, 5 656ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm3, 3 657ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm4, 4 658ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 659ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org APPLY_FILTER_8 0, 0 660ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 661ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rsi, [rsi + rax] 662ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rdi, [rdi + rdx] 663ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org dec rcx 664ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org jnz .loop 665ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 666ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org add rsp, 16 * 10 667ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsp 668ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 669ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; begin epilog 670ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rdi 671ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsi 672ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org RESTORE_XMM 673ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org UNSHADOW_ARGS 674ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rbp 675ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ret 676ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 677ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;void vp9_filter_block1d16_h8_sse2 678ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;( 679ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned char *src_ptr, 680ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned int src_pixels_per_line, 681ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned char *output_ptr, 682ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned int output_pitch, 683ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; unsigned int output_height, 684ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org; short *filter 685ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;) 686ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d16_h8_sse2) PRIVATE 687ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d16_h8_sse2): 688ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rbp 689ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rbp, rsp 690ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SHADOW_ARGS_TO_STACK 6 691ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SAVE_XMM 7 692ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rsi 693ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rdi 694ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; end prolog 695ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 696ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ALIGN_STACK 16, rax 697ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org sub rsp, 16 * 10 698ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k0 [rsp + 16 * 0] 699ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k1 [rsp + 16 * 1] 700ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k2 [rsp + 16 * 2] 701ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k3 [rsp + 16 * 3] 702ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k4 [rsp + 16 * 4] 703ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k5 [rsp + 16 * 5] 704ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k6 [rsp + 16 * 6] 705ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k7 [rsp + 16 * 7] 706ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define krd [rsp + 16 * 8] 707ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define zero [rsp + 16 * 9] 708ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 709ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org GET_FILTERS 710ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 711ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 712ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rdx, DWORD PTR arg(3) ;out_pitch 713ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 714ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 715ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop: 716ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqu xmm0, [rsi - 3] ;load src 717ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 718ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm1, xmm0 719ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm6, xmm0 720ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm7, xmm0 721ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm2, xmm0 722ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm5, xmm0 723ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm3, xmm0 724ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm4, xmm0 725ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 726ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm1, 1 727ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm6, 6 728ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm7, 7 729ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm2, 2 730ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm5, 5 731ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm3, 3 732ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm4, 4 733ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 734ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org APPLY_FILTER_8 0, 0 735ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 736ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqu xmm0, [rsi + 5] ;load src 737ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 738ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm1, xmm0 739ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm6, xmm0 740ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm7, xmm0 741ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm2, xmm0 742ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm5, xmm0 743ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm3, xmm0 744ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm4, xmm0 745ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 746ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm1, 1 747ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm6, 6 748ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm7, 7 749ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm2, 2 750ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm5, 5 751ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm3, 3 752ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm4, 4 753ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 754ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org APPLY_FILTER_8 0, 8 755ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 756ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rsi, [rsi + rax] 757ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rdi, [rdi + rdx] 758ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org dec rcx 759ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org jnz .loop 760ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 761ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org add rsp, 16 * 10 762ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsp 763ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 764ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; begin epilog 765ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rdi 766ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsi 767ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org RESTORE_XMM 768ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org UNSHADOW_ARGS 769ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rbp 770ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ret 771ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 772ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d4_h8_avg_sse2) PRIVATE 773ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d4_h8_avg_sse2): 774ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rbp 775ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rbp, rsp 776ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SHADOW_ARGS_TO_STACK 6 777ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SAVE_XMM 7 778ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rsi 779ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rdi 780ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; end prolog 781ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 782ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ALIGN_STACK 16, rax 783ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org sub rsp, 16 * 6 784ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k0k1 [rsp + 16 * 0] 785ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k2k3 [rsp + 16 * 1] 786ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k5k4 [rsp + 16 * 2] 787ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k6k7 [rsp + 16 * 3] 788ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define krd [rsp + 16 * 4] 789ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define zero [rsp + 16 * 5] 790ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 791ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org GET_FILTERS_4 792ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 793ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rsi, arg(0) ;src_ptr 794ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rdi, arg(2) ;output_ptr 795ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 796ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 797ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rdx, DWORD PTR arg(3) ;out_pitch 798ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 799ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 800ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop: 801ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqu xmm0, [rsi - 3] ;load src 802ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 803ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm1, xmm0 804ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm6, xmm0 805ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm7, xmm0 806ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm2, xmm0 807ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm3, xmm0 808ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm5, xmm0 809ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm4, xmm0 810ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 811ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm1, 1 812ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm6, 6 813ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm7, 7 814ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm2, 2 815ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm3, 3 816ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm5, 5 817ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm4, 4 818ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 819ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org APPLY_FILTER_4 1 820ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 821ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rsi, [rsi + rax] 822ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rdi, [rdi + rdx] 823ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org dec rcx 824ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org jnz .loop 825ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 826ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org add rsp, 16 * 6 827ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsp 828ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 829ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; begin epilog 830ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rdi 831ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsi 832ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org RESTORE_XMM 833ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org UNSHADOW_ARGS 834ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rbp 835ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ret 836ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 837ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d8_h8_avg_sse2) PRIVATE 838ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d8_h8_avg_sse2): 839ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rbp 840ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rbp, rsp 841ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SHADOW_ARGS_TO_STACK 6 842ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SAVE_XMM 7 843ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rsi 844ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rdi 845ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; end prolog 846ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 847ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ALIGN_STACK 16, rax 848ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org sub rsp, 16 * 10 849ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k0 [rsp + 16 * 0] 850ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k1 [rsp + 16 * 1] 851ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k2 [rsp + 16 * 2] 852ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k3 [rsp + 16 * 3] 853ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k4 [rsp + 16 * 4] 854ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k5 [rsp + 16 * 5] 855ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k6 [rsp + 16 * 6] 856ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k7 [rsp + 16 * 7] 857ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define krd [rsp + 16 * 8] 858ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define zero [rsp + 16 * 9] 859ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 860ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org GET_FILTERS 861ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 862ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 863ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rdx, DWORD PTR arg(3) ;out_pitch 864ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 865ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 866ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop: 867ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqu xmm0, [rsi - 3] ;load src 868ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 869ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm1, xmm0 870ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm6, xmm0 871ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm7, xmm0 872ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm2, xmm0 873ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm5, xmm0 874ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm3, xmm0 875ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm4, xmm0 876ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 877ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm1, 1 878ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm6, 6 879ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm7, 7 880ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm2, 2 881ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm5, 5 882ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm3, 3 883ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm4, 4 884ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 885ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org APPLY_FILTER_8 1, 0 886ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 887ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rsi, [rsi + rax] 888ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rdi, [rdi + rdx] 889ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org dec rcx 890ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org jnz .loop 891ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 892ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org add rsp, 16 * 10 893ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsp 894ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 895ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; begin epilog 896ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rdi 897ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsi 898ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org RESTORE_XMM 899ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org UNSHADOW_ARGS 900ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rbp 901ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ret 902ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 903ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d16_h8_avg_sse2) PRIVATE 904ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d16_h8_avg_sse2): 905ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rbp 906ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org mov rbp, rsp 907ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SHADOW_ARGS_TO_STACK 6 908ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org SAVE_XMM 7 909ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rsi 910ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org push rdi 911ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; end prolog 912ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 913ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ALIGN_STACK 16, rax 914ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org sub rsp, 16 * 10 915ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k0 [rsp + 16 * 0] 916ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k1 [rsp + 16 * 1] 917ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k2 [rsp + 16 * 2] 918ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k3 [rsp + 16 * 3] 919ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k4 [rsp + 16 * 4] 920ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k5 [rsp + 16 * 5] 921ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k6 [rsp + 16 * 6] 922ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define k7 [rsp + 16 * 7] 923ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define krd [rsp + 16 * 8] 924ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org %define zero [rsp + 16 * 9] 925ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 926ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org GET_FILTERS 927ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 928ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rax, DWORD PTR arg(1) ;pixels_per_line 929ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rdx, DWORD PTR arg(3) ;out_pitch 930ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movsxd rcx, DWORD PTR arg(4) ;output_height 931ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 932ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop: 933ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqu xmm0, [rsi - 3] ;load src 934ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 935ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm1, xmm0 936ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm6, xmm0 937ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm7, xmm0 938ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm2, xmm0 939ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm5, xmm0 940ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm3, xmm0 941ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm4, xmm0 942ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 943ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm1, 1 944ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm6, 6 945ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm7, 7 946ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm2, 2 947ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm5, 5 948ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm3, 3 949ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm4, 4 950ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 951ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org APPLY_FILTER_8 1, 0 952ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 953ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqu xmm0, [rsi + 5] ;load src 954ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 955ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm1, xmm0 956ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm6, xmm0 957ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm7, xmm0 958ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm2, xmm0 959ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm5, xmm0 960ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm3, xmm0 961ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org movdqa xmm4, xmm0 962ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 963ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm1, 1 964ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm6, 6 965ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm7, 7 966ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm2, 2 967ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm5, 5 968ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm3, 3 969ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org psrldq xmm4, 4 970ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 971ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org APPLY_FILTER_8 1, 8 972ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 973ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rsi, [rsi + rax] 974ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org lea rdi, [rdi + rdx] 975ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org dec rcx 976ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org jnz .loop 977ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 978ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org add rsp, 16 * 10 979ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsp 980ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 981ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ; begin epilog 982ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rdi 983ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rsi 984ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org RESTORE_XMM 985ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org UNSHADOW_ARGS 986ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org pop rbp 987ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ret 988