187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;
287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;
487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;  Use of this source code is governed by a BSD-style license
587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;  that can be found in the LICENSE file in the root of the source
687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;  tree. An additional intellectual property rights grant can be found
787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;  in the file PATENTS.  All contributing project authors may
887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;  be found in the AUTHORS file in the root of the source tree.
987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;
1087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
1187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
1287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%include "vpx_ports/x86_abi_support.asm"
1387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
1487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;Note: tap3 and tap4 have to be applied and added after other taps to avoid
1587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;overflow.
1687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
1787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%macro HIGH_GET_FILTERS_4 0
1887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rdx, arg(5)                 ;filter ptr
1987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rcx, 0x00000040
2087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
2187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      xmm7, [rdx]                 ;load filters
2287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pshuflw     xmm0, xmm7, 0b              ;k0
2387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pshuflw     xmm1, xmm7, 01010101b       ;k1
2487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pshuflw     xmm2, xmm7, 10101010b       ;k2
2587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pshuflw     xmm3, xmm7, 11111111b       ;k3
2687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    psrldq      xmm7, 8
2787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pshuflw     xmm4, xmm7, 0b              ;k4
2887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pshuflw     xmm5, xmm7, 01010101b       ;k5
2987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pshuflw     xmm6, xmm7, 10101010b       ;k6
3087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pshuflw     xmm7, xmm7, 11111111b       ;k7
3187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
3287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpcklwd   xmm0, xmm6
3387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpcklwd   xmm2, xmm5
3487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpcklwd   xmm3, xmm4
3587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpcklwd   xmm1, xmm7
3687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
3787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      k0k6, xmm0
3887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      k2k5, xmm2
3987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      k3k4, xmm3
4087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      k1k7, xmm1
4187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
4287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm6, rcx
4387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pshufd      xmm6, xmm6, 0
4487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      krd, xmm6
4587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
4687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ;Compute max and min values of a pixel
4787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rdx, 0x00010001
4887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(6)      ;bps
4987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm0, rdx
5087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm1, rcx
5187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pshufd      xmm0, xmm0, 0b
5287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      xmm2, xmm0
5387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    psllw       xmm0, xmm1
5487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    psubw       xmm0, xmm2
5587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pxor        xmm1, xmm1
5687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      max, xmm0                  ;max value (for clamping)
5787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      min, xmm1                  ;min value (for clamping)
5887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
5987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%endm
6087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
6187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%macro HIGH_APPLY_FILTER_4 1
6287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpcklwd   xmm0, xmm6                  ;two row in one register
6387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpcklwd   xmm1, xmm7
6487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpcklwd   xmm2, xmm5
6587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpcklwd   xmm3, xmm4
6687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
6787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pmaddwd     xmm0, k0k6                  ;multiply the filter factors
6887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pmaddwd     xmm1, k1k7
6987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pmaddwd     xmm2, k2k5
7087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pmaddwd     xmm3, k3k4
7187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
7287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    paddd       xmm0, xmm1                  ;sum
7387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    paddd       xmm0, xmm2
7487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    paddd       xmm0, xmm3
7587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
7687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    paddd       xmm0, krd                   ;rounding
7787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    psrad       xmm0, 7                     ;shift
7887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    packssdw    xmm0, xmm0                  ;pack to word
7987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
8087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ;clamp the values
8187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pminsw      xmm0, max
8287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pmaxsw      xmm0, min
8387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
8487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%if %1
8587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm1, [rdi]
8687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pavgw       xmm0, xmm1
8787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%endif
8887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        [rdi], xmm0
8987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%endm
9087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
9187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%macro HIGH_GET_FILTERS 0
9287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rdx, arg(5)                 ;filter ptr
9387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rsi, arg(0)                 ;src_ptr
9487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rdi, arg(2)                 ;output_ptr
9587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rcx, 0x00000040
9687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
9787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      xmm7, [rdx]                 ;load filters
9887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pshuflw     xmm0, xmm7, 0b              ;k0
9987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pshuflw     xmm1, xmm7, 01010101b       ;k1
10087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pshuflw     xmm2, xmm7, 10101010b       ;k2
10187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pshuflw     xmm3, xmm7, 11111111b       ;k3
10287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pshufhw     xmm4, xmm7, 0b              ;k4
10387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pshufhw     xmm5, xmm7, 01010101b       ;k5
10487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pshufhw     xmm6, xmm7, 10101010b       ;k6
10587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pshufhw     xmm7, xmm7, 11111111b       ;k7
10687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpcklqdq  xmm2, xmm2
10787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpcklqdq  xmm3, xmm3
10887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpcklwd   xmm0, xmm1
10987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpckhwd   xmm6, xmm7
11087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpckhwd   xmm2, xmm5
11187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpckhwd   xmm3, xmm4
11287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
11387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      k0k1, xmm0                  ;store filter factors on stack
11487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      k6k7, xmm6
11587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      k2k5, xmm2
11687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      k3k4, xmm3
11787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
11887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm6, rcx
11987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pshufd      xmm6, xmm6, 0
12087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      krd, xmm6                   ;rounding
12187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
12287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ;Compute max and min values of a pixel
12387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rdx, 0x00010001
12487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(6)       ;bps
12587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm0, rdx
12687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm1, rcx
12787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pshufd      xmm0, xmm0, 0b
12887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      xmm2, xmm0
12987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    psllw       xmm0, xmm1
13087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    psubw       xmm0, xmm2
13187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pxor        xmm1, xmm1
13287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      max, xmm0                  ;max value (for clamping)
13387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      min, xmm1                  ;min value (for clamping)
13487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%endm
13587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
13687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%macro LOAD_VERT_8 1
13787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm0, [rsi + %1]            ;0
13887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm1, [rsi + rax + %1]      ;1
13987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm6, [rsi + rdx * 2 + %1]  ;6
14087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rsi,  [rsi + rax]
14187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm7, [rsi + rdx * 2 + %1]  ;7
14287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm2, [rsi + rax + %1]      ;2
14387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm3, [rsi + rax * 2 + %1]  ;3
14487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm4, [rsi + rdx + %1]      ;4
14587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm5, [rsi + rax * 4 + %1]  ;5
14687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%endm
14787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
14887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%macro HIGH_APPLY_FILTER_8 2
14987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      temp, xmm4
15087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      xmm4, xmm0
15187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpcklwd   xmm0, xmm1
15287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpckhwd   xmm4, xmm1
15387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      xmm1, xmm6
15487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpcklwd   xmm6, xmm7
15587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpckhwd   xmm1, xmm7
15687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      xmm7, xmm2
15787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpcklwd   xmm2, xmm5
15887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpckhwd   xmm7, xmm5
15987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
16087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm5, temp
16187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      temp, xmm4
16287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      xmm4, xmm3
16387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpcklwd   xmm3, xmm5
16487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    punpckhwd   xmm4, xmm5
16587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm5, temp
16687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
16787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pmaddwd     xmm0, k0k1
16887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pmaddwd     xmm5, k0k1
16987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pmaddwd     xmm6, k6k7
17087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pmaddwd     xmm1, k6k7
17187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pmaddwd     xmm2, k2k5
17287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pmaddwd     xmm7, k2k5
17387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pmaddwd     xmm3, k3k4
17487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pmaddwd     xmm4, k3k4
17587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
17687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    paddd       xmm0, xmm6
17787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    paddd       xmm0, xmm2
17887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    paddd       xmm0, xmm3
17987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    paddd       xmm5, xmm1
18087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    paddd       xmm5, xmm7
18187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    paddd       xmm5, xmm4
18287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
18387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    paddd       xmm0, krd                   ;rounding
18487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    paddd       xmm5, krd
18587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    psrad       xmm0, 7                     ;shift
18687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    psrad       xmm5, 7
18787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    packssdw    xmm0, xmm5                  ;pack back to word
18887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
18987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ;clamp the values
19087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pminsw      xmm0, max
19187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pmaxsw      xmm0, min
19287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
19387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%if %1
19487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm1, [rdi + %2]
19587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pavgw       xmm0, xmm1
19687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%endif
19787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      [rdi + %2], xmm0
19887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org%endm
19987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
20087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;void vp9_filter_block1d4_v8_sse2
20187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;(
20287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned char *src_ptr,
20387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned int   src_pitch,
20487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned char *output_ptr,
20587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned int   out_pitch,
20687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned int   output_height,
20787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    short *filter
20887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;)
20987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d4_v8_sse2) PRIVATE
21087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d4_v8_sse2):
21187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rbp
21287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rbp, rsp
21387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 7
21487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SAVE_XMM 7
21587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rsi
21687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rdi
21787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rbx
21887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; end prolog
21987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
22087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ALIGN_STACK 16, rax
22187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    sub         rsp, 16 * 7
22287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k0k6 [rsp + 16 * 0]
22387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k2k5 [rsp + 16 * 1]
22487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k3k4 [rsp + 16 * 2]
22587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k1k7 [rsp + 16 * 3]
22687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define krd [rsp + 16 * 4]
22787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define max [rsp + 16 * 5]
22887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define min [rsp + 16 * 6]
22987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
23087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_GET_FILTERS_4
23187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
23287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rsi, arg(0)                 ;src_ptr
23387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rdi, arg(2)                 ;output_ptr
23487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
23587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
23687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rbx, DWORD PTR arg(3)       ;out_pitch
23787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rax, [rax + rax]            ;bytes per line
23887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rbx, [rbx + rbx]
23987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdx, [rax + rax * 2]
24087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
24187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
24287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop:
24387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm0, [rsi]                 ;load src: row 0
24487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm1, [rsi + rax]           ;1
24587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm6, [rsi + rdx * 2]       ;6
24687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rsi,  [rsi + rax]
24787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm7, [rsi + rdx * 2]       ;7
24887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm2, [rsi + rax]           ;2
24987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm3, [rsi + rax * 2]       ;3
25087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm4, [rsi + rdx]           ;4
25187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm5, [rsi + rax * 4]       ;5
25287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
25387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_APPLY_FILTER_4 0
25487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
25587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdi, [rdi + rbx]
25687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    dec         rcx
25787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    jnz         .loop
25887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
25987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    add rsp, 16 * 7
26087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsp
26187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rbx
26287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; begin epilog
26387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rdi
26487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsi
26587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    RESTORE_XMM
26687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    UNSHADOW_ARGS
26787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop         rbp
26887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ret
26987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
27087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;void vp9_filter_block1d8_v8_sse2
27187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;(
27287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned char *src_ptr,
27387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned int   src_pitch,
27487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned char *output_ptr,
27587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned int   out_pitch,
27687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned int   output_height,
27787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    short *filter
27887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;)
27987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d8_v8_sse2) PRIVATE
28087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d8_v8_sse2):
28187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rbp
28287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rbp, rsp
28387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 7
28487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SAVE_XMM 7
28587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rsi
28687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rdi
28787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rbx
28887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; end prolog
28987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
29087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ALIGN_STACK 16, rax
29187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    sub         rsp, 16 * 8
29287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k0k1 [rsp + 16 * 0]
29387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k6k7 [rsp + 16 * 1]
29487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k2k5 [rsp + 16 * 2]
29587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k3k4 [rsp + 16 * 3]
29687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define krd [rsp + 16 * 4]
29787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define temp [rsp + 16 * 5]
29887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define max [rsp + 16 * 6]
29987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define min [rsp + 16 * 7]
30087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
30187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_GET_FILTERS
30287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
30387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
30487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rbx, DWORD PTR arg(3)       ;out_pitch
30587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rax, [rax + rax]            ;bytes per line
30687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rbx, [rbx + rbx]
30787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdx, [rax + rax * 2]
30887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
30987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
31087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop:
31187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    LOAD_VERT_8 0
31287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_APPLY_FILTER_8 0, 0
31387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
31487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdi, [rdi + rbx]
31587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    dec         rcx
31687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    jnz         .loop
31787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
31887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    add rsp, 16 * 8
31987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsp
32087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rbx
32187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; begin epilog
32287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rdi
32387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsi
32487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    RESTORE_XMM
32587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    UNSHADOW_ARGS
32687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop         rbp
32787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ret
32887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
32987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;void vp9_filter_block1d16_v8_sse2
33087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;(
33187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned char *src_ptr,
33287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned int   src_pitch,
33387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned char *output_ptr,
33487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned int   out_pitch,
33587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned int   output_height,
33687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    short *filter
33787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;)
33887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d16_v8_sse2) PRIVATE
33987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d16_v8_sse2):
34087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rbp
34187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rbp, rsp
34287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 7
34387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SAVE_XMM 7
34487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rsi
34587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rdi
34687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rbx
34787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; end prolog
34887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
34987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ALIGN_STACK 16, rax
35087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    sub         rsp, 16 * 8
35187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k0k1 [rsp + 16 * 0]
35287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k6k7 [rsp + 16 * 1]
35387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k2k5 [rsp + 16 * 2]
35487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k3k4 [rsp + 16 * 3]
35587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define krd [rsp + 16 * 4]
35687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define temp [rsp + 16 * 5]
35787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define max [rsp + 16 * 6]
35887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define min [rsp + 16 * 7]
35987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
36087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_GET_FILTERS
36187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
36287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
36387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rbx, DWORD PTR arg(3)       ;out_pitch
36487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rax, [rax + rax]            ;bytes per line
36587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rbx, [rbx + rbx]
36687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdx, [rax + rax * 2]
36787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
36887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
36987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop:
37087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    LOAD_VERT_8 0
37187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_APPLY_FILTER_8 0, 0
37287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    sub         rsi, rax
37387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
37487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    LOAD_VERT_8 16
37587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_APPLY_FILTER_8 0, 16
37687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    add         rdi, rbx
37787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
37887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    dec         rcx
37987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    jnz         .loop
38087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
38187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    add rsp, 16 * 8
38287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsp
38387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rbx
38487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; begin epilog
38587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rdi
38687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsi
38787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    RESTORE_XMM
38887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    UNSHADOW_ARGS
38987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop         rbp
39087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ret
39187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
39287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d4_v8_avg_sse2) PRIVATE
39387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d4_v8_avg_sse2):
39487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rbp
39587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rbp, rsp
39687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 7
39787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SAVE_XMM 7
39887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rsi
39987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rdi
40087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rbx
40187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; end prolog
40287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
40387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ALIGN_STACK 16, rax
40487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    sub         rsp, 16 * 7
40587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k0k6 [rsp + 16 * 0]
40687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k2k5 [rsp + 16 * 1]
40787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k3k4 [rsp + 16 * 2]
40887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k1k7 [rsp + 16 * 3]
40987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define krd [rsp + 16 * 4]
41087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define max [rsp + 16 * 5]
41187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define min [rsp + 16 * 6]
41287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
41387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_GET_FILTERS_4
41487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
41587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rsi, arg(0)                 ;src_ptr
41687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rdi, arg(2)                 ;output_ptr
41787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
41887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
41987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rbx, DWORD PTR arg(3)       ;out_pitch
42087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rax, [rax + rax]            ;bytes per line
42187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rbx, [rbx + rbx]
42287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdx, [rax + rax * 2]
42387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
42487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
42587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop:
42687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm0, [rsi]                 ;load src: row 0
42787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm1, [rsi + rax]           ;1
42887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm6, [rsi + rdx * 2]       ;6
42987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rsi,  [rsi + rax]
43087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm7, [rsi + rdx * 2]       ;7
43187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm2, [rsi + rax]           ;2
43287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm3, [rsi + rax * 2]       ;3
43387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm4, [rsi + rdx]           ;4
43487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movq        xmm5, [rsi + rax * 4]       ;5
43587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
43687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_APPLY_FILTER_4 1
43787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
43887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdi, [rdi + rbx]
43987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    dec         rcx
44087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    jnz         .loop
44187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
44287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    add rsp, 16 * 7
44387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsp
44487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rbx
44587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; begin epilog
44687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rdi
44787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsi
44887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    RESTORE_XMM
44987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    UNSHADOW_ARGS
45087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop         rbp
45187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ret
45287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
45387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d8_v8_avg_sse2) PRIVATE
45487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d8_v8_avg_sse2):
45587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rbp
45687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rbp, rsp
45787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 7
45887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SAVE_XMM 7
45987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rsi
46087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rdi
46187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rbx
46287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; end prolog
46387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
46487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ALIGN_STACK 16, rax
46587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    sub         rsp, 16 * 8
46687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k0k1 [rsp + 16 * 0]
46787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k6k7 [rsp + 16 * 1]
46887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k2k5 [rsp + 16 * 2]
46987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k3k4 [rsp + 16 * 3]
47087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define krd [rsp + 16 * 4]
47187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define temp [rsp + 16 * 5]
47287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define max [rsp + 16 * 6]
47387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define min [rsp + 16 * 7]
47487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
47587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_GET_FILTERS
47687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
47787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
47887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rbx, DWORD PTR arg(3)       ;out_pitch
47987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rax, [rax + rax]            ;bytes per line
48087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rbx, [rbx + rbx]
48187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdx, [rax + rax * 2]
48287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
48387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop:
48487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    LOAD_VERT_8 0
48587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_APPLY_FILTER_8 1, 0
48687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
48787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdi, [rdi + rbx]
48887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    dec         rcx
48987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    jnz         .loop
49087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
49187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    add rsp, 16 * 8
49287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsp
49387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rbx
49487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; begin epilog
49587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rdi
49687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsi
49787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    RESTORE_XMM
49887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    UNSHADOW_ARGS
49987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop         rbp
50087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ret
50187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
50287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d16_v8_avg_sse2) PRIVATE
50387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d16_v8_avg_sse2):
50487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rbp
50587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rbp, rsp
50687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 7
50787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SAVE_XMM 7
50887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rsi
50987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rdi
51087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rbx
51187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; end prolog
51287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
51387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ALIGN_STACK 16, rax
51487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    sub         rsp, 16 * 8
51587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k0k1 [rsp + 16 * 0]
51687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k6k7 [rsp + 16 * 1]
51787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k2k5 [rsp + 16 * 2]
51887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k3k4 [rsp + 16 * 3]
51987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define krd [rsp + 16 * 4]
52087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define temp [rsp + 16 * 5]
52187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define max [rsp + 16 * 6]
52287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define min [rsp + 16 * 7]
52387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
52487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_GET_FILTERS
52587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
52687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
52787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rbx, DWORD PTR arg(3)       ;out_pitch
52887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rax, [rax + rax]            ;bytes per line
52987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rbx, [rbx + rbx]
53087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdx, [rax + rax * 2]
53187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
53287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop:
53387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    LOAD_VERT_8 0
53487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_APPLY_FILTER_8 1, 0
53587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    sub         rsi, rax
53687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
53787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    LOAD_VERT_8 16
53887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_APPLY_FILTER_8 1, 16
53987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    add         rdi, rbx
54087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
54187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    dec         rcx
54287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    jnz         .loop
54387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
54487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    add rsp, 16 * 8
54587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsp
54687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rbx
54787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; begin epilog
54887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rdi
54987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsi
55087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    RESTORE_XMM
55187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    UNSHADOW_ARGS
55287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop         rbp
55387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ret
55487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
55587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;void vp9_filter_block1d4_h8_sse2
55687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;(
55787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned char  *src_ptr,
55887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned int    src_pixels_per_line,
55987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned char  *output_ptr,
56087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned int    output_pitch,
56187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned int    output_height,
56287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    short *filter
56387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;)
56487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d4_h8_sse2) PRIVATE
56587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d4_h8_sse2):
56687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rbp
56787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rbp, rsp
56887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 7
56987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SAVE_XMM 7
57087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rsi
57187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rdi
57287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; end prolog
57387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
57487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ALIGN_STACK 16, rax
57587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    sub         rsp, 16 * 7
57687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k0k6 [rsp + 16 * 0]
57787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k2k5 [rsp + 16 * 1]
57887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k3k4 [rsp + 16 * 2]
57987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k1k7 [rsp + 16 * 3]
58087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define krd [rsp + 16 * 4]
58187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define max [rsp + 16 * 5]
58287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define min [rsp + 16 * 6]
58387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
58487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_GET_FILTERS_4
58587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
58687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rsi, arg(0)                 ;src_ptr
58787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rdi, arg(2)                 ;output_ptr
58887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
58987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
59087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rdx, DWORD PTR arg(3)       ;out_pitch
59187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rax, [rax + rax]            ;bytes per line
59287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdx, [rdx + rdx]
59387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
59487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
59587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop:
59687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm0,   [rsi - 6]           ;load src
59787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm4,   [rsi + 2]
59887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      xmm1, xmm0
59987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      xmm6, xmm4
60087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      xmm7, xmm4
60187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      xmm2, xmm0
60287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      xmm3, xmm0
60387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      xmm5, xmm4
60487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
60587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    psrldq      xmm1, 2
60687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    psrldq      xmm6, 4
60787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    psrldq      xmm7, 6
60887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    psrldq      xmm2, 4
60987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    psrldq      xmm3, 6
61087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    psrldq      xmm5, 2
61187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
61287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_APPLY_FILTER_4 0
61387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
61487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rsi, [rsi + rax]
61587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdi, [rdi + rdx]
61687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    dec         rcx
61787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    jnz         .loop
61887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
61987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    add rsp, 16 * 7
62087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsp
62187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
62287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; begin epilog
62387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rdi
62487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsi
62587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    RESTORE_XMM
62687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    UNSHADOW_ARGS
62787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop         rbp
62887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ret
62987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
63087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;void vp9_filter_block1d8_h8_sse2
63187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;(
63287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned char  *src_ptr,
63387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned int    src_pixels_per_line,
63487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned char  *output_ptr,
63587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned int    output_pitch,
63687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned int    output_height,
63787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    short *filter
63887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;)
63987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d8_h8_sse2) PRIVATE
64087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d8_h8_sse2):
64187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rbp
64287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rbp, rsp
64387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 7
64487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SAVE_XMM 7
64587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rsi
64687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rdi
64787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; end prolog
64887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
64987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ALIGN_STACK 16, rax
65087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    sub         rsp, 16 * 8
65187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k0k1 [rsp + 16 * 0]
65287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k6k7 [rsp + 16 * 1]
65387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k2k5 [rsp + 16 * 2]
65487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k3k4 [rsp + 16 * 3]
65587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define krd [rsp + 16 * 4]
65687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define temp [rsp + 16 * 5]
65787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define max [rsp + 16 * 6]
65887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define min [rsp + 16 * 7]
65987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
66087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_GET_FILTERS
66187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
66287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
66387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rdx, DWORD PTR arg(3)       ;out_pitch
66487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rax, [rax + rax]            ;bytes per line
66587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdx, [rdx + rdx]
66687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
66787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
66887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop:
66987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm0,   [rsi - 6]           ;load src
67087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm1,   [rsi - 4]
67187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm2,   [rsi - 2]
67287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm3,   [rsi]
67387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm4,   [rsi + 2]
67487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm5,   [rsi + 4]
67587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm6,   [rsi + 6]
67687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm7,   [rsi + 8]
67787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
67887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_APPLY_FILTER_8 0, 0
67987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
68087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rsi, [rsi + rax]
68187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdi, [rdi + rdx]
68287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    dec         rcx
68387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    jnz         .loop
68487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
68587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    add rsp, 16 * 8
68687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsp
68787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
68887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; begin epilog
68987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rdi
69087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsi
69187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    RESTORE_XMM
69287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    UNSHADOW_ARGS
69387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop         rbp
69487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ret
69587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
69687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;void vp9_filter_block1d16_h8_sse2
69787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;(
69887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned char  *src_ptr,
69987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned int    src_pixels_per_line,
70087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned char  *output_ptr,
70187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned int    output_pitch,
70287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    unsigned int    output_height,
70387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;    short *filter
70487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org;)
70587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d16_h8_sse2) PRIVATE
70687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d16_h8_sse2):
70787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rbp
70887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rbp, rsp
70987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 7
71087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SAVE_XMM 7
71187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rsi
71287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rdi
71387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; end prolog
71487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
71587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ALIGN_STACK 16, rax
71687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    sub         rsp, 16 * 8
71787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k0k1 [rsp + 16 * 0]
71887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k6k7 [rsp + 16 * 1]
71987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k2k5 [rsp + 16 * 2]
72087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k3k4 [rsp + 16 * 3]
72187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define krd [rsp + 16 * 4]
72287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define temp [rsp + 16 * 5]
72387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define max [rsp + 16 * 6]
72487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define min [rsp + 16 * 7]
72587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
72687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_GET_FILTERS
72787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
72887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
72987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rdx, DWORD PTR arg(3)       ;out_pitch
73087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rax, [rax + rax]            ;bytes per line
73187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdx, [rdx + rdx]
73287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
73387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
73487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop:
73587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm0,   [rsi - 6]           ;load src
73687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm1,   [rsi - 4]
73787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm2,   [rsi - 2]
73887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm3,   [rsi]
73987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm4,   [rsi + 2]
74087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm5,   [rsi + 4]
74187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm6,   [rsi + 6]
74287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm7,   [rsi + 8]
74387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
74487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_APPLY_FILTER_8 0, 0
74587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
74687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm0,   [rsi + 10]           ;load src
74787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm1,   [rsi + 12]
74887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm2,   [rsi + 14]
74987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm3,   [rsi + 16]
75087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm4,   [rsi + 18]
75187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm5,   [rsi + 20]
75287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm6,   [rsi + 22]
75387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm7,   [rsi + 24]
75487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
75587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_APPLY_FILTER_8 0, 16
75687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
75787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rsi, [rsi + rax]
75887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdi, [rdi + rdx]
75987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    dec         rcx
76087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    jnz         .loop
76187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
76287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    add rsp, 16 * 8
76387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsp
76487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
76587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; begin epilog
76687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rdi
76787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsi
76887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    RESTORE_XMM
76987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    UNSHADOW_ARGS
77087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop         rbp
77187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ret
77287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
77387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d4_h8_avg_sse2) PRIVATE
77487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d4_h8_avg_sse2):
77587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rbp
77687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rbp, rsp
77787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 7
77887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SAVE_XMM 7
77987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rsi
78087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rdi
78187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; end prolog
78287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
78387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ALIGN_STACK 16, rax
78487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    sub         rsp, 16 * 7
78587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k0k6 [rsp + 16 * 0]
78687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k2k5 [rsp + 16 * 1]
78787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k3k4 [rsp + 16 * 2]
78887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k1k7 [rsp + 16 * 3]
78987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define krd [rsp + 16 * 4]
79087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define max [rsp + 16 * 5]
79187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define min [rsp + 16 * 6]
79287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
79387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_GET_FILTERS_4
79487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
79587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rsi, arg(0)                 ;src_ptr
79687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rdi, arg(2)                 ;output_ptr
79787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
79887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
79987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rdx, DWORD PTR arg(3)       ;out_pitch
80087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rax, [rax + rax]            ;bytes per line
80187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdx, [rdx + rdx]
80287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
80387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
80487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop:
80587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm0,   [rsi - 6]           ;load src
80687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm4,   [rsi + 2]
80787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      xmm1, xmm0
80887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      xmm6, xmm4
80987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      xmm7, xmm4
81087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      xmm2, xmm0
81187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      xmm3, xmm0
81287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqa      xmm5, xmm4
81387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
81487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    psrldq      xmm1, 2
81587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    psrldq      xmm6, 4
81687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    psrldq      xmm7, 6
81787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    psrldq      xmm2, 4
81887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    psrldq      xmm3, 6
81987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    psrldq      xmm5, 2
82087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
82187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_APPLY_FILTER_4 1
82287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
82387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rsi, [rsi + rax]
82487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdi, [rdi + rdx]
82587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    dec         rcx
82687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    jnz         .loop
82787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
82887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    add rsp, 16 * 7
82987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsp
83087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
83187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; begin epilog
83287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rdi
83387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsi
83487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    RESTORE_XMM
83587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    UNSHADOW_ARGS
83687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop         rbp
83787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ret
83887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
83987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d8_h8_avg_sse2) PRIVATE
84087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d8_h8_avg_sse2):
84187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rbp
84287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rbp, rsp
84387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 7
84487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SAVE_XMM 7
84587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rsi
84687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rdi
84787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; end prolog
84887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
84987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ALIGN_STACK 16, rax
85087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    sub         rsp, 16 * 8
85187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k0k1 [rsp + 16 * 0]
85287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k6k7 [rsp + 16 * 1]
85387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k2k5 [rsp + 16 * 2]
85487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k3k4 [rsp + 16 * 3]
85587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define krd [rsp + 16 * 4]
85687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define temp [rsp + 16 * 5]
85787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define max [rsp + 16 * 6]
85887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define min [rsp + 16 * 7]
85987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
86087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_GET_FILTERS
86187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
86287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
86387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rdx, DWORD PTR arg(3)       ;out_pitch
86487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rax, [rax + rax]            ;bytes per line
86587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdx, [rdx + rdx]
86687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
86787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
86887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop:
86987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm0,   [rsi - 6]           ;load src
87087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm1,   [rsi - 4]
87187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm2,   [rsi - 2]
87287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm3,   [rsi]
87387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm4,   [rsi + 2]
87487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm5,   [rsi + 4]
87587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm6,   [rsi + 6]
87687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm7,   [rsi + 8]
87787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
87887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_APPLY_FILTER_8 1, 0
87987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
88087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rsi, [rsi + rax]
88187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdi, [rdi + rdx]
88287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    dec         rcx
88387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    jnz         .loop
88487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
88587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    add rsp, 16 * 8
88687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsp
88787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
88887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; begin epilog
88987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rdi
89087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsi
89187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    RESTORE_XMM
89287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    UNSHADOW_ARGS
89387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop         rbp
89487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ret
89587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
89687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgglobal sym(vp9_high_filter_block1d16_h8_avg_sse2) PRIVATE
89787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgsym(vp9_high_filter_block1d16_h8_avg_sse2):
89887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rbp
89987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    mov         rbp, rsp
90087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 7
90187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    SAVE_XMM 7
90287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rsi
90387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    push        rdi
90487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; end prolog
90587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
90687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ALIGN_STACK 16, rax
90787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    sub         rsp, 16 * 8
90887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k0k1 [rsp + 16 * 0]
90987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k6k7 [rsp + 16 * 1]
91087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k2k5 [rsp + 16 * 2]
91187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define k3k4 [rsp + 16 * 3]
91287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define krd [rsp + 16 * 4]
91387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define temp [rsp + 16 * 5]
91487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define max [rsp + 16 * 6]
91587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    %define min [rsp + 16 * 7]
91687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
91787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_GET_FILTERS
91887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
91987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
92087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rdx, DWORD PTR arg(3)       ;out_pitch
92187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rax, [rax + rax]            ;bytes per line
92287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdx, [rdx + rdx]
92387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
92487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
92587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org.loop:
92687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm0,   [rsi - 6]           ;load src
92787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm1,   [rsi - 4]
92887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm2,   [rsi - 2]
92987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm3,   [rsi]
93087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm4,   [rsi + 2]
93187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm5,   [rsi + 4]
93287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm6,   [rsi + 6]
93387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm7,   [rsi + 8]
93487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
93587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_APPLY_FILTER_8 1, 0
93687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
93787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm0,   [rsi + 10]           ;load src
93887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm1,   [rsi + 12]
93987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm2,   [rsi + 14]
94087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm3,   [rsi + 16]
94187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm4,   [rsi + 18]
94287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm5,   [rsi + 20]
94387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm6,   [rsi + 22]
94487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    movdqu      xmm7,   [rsi + 24]
94587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
94687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    HIGH_APPLY_FILTER_8 1, 16
94787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
94887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rsi, [rsi + rax]
94987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    lea         rdi, [rdi + rdx]
95087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    dec         rcx
95187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    jnz         .loop
95287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
95387997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    add rsp, 16 * 8
95487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsp
95587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
95687997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ; begin epilog
95787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rdi
95887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop rsi
95987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    RESTORE_XMM
96087997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    UNSHADOW_ARGS
96187997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    pop         rbp
96287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    ret
963