190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%include "vpx_ports/x86_abi_support.asm"
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%define xmm_filter_shift            7
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;unsigned int vp8_get_mb_ss_sse2
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;(
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    short *src_ptr
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;)
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_get_mb_ss_sse2)
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_get_mb_ss_sse2):
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 1
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    GET_GOT     rbx
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rsi
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rdi
2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub         rsp, 16
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rax, arg(0) ;[src_ptr]
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rcx, 8
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        xmm4, xmm4
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas HuberNEXTROW:
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm0, [rax]
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm1, [rax+16]
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm2, [rax+32]
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm3, [rax+48]
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     xmm0, xmm0
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     xmm1, xmm1
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     xmm2, xmm2
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     xmm3, xmm3
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm0, xmm1
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm2, xmm3
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm4, xmm0
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm4, xmm2
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add         rax, 0x40
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dec         rcx
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ja          NEXTROW
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm3,xmm4
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq      xmm4,8
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm4,xmm3
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm3,xmm4
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq      xmm4,4
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm4,xmm3
61538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movq        rax,xmm4
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add rsp, 16
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    RESTORE_GOT
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;unsigned int vp8_get16x16var_sse2
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;(
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char   *  src_ptr,
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int             source_stride,
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char   *  ref_ptr,
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int             recon_stride,
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned int    *  SSE,
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int             *  Sum
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;)
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_get16x16var_sse2)
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_get16x16var_sse2):
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 6
8879f15823c34ae1e423108295e416213200bb280fAndreas Huber    push rbx
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rsi
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rdi
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rsi,            arg(0) ;[src_ptr]
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rdi,            arg(2) ;[ref_ptr]
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd      rax,            DWORD PTR arg(1) ;[source_stride]
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd      rdx,            DWORD PTR arg(3) ;[recon_stride]
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9979f15823c34ae1e423108295e416213200bb280fAndreas Huber        ; Prefetch data
10079f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rcx,    [rax+rax*2]
10179f15823c34ae1e423108295e416213200bb280fAndreas Huber        prefetcht0      [rsi]
10279f15823c34ae1e423108295e416213200bb280fAndreas Huber        prefetcht0      [rsi+rax]
10379f15823c34ae1e423108295e416213200bb280fAndreas Huber        prefetcht0      [rsi+rax*2]
10479f15823c34ae1e423108295e416213200bb280fAndreas Huber        prefetcht0      [rsi+rcx]
10579f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rbx,    [rsi+rax*4]
10679f15823c34ae1e423108295e416213200bb280fAndreas Huber        prefetcht0      [rbx]
10779f15823c34ae1e423108295e416213200bb280fAndreas Huber        prefetcht0      [rbx+rax]
10879f15823c34ae1e423108295e416213200bb280fAndreas Huber        prefetcht0      [rbx+rax*2]
10979f15823c34ae1e423108295e416213200bb280fAndreas Huber        prefetcht0      [rbx+rcx]
11079f15823c34ae1e423108295e416213200bb280fAndreas Huber
11179f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rcx,    [rdx+rdx*2]
11279f15823c34ae1e423108295e416213200bb280fAndreas Huber        prefetcht0      [rdi]
11379f15823c34ae1e423108295e416213200bb280fAndreas Huber        prefetcht0      [rdi+rdx]
11479f15823c34ae1e423108295e416213200bb280fAndreas Huber        prefetcht0      [rdi+rdx*2]
11579f15823c34ae1e423108295e416213200bb280fAndreas Huber        prefetcht0      [rdi+rcx]
11679f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rbx,    [rdi+rdx*4]
11779f15823c34ae1e423108295e416213200bb280fAndreas Huber        prefetcht0      [rbx]
11879f15823c34ae1e423108295e416213200bb280fAndreas Huber        prefetcht0      [rbx+rdx]
11979f15823c34ae1e423108295e416213200bb280fAndreas Huber        prefetcht0      [rbx+rdx*2]
12079f15823c34ae1e423108295e416213200bb280fAndreas Huber        prefetcht0      [rbx+rcx]
12179f15823c34ae1e423108295e416213200bb280fAndreas Huber
12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        xmm0,           xmm0                        ; clear xmm0 for unpack
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        xmm7,           xmm7                        ; clear xmm7 for accumulating diffs
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        xmm6,           xmm6                        ; clear xmm6 for accumulating sse
12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rcx,            16
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervar16loop:
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqu      xmm1,           XMMWORD PTR [rsi]
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqu      xmm2,           XMMWORD PTR [rdi]
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13279f15823c34ae1e423108295e416213200bb280fAndreas Huber        prefetcht0      [rsi+rax*8]
13379f15823c34ae1e423108295e416213200bb280fAndreas Huber        prefetcht0      [rdi+rdx*8]
13479f15823c34ae1e423108295e416213200bb280fAndreas Huber
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm3,           xmm1
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm4,           xmm2
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw   xmm1,           xmm0
14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhbw   xmm3,           xmm0
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw   xmm2,           xmm0
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhbw   xmm4,           xmm0
14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       xmm1,           xmm2
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       xmm3,           xmm4
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       xmm7,           xmm1
15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     xmm1,           xmm1
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       xmm7,           xmm3
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     xmm3,           xmm3
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm6,           xmm1
15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm6,           xmm3
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add         rsi,            rax
15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add         rdi,            rdx
16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        sub         rcx,            1
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jnz         var16loop
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm1,           xmm6
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        xmm6,           xmm6
16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        xmm5,           xmm5
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklwd   xmm6,           xmm7
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhwd   xmm5,           xmm7
17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrad       xmm5,           16
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrad       xmm6,           16
17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm6,           xmm5
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm2,           xmm1
17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckldq   xmm1,           xmm0
17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhdq   xmm2,           xmm0
18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm7,           xmm6
18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm1,           xmm2
18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckldq   xmm6,           xmm0
18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhdq   xmm7,           xmm0
18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm6,           xmm7
18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm2,           xmm1
19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm7,           xmm6
19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq      xmm1,           8
19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq      xmm6,           8
19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm7,           xmm6
19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm1,           xmm2
19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rax,            arg(5) ;[Sum]
19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rdi,            arg(4) ;[SSE]
20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd DWORD PTR [rax],       xmm7
20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd DWORD PTR [rdi],       xmm1
20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
20879f15823c34ae1e423108295e416213200bb280fAndreas Huber    pop rbx
20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;unsigned int vp8_get16x16pred_error_sse2
21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;(
21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;   unsigned char *src_ptr,
21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int src_stride,
21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *ref_ptr,
21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int ref_stride
22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;)
22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_get16x16pred_error_sse2)
22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_get16x16pred_error_sse2):
22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 4
22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    GET_GOT     rbx
22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rsi
22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rdi
22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub         rsp, 16
23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rsi,            arg(0) ;[src_ptr]
23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rdi,            arg(2) ;[ref_ptr]
23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd      rax,            DWORD PTR arg(1) ;[src_stride]
23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd      rdx,            DWORD PTR arg(3) ;[ref_stride]
23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        xmm0,           xmm0                        ; clear xmm0 for unpack
23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        xmm7,           xmm7                        ; clear xmm7 for accumulating diffs
24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        xmm6,           xmm6                        ; clear xmm6 for accumulating sse
24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rcx,            16
24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervar16peloop:
24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqu      xmm1,           XMMWORD PTR [rsi]
24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqu      xmm2,           XMMWORD PTR [rdi]
24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm3,           xmm1
24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm4,           xmm2
25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw   xmm1,           xmm0
25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhbw   xmm3,           xmm0
25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw   xmm2,           xmm0
25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhbw   xmm4,           xmm0
25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       xmm1,           xmm2
25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       xmm3,           xmm4
25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       xmm7,           xmm1
26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     xmm1,           xmm1
26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       xmm7,           xmm3
26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     xmm3,           xmm3
26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm6,           xmm1
26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm6,           xmm3
26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add         rsi,            rax
27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add         rdi,            rdx
27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        sub         rcx,            1
27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jnz         var16peloop
27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm1,           xmm6
27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        xmm6,           xmm6
27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        xmm5,           xmm5
28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklwd   xmm6,           xmm7
28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhwd   xmm5,           xmm7
28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrad       xmm5,           16
28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrad       xmm6,           16
28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm6,           xmm5
28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm2,           xmm1
28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckldq   xmm1,           xmm0
29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhdq   xmm2,           xmm0
29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm7,           xmm6
29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm1,           xmm2
29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckldq   xmm6,           xmm0
29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhdq   xmm7,           xmm0
29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm6,           xmm7
29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm2,           xmm1
30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm7,           xmm6
30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq      xmm1,           8
30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq      xmm6,           8
30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm7,           xmm6
30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm1,           xmm2
30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd DWORD PTR [rsp],       xmm7  ;Sum
31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd DWORD PTR [rsp+4],     xmm1  ;SSE
31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ; return (SSE-((Sum*Sum)>>8));
31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd      rdx, dword ptr [rsp]
31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        imul        rdx, rdx
31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        sar         rdx, 8
31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd      rax, dword ptr [rsp + 4]
31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        sub         rax, rdx
31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add rsp, 16
32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    RESTORE_GOT
32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;unsigned int vp8_get8x8var_sse2
33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;(
33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char   *  src_ptr,
33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int             source_stride,
33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char   *  ref_ptr,
33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int             recon_stride,
33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned int    *  SSE,
33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int             *  Sum
33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;)
33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_get8x8var_sse2)
34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_get8x8var_sse2):
34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 6
34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    GET_GOT     rbx
34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rsi
34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rdi
34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub         rsp, 16
34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rsi,            arg(0) ;[src_ptr]
35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rdi,            arg(2) ;[ref_ptr]
35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd      rax,            DWORD PTR arg(1) ;[source_stride]
35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd      rdx,            DWORD PTR arg(3) ;[recon_stride]
35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        xmm0,           xmm0                        ; clear xmm0 for unpack
35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        xmm7,           xmm7                        ; clear xmm7 for accumulating diffs
35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        xmm1,           QWORD PTR [rsi]
36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        xmm2,           QWORD PTR [rdi]
36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw   xmm1,           xmm0
36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw   xmm2,           xmm0
36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubsw      xmm1,           xmm2
36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       xmm7,           xmm1
36790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     xmm1,           xmm1
36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        xmm2,           QWORD PTR[rsi + rax]
37190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        xmm3,           QWORD PTR[rdi + rdx]
37290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw   xmm2,           xmm0
37490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw   xmm3,           xmm0
37590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubsw      xmm2,           xmm3
37790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       xmm7,           xmm2
37890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     xmm2,           xmm2
38090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm1,           xmm2
38190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
38290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
38390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        xmm2,           QWORD PTR[rsi + rax * 2]
38490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        xmm3,           QWORD PTR[rdi + rdx * 2]
38590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
38690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw   xmm2,           xmm0
38790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw   xmm3,           xmm0
38890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
38990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubsw      xmm2,           xmm3
39090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       xmm7,           xmm2
39190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     xmm2,           xmm2
39390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm1,           xmm2
39490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea         rsi,            [rsi + rax * 2]
39790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea         rdi,            [rdi + rdx * 2]
39890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        xmm2,           QWORD PTR[rsi + rax]
39990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        xmm3,           QWORD PTR[rdi + rdx]
40090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
40190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw   xmm2,           xmm0
40290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw   xmm3,           xmm0
40390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
40490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubsw      xmm2,           xmm3
40590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       xmm7,           xmm2
40690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
40790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     xmm2,           xmm2
40890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm1,           xmm2
40990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
41090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        xmm2,           QWORD PTR[rsi + rax *2]
41190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        xmm3,           QWORD PTR[rdi + rdx *2]
41290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
41390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw   xmm2,           xmm0
41490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw   xmm3,           xmm0
41590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
41690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubsw      xmm2,           xmm3
41790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       xmm7,           xmm2
41890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
41990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     xmm2,           xmm2
42090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm1,           xmm2
42190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
42290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
42390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea         rsi,            [rsi + rax * 2]
42490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea         rdi,            [rdi + rdx * 2]
42590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
42690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
42790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        xmm2,           QWORD PTR[rsi + rax]
42890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        xmm3,           QWORD PTR[rdi + rdx]
42990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
43090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw   xmm2,           xmm0
43190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw   xmm3,           xmm0
43290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
43390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubsw      xmm2,           xmm3
43490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       xmm7,           xmm2
43590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
43690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     xmm2,           xmm2
43790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm1,           xmm2
43890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
43990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        xmm2,           QWORD PTR[rsi + rax *2]
44090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        xmm3,           QWORD PTR[rdi + rdx *2]
44190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
44290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw   xmm2,           xmm0
44390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw   xmm3,           xmm0
44490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
44590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubsw      xmm2,           xmm3
44690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       xmm7,           xmm2
44790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
44890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     xmm2,           xmm2
44990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm1,           xmm2
45090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
45190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
45290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea         rsi,            [rsi + rax * 2]
45390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea         rdi,            [rdi + rdx * 2]
45490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
45590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        xmm2,           QWORD PTR[rsi + rax]
45690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        xmm3,           QWORD PTR[rdi + rdx]
45790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
45890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw   xmm2,           xmm0
45990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw   xmm3,           xmm0
46090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
46190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubsw      xmm2,           xmm3
46290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       xmm7,           xmm2
46390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
46490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     xmm2,           xmm2
46590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm1,           xmm2
46690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
46790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
46890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm6,           xmm7
46990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklwd   xmm6,           xmm0
47090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
47190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhwd   xmm7,           xmm0
47290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm2,           xmm1
47390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
47490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       xmm6,           xmm7
47590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckldq   xmm1,           xmm0
47690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
47790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhdq   xmm2,           xmm0
47890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm7,           xmm6
47990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
48090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm1,           xmm2
48190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckldq   xmm6,           xmm0
48290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
48390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhdq   xmm7,           xmm0
48490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       xmm6,           xmm7
48590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
48690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm2,           xmm1
48790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm7,           xmm6
48890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
48990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq      xmm1,           8
49090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq      xmm6,           8
49190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
49290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       xmm7,           xmm6
49390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm1,           xmm2
49490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
49590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rax,            arg(5) ;[Sum]
49690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rdi,            arg(4) ;[SSE]
49790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
498538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movq        rdx,            xmm7
49990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsx       rcx,            dx
50090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
50190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov  dword ptr [rax],       ecx
50290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd DWORD PTR [rdi],       xmm1
50390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
50490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
50590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add rsp, 16
50690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
50790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
50890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    RESTORE_GOT
50990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
51090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
51190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
51290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
51390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;void vp8_filter_block2d_bil_var_sse2
51490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;(
51590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *ref_ptr,
51690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int ref_pixels_per_line,
51790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *src_ptr,
51890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int src_pixels_per_line,
51990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned int Height,
52079f15823c34ae1e423108295e416213200bb280fAndreas Huber;    int  xoffset,
52179f15823c34ae1e423108295e416213200bb280fAndreas Huber;    int  yoffset,
52290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int *sum,
52390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned int *sumsquared;;
52490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
52590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;)
52690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_filter_block2d_bil_var_sse2)
52790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_filter_block2d_bil_var_sse2):
52890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
52990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
53090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 9
53179f15823c34ae1e423108295e416213200bb280fAndreas Huber    SAVE_XMM
53290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    GET_GOT     rbx
53390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rsi
53490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rdi
53579f15823c34ae1e423108295e416213200bb280fAndreas Huber    push rbx
53690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
53790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
53890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            xmm6,           xmm6                 ;
53990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            xmm7,           xmm7                 ;
54090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
54179f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rsi,            [GLOBAL(xmm_bi_rd)]  ; rounding
54279f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa          xmm4,           XMMWORD PTR [rsi]
54390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
54479f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rcx,            [GLOBAL(vp8_bilinear_filters_sse2)]
54579f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rax,            dword ptr arg(5)     ; xoffset
54679f15823c34ae1e423108295e416213200bb280fAndreas Huber
54779f15823c34ae1e423108295e416213200bb280fAndreas Huber        cmp             rax,            0                    ; skip first_pass filter if xoffset=0
54879f15823c34ae1e423108295e416213200bb280fAndreas Huber        je              filter_block2d_bil_var_sse2_sp_only
54979f15823c34ae1e423108295e416213200bb280fAndreas Huber
55079f15823c34ae1e423108295e416213200bb280fAndreas Huber        shl             rax,            5                    ; point to filter coeff with xoffset
55179f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rax,            [rax + rcx]          ; HFilter
55279f15823c34ae1e423108295e416213200bb280fAndreas Huber
55379f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rdx,            dword ptr arg(6)     ; yoffset
55479f15823c34ae1e423108295e416213200bb280fAndreas Huber
55579f15823c34ae1e423108295e416213200bb280fAndreas Huber        cmp             rdx,            0                    ; skip second_pass filter if yoffset=0
55679f15823c34ae1e423108295e416213200bb280fAndreas Huber        je              filter_block2d_bil_var_sse2_fp_only
55779f15823c34ae1e423108295e416213200bb280fAndreas Huber
55879f15823c34ae1e423108295e416213200bb280fAndreas Huber        shl             rdx,            5
55979f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rdx,            [rdx + rcx]          ; VFilter
56079f15823c34ae1e423108295e416213200bb280fAndreas Huber
56179f15823c34ae1e423108295e416213200bb280fAndreas Huber        mov             rsi,            arg(0)               ;ref_ptr
56279f15823c34ae1e423108295e416213200bb280fAndreas Huber        mov             rdi,            arg(2)               ;src_ptr
56379f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rcx,            dword ptr arg(4)     ;Height
56490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
56590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            xmm0,           xmm0                 ;
56679f15823c34ae1e423108295e416213200bb280fAndreas Huber        movq            xmm1,           QWORD PTR [rsi]      ;
56779f15823c34ae1e423108295e416213200bb280fAndreas Huber        movq            xmm3,           QWORD PTR [rsi+1]    ;
56890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
56990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       xmm1,           xmm0                 ;
57079f15823c34ae1e423108295e416213200bb280fAndreas Huber        pmullw          xmm1,           [rax]                ;
57190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       xmm3,           xmm0
57290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmullw          xmm3,           [rax+16]             ;
57390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
57479f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddw           xmm1,           xmm3                 ;
57579f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddw           xmm1,           xmm4                 ;
57679f15823c34ae1e423108295e416213200bb280fAndreas Huber        psraw           xmm1,           xmm_filter_shift     ;
57790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa          xmm5,           xmm1
57879f15823c34ae1e423108295e416213200bb280fAndreas Huber
57979f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rbx,            dword ptr arg(1) ;ref_pixels_per_line
58079f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rsi,            [rsi + rbx]
58179f15823c34ae1e423108295e416213200bb280fAndreas Huber%if ABI_IS_32BIT=0
58279f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          r9,             dword ptr arg(3) ;src_pixels_per_line
58390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endif
58490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
58579f15823c34ae1e423108295e416213200bb280fAndreas Huberfilter_block2d_bil_var_sse2_loop:
58690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm1,           QWORD PTR [rsi]               ;
58790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm3,           QWORD PTR [rsi+1]             ;
58890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
58990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       xmm1,           xmm0                 ;
59090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmullw          xmm1,           [rax]               ;
59190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       xmm3,           xmm0                 ;
59290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmullw          xmm3,           [rax+16]             ;
59390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
59490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm1,           xmm3                 ;
59579f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddw           xmm1,           xmm4               ;
59690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psraw           xmm1,           xmm_filter_shift    ;
59790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
59879f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa          xmm3,           xmm5                 ;
59990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa          xmm5,           xmm1                 ;
60090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
60179f15823c34ae1e423108295e416213200bb280fAndreas Huber        pmullw          xmm3,           [rdx]               ;
60290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmullw          xmm1,           [rdx+16]             ;
60390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm1,           xmm3                 ;
60479f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddw           xmm1,           xmm4                 ;
60590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psraw           xmm1,           xmm_filter_shift    ;
60690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
60790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm3,           QWORD PTR [rdi]               ;
60890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       xmm3,           xmm0                 ;
60990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
61090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw           xmm1,           xmm3                 ;
61190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm6,           xmm1                 ;
61290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
61390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd         xmm1,           xmm1                 ;
61490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           xmm7,           xmm1                 ;
61590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
61679f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rsi,            [rsi + rbx]          ;ref_pixels_per_line
61790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%if ABI_IS_32BIT
61879f15823c34ae1e423108295e416213200bb280fAndreas Huber        add             rdi,            dword ptr arg(3)     ;src_pixels_per_line
61990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%else
62079f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rdi,            [rdi + r9]
62190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endif
62290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
62390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        sub             rcx,            1                   ;
62490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jnz             filter_block2d_bil_var_sse2_loop       ;
62590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
62679f15823c34ae1e423108295e416213200bb280fAndreas Huber        jmp             filter_block2d_bil_variance
62779f15823c34ae1e423108295e416213200bb280fAndreas Huber
62879f15823c34ae1e423108295e416213200bb280fAndreas Huberfilter_block2d_bil_var_sse2_sp_only:
62979f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rdx,            dword ptr arg(6)     ; yoffset
63079f15823c34ae1e423108295e416213200bb280fAndreas Huber
63179f15823c34ae1e423108295e416213200bb280fAndreas Huber        cmp             rdx,            0                    ; skip all if both xoffset=0 and yoffset=0
63279f15823c34ae1e423108295e416213200bb280fAndreas Huber        je              filter_block2d_bil_var_sse2_full_pixel
63379f15823c34ae1e423108295e416213200bb280fAndreas Huber
63479f15823c34ae1e423108295e416213200bb280fAndreas Huber        shl             rdx,            5
63579f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rdx,            [rdx + rcx]          ; VFilter
63679f15823c34ae1e423108295e416213200bb280fAndreas Huber
63779f15823c34ae1e423108295e416213200bb280fAndreas Huber        mov             rsi,            arg(0)               ;ref_ptr
63879f15823c34ae1e423108295e416213200bb280fAndreas Huber        mov             rdi,            arg(2)               ;src_ptr
63979f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rcx,            dword ptr arg(4)     ;Height
64079f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rax,            dword ptr arg(1)     ;ref_pixels_per_line
64179f15823c34ae1e423108295e416213200bb280fAndreas Huber
64279f15823c34ae1e423108295e416213200bb280fAndreas Huber        pxor            xmm0,           xmm0                 ;
64379f15823c34ae1e423108295e416213200bb280fAndreas Huber        movq            xmm1,           QWORD PTR [rsi]      ;
64479f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpcklbw       xmm1,           xmm0                 ;
64579f15823c34ae1e423108295e416213200bb280fAndreas Huber
64679f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rbx,            dword ptr arg(3)     ;src_pixels_per_line
64779f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rsi,            [rsi + rax]
64879f15823c34ae1e423108295e416213200bb280fAndreas Huber
64979f15823c34ae1e423108295e416213200bb280fAndreas Huberfilter_block2d_bil_sp_only_loop:
65079f15823c34ae1e423108295e416213200bb280fAndreas Huber        movq            xmm3,           QWORD PTR [rsi]             ;
65179f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpcklbw       xmm3,           xmm0                 ;
65279f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa          xmm5,           xmm3
65379f15823c34ae1e423108295e416213200bb280fAndreas Huber
65479f15823c34ae1e423108295e416213200bb280fAndreas Huber        pmullw          xmm1,           [rdx]               ;
65579f15823c34ae1e423108295e416213200bb280fAndreas Huber        pmullw          xmm3,           [rdx+16]             ;
65679f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddw           xmm1,           xmm3                 ;
65779f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddw           xmm1,           xmm4                 ;
65879f15823c34ae1e423108295e416213200bb280fAndreas Huber        psraw           xmm1,           xmm_filter_shift    ;
65979f15823c34ae1e423108295e416213200bb280fAndreas Huber
66079f15823c34ae1e423108295e416213200bb280fAndreas Huber        movq            xmm3,           QWORD PTR [rdi]               ;
66179f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpcklbw       xmm3,           xmm0                 ;
66279f15823c34ae1e423108295e416213200bb280fAndreas Huber
66379f15823c34ae1e423108295e416213200bb280fAndreas Huber        psubw           xmm1,           xmm3                 ;
66479f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddw           xmm6,           xmm1                 ;
66579f15823c34ae1e423108295e416213200bb280fAndreas Huber
66679f15823c34ae1e423108295e416213200bb280fAndreas Huber        pmaddwd         xmm1,           xmm1                 ;
66779f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd           xmm7,           xmm1                 ;
66879f15823c34ae1e423108295e416213200bb280fAndreas Huber
66979f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa          xmm1,           xmm5                 ;
67079f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rsi,            [rsi + rax]          ;ref_pixels_per_line
67179f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rdi,            [rdi + rbx]          ;src_pixels_per_line
67279f15823c34ae1e423108295e416213200bb280fAndreas Huber
67379f15823c34ae1e423108295e416213200bb280fAndreas Huber        sub             rcx,            1                   ;
67479f15823c34ae1e423108295e416213200bb280fAndreas Huber        jnz             filter_block2d_bil_sp_only_loop       ;
67579f15823c34ae1e423108295e416213200bb280fAndreas Huber
67679f15823c34ae1e423108295e416213200bb280fAndreas Huber        jmp             filter_block2d_bil_variance
67779f15823c34ae1e423108295e416213200bb280fAndreas Huber
67879f15823c34ae1e423108295e416213200bb280fAndreas Huberfilter_block2d_bil_var_sse2_full_pixel:
67979f15823c34ae1e423108295e416213200bb280fAndreas Huber        mov             rsi,            arg(0)               ;ref_ptr
68079f15823c34ae1e423108295e416213200bb280fAndreas Huber        mov             rdi,            arg(2)               ;src_ptr
68179f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rcx,            dword ptr arg(4)     ;Height
68279f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rax,            dword ptr arg(1)     ;ref_pixels_per_line
68379f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rbx,            dword ptr arg(3)     ;src_pixels_per_line
68479f15823c34ae1e423108295e416213200bb280fAndreas Huber        pxor            xmm0,           xmm0                 ;
68579f15823c34ae1e423108295e416213200bb280fAndreas Huber
68679f15823c34ae1e423108295e416213200bb280fAndreas Huberfilter_block2d_bil_full_pixel_loop:
68779f15823c34ae1e423108295e416213200bb280fAndreas Huber        movq            xmm1,           QWORD PTR [rsi]               ;
68879f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpcklbw       xmm1,           xmm0                 ;
68979f15823c34ae1e423108295e416213200bb280fAndreas Huber
69079f15823c34ae1e423108295e416213200bb280fAndreas Huber        movq            xmm2,           QWORD PTR [rdi]               ;
69179f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpcklbw       xmm2,           xmm0                 ;
69279f15823c34ae1e423108295e416213200bb280fAndreas Huber
69379f15823c34ae1e423108295e416213200bb280fAndreas Huber        psubw           xmm1,           xmm2                 ;
69479f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddw           xmm6,           xmm1                 ;
69579f15823c34ae1e423108295e416213200bb280fAndreas Huber
69679f15823c34ae1e423108295e416213200bb280fAndreas Huber        pmaddwd         xmm1,           xmm1                 ;
69779f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd           xmm7,           xmm1                 ;
69879f15823c34ae1e423108295e416213200bb280fAndreas Huber
69979f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rsi,            [rsi + rax]          ;ref_pixels_per_line
70079f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rdi,            [rdi + rbx]          ;src_pixels_per_line
70179f15823c34ae1e423108295e416213200bb280fAndreas Huber
70279f15823c34ae1e423108295e416213200bb280fAndreas Huber        sub             rcx,            1                   ;
70379f15823c34ae1e423108295e416213200bb280fAndreas Huber        jnz             filter_block2d_bil_full_pixel_loop       ;
70479f15823c34ae1e423108295e416213200bb280fAndreas Huber
70579f15823c34ae1e423108295e416213200bb280fAndreas Huber        jmp             filter_block2d_bil_variance
70679f15823c34ae1e423108295e416213200bb280fAndreas Huber
70779f15823c34ae1e423108295e416213200bb280fAndreas Huberfilter_block2d_bil_var_sse2_fp_only:
70879f15823c34ae1e423108295e416213200bb280fAndreas Huber        mov             rsi,            arg(0)               ;ref_ptr
70979f15823c34ae1e423108295e416213200bb280fAndreas Huber        mov             rdi,            arg(2)               ;src_ptr
71079f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rcx,            dword ptr arg(4)     ;Height
71179f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rdx,            dword ptr arg(1)     ;ref_pixels_per_line
71279f15823c34ae1e423108295e416213200bb280fAndreas Huber
71379f15823c34ae1e423108295e416213200bb280fAndreas Huber        pxor            xmm0,           xmm0                 ;
71479f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rbx,            dword ptr arg(3)     ;src_pixels_per_line
71579f15823c34ae1e423108295e416213200bb280fAndreas Huber
71679f15823c34ae1e423108295e416213200bb280fAndreas Huberfilter_block2d_bil_fp_only_loop:
71779f15823c34ae1e423108295e416213200bb280fAndreas Huber        movq            xmm1,           QWORD PTR [rsi]       ;
71879f15823c34ae1e423108295e416213200bb280fAndreas Huber        movq            xmm3,           QWORD PTR [rsi+1]     ;
71979f15823c34ae1e423108295e416213200bb280fAndreas Huber
72079f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpcklbw       xmm1,           xmm0                 ;
72179f15823c34ae1e423108295e416213200bb280fAndreas Huber        pmullw          xmm1,           [rax]               ;
72279f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpcklbw       xmm3,           xmm0                 ;
72379f15823c34ae1e423108295e416213200bb280fAndreas Huber        pmullw          xmm3,           [rax+16]             ;
72479f15823c34ae1e423108295e416213200bb280fAndreas Huber
72579f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddw           xmm1,           xmm3                 ;
72679f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddw           xmm1,           xmm4  ;
72779f15823c34ae1e423108295e416213200bb280fAndreas Huber        psraw           xmm1,           xmm_filter_shift    ;
72879f15823c34ae1e423108295e416213200bb280fAndreas Huber
72979f15823c34ae1e423108295e416213200bb280fAndreas Huber        movq            xmm3,           QWORD PTR [rdi]     ;
73079f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpcklbw       xmm3,           xmm0                 ;
73179f15823c34ae1e423108295e416213200bb280fAndreas Huber
73279f15823c34ae1e423108295e416213200bb280fAndreas Huber        psubw           xmm1,           xmm3                 ;
73379f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddw           xmm6,           xmm1                 ;
73479f15823c34ae1e423108295e416213200bb280fAndreas Huber
73579f15823c34ae1e423108295e416213200bb280fAndreas Huber        pmaddwd         xmm1,           xmm1                 ;
73679f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd           xmm7,           xmm1                 ;
73779f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rsi,            [rsi + rdx]
73879f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rdi,            [rdi + rbx]          ;src_pixels_per_line
73979f15823c34ae1e423108295e416213200bb280fAndreas Huber
74079f15823c34ae1e423108295e416213200bb280fAndreas Huber        sub             rcx,            1                   ;
74179f15823c34ae1e423108295e416213200bb280fAndreas Huber        jnz             filter_block2d_bil_fp_only_loop       ;
74279f15823c34ae1e423108295e416213200bb280fAndreas Huber
74379f15823c34ae1e423108295e416213200bb280fAndreas Huber        jmp             filter_block2d_bil_variance
74490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
74579f15823c34ae1e423108295e416213200bb280fAndreas Huberfilter_block2d_bil_variance:
74690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdq2q         mm6,            xmm6                ;
74790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdq2q         mm7,            xmm7                ;
74890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
74990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq          xmm6,           8
75090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq          xmm7,           8
75190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
75290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdq2q         mm2,            xmm6
75390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdq2q         mm3,            xmm7
75490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
75590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm6,            mm2
75690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm7,            mm3
75790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
75890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm3,            mm3                 ;
75990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm2,            mm2                 ;
76090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
76190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklwd       mm2,            mm6                 ;
76290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhwd       mm3,            mm6                 ;
76390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
76490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm2,            mm3                 ;
76590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm6,            mm2                 ;
76690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
76790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlq           mm6,            32                  ;
76890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm2,            mm6                 ;
76990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
77090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrad           mm2,            16                  ;
77190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm4,            mm7                 ;
77290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
77390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlq           mm4,            32                  ;
77490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm4,            mm7                 ;
77590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
77690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rsi,            arg(7) ; sum
77790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,            arg(8) ; sumsquared
77890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
77990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd            [rsi],          mm2    ; xsum
78090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd            [rdi],          mm4    ; xxsum
78190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
78290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
78379f15823c34ae1e423108295e416213200bb280fAndreas Huber    pop rbx
78490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
78590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
78690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    RESTORE_GOT
78779f15823c34ae1e423108295e416213200bb280fAndreas Huber    RESTORE_XMM
78890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
78990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
79090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
79190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
79290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
79379f15823c34ae1e423108295e416213200bb280fAndreas Huber;void vp8_half_horiz_vert_variance8x_h_sse2
79490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;(
79590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *ref_ptr,
79690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int ref_pixels_per_line,
79790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *src_ptr,
79890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int src_pixels_per_line,
79990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned int Height,
80090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int *sum,
80190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned int *sumsquared
80290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;)
80379f15823c34ae1e423108295e416213200bb280fAndreas Huberglobal sym(vp8_half_horiz_vert_variance8x_h_sse2)
80479f15823c34ae1e423108295e416213200bb280fAndreas Hubersym(vp8_half_horiz_vert_variance8x_h_sse2):
80590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
80690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
80790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 7
80890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    GET_GOT     rbx
80990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rsi
81090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rdi
81190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
81290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
81390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%if ABI_IS_32BIT=0
81490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    movsxd          r8, dword ptr arg(1) ;ref_pixels_per_line
81590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    movsxd          r9, dword ptr arg(3) ;src_pixels_per_line
81690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endif
81790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
81890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            xmm6,           xmm6                ;  error accumulator
81990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            xmm7,           xmm7                ;  sse eaccumulator
82090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rsi,            arg(0) ;ref_ptr              ;
82190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
82290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,            arg(2) ;src_ptr              ;
82390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rcx,            dword ptr arg(4) ;Height              ;
82490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rax,            dword ptr arg(1) ;ref_pixels_per_line
82590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
82690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            xmm0,           xmm0                ;
82790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
82890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm5,           QWORD PTR [rsi]     ;  xmm5 = s0,s1,s2..s8
82990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm3,           QWORD PTR [rsi+1]   ;  xmm3 = s1,s2,s3..s9
83090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pavgb           xmm5,           xmm3                ;  xmm5 = avg(xmm1,xmm3) horizontal line 1
83190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
83290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%if ABI_IS_32BIT
83390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rsi,            dword ptr arg(1) ;ref_pixels_per_line    ;  next source
83490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%else
83590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rsi, r8
83690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endif
83790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
83879f15823c34ae1e423108295e416213200bb280fAndreas Hubervp8_half_horiz_vert_variance8x_h_1:
83990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
84090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm1,           QWORD PTR [rsi]     ;
84190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm2,           QWORD PTR [rsi+1]   ;
84290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pavgb           xmm1,           xmm2                ;  xmm1 = avg(xmm1,xmm3) horizontal line i+1
84390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
84490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pavgb           xmm5,           xmm1                ;  xmm = vertical average of the above
84590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       xmm5,           xmm0                ;  xmm5 = words of above
84690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
84790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm3,           QWORD PTR [rdi]     ;  xmm3 = d0,d1,d2..d8
84890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       xmm3,           xmm0                ;  xmm3 = words of above
84990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
85090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw           xmm5,           xmm3                ;  xmm5 -= xmm3
85190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm6,           xmm5                ;  xmm6 += accumulated column differences
85290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd         xmm5,           xmm5                ;  xmm5 *= xmm5
85390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           xmm7,           xmm5                ;  xmm7 += accumulated square column differences
85490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
85590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa          xmm5,           xmm1                ;  save xmm1 for use on the next row
85690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
85790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%if ABI_IS_32BIT
85890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             esi,            dword ptr arg(1) ;ref_pixels_per_line    ;  next source
85990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             edi,            dword ptr arg(3) ;src_pixels_per_line    ;  next destination
86090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%else
86190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rsi, r8
86290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rdi, r9
86390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endif
86490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
86590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        sub             rcx,            1                   ;
86679f15823c34ae1e423108295e416213200bb280fAndreas Huber        jnz             vp8_half_horiz_vert_variance8x_h_1     ;
86790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
86890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdq2q         mm6,            xmm6                ;
86990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdq2q         mm7,            xmm7                ;
87090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
87190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq          xmm6,           8
87290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq          xmm7,           8
87390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
87490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdq2q         mm2,            xmm6
87590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdq2q         mm3,            xmm7
87690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
87790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm6,            mm2
87890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm7,            mm3
87990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
88090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm3,            mm3                 ;
88190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm2,            mm2                 ;
88290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
88390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklwd       mm2,            mm6                 ;
88490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhwd       mm3,            mm6                 ;
88590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
88690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm2,            mm3                 ;
88790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm6,            mm2                 ;
88890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
88990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlq           mm6,            32                  ;
89090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm2,            mm6                 ;
89190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
89290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrad           mm2,            16                  ;
89390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm4,            mm7                 ;
89490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
89590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlq           mm4,            32                  ;
89690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm4,            mm7                 ;
89790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
89890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rsi,            arg(5) ; sum
89990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,            arg(6) ; sumsquared
90090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
90190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd            [rsi],          mm2                 ;
90290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd            [rdi],          mm4                 ;
90390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
90490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
90590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
90690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
90790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
90890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    RESTORE_GOT
90990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
91090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
91190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
91290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
91379f15823c34ae1e423108295e416213200bb280fAndreas Huber;void vp8_half_horiz_vert_variance16x_h_sse2
91479f15823c34ae1e423108295e416213200bb280fAndreas Huber;(
91579f15823c34ae1e423108295e416213200bb280fAndreas Huber;    unsigned char *ref_ptr,
91679f15823c34ae1e423108295e416213200bb280fAndreas Huber;    int ref_pixels_per_line,
91779f15823c34ae1e423108295e416213200bb280fAndreas Huber;    unsigned char *src_ptr,
91879f15823c34ae1e423108295e416213200bb280fAndreas Huber;    int src_pixels_per_line,
91979f15823c34ae1e423108295e416213200bb280fAndreas Huber;    unsigned int Height,
92079f15823c34ae1e423108295e416213200bb280fAndreas Huber;    int *sum,
92179f15823c34ae1e423108295e416213200bb280fAndreas Huber;    unsigned int *sumsquared
92279f15823c34ae1e423108295e416213200bb280fAndreas Huber;)
92379f15823c34ae1e423108295e416213200bb280fAndreas Huberglobal sym(vp8_half_horiz_vert_variance16x_h_sse2)
92479f15823c34ae1e423108295e416213200bb280fAndreas Hubersym(vp8_half_horiz_vert_variance16x_h_sse2):
92579f15823c34ae1e423108295e416213200bb280fAndreas Huber    push        rbp
92679f15823c34ae1e423108295e416213200bb280fAndreas Huber    mov         rbp, rsp
92779f15823c34ae1e423108295e416213200bb280fAndreas Huber    SHADOW_ARGS_TO_STACK 7
92879f15823c34ae1e423108295e416213200bb280fAndreas Huber    SAVE_XMM
92979f15823c34ae1e423108295e416213200bb280fAndreas Huber    GET_GOT     rbx
93079f15823c34ae1e423108295e416213200bb280fAndreas Huber    push rsi
93179f15823c34ae1e423108295e416213200bb280fAndreas Huber    push rdi
93279f15823c34ae1e423108295e416213200bb280fAndreas Huber    ; end prolog
93379f15823c34ae1e423108295e416213200bb280fAndreas Huber
93479f15823c34ae1e423108295e416213200bb280fAndreas Huber        pxor            xmm6,           xmm6                ;  error accumulator
93579f15823c34ae1e423108295e416213200bb280fAndreas Huber        pxor            xmm7,           xmm7                ;  sse eaccumulator
93679f15823c34ae1e423108295e416213200bb280fAndreas Huber        mov             rsi,            arg(0) ;ref_ptr              ;
93779f15823c34ae1e423108295e416213200bb280fAndreas Huber
93879f15823c34ae1e423108295e416213200bb280fAndreas Huber        mov             rdi,            arg(2) ;src_ptr              ;
93979f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rcx,            dword ptr arg(4) ;Height              ;
94079f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rax,            dword ptr arg(1) ;ref_pixels_per_line
94179f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rdx,            dword ptr arg(3)    ;src_pixels_per_line
94290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
94379f15823c34ae1e423108295e416213200bb280fAndreas Huber        pxor            xmm0,           xmm0                ;
94479f15823c34ae1e423108295e416213200bb280fAndreas Huber
94579f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqu          xmm5,           XMMWORD PTR [rsi]
94679f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqu          xmm3,           XMMWORD PTR [rsi+1]
94779f15823c34ae1e423108295e416213200bb280fAndreas Huber        pavgb           xmm5,           xmm3                ;  xmm5 = avg(xmm1,xmm3) horizontal line 1
94879f15823c34ae1e423108295e416213200bb280fAndreas Huber
94979f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rsi,            [rsi + rax]
95079f15823c34ae1e423108295e416213200bb280fAndreas Huber
95179f15823c34ae1e423108295e416213200bb280fAndreas Hubervp8_half_horiz_vert_variance16x_h_1:
95279f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqu          xmm1,           XMMWORD PTR [rsi]     ;
95379f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqu          xmm2,           XMMWORD PTR [rsi+1]   ;
95479f15823c34ae1e423108295e416213200bb280fAndreas Huber        pavgb           xmm1,           xmm2                ;  xmm1 = avg(xmm1,xmm3) horizontal line i+1
95579f15823c34ae1e423108295e416213200bb280fAndreas Huber
95679f15823c34ae1e423108295e416213200bb280fAndreas Huber        pavgb           xmm5,           xmm1                ;  xmm = vertical average of the above
95779f15823c34ae1e423108295e416213200bb280fAndreas Huber
95879f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa          xmm4,           xmm5
95979f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpcklbw       xmm5,           xmm0                ;  xmm5 = words of above
96079f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpckhbw       xmm4,           xmm0
96179f15823c34ae1e423108295e416213200bb280fAndreas Huber
96279f15823c34ae1e423108295e416213200bb280fAndreas Huber        movq            xmm3,           QWORD PTR [rdi]     ;  xmm3 = d0,d1,d2..d7
96379f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpcklbw       xmm3,           xmm0                ;  xmm3 = words of above
96479f15823c34ae1e423108295e416213200bb280fAndreas Huber        psubw           xmm5,           xmm3                ;  xmm5 -= xmm3
96579f15823c34ae1e423108295e416213200bb280fAndreas Huber
96679f15823c34ae1e423108295e416213200bb280fAndreas Huber        movq            xmm3,           QWORD PTR [rdi+8]
96779f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpcklbw       xmm3,           xmm0
96879f15823c34ae1e423108295e416213200bb280fAndreas Huber        psubw           xmm4,           xmm3
96979f15823c34ae1e423108295e416213200bb280fAndreas Huber
97079f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddw           xmm6,           xmm5                ;  xmm6 += accumulated column differences
97179f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddw           xmm6,           xmm4
97279f15823c34ae1e423108295e416213200bb280fAndreas Huber        pmaddwd         xmm5,           xmm5                ;  xmm5 *= xmm5
97379f15823c34ae1e423108295e416213200bb280fAndreas Huber        pmaddwd         xmm4,           xmm4
97479f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd           xmm7,           xmm5                ;  xmm7 += accumulated square column differences
97579f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd           xmm7,           xmm4
97679f15823c34ae1e423108295e416213200bb280fAndreas Huber
97779f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa          xmm5,           xmm1                ;  save xmm1 for use on the next row
97879f15823c34ae1e423108295e416213200bb280fAndreas Huber
97979f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rsi,            [rsi + rax]
98079f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rdi,            [rdi + rdx]
98179f15823c34ae1e423108295e416213200bb280fAndreas Huber
98279f15823c34ae1e423108295e416213200bb280fAndreas Huber        sub             rcx,            1                   ;
98379f15823c34ae1e423108295e416213200bb280fAndreas Huber        jnz             vp8_half_horiz_vert_variance16x_h_1     ;
98479f15823c34ae1e423108295e416213200bb280fAndreas Huber
98579f15823c34ae1e423108295e416213200bb280fAndreas Huber        pxor        xmm1,           xmm1
98679f15823c34ae1e423108295e416213200bb280fAndreas Huber        pxor        xmm5,           xmm5
98779f15823c34ae1e423108295e416213200bb280fAndreas Huber
98879f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpcklwd   xmm0,           xmm6
98979f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpckhwd   xmm1,           xmm6
99079f15823c34ae1e423108295e416213200bb280fAndreas Huber        psrad       xmm0,           16
99179f15823c34ae1e423108295e416213200bb280fAndreas Huber        psrad       xmm1,           16
99279f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd       xmm0,           xmm1
99379f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa      xmm1,           xmm0
99479f15823c34ae1e423108295e416213200bb280fAndreas Huber
99579f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa      xmm6,           xmm7
99679f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpckldq   xmm6,           xmm5
99779f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpckhdq   xmm7,           xmm5
99879f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd       xmm6,           xmm7
99979f15823c34ae1e423108295e416213200bb280fAndreas Huber
100079f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpckldq   xmm0,           xmm5
100179f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpckhdq   xmm1,           xmm5
100279f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd       xmm0,           xmm1
100379f15823c34ae1e423108295e416213200bb280fAndreas Huber
100479f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa      xmm7,           xmm6
100579f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa      xmm1,           xmm0
100679f15823c34ae1e423108295e416213200bb280fAndreas Huber
100779f15823c34ae1e423108295e416213200bb280fAndreas Huber        psrldq      xmm7,           8
100879f15823c34ae1e423108295e416213200bb280fAndreas Huber        psrldq      xmm1,           8
100979f15823c34ae1e423108295e416213200bb280fAndreas Huber
101079f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd       xmm6,           xmm7
101179f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd       xmm0,           xmm1
101279f15823c34ae1e423108295e416213200bb280fAndreas Huber
101379f15823c34ae1e423108295e416213200bb280fAndreas Huber        mov         rsi,            arg(5) ;[Sum]
101479f15823c34ae1e423108295e416213200bb280fAndreas Huber        mov         rdi,            arg(6) ;[SSE]
101579f15823c34ae1e423108295e416213200bb280fAndreas Huber
101679f15823c34ae1e423108295e416213200bb280fAndreas Huber        movd        [rsi],       xmm0
101779f15823c34ae1e423108295e416213200bb280fAndreas Huber        movd        [rdi],       xmm6
101879f15823c34ae1e423108295e416213200bb280fAndreas Huber
101979f15823c34ae1e423108295e416213200bb280fAndreas Huber    ; begin epilog
102079f15823c34ae1e423108295e416213200bb280fAndreas Huber    pop rdi
102179f15823c34ae1e423108295e416213200bb280fAndreas Huber    pop rsi
102279f15823c34ae1e423108295e416213200bb280fAndreas Huber    RESTORE_GOT
102379f15823c34ae1e423108295e416213200bb280fAndreas Huber    RESTORE_XMM
102479f15823c34ae1e423108295e416213200bb280fAndreas Huber    UNSHADOW_ARGS
102579f15823c34ae1e423108295e416213200bb280fAndreas Huber    pop         rbp
102679f15823c34ae1e423108295e416213200bb280fAndreas Huber    ret
102779f15823c34ae1e423108295e416213200bb280fAndreas Huber
102879f15823c34ae1e423108295e416213200bb280fAndreas Huber
102979f15823c34ae1e423108295e416213200bb280fAndreas Huber;void vp8_half_vert_variance8x_h_sse2
103090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;(
103190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *ref_ptr,
103290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int ref_pixels_per_line,
103390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *src_ptr,
103490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int src_pixels_per_line,
103590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned int Height,
103690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int *sum,
103790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned int *sumsquared
103890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;)
103979f15823c34ae1e423108295e416213200bb280fAndreas Huberglobal sym(vp8_half_vert_variance8x_h_sse2)
104079f15823c34ae1e423108295e416213200bb280fAndreas Hubersym(vp8_half_vert_variance8x_h_sse2):
104190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
104290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
104390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 7
104490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    GET_GOT     rbx
104590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rsi
104690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rdi
104790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
104890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
104990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%if ABI_IS_32BIT=0
105090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    movsxd          r8, dword ptr arg(1) ;ref_pixels_per_line
105190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    movsxd          r9, dword ptr arg(3) ;src_pixels_per_line
105290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endif
105390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
105490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            xmm6,           xmm6                ;  error accumulator
105590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            xmm7,           xmm7                ;  sse eaccumulator
105690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rsi,            arg(0) ;ref_ptr              ;
105790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
105890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,            arg(2) ;src_ptr              ;
105990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rcx,            dword ptr arg(4) ;Height              ;
106090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rax,            dword ptr arg(1) ;ref_pixels_per_line
106190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
106290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            xmm0,           xmm0                ;
106379f15823c34ae1e423108295e416213200bb280fAndreas Hubervp8_half_vert_variance8x_h_1:
106490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm5,           QWORD PTR [rsi]     ;  xmm5 = s0,s1,s2..s8
106590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm3,           QWORD PTR [rsi+rax] ;  xmm3 = s1,s2,s3..s9
106690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
106790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pavgb           xmm5,           xmm3                ;  xmm5 = avg(xmm1,xmm3)
106890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       xmm5,           xmm0                ;  xmm5 = words of above
106990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
107090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm3,           QWORD PTR [rdi]     ;  xmm3 = d0,d1,d2..d8
107190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       xmm3,           xmm0                ;  xmm3 = words of above
107290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
107390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw           xmm5,           xmm3                ;  xmm5 -= xmm3
107490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm6,           xmm5                ;  xmm6 += accumulated column differences
107590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd         xmm5,           xmm5                ;  xmm5 *= xmm5
107690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           xmm7,           xmm5                ;  xmm7 += accumulated square column differences
107790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
107890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%if ABI_IS_32BIT
107990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             esi,            dword ptr arg(1) ;ref_pixels_per_line    ;  next source
108090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             edi,            dword ptr arg(3) ;src_pixels_per_line    ;  next destination
108190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%else
108290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rsi, r8
108390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rdi, r9
108490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endif
108590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
108690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        sub             rcx,            1                   ;
108779f15823c34ae1e423108295e416213200bb280fAndreas Huber        jnz             vp8_half_vert_variance8x_h_1          ;
108890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
108990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdq2q         mm6,            xmm6                ;
109090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdq2q         mm7,            xmm7                ;
109190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
109290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq          xmm6,           8
109390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq          xmm7,           8
109490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
109590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdq2q         mm2,            xmm6
109690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdq2q         mm3,            xmm7
109790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
109890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm6,            mm2
109990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm7,            mm3
110090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
110190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm3,            mm3                 ;
110290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm2,            mm2                 ;
110390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
110490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklwd       mm2,            mm6                 ;
110590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhwd       mm3,            mm6                 ;
110690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
110790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm2,            mm3                 ;
110890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm6,            mm2                 ;
110990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
111090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlq           mm6,            32                  ;
111190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm2,            mm6                 ;
111290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
111390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrad           mm2,            16                  ;
111490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm4,            mm7                 ;
111590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
111690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlq           mm4,            32                  ;
111790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm4,            mm7                 ;
111890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
111990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rsi,            arg(5) ; sum
112090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,            arg(6) ; sumsquared
112190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
112290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd            [rsi],          mm2                 ;
112390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd            [rdi],          mm4                 ;
112490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
112590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
112690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
112790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
112890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
112990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    RESTORE_GOT
113090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
113190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
113290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
113390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
113479f15823c34ae1e423108295e416213200bb280fAndreas Huber;void vp8_half_vert_variance16x_h_sse2
113579f15823c34ae1e423108295e416213200bb280fAndreas Huber;(
113679f15823c34ae1e423108295e416213200bb280fAndreas Huber;    unsigned char *ref_ptr,
113779f15823c34ae1e423108295e416213200bb280fAndreas Huber;    int ref_pixels_per_line,
113879f15823c34ae1e423108295e416213200bb280fAndreas Huber;    unsigned char *src_ptr,
113979f15823c34ae1e423108295e416213200bb280fAndreas Huber;    int src_pixels_per_line,
114079f15823c34ae1e423108295e416213200bb280fAndreas Huber;    unsigned int Height,
114179f15823c34ae1e423108295e416213200bb280fAndreas Huber;    int *sum,
114279f15823c34ae1e423108295e416213200bb280fAndreas Huber;    unsigned int *sumsquared
114379f15823c34ae1e423108295e416213200bb280fAndreas Huber;)
114479f15823c34ae1e423108295e416213200bb280fAndreas Huberglobal sym(vp8_half_vert_variance16x_h_sse2)
114579f15823c34ae1e423108295e416213200bb280fAndreas Hubersym(vp8_half_vert_variance16x_h_sse2):
114679f15823c34ae1e423108295e416213200bb280fAndreas Huber    push        rbp
114779f15823c34ae1e423108295e416213200bb280fAndreas Huber    mov         rbp, rsp
114879f15823c34ae1e423108295e416213200bb280fAndreas Huber    SHADOW_ARGS_TO_STACK 7
114979f15823c34ae1e423108295e416213200bb280fAndreas Huber    SAVE_XMM
115079f15823c34ae1e423108295e416213200bb280fAndreas Huber    GET_GOT     rbx
115179f15823c34ae1e423108295e416213200bb280fAndreas Huber    push rsi
115279f15823c34ae1e423108295e416213200bb280fAndreas Huber    push rdi
115379f15823c34ae1e423108295e416213200bb280fAndreas Huber    ; end prolog
115479f15823c34ae1e423108295e416213200bb280fAndreas Huber
115579f15823c34ae1e423108295e416213200bb280fAndreas Huber        pxor            xmm6,           xmm6                ;  error accumulator
115679f15823c34ae1e423108295e416213200bb280fAndreas Huber        pxor            xmm7,           xmm7                ;  sse eaccumulator
115779f15823c34ae1e423108295e416213200bb280fAndreas Huber        mov             rsi,            arg(0)              ;ref_ptr
115879f15823c34ae1e423108295e416213200bb280fAndreas Huber
115979f15823c34ae1e423108295e416213200bb280fAndreas Huber        mov             rdi,            arg(2)              ;src_ptr
116079f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rcx,            dword ptr arg(4)    ;Height
116179f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rax,            dword ptr arg(1)    ;ref_pixels_per_line
116279f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rdx,            dword ptr arg(3)    ;src_pixels_per_line
116390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
116479f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqu          xmm5,           XMMWORD PTR [rsi]
116579f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rsi,            [rsi + rax          ]
116679f15823c34ae1e423108295e416213200bb280fAndreas Huber        pxor            xmm0,           xmm0
116779f15823c34ae1e423108295e416213200bb280fAndreas Huber
116879f15823c34ae1e423108295e416213200bb280fAndreas Hubervp8_half_vert_variance16x_h_1:
116979f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqu          xmm3,           XMMWORD PTR [rsi]
117079f15823c34ae1e423108295e416213200bb280fAndreas Huber
117179f15823c34ae1e423108295e416213200bb280fAndreas Huber        pavgb           xmm5,           xmm3                ;  xmm5 = avg(xmm1,xmm3)
117279f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa          xmm4,           xmm5
117379f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpcklbw       xmm5,           xmm0
117479f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpckhbw       xmm4,           xmm0
117579f15823c34ae1e423108295e416213200bb280fAndreas Huber
117679f15823c34ae1e423108295e416213200bb280fAndreas Huber        movq            xmm2,           QWORD PTR [rdi]
117779f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpcklbw       xmm2,           xmm0
117879f15823c34ae1e423108295e416213200bb280fAndreas Huber        psubw           xmm5,           xmm2
117979f15823c34ae1e423108295e416213200bb280fAndreas Huber        movq            xmm2,           QWORD PTR [rdi+8]
118079f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpcklbw       xmm2,           xmm0
118179f15823c34ae1e423108295e416213200bb280fAndreas Huber        psubw           xmm4,           xmm2
118279f15823c34ae1e423108295e416213200bb280fAndreas Huber
118379f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddw           xmm6,           xmm5                ;  xmm6 += accumulated column differences
118479f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddw           xmm6,           xmm4
118579f15823c34ae1e423108295e416213200bb280fAndreas Huber        pmaddwd         xmm5,           xmm5                ;  xmm5 *= xmm5
118679f15823c34ae1e423108295e416213200bb280fAndreas Huber        pmaddwd         xmm4,           xmm4
118779f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd           xmm7,           xmm5                ;  xmm7 += accumulated square column differences
118879f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd           xmm7,           xmm4
118979f15823c34ae1e423108295e416213200bb280fAndreas Huber
119079f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa          xmm5,           xmm3
119179f15823c34ae1e423108295e416213200bb280fAndreas Huber
119279f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rsi,            [rsi + rax]
119379f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rdi,            [rdi + rdx]
119479f15823c34ae1e423108295e416213200bb280fAndreas Huber
119579f15823c34ae1e423108295e416213200bb280fAndreas Huber        sub             rcx,            1
119679f15823c34ae1e423108295e416213200bb280fAndreas Huber        jnz             vp8_half_vert_variance16x_h_1
119779f15823c34ae1e423108295e416213200bb280fAndreas Huber
119879f15823c34ae1e423108295e416213200bb280fAndreas Huber        pxor        xmm1,           xmm1
119979f15823c34ae1e423108295e416213200bb280fAndreas Huber        pxor        xmm5,           xmm5
120079f15823c34ae1e423108295e416213200bb280fAndreas Huber
120179f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpcklwd   xmm0,           xmm6
120279f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpckhwd   xmm1,           xmm6
120379f15823c34ae1e423108295e416213200bb280fAndreas Huber        psrad       xmm0,           16
120479f15823c34ae1e423108295e416213200bb280fAndreas Huber        psrad       xmm1,           16
120579f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd       xmm0,           xmm1
120679f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa      xmm1,           xmm0
120779f15823c34ae1e423108295e416213200bb280fAndreas Huber
120879f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa      xmm6,           xmm7
120979f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpckldq   xmm6,           xmm5
121079f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpckhdq   xmm7,           xmm5
121179f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd       xmm6,           xmm7
121279f15823c34ae1e423108295e416213200bb280fAndreas Huber
121379f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpckldq   xmm0,           xmm5
121479f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpckhdq   xmm1,           xmm5
121579f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd       xmm0,           xmm1
121679f15823c34ae1e423108295e416213200bb280fAndreas Huber
121779f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa      xmm7,           xmm6
121879f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa      xmm1,           xmm0
121979f15823c34ae1e423108295e416213200bb280fAndreas Huber
122079f15823c34ae1e423108295e416213200bb280fAndreas Huber        psrldq      xmm7,           8
122179f15823c34ae1e423108295e416213200bb280fAndreas Huber        psrldq      xmm1,           8
122279f15823c34ae1e423108295e416213200bb280fAndreas Huber
122379f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd       xmm6,           xmm7
122479f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd       xmm0,           xmm1
122579f15823c34ae1e423108295e416213200bb280fAndreas Huber
122679f15823c34ae1e423108295e416213200bb280fAndreas Huber        mov         rsi,            arg(5) ;[Sum]
122779f15823c34ae1e423108295e416213200bb280fAndreas Huber        mov         rdi,            arg(6) ;[SSE]
122879f15823c34ae1e423108295e416213200bb280fAndreas Huber
122979f15823c34ae1e423108295e416213200bb280fAndreas Huber        movd        [rsi],       xmm0
123079f15823c34ae1e423108295e416213200bb280fAndreas Huber        movd        [rdi],       xmm6
123179f15823c34ae1e423108295e416213200bb280fAndreas Huber
123279f15823c34ae1e423108295e416213200bb280fAndreas Huber    ; begin epilog
123379f15823c34ae1e423108295e416213200bb280fAndreas Huber    pop rdi
123479f15823c34ae1e423108295e416213200bb280fAndreas Huber    pop rsi
123579f15823c34ae1e423108295e416213200bb280fAndreas Huber    RESTORE_GOT
123679f15823c34ae1e423108295e416213200bb280fAndreas Huber    RESTORE_XMM
123779f15823c34ae1e423108295e416213200bb280fAndreas Huber    UNSHADOW_ARGS
123879f15823c34ae1e423108295e416213200bb280fAndreas Huber    pop         rbp
123979f15823c34ae1e423108295e416213200bb280fAndreas Huber    ret
124079f15823c34ae1e423108295e416213200bb280fAndreas Huber
124179f15823c34ae1e423108295e416213200bb280fAndreas Huber
124279f15823c34ae1e423108295e416213200bb280fAndreas Huber;void vp8_half_horiz_variance8x_h_sse2
124390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;(
124490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *ref_ptr,
124590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int ref_pixels_per_line,
124690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *src_ptr,
124790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int src_pixels_per_line,
124890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned int Height,
124990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int *sum,
125090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned int *sumsquared
125190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;)
125279f15823c34ae1e423108295e416213200bb280fAndreas Huberglobal sym(vp8_half_horiz_variance8x_h_sse2)
125379f15823c34ae1e423108295e416213200bb280fAndreas Hubersym(vp8_half_horiz_variance8x_h_sse2):
125490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
125590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
125690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 7
125790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    GET_GOT     rbx
125890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rsi
125990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rdi
126090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
126190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
126290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%if ABI_IS_32BIT=0
126390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    movsxd          r8, dword ptr arg(1) ;ref_pixels_per_line
126490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    movsxd          r9, dword ptr arg(3) ;src_pixels_per_line
126590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endif
126690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
126790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            xmm6,           xmm6                ;  error accumulator
126890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            xmm7,           xmm7                ;  sse eaccumulator
126990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rsi,            arg(0) ;ref_ptr              ;
127090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
127190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,            arg(2) ;src_ptr              ;
127290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rcx,            dword ptr arg(4) ;Height              ;
127390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
127490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            xmm0,           xmm0                ;
127579f15823c34ae1e423108295e416213200bb280fAndreas Hubervp8_half_horiz_variance8x_h_1:
127690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm5,           QWORD PTR [rsi]     ;  xmm5 = s0,s1,s2..s8
127790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm3,           QWORD PTR [rsi+1]   ;  xmm3 = s1,s2,s3..s9
127890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
127990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pavgb           xmm5,           xmm3                ;  xmm5 = avg(xmm1,xmm3)
128090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       xmm5,           xmm0                ;  xmm5 = words of above
128190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
128290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm3,           QWORD PTR [rdi]     ;  xmm3 = d0,d1,d2..d8
128390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       xmm3,           xmm0                ;  xmm3 = words of above
128490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
128590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw           xmm5,           xmm3                ;  xmm5 -= xmm3
128690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm6,           xmm5                ;  xmm6 += accumulated column differences
128790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd         xmm5,           xmm5                ;  xmm5 *= xmm5
128890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           xmm7,           xmm5                ;  xmm7 += accumulated square column differences
128990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
129090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%if ABI_IS_32BIT
129190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             esi,            dword ptr arg(1) ;ref_pixels_per_line    ;  next source
129290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             edi,            dword ptr arg(3) ;src_pixels_per_line    ;  next destination
129390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%else
129490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rsi, r8
129590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rdi, r9
129690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endif
129790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        sub             rcx,            1                   ;
129879f15823c34ae1e423108295e416213200bb280fAndreas Huber        jnz             vp8_half_horiz_variance8x_h_1        ;
129990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
130090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdq2q         mm6,            xmm6                ;
130190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdq2q         mm7,            xmm7                ;
130290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
130390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq          xmm6,           8
130490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq          xmm7,           8
130590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
130690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdq2q         mm2,            xmm6
130790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdq2q         mm3,            xmm7
130890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
130990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm6,            mm2
131090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm7,            mm3
131190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
131290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm3,            mm3                 ;
131390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm2,            mm2                 ;
131490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
131590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklwd       mm2,            mm6                 ;
131690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhwd       mm3,            mm6                 ;
131790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
131890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm2,            mm3                 ;
131990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm6,            mm2                 ;
132090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
132190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlq           mm6,            32                  ;
132290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm2,            mm6                 ;
132390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
132490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrad           mm2,            16                  ;
132590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm4,            mm7                 ;
132690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
132790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlq           mm4,            32                  ;
132890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm4,            mm7                 ;
132990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
133090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rsi,            arg(5) ; sum
133190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,            arg(6) ; sumsquared
133290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
133390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd            [rsi],          mm2                 ;
133490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd            [rdi],          mm4                 ;
133590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
133690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
133790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
133890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
133990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
134090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    RESTORE_GOT
134190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
134290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
134390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
134490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
134579f15823c34ae1e423108295e416213200bb280fAndreas Huber;void vp8_half_horiz_variance16x_h_sse2
134679f15823c34ae1e423108295e416213200bb280fAndreas Huber;(
134779f15823c34ae1e423108295e416213200bb280fAndreas Huber;    unsigned char *ref_ptr,
134879f15823c34ae1e423108295e416213200bb280fAndreas Huber;    int ref_pixels_per_line,
134979f15823c34ae1e423108295e416213200bb280fAndreas Huber;    unsigned char *src_ptr,
135079f15823c34ae1e423108295e416213200bb280fAndreas Huber;    int src_pixels_per_line,
135179f15823c34ae1e423108295e416213200bb280fAndreas Huber;    unsigned int Height,
135279f15823c34ae1e423108295e416213200bb280fAndreas Huber;    int *sum,
135379f15823c34ae1e423108295e416213200bb280fAndreas Huber;    unsigned int *sumsquared
135479f15823c34ae1e423108295e416213200bb280fAndreas Huber;)
135579f15823c34ae1e423108295e416213200bb280fAndreas Huberglobal sym(vp8_half_horiz_variance16x_h_sse2)
135679f15823c34ae1e423108295e416213200bb280fAndreas Hubersym(vp8_half_horiz_variance16x_h_sse2):
135779f15823c34ae1e423108295e416213200bb280fAndreas Huber    push        rbp
135879f15823c34ae1e423108295e416213200bb280fAndreas Huber    mov         rbp, rsp
135979f15823c34ae1e423108295e416213200bb280fAndreas Huber    SHADOW_ARGS_TO_STACK 7
136079f15823c34ae1e423108295e416213200bb280fAndreas Huber    SAVE_XMM
136179f15823c34ae1e423108295e416213200bb280fAndreas Huber    GET_GOT     rbx
136279f15823c34ae1e423108295e416213200bb280fAndreas Huber    push rsi
136379f15823c34ae1e423108295e416213200bb280fAndreas Huber    push rdi
136479f15823c34ae1e423108295e416213200bb280fAndreas Huber    ; end prolog
136579f15823c34ae1e423108295e416213200bb280fAndreas Huber
136679f15823c34ae1e423108295e416213200bb280fAndreas Huber        pxor            xmm6,           xmm6                ;  error accumulator
136779f15823c34ae1e423108295e416213200bb280fAndreas Huber        pxor            xmm7,           xmm7                ;  sse eaccumulator
136879f15823c34ae1e423108295e416213200bb280fAndreas Huber        mov             rsi,            arg(0) ;ref_ptr              ;
136979f15823c34ae1e423108295e416213200bb280fAndreas Huber
137079f15823c34ae1e423108295e416213200bb280fAndreas Huber        mov             rdi,            arg(2) ;src_ptr              ;
137179f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rcx,            dword ptr arg(4) ;Height              ;
137279f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rax,            dword ptr arg(1) ;ref_pixels_per_line
137379f15823c34ae1e423108295e416213200bb280fAndreas Huber        movsxd          rdx,            dword ptr arg(3)    ;src_pixels_per_line
137479f15823c34ae1e423108295e416213200bb280fAndreas Huber
137579f15823c34ae1e423108295e416213200bb280fAndreas Huber        pxor            xmm0,           xmm0                ;
137679f15823c34ae1e423108295e416213200bb280fAndreas Huber
137779f15823c34ae1e423108295e416213200bb280fAndreas Hubervp8_half_horiz_variance16x_h_1:
137879f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqu          xmm5,           XMMWORD PTR [rsi]     ;  xmm5 = s0,s1,s2..s15
137979f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqu          xmm3,           XMMWORD PTR [rsi+1]   ;  xmm3 = s1,s2,s3..s16
138079f15823c34ae1e423108295e416213200bb280fAndreas Huber
138179f15823c34ae1e423108295e416213200bb280fAndreas Huber        pavgb           xmm5,           xmm3                ;  xmm5 = avg(xmm1,xmm3)
138279f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa          xmm1,           xmm5
138379f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpcklbw       xmm5,           xmm0                ;  xmm5 = words of above
138479f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpckhbw       xmm1,           xmm0
138579f15823c34ae1e423108295e416213200bb280fAndreas Huber
138679f15823c34ae1e423108295e416213200bb280fAndreas Huber        movq            xmm3,           QWORD PTR [rdi]     ;  xmm3 = d0,d1,d2..d7
138779f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpcklbw       xmm3,           xmm0                ;  xmm3 = words of above
138879f15823c34ae1e423108295e416213200bb280fAndreas Huber        movq            xmm2,           QWORD PTR [rdi+8]
138979f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpcklbw       xmm2,           xmm0
139079f15823c34ae1e423108295e416213200bb280fAndreas Huber
139179f15823c34ae1e423108295e416213200bb280fAndreas Huber        psubw           xmm5,           xmm3                ;  xmm5 -= xmm3
139279f15823c34ae1e423108295e416213200bb280fAndreas Huber        psubw           xmm1,           xmm2
139379f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddw           xmm6,           xmm5                ;  xmm6 += accumulated column differences
139479f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddw           xmm6,           xmm1
139579f15823c34ae1e423108295e416213200bb280fAndreas Huber        pmaddwd         xmm5,           xmm5                ;  xmm5 *= xmm5
139679f15823c34ae1e423108295e416213200bb280fAndreas Huber        pmaddwd         xmm1,           xmm1
139779f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd           xmm7,           xmm5                ;  xmm7 += accumulated square column differences
139879f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd           xmm7,           xmm1
139979f15823c34ae1e423108295e416213200bb280fAndreas Huber
140079f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rsi,            [rsi + rax]
140179f15823c34ae1e423108295e416213200bb280fAndreas Huber        lea             rdi,            [rdi + rdx]
140279f15823c34ae1e423108295e416213200bb280fAndreas Huber
140379f15823c34ae1e423108295e416213200bb280fAndreas Huber        sub             rcx,            1                   ;
140479f15823c34ae1e423108295e416213200bb280fAndreas Huber        jnz             vp8_half_horiz_variance16x_h_1        ;
140579f15823c34ae1e423108295e416213200bb280fAndreas Huber
140679f15823c34ae1e423108295e416213200bb280fAndreas Huber        pxor        xmm1,           xmm1
140779f15823c34ae1e423108295e416213200bb280fAndreas Huber        pxor        xmm5,           xmm5
140879f15823c34ae1e423108295e416213200bb280fAndreas Huber
140979f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpcklwd   xmm0,           xmm6
141079f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpckhwd   xmm1,           xmm6
141179f15823c34ae1e423108295e416213200bb280fAndreas Huber        psrad       xmm0,           16
141279f15823c34ae1e423108295e416213200bb280fAndreas Huber        psrad       xmm1,           16
141379f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd       xmm0,           xmm1
141479f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa      xmm1,           xmm0
141579f15823c34ae1e423108295e416213200bb280fAndreas Huber
141679f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa      xmm6,           xmm7
141779f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpckldq   xmm6,           xmm5
141879f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpckhdq   xmm7,           xmm5
141979f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd       xmm6,           xmm7
142079f15823c34ae1e423108295e416213200bb280fAndreas Huber
142179f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpckldq   xmm0,           xmm5
142279f15823c34ae1e423108295e416213200bb280fAndreas Huber        punpckhdq   xmm1,           xmm5
142379f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd       xmm0,           xmm1
142479f15823c34ae1e423108295e416213200bb280fAndreas Huber
142579f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa      xmm7,           xmm6
142679f15823c34ae1e423108295e416213200bb280fAndreas Huber        movdqa      xmm1,           xmm0
142779f15823c34ae1e423108295e416213200bb280fAndreas Huber
142879f15823c34ae1e423108295e416213200bb280fAndreas Huber        psrldq      xmm7,           8
142979f15823c34ae1e423108295e416213200bb280fAndreas Huber        psrldq      xmm1,           8
143079f15823c34ae1e423108295e416213200bb280fAndreas Huber
143179f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd       xmm6,           xmm7
143279f15823c34ae1e423108295e416213200bb280fAndreas Huber        paddd       xmm0,           xmm1
143379f15823c34ae1e423108295e416213200bb280fAndreas Huber
143479f15823c34ae1e423108295e416213200bb280fAndreas Huber        mov         rsi,            arg(5) ;[Sum]
143579f15823c34ae1e423108295e416213200bb280fAndreas Huber        mov         rdi,            arg(6) ;[SSE]
143679f15823c34ae1e423108295e416213200bb280fAndreas Huber
143779f15823c34ae1e423108295e416213200bb280fAndreas Huber        movd        [rsi],       xmm0
143879f15823c34ae1e423108295e416213200bb280fAndreas Huber        movd        [rdi],       xmm6
143979f15823c34ae1e423108295e416213200bb280fAndreas Huber
144079f15823c34ae1e423108295e416213200bb280fAndreas Huber    ; begin epilog
144179f15823c34ae1e423108295e416213200bb280fAndreas Huber    pop rdi
144279f15823c34ae1e423108295e416213200bb280fAndreas Huber    pop rsi
144379f15823c34ae1e423108295e416213200bb280fAndreas Huber    RESTORE_GOT
144479f15823c34ae1e423108295e416213200bb280fAndreas Huber    RESTORE_XMM
144579f15823c34ae1e423108295e416213200bb280fAndreas Huber    UNSHADOW_ARGS
144679f15823c34ae1e423108295e416213200bb280fAndreas Huber    pop         rbp
144779f15823c34ae1e423108295e416213200bb280fAndreas Huber    ret
144890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
144990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas HuberSECTION_RODATA
145090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    short xmm_bi_rd[8] = { 64, 64, 64, 64,64, 64, 64, 64};
145190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberalign 16
145290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberxmm_bi_rd:
145390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    times 8 dw 64
145479f15823c34ae1e423108295e416213200bb280fAndreas Huberalign 16
145579f15823c34ae1e423108295e416213200bb280fAndreas Hubervp8_bilinear_filters_sse2:
145679f15823c34ae1e423108295e416213200bb280fAndreas Huber    dw 128, 128, 128, 128, 128, 128, 128, 128,  0,  0,  0,  0,  0,  0,  0,  0
145779f15823c34ae1e423108295e416213200bb280fAndreas Huber    dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16
145879f15823c34ae1e423108295e416213200bb280fAndreas Huber    dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32
145979f15823c34ae1e423108295e416213200bb280fAndreas Huber    dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48
146079f15823c34ae1e423108295e416213200bb280fAndreas Huber    dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
146179f15823c34ae1e423108295e416213200bb280fAndreas Huber    dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80
146279f15823c34ae1e423108295e416213200bb280fAndreas Huber    dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96
146379f15823c34ae1e423108295e416213200bb280fAndreas Huber    dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112
1464