190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%include "vpx_ports/x86_abi_support.asm"
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;int vp8_block_error_xmm(short *coeff_ptr,  short *dcoef_ptr)
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_block_error_xmm)
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_block_error_xmm):
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 2
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rsi
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rdi
22f71323e297a928af368937089d3ed71239786f86Andreas Huber    ; end prologue
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rsi,        arg(0) ;coeff_ptr
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rdi,        arg(1) ;dcoef_ptr
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm3,       [rsi]
2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm4,       [rdi]
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm5,       [rsi+16]
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm6,       [rdi+16]
33f71323e297a928af368937089d3ed71239786f86Andreas Huber        psubw       xmm3,       xmm4
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       xmm5,       xmm6
36f71323e297a928af368937089d3ed71239786f86Andreas Huber        pmaddwd     xmm3,       xmm3
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     xmm5,       xmm5
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39f71323e297a928af368937089d3ed71239786f86Andreas Huber        paddd       xmm3,       xmm5
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
41f71323e297a928af368937089d3ed71239786f86Andreas Huber        pxor        xmm7,       xmm7
42f71323e297a928af368937089d3ed71239786f86Andreas Huber        movdqa      xmm0,       xmm3
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckldq   xmm0,       xmm7
45f71323e297a928af368937089d3ed71239786f86Andreas Huber        punpckhdq   xmm3,       xmm7
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
47f71323e297a928af368937089d3ed71239786f86Andreas Huber        paddd       xmm0,       xmm3
48f71323e297a928af368937089d3ed71239786f86Andreas Huber        movdqa      xmm3,       xmm0
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq      xmm0,       8
51f71323e297a928af368937089d3ed71239786f86Andreas Huber        paddd       xmm0,       xmm3
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
53538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movq        rax,        xmm0
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;int vp8_block_error_mmx(short *coeff_ptr,  short *dcoef_ptr)
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_block_error_mmx)
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_block_error_mmx):
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 2
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rsi
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rdi
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rsi,        arg(0) ;coeff_ptr
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        mm7,        mm7
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rdi,        arg(1) ;dcoef_ptr
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm3,        [rsi]
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm4,        [rdi]
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm5,        [rsi+8]
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm6,        [rdi+8]
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        mm1,        mm1 ; from movd mm1, dc ; dc =0
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm2,        mm7
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm5,        mm6
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        por         mm1,        mm2
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     mm5,        mm5
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pcmpeqw     mm1,        mm7
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm3,        mm4
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pand        mm1,        mm3
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     mm1,        mm1
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       mm1,        mm5
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm3,        [rsi+16]
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm4,        [rdi+16]
10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm5,        [rsi+24]
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm6,        [rdi+24]
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm5,        mm6
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     mm5,        mm5
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm3,        mm4
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     mm3,        mm3
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       mm3,        mm5
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       mm1,        mm3
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm0,        mm1
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlq       mm1,        32
11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       mm0,        mm1
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
118538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movq        rax,        mm0
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_mbblock_error_mmx_impl)
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_mbblock_error_mmx_impl):
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 3
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rsi
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rdi
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rsi,        arg(0) ;coeff_ptr
14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        mm7,        mm7
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rdi,        arg(1) ;dcoef_ptr
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        mm2,        mm2
14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd        mm1,        dword ptr arg(2) ;dc
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        por         mm1,        mm2
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pcmpeqw     mm1,        mm7
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rcx,        16
15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubermberror_loop_mmx:
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm3,       [rsi]
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm4,       [rdi]
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm5,       [rsi+8]
15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm6,       [rdi+8]
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm5,        mm6
16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     mm5,        mm5
16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm3,        mm4
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pand        mm3,        mm1
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     mm3,        mm3
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       mm2,        mm5
16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       mm2,        mm3
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm3,       [rsi+16]
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm4,       [rdi+16]
17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm5,       [rsi+24]
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm6,       [rdi+24]
17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm5,        mm6
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     mm5,        mm5
17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm3,        mm4
17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     mm3,        mm3
18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       mm2,        mm5
18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       mm2,        mm3
18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add         rsi,        32
18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add         rdi,        32
18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        sub         rcx,        1
18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jnz         mberror_loop_mmx
19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm0,        mm2
19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlq       mm2,        32
19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       mm0,        mm2
195538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movq        rax,        mm0
19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_mbblock_error_xmm_impl)
20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_mbblock_error_xmm_impl):
20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 3
21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rsi
21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rdi
21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rsi,        arg(0) ;coeff_ptr
21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        xmm7,       xmm7
21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rdi,        arg(1) ;dcoef_ptr
22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        xmm2,       xmm2
22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd        xmm1,       dword ptr arg(2) ;dc
22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        por         xmm1,       xmm2
22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pcmpeqw     xmm1,       xmm7
22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rcx,        16
22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubermberror_loop:
22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm3,       [rsi]
23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm4,       [rdi]
23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm5,       [rsi+16]
23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm6,       [rdi+16]
23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       xmm5,       xmm6
23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     xmm5,       xmm5
23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       xmm3,       xmm4
24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pand        xmm3,       xmm1
24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd     xmm3,       xmm3
24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add         rsi,        32
24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add         rdi,        32
24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        sub         rcx,        1
24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm2,       xmm5
24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm2,       xmm3
25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jnz         mberror_loop
25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm0,       xmm2
25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckldq   xmm0,       xmm7
25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhdq   xmm2,       xmm7
25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm0,       xmm2
25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm1,       xmm0
26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq      xmm0,       8
26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm0,       xmm1
263538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movq        rax,        xmm0
26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_mbuverror_mmx_impl)
27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_mbuverror_mmx_impl):
27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 2
27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rsi
28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rdi
28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rsi,        arg(0) ;s_ptr
28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,        arg(1) ;d_ptr
28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rcx,        16
28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm7,        mm7
28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubermbuverror_loop_mmx:
29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm1,        [rsi]
29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm2,        [rdi]
29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw           mm1,        mm2
29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd         mm1,        mm1
29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm3,        [rsi+8]
30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm4,        [rdi+8]
30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw           mm3,        mm4
30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd         mm3,        mm3
30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm7,        mm1
30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm7,        mm3
30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rsi,        16
31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rdi,        16
31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dec             rcx
31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jnz             mbuverror_loop_mmx
31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm0,        mm7
31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlq           mm7,        32
31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm0,        mm7
320538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movq            rax,        mm0
32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);
33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_mbuverror_xmm_impl)
33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_mbuverror_xmm_impl):
33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 2
33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rsi
33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rdi
33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rsi,        arg(0) ;s_ptr
34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,        arg(1) ;d_ptr
34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rcx,        16
34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            xmm7,       xmm7
34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubermbuverror_loop:
34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa          xmm1,       [rsi]
35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa          xmm2,       [rdi]
35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw           xmm1,       xmm2
35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd         xmm1,       xmm1
35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           xmm7,       xmm1
35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rsi,        16
35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rdi,        16
35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dec             rcx
36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jnz             mbuverror_loop
36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        xmm0,           xmm0
36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm1,           xmm7
36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm2,           xmm1
36790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckldq   xmm1,           xmm0
36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhdq   xmm2,           xmm0
37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm1,           xmm2
37190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa      xmm2,           xmm1
37390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq      xmm1,           8
37590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd       xmm1,           xmm2
37690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
377538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movq            rax,            xmm1
37890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
38090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
38190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
38290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
38390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
38490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
385