190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%include "vpx_ports/x86_abi_support.asm"
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_sad16x16_mmx)
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_sad8x16_mmx)
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_sad8x8_mmx)
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_sad4x4_mmx)
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_sad16x8_mmx)
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;unsigned int vp8_sad16x16_mmx(
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *src_ptr,
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  src_stride,
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *ref_ptr,
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  ref_stride)
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_sad16x16_mmx):
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 4
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rsi
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rdi
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rsi,        arg(0) ;src_ptr
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,        arg(2) ;ref_ptr
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rax,        dword ptr arg(1) ;src_stride
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rdx,        dword ptr arg(3) ;ref_stride
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea             rcx,        [rsi+rax*8]
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea             rcx,        [rcx+rax*8]
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm7,        mm7
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm6,        mm6
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberx16x16sad_mmx_loop:
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm0,        QWORD PTR [rsi]
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm2,        QWORD PTR [rsi+8]
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm1,        QWORD PTR [rdi]
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm3,        QWORD PTR [rdi+8]
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm4,        mm0
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm5,        mm2
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubusb         mm0,        mm1
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubusb         mm1,        mm4
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubusb         mm2,        mm3
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubusb         mm3,        mm5
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        por             mm0,        mm1
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        por             mm2,        mm3
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm1,        mm0
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm3,        mm2
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       mm0,        mm6
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       mm2,        mm6
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhbw       mm1,        mm6
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhbw       mm3,        mm6
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm0,        mm2
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm1,        mm3
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea             rsi,        [rsi+rax]
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rdi,        rdx
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm7,        mm0
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm7,        mm1
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        cmp             rsi,        rcx
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jne             x16x16sad_mmx_loop
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm0,        mm7
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklwd       mm0,        mm6
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhwd       mm7,        mm6
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm0,        mm7
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm7,        mm0
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlq           mm0,        32
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm7,        mm0
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
101538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movq            rax,        mm7
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov rsp, rbp
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;unsigned int vp8_sad8x16_mmx(
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *src_ptr,
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  src_stride,
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *ref_ptr,
11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  ref_stride)
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_sad8x16_mmx):
11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 4
12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rsi
12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rdi
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rsi,        arg(0) ;src_ptr
12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,        arg(2) ;ref_ptr
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rax,        dword ptr arg(1) ;src_stride
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rdx,        dword ptr arg(3) ;ref_stride
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea             rcx,        [rsi+rax*8]
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea             rcx,        [rcx+rax*8]
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm7,        mm7
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm6,        mm6
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberx8x16sad_mmx_loop:
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm0,        QWORD PTR [rsi]
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm1,        QWORD PTR [rdi]
14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm2,        mm0
14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubusb         mm0,        mm1
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubusb         mm1,        mm2
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        por             mm0,        mm1
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm2,        mm0
15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       mm0,        mm6
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhbw       mm2,        mm6
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea             rsi,        [rsi+rax]
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rdi,        rdx
15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm7,        mm0
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm7,        mm2
15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        cmp             rsi,        rcx
16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jne             x8x16sad_mmx_loop
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm0,        mm7
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklwd       mm0,        mm6
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhwd       mm7,        mm6
16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm0,        mm7
16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm7,        mm0
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlq           mm0,        32
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm7,        mm0
173538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movq            rax,        mm7
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov rsp, rbp
17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;unsigned int vp8_sad8x8_mmx(
18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *src_ptr,
18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  src_stride,
18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *ref_ptr,
18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  ref_stride)
18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_sad8x8_mmx):
19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 4
19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rsi
19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rdi
19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rsi,        arg(0) ;src_ptr
19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,        arg(2) ;ref_ptr
19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rax,        dword ptr arg(1) ;src_stride
20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rdx,        dword ptr arg(3) ;ref_stride
20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea             rcx,        [rsi+rax*8]
20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm7,        mm7
20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm6,        mm6
20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberx8x8sad_mmx_loop:
20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm0,        QWORD PTR [rsi]
21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm1,        QWORD PTR [rdi]
21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm2,        mm0
21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubusb         mm0,        mm1
21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubusb         mm1,        mm2
21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        por             mm0,        mm1
21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm2,        mm0
22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       mm0,        mm6
22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhbw       mm2,        mm6
22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm0,        mm2
22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea             rsi,       [rsi+rax]
22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rdi,        rdx
22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm7,       mm0
22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        cmp             rsi,        rcx
23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jne             x8x8sad_mmx_loop
23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm0,        mm7
23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklwd       mm0,        mm6
23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhwd       mm7,        mm6
23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm0,        mm7
23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm7,        mm0
24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlq           mm0,        32
24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm7,        mm0
243538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movq            rax,        mm7
24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov rsp, rbp
24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;unsigned int vp8_sad4x4_mmx(
25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *src_ptr,
25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  src_stride,
25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *ref_ptr,
25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  ref_stride)
25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_sad4x4_mmx):
26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 4
26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rsi
26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rdi
26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rsi,        arg(0) ;src_ptr
26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,        arg(2) ;ref_ptr
26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rax,        dword ptr arg(1) ;src_stride
27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rdx,        dword ptr arg(3) ;ref_stride
27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
273538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movd            mm0,        DWORD PTR [rsi]
274538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movd            mm1,        DWORD PTR [rdi]
27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
276538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movd            mm2,        DWORD PTR [rsi+rax]
277538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movd            mm3,        DWORD PTR [rdi+rdx]
27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       mm0,        mm2
28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       mm1,        mm3
28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm2,        mm0
28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubusb         mm0,        mm1
28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubusb         mm1,        mm2
28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        por             mm0,        mm1
28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm2,        mm0
28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm3,        mm3
29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       mm0,        mm3
29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhbw       mm2,        mm3
29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm0,        mm2
29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea             rsi,        [rsi+rax*2]
29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea             rdi,        [rdi+rdx*2]
29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
299538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movd            mm4,        DWORD PTR [rsi]
300538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movd            mm5,        DWORD PTR [rdi]
30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
302538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movd            mm6,        DWORD PTR [rsi+rax]
303538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movd            mm7,        DWORD PTR [rdi+rdx]
30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       mm4,        mm6
30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       mm5,        mm7
30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm6,        mm4
30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubusb         mm4,        mm5
31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubusb         mm5,        mm6
31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        por             mm4,        mm5
31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm5,        mm4
31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       mm4,        mm3
31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhbw       mm5,        mm3
31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm4,        mm5
31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm0,        mm4
32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm1,        mm0
32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklwd       mm0,        mm3
32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhwd       mm1,        mm3
32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm0,        mm1
32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm1,        mm0
32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlq           mm0,        32
33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm0,        mm1
33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
332538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movq            rax,        mm0
33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov rsp, rbp
33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;unsigned int vp8_sad16x8_mmx(
34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *src_ptr,
34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  src_stride,
34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *ref_ptr,
34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  ref_stride)
34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_sad16x8_mmx):
34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 4
35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rsi
35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rdi
35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rsi,        arg(0) ;src_ptr
35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,        arg(2) ;ref_ptr
35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rax,        dword ptr arg(1) ;src_stride
36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rdx,        dword ptr arg(3) ;ref_stride
36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea             rcx,        [rsi+rax*8]
36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm7,        mm7
36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm6,        mm6
36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberx16x8sad_mmx_loop:
36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm0,       [rsi]
37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm1,       [rdi]
37190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm2,        [rsi+8]
37390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm3,        [rdi+8]
37490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm4,        mm0
37690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm5,        mm2
37790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubusb         mm0,        mm1
37990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubusb         mm1,        mm4
38090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
38190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubusb         mm2,        mm3
38290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubusb         mm3,        mm5
38390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
38490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        por             mm0,        mm1
38590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        por             mm2,        mm3
38690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
38790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm1,        mm0
38890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm3,        mm2
38990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       mm0,        mm6
39190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhbw       mm1,        mm6
39290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklbw       mm2,        mm6
39490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhbw       mm3,        mm6
39590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm0,        mm2
39890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm1,        mm3
39990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
40090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm0,        mm1
40190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea             rsi,        [rsi+rax]
40290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
40390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rdi,        rdx
40490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm7,        mm0
40590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
40690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        cmp             rsi,        rcx
40790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jne             x16x8sad_mmx_loop
40890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
40990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm0,        mm7
41090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklwd       mm0,        mm6
41190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
41290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhwd       mm7,        mm6
41390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm0,        mm7
41490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
41590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm7,        mm0
41690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlq           mm0,        32
41790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
41890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm7,        mm0
419538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movq            rax,        mm7
42090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
42190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
42290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
42390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov rsp, rbp
42490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
42590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
42690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
42790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
428