sad_ssse3.asm revision 538f6170b788de7408b06efc6613dc98579aa6a6
190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%include "vpx_ports/x86_abi_support.asm"
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%macro PROCESS_16X2X3 1
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%if %1
16538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm0,       XMMWORD PTR [rsi]
17538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        lddqu           xmm5,       XMMWORD PTR [rdi]
18538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        lddqu           xmm6,       XMMWORD PTR [rdi+1]
19538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        lddqu           xmm7,       XMMWORD PTR [rdi+2]
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm5,       xmm0
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm6,       xmm0
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm7,       xmm0
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%else
25538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm0,       XMMWORD PTR [rsi]
26538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        lddqu           xmm1,       XMMWORD PTR [rdi]
27538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        lddqu           xmm2,       XMMWORD PTR [rdi+1]
28538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        lddqu           xmm3,       XMMWORD PTR [rdi+2]
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm1,       xmm0
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm2,       xmm0
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm3,       xmm0
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm5,       xmm1
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm6,       xmm2
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm7,       xmm3
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endif
38538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm0,       XMMWORD PTR [rsi+rax]
39538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        lddqu           xmm1,       XMMWORD PTR [rdi+rdx]
40538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        lddqu           xmm2,       XMMWORD PTR [rdi+rdx+1]
41538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        lddqu           xmm3,       XMMWORD PTR [rdi+rdx+2]
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea             rsi,        [rsi+rax*2]
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea             rdi,        [rdi+rdx*2]
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm1,       xmm0
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm2,       xmm0
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm3,       xmm0
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm5,       xmm1
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm6,       xmm2
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm7,       xmm3
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endmacro
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%macro PROCESS_16X2X3_OFFSET 2
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%if %1
57538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm0,       XMMWORD PTR [rsi]
58538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm4,       XMMWORD PTR [rdi]
59538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm7,       XMMWORD PTR [rdi+16]
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa          xmm5,       xmm7
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        palignr         xmm5,       xmm4,       %2
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa          xmm6,       xmm7
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        palignr         xmm6,       xmm4,       (%2+1)
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        palignr         xmm7,       xmm4,       (%2+2)
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm5,       xmm0
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm6,       xmm0
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm7,       xmm0
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%else
73538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm0,       XMMWORD PTR [rsi]
74538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm4,       XMMWORD PTR [rdi]
75538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm3,       XMMWORD PTR [rdi+16]
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa          xmm1,       xmm3
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        palignr         xmm1,       xmm4,       %2
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa          xmm2,       xmm3
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        palignr         xmm2,       xmm4,       (%2+1)
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        palignr         xmm3,       xmm4,       (%2+2)
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm1,       xmm0
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm2,       xmm0
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm3,       xmm0
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm5,       xmm1
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm6,       xmm2
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm7,       xmm3
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endif
93538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm0,       XMMWORD PTR [rsi+rax]
94538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm4,       XMMWORD PTR [rdi+rdx]
95538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm3,       XMMWORD PTR [rdi+rdx+16]
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa          xmm1,       xmm3
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        palignr         xmm1,       xmm4,       %2
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa          xmm2,       xmm3
10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        palignr         xmm2,       xmm4,       (%2+1)
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        palignr         xmm3,       xmm4,       (%2+2)
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea             rsi,        [rsi+rax*2]
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea             rdi,        [rdi+rdx*2]
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm1,       xmm0
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm2,       xmm0
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm3,       xmm0
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm5,       xmm1
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm6,       xmm2
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm7,       xmm3
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endmacro
11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%macro PROCESS_16X16X3_OFFSET 2
11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%2_aligned_by_%1:
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        sub             rdi,        %1
12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 1, %1
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 0, %1
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 0, %1
12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 0, %1
12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 0, %1
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 0, %1
12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 0, %1
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 0, %1
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jmp             %2_store_off
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endmacro
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%macro PROCESS_16X8X3_OFFSET 2
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%2_aligned_by_%1:
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        sub             rdi,        %1
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 1, %1
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 0, %1
14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 0, %1
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 0, %1
14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jmp             %2_store_off
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endmacro
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;void int vp8_sad16x16x3_ssse3(
15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *src_ptr,
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  src_stride,
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *ref_ptr,
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  ref_stride,
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  *results)
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_sad16x16x3_ssse3)
15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_sad16x16x3_ssse3):
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 5
16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rsi
16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rdi
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rcx
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rsi,        arg(0) ;src_ptr
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,        arg(2) ;ref_ptr
16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdx,        0xf
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        and             rdx,        rdi
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jmp vp8_sad16x16x3_ssse3_skiptable
17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_sad16x16x3_ssse3_jumptable:
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x16x3_ssse3_aligned_by_0  - vp8_sad16x16x3_ssse3_do_jump
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x16x3_ssse3_aligned_by_1  - vp8_sad16x16x3_ssse3_do_jump
17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x16x3_ssse3_aligned_by_2  - vp8_sad16x16x3_ssse3_do_jump
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x16x3_ssse3_aligned_by_3  - vp8_sad16x16x3_ssse3_do_jump
17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x16x3_ssse3_aligned_by_4  - vp8_sad16x16x3_ssse3_do_jump
17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x16x3_ssse3_aligned_by_5  - vp8_sad16x16x3_ssse3_do_jump
17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x16x3_ssse3_aligned_by_6  - vp8_sad16x16x3_ssse3_do_jump
18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x16x3_ssse3_aligned_by_7  - vp8_sad16x16x3_ssse3_do_jump
18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x16x3_ssse3_aligned_by_8  - vp8_sad16x16x3_ssse3_do_jump
18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x16x3_ssse3_aligned_by_9  - vp8_sad16x16x3_ssse3_do_jump
18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x16x3_ssse3_aligned_by_10 - vp8_sad16x16x3_ssse3_do_jump
18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x16x3_ssse3_aligned_by_11 - vp8_sad16x16x3_ssse3_do_jump
18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x16x3_ssse3_aligned_by_12 - vp8_sad16x16x3_ssse3_do_jump
18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x16x3_ssse3_aligned_by_13 - vp8_sad16x16x3_ssse3_do_jump
18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x16x3_ssse3_aligned_by_14 - vp8_sad16x16x3_ssse3_do_jump
18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x16x3_ssse3_aligned_by_15 - vp8_sad16x16x3_ssse3_do_jump
18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_sad16x16x3_ssse3_skiptable:
19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        call vp8_sad16x16x3_ssse3_do_jump
19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_sad16x16x3_ssse3_do_jump:
19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pop             rcx                         ; get the address of do_jump
19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rax,  vp8_sad16x16x3_ssse3_jumptable - vp8_sad16x16x3_ssse3_do_jump
19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rax,  rcx  ; get the absolute address of vp8_sad16x16x3_ssse3_jumptable
19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rax,  dword [rax + 4*rdx]   ; get the 32 bit offset from the jumptable
19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rcx,        rax
19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rax,        dword ptr arg(1) ;src_stride
20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rdx,        dword ptr arg(3) ;ref_stride
20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jmp             rcx
20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X16X3_OFFSET 0,  vp8_sad16x16x3_ssse3
20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X16X3_OFFSET 1,  vp8_sad16x16x3_ssse3
20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X16X3_OFFSET 2,  vp8_sad16x16x3_ssse3
20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X16X3_OFFSET 3,  vp8_sad16x16x3_ssse3
20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X16X3_OFFSET 4,  vp8_sad16x16x3_ssse3
21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X16X3_OFFSET 5,  vp8_sad16x16x3_ssse3
21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X16X3_OFFSET 6,  vp8_sad16x16x3_ssse3
21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X16X3_OFFSET 7,  vp8_sad16x16x3_ssse3
21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X16X3_OFFSET 8,  vp8_sad16x16x3_ssse3
21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X16X3_OFFSET 9,  vp8_sad16x16x3_ssse3
21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X16X3_OFFSET 10, vp8_sad16x16x3_ssse3
21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X16X3_OFFSET 11, vp8_sad16x16x3_ssse3
21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X16X3_OFFSET 12, vp8_sad16x16x3_ssse3
21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X16X3_OFFSET 13, vp8_sad16x16x3_ssse3
21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X16X3_OFFSET 14, vp8_sad16x16x3_ssse3
22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_sad16x16x3_ssse3_aligned_by_15:
22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 1
22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 0
22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 0
22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 0
22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 0
22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 0
22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 0
22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 0
23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_sad16x16x3_ssse3_store_off:
23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,        arg(4) ;Results
23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm0,       xmm5
23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq          xmm5,       8
23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm0,       xmm5
23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd            [rdi],      xmm0
23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-
24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm0,       xmm6
24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq          xmm6,       8
24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm0,       xmm6
24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd            [rdi+4],    xmm0
24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-
24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm0,       xmm7
24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq          xmm7,       8
24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm0,       xmm7
25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd            [rdi+8],    xmm0
25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rcx
25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rdi
25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rsi
25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;void int vp8_sad16x8x3_ssse3(
26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *src_ptr,
26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  src_stride,
26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *ref_ptr,
26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  ref_stride,
26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  *results)
26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_sad16x8x3_ssse3)
26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_sad16x8x3_ssse3):
26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 5
27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rsi
27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rdi
27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rcx
27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rsi,        arg(0) ;src_ptr
27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,        arg(2) ;ref_ptr
27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdx,        0xf
28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        and             rdx,        rdi
28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jmp vp8_sad16x8x3_ssse3_skiptable
28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_sad16x8x3_ssse3_jumptable:
28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x8x3_ssse3_aligned_by_0  - vp8_sad16x8x3_ssse3_do_jump
28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x8x3_ssse3_aligned_by_1  - vp8_sad16x8x3_ssse3_do_jump
28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x8x3_ssse3_aligned_by_2  - vp8_sad16x8x3_ssse3_do_jump
28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x8x3_ssse3_aligned_by_3  - vp8_sad16x8x3_ssse3_do_jump
28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x8x3_ssse3_aligned_by_4  - vp8_sad16x8x3_ssse3_do_jump
28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x8x3_ssse3_aligned_by_5  - vp8_sad16x8x3_ssse3_do_jump
29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x8x3_ssse3_aligned_by_6  - vp8_sad16x8x3_ssse3_do_jump
29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x8x3_ssse3_aligned_by_7  - vp8_sad16x8x3_ssse3_do_jump
29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x8x3_ssse3_aligned_by_8  - vp8_sad16x8x3_ssse3_do_jump
29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x8x3_ssse3_aligned_by_9  - vp8_sad16x8x3_ssse3_do_jump
29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x8x3_ssse3_aligned_by_10 - vp8_sad16x8x3_ssse3_do_jump
29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x8x3_ssse3_aligned_by_11 - vp8_sad16x8x3_ssse3_do_jump
29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x8x3_ssse3_aligned_by_12 - vp8_sad16x8x3_ssse3_do_jump
29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x8x3_ssse3_aligned_by_13 - vp8_sad16x8x3_ssse3_do_jump
29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x8x3_ssse3_aligned_by_14 - vp8_sad16x8x3_ssse3_do_jump
29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        dd vp8_sad16x8x3_ssse3_aligned_by_15 - vp8_sad16x8x3_ssse3_do_jump
30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_sad16x8x3_ssse3_skiptable:
30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        call vp8_sad16x8x3_ssse3_do_jump
30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_sad16x8x3_ssse3_do_jump:
30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pop             rcx                         ; get the address of do_jump
30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rax,  vp8_sad16x8x3_ssse3_jumptable - vp8_sad16x8x3_ssse3_do_jump
30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rax,  rcx  ; get the absolute address of vp8_sad16x8x3_ssse3_jumptable
30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rax,  dword [rax + 4*rdx]   ; get the 32 bit offset from the jumptable
30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rcx,        rax
31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rax,        dword ptr arg(1) ;src_stride
31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rdx,        dword ptr arg(3) ;ref_stride
31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jmp             rcx
31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X8X3_OFFSET 0,  vp8_sad16x8x3_ssse3
31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X8X3_OFFSET 1,  vp8_sad16x8x3_ssse3
31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X8X3_OFFSET 2,  vp8_sad16x8x3_ssse3
31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X8X3_OFFSET 3,  vp8_sad16x8x3_ssse3
32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X8X3_OFFSET 4,  vp8_sad16x8x3_ssse3
32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X8X3_OFFSET 5,  vp8_sad16x8x3_ssse3
32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X8X3_OFFSET 6,  vp8_sad16x8x3_ssse3
32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X8X3_OFFSET 7,  vp8_sad16x8x3_ssse3
32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X8X3_OFFSET 8,  vp8_sad16x8x3_ssse3
32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X8X3_OFFSET 9,  vp8_sad16x8x3_ssse3
32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X8X3_OFFSET 10, vp8_sad16x8x3_ssse3
32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X8X3_OFFSET 11, vp8_sad16x8x3_ssse3
32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X8X3_OFFSET 12, vp8_sad16x8x3_ssse3
32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X8X3_OFFSET 13, vp8_sad16x8x3_ssse3
33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X8X3_OFFSET 14, vp8_sad16x8x3_ssse3
33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_sad16x8x3_ssse3_aligned_by_15:
33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 1
33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 0
33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 0
33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 0
33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_sad16x8x3_ssse3_store_off:
34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,        arg(4) ;Results
34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm0,       xmm5
34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq          xmm5,       8
34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm0,       xmm5
34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd            [rdi],      xmm0
34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-
34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm0,       xmm6
34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq          xmm6,       8
35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm0,       xmm6
35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd            [rdi+4],    xmm0
35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-
35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm0,       xmm7
35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq          xmm7,       8
35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm0,       xmm7
35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd            [rdi+8],    xmm0
35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rcx
36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rdi
36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rsi
36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
367