190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%include "vpx_ports/x86_abi_support.asm"
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%macro PROCESS_16X2X3 1
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%if %1
16538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm0,       XMMWORD PTR [rsi]
17538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        lddqu           xmm5,       XMMWORD PTR [rdi]
18538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        lddqu           xmm6,       XMMWORD PTR [rdi+1]
19538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        lddqu           xmm7,       XMMWORD PTR [rdi+2]
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm5,       xmm0
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm6,       xmm0
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm7,       xmm0
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%else
25538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm0,       XMMWORD PTR [rsi]
26538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        lddqu           xmm1,       XMMWORD PTR [rdi]
27538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        lddqu           xmm2,       XMMWORD PTR [rdi+1]
28538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        lddqu           xmm3,       XMMWORD PTR [rdi+2]
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm1,       xmm0
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm2,       xmm0
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm3,       xmm0
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm5,       xmm1
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm6,       xmm2
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm7,       xmm3
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endif
38538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm0,       XMMWORD PTR [rsi+rax]
39538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        lddqu           xmm1,       XMMWORD PTR [rdi+rdx]
40538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        lddqu           xmm2,       XMMWORD PTR [rdi+rdx+1]
41538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        lddqu           xmm3,       XMMWORD PTR [rdi+rdx+2]
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea             rsi,        [rsi+rax*2]
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea             rdi,        [rdi+rdx*2]
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm1,       xmm0
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm2,       xmm0
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm3,       xmm0
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm5,       xmm1
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm6,       xmm2
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm7,       xmm3
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endmacro
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%macro PROCESS_16X2X3_OFFSET 2
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%if %1
57538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm0,       XMMWORD PTR [rsi]
58538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm4,       XMMWORD PTR [rdi]
59538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm7,       XMMWORD PTR [rdi+16]
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa          xmm5,       xmm7
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        palignr         xmm5,       xmm4,       %2
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa          xmm6,       xmm7
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        palignr         xmm6,       xmm4,       (%2+1)
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        palignr         xmm7,       xmm4,       (%2+2)
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm5,       xmm0
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm6,       xmm0
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm7,       xmm0
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%else
73538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm0,       XMMWORD PTR [rsi]
74538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm4,       XMMWORD PTR [rdi]
75538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm3,       XMMWORD PTR [rdi+16]
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa          xmm1,       xmm3
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        palignr         xmm1,       xmm4,       %2
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa          xmm2,       xmm3
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        palignr         xmm2,       xmm4,       (%2+1)
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        palignr         xmm3,       xmm4,       (%2+2)
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm1,       xmm0
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm2,       xmm0
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm3,       xmm0
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm5,       xmm1
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm6,       xmm2
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm7,       xmm3
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endif
93538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm0,       XMMWORD PTR [rsi+rax]
94538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm4,       XMMWORD PTR [rdi+rdx]
95538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movdqa          xmm3,       XMMWORD PTR [rdi+rdx+16]
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa          xmm1,       xmm3
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        palignr         xmm1,       xmm4,       %2
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movdqa          xmm2,       xmm3
10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        palignr         xmm2,       xmm4,       (%2+1)
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        palignr         xmm3,       xmm4,       (%2+2)
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea             rsi,        [rsi+rax*2]
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        lea             rdi,        [rdi+rdx*2]
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm1,       xmm0
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm2,       xmm0
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psadbw          xmm3,       xmm0
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm5,       xmm1
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm6,       xmm2
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm7,       xmm3
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endmacro
11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%macro PROCESS_16X16X3_OFFSET 2
11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%2_aligned_by_%1:
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        sub             rdi,        %1
12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 1, %1
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 0, %1
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 0, %1
12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 0, %1
12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 0, %1
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 0, %1
12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 0, %1
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 0, %1
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jmp             %2_store_off
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endmacro
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%macro PROCESS_16X8X3_OFFSET 2
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%2_aligned_by_%1:
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        sub             rdi,        %1
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 1, %1
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 0, %1
14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 0, %1
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3_OFFSET 0, %1
14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jmp             %2_store_off
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endmacro
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;void int vp8_sad16x16x3_ssse3(
15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *src_ptr,
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  src_stride,
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *ref_ptr,
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  ref_stride,
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  *results)
1551b362b15af34006e6a11974088a46d42b903418eJohannglobal sym(vp8_sad16x16x3_ssse3) PRIVATE
15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_sad16x16x3_ssse3):
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 5
1601b362b15af34006e6a11974088a46d42b903418eJohann    SAVE_XMM 7
16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rsi
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rdi
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rcx
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rsi,        arg(0) ;src_ptr
16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,        arg(2) ;ref_ptr
16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdx,        0xf
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        and             rdx,        rdi
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1721b362b15af34006e6a11974088a46d42b903418eJohann        jmp .vp8_sad16x16x3_ssse3_skiptable
1731b362b15af34006e6a11974088a46d42b903418eJohann.vp8_sad16x16x3_ssse3_jumptable:
1741b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x16x3_ssse3_aligned_by_0  - .vp8_sad16x16x3_ssse3_do_jump
1751b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x16x3_ssse3_aligned_by_1  - .vp8_sad16x16x3_ssse3_do_jump
1761b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x16x3_ssse3_aligned_by_2  - .vp8_sad16x16x3_ssse3_do_jump
1771b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x16x3_ssse3_aligned_by_3  - .vp8_sad16x16x3_ssse3_do_jump
1781b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x16x3_ssse3_aligned_by_4  - .vp8_sad16x16x3_ssse3_do_jump
1791b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x16x3_ssse3_aligned_by_5  - .vp8_sad16x16x3_ssse3_do_jump
1801b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x16x3_ssse3_aligned_by_6  - .vp8_sad16x16x3_ssse3_do_jump
1811b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x16x3_ssse3_aligned_by_7  - .vp8_sad16x16x3_ssse3_do_jump
1821b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x16x3_ssse3_aligned_by_8  - .vp8_sad16x16x3_ssse3_do_jump
1831b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x16x3_ssse3_aligned_by_9  - .vp8_sad16x16x3_ssse3_do_jump
1841b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x16x3_ssse3_aligned_by_10 - .vp8_sad16x16x3_ssse3_do_jump
1851b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x16x3_ssse3_aligned_by_11 - .vp8_sad16x16x3_ssse3_do_jump
1861b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x16x3_ssse3_aligned_by_12 - .vp8_sad16x16x3_ssse3_do_jump
1871b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x16x3_ssse3_aligned_by_13 - .vp8_sad16x16x3_ssse3_do_jump
1881b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x16x3_ssse3_aligned_by_14 - .vp8_sad16x16x3_ssse3_do_jump
1891b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x16x3_ssse3_aligned_by_15 - .vp8_sad16x16x3_ssse3_do_jump
1901b362b15af34006e6a11974088a46d42b903418eJohann.vp8_sad16x16x3_ssse3_skiptable:
1911b362b15af34006e6a11974088a46d42b903418eJohann
1921b362b15af34006e6a11974088a46d42b903418eJohann        call .vp8_sad16x16x3_ssse3_do_jump
1931b362b15af34006e6a11974088a46d42b903418eJohann.vp8_sad16x16x3_ssse3_do_jump:
19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pop             rcx                         ; get the address of do_jump
1951b362b15af34006e6a11974088a46d42b903418eJohann        mov             rax,  .vp8_sad16x16x3_ssse3_jumptable - .vp8_sad16x16x3_ssse3_do_jump
19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rax,  rcx  ; get the absolute address of vp8_sad16x16x3_ssse3_jumptable
19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rax,  dword [rax + 4*rdx]   ; get the 32 bit offset from the jumptable
19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rcx,        rax
20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rax,        dword ptr arg(1) ;src_stride
20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rdx,        dword ptr arg(3) ;ref_stride
20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jmp             rcx
20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2061b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X16X3_OFFSET 0,  .vp8_sad16x16x3_ssse3
2071b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X16X3_OFFSET 1,  .vp8_sad16x16x3_ssse3
2081b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X16X3_OFFSET 2,  .vp8_sad16x16x3_ssse3
2091b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X16X3_OFFSET 3,  .vp8_sad16x16x3_ssse3
2101b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X16X3_OFFSET 4,  .vp8_sad16x16x3_ssse3
2111b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X16X3_OFFSET 5,  .vp8_sad16x16x3_ssse3
2121b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X16X3_OFFSET 6,  .vp8_sad16x16x3_ssse3
2131b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X16X3_OFFSET 7,  .vp8_sad16x16x3_ssse3
2141b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X16X3_OFFSET 8,  .vp8_sad16x16x3_ssse3
2151b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X16X3_OFFSET 9,  .vp8_sad16x16x3_ssse3
2161b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X16X3_OFFSET 10, .vp8_sad16x16x3_ssse3
2171b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X16X3_OFFSET 11, .vp8_sad16x16x3_ssse3
2181b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X16X3_OFFSET 12, .vp8_sad16x16x3_ssse3
2191b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X16X3_OFFSET 13, .vp8_sad16x16x3_ssse3
2201b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X16X3_OFFSET 14, .vp8_sad16x16x3_ssse3
2211b362b15af34006e6a11974088a46d42b903418eJohann
2221b362b15af34006e6a11974088a46d42b903418eJohann.vp8_sad16x16x3_ssse3_aligned_by_15:
22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 1
22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 0
22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 0
22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 0
22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 0
22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 0
22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 0
23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 0
23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2321b362b15af34006e6a11974088a46d42b903418eJohann.vp8_sad16x16x3_ssse3_store_off:
23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,        arg(4) ;Results
23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm0,       xmm5
23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq          xmm5,       8
23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm0,       xmm5
23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd            [rdi],      xmm0
24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-
24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm0,       xmm6
24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq          xmm6,       8
24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm0,       xmm6
24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd            [rdi+4],    xmm0
24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-
24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm0,       xmm7
24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq          xmm7,       8
24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm0,       xmm7
25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd            [rdi+8],    xmm0
25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rcx
25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rdi
25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rsi
2571b362b15af34006e6a11974088a46d42b903418eJohann    RESTORE_XMM
25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;void int vp8_sad16x8x3_ssse3(
26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *src_ptr,
26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  src_stride,
26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    unsigned char *ref_ptr,
26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  ref_stride,
26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;    int  *results)
2681b362b15af34006e6a11974088a46d42b903418eJohannglobal sym(vp8_sad16x8x3_ssse3) PRIVATE
26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_sad16x8x3_ssse3):
27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 5
2731b362b15af34006e6a11974088a46d42b903418eJohann    SAVE_XMM 7
27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rsi
27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rdi
27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rcx
27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rsi,        arg(0) ;src_ptr
28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,        arg(2) ;ref_ptr
28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdx,        0xf
28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        and             rdx,        rdi
28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2851b362b15af34006e6a11974088a46d42b903418eJohann        jmp .vp8_sad16x8x3_ssse3_skiptable
2861b362b15af34006e6a11974088a46d42b903418eJohann.vp8_sad16x8x3_ssse3_jumptable:
2871b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x8x3_ssse3_aligned_by_0  - .vp8_sad16x8x3_ssse3_do_jump
2881b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x8x3_ssse3_aligned_by_1  - .vp8_sad16x8x3_ssse3_do_jump
2891b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x8x3_ssse3_aligned_by_2  - .vp8_sad16x8x3_ssse3_do_jump
2901b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x8x3_ssse3_aligned_by_3  - .vp8_sad16x8x3_ssse3_do_jump
2911b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x8x3_ssse3_aligned_by_4  - .vp8_sad16x8x3_ssse3_do_jump
2921b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x8x3_ssse3_aligned_by_5  - .vp8_sad16x8x3_ssse3_do_jump
2931b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x8x3_ssse3_aligned_by_6  - .vp8_sad16x8x3_ssse3_do_jump
2941b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x8x3_ssse3_aligned_by_7  - .vp8_sad16x8x3_ssse3_do_jump
2951b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x8x3_ssse3_aligned_by_8  - .vp8_sad16x8x3_ssse3_do_jump
2961b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x8x3_ssse3_aligned_by_9  - .vp8_sad16x8x3_ssse3_do_jump
2971b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x8x3_ssse3_aligned_by_10 - .vp8_sad16x8x3_ssse3_do_jump
2981b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x8x3_ssse3_aligned_by_11 - .vp8_sad16x8x3_ssse3_do_jump
2991b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x8x3_ssse3_aligned_by_12 - .vp8_sad16x8x3_ssse3_do_jump
3001b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x8x3_ssse3_aligned_by_13 - .vp8_sad16x8x3_ssse3_do_jump
3011b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x8x3_ssse3_aligned_by_14 - .vp8_sad16x8x3_ssse3_do_jump
3021b362b15af34006e6a11974088a46d42b903418eJohann        dd .vp8_sad16x8x3_ssse3_aligned_by_15 - .vp8_sad16x8x3_ssse3_do_jump
3031b362b15af34006e6a11974088a46d42b903418eJohann.vp8_sad16x8x3_ssse3_skiptable:
3041b362b15af34006e6a11974088a46d42b903418eJohann
3051b362b15af34006e6a11974088a46d42b903418eJohann        call .vp8_sad16x8x3_ssse3_do_jump
3061b362b15af34006e6a11974088a46d42b903418eJohann.vp8_sad16x8x3_ssse3_do_jump:
30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pop             rcx                         ; get the address of do_jump
3081b362b15af34006e6a11974088a46d42b903418eJohann        mov             rax,  .vp8_sad16x8x3_ssse3_jumptable - .vp8_sad16x8x3_ssse3_do_jump
30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rax,  rcx  ; get the absolute address of vp8_sad16x8x3_ssse3_jumptable
31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rax,  dword [rax + 4*rdx]   ; get the 32 bit offset from the jumptable
31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        add             rcx,        rax
31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rax,        dword ptr arg(1) ;src_stride
31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movsxd          rdx,        dword ptr arg(3) ;ref_stride
31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        jmp             rcx
31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3191b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X8X3_OFFSET 0,  .vp8_sad16x8x3_ssse3
3201b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X8X3_OFFSET 1,  .vp8_sad16x8x3_ssse3
3211b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X8X3_OFFSET 2,  .vp8_sad16x8x3_ssse3
3221b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X8X3_OFFSET 3,  .vp8_sad16x8x3_ssse3
3231b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X8X3_OFFSET 4,  .vp8_sad16x8x3_ssse3
3241b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X8X3_OFFSET 5,  .vp8_sad16x8x3_ssse3
3251b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X8X3_OFFSET 6,  .vp8_sad16x8x3_ssse3
3261b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X8X3_OFFSET 7,  .vp8_sad16x8x3_ssse3
3271b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X8X3_OFFSET 8,  .vp8_sad16x8x3_ssse3
3281b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X8X3_OFFSET 9,  .vp8_sad16x8x3_ssse3
3291b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X8X3_OFFSET 10, .vp8_sad16x8x3_ssse3
3301b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X8X3_OFFSET 11, .vp8_sad16x8x3_ssse3
3311b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X8X3_OFFSET 12, .vp8_sad16x8x3_ssse3
3321b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X8X3_OFFSET 13, .vp8_sad16x8x3_ssse3
3331b362b15af34006e6a11974088a46d42b903418eJohann        PROCESS_16X8X3_OFFSET 14, .vp8_sad16x8x3_ssse3
3341b362b15af34006e6a11974088a46d42b903418eJohann
3351b362b15af34006e6a11974088a46d42b903418eJohann.vp8_sad16x8x3_ssse3_aligned_by_15:
33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 1
33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 0
33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 0
34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        PROCESS_16X2X3 0
34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3421b362b15af34006e6a11974088a46d42b903418eJohann.vp8_sad16x8x3_ssse3_store_off:
34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,        arg(4) ;Results
34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm0,       xmm5
34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq          xmm5,       8
34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm0,       xmm5
34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd            [rdi],      xmm0
35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-
35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm0,       xmm6
35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq          xmm6,       8
35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm0,       xmm6
35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd            [rdi+4],    xmm0
35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-
35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            xmm0,       xmm7
35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrldq          xmm7,       8
35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           xmm0,       xmm7
36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movd            [rdi+8],    xmm0
36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rcx
36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rdi
36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rsi
3671b362b15af34006e6a11974088a46d42b903418eJohann    RESTORE_XMM
36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
371