190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%include "vpx_ports/x86_abi_support.asm" 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%macro PROCESS_16X2X3 1 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%if %1 16538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa xmm0, XMMWORD PTR [rsi] 17538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber lddqu xmm5, XMMWORD PTR [rdi] 18538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber lddqu xmm6, XMMWORD PTR [rdi+1] 19538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber lddqu xmm7, XMMWORD PTR [rdi+2] 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw xmm5, xmm0 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw xmm6, xmm0 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw xmm7, xmm0 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%else 25538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa xmm0, XMMWORD PTR [rsi] 26538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber lddqu xmm1, XMMWORD PTR [rdi] 27538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber lddqu xmm2, XMMWORD PTR [rdi+1] 28538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber lddqu xmm3, XMMWORD PTR [rdi+2] 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw xmm1, xmm0 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw xmm2, xmm0 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw xmm3, xmm0 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw xmm5, xmm1 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw xmm6, xmm2 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw xmm7, xmm3 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endif 38538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa xmm0, XMMWORD PTR [rsi+rax] 39538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber lddqu xmm1, XMMWORD PTR [rdi+rdx] 40538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber lddqu xmm2, XMMWORD PTR [rdi+rdx+1] 41538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber lddqu xmm3, XMMWORD PTR [rdi+rdx+2] 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rsi, [rsi+rax*2] 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rdi, [rdi+rdx*2] 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw xmm1, xmm0 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw xmm2, xmm0 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw xmm3, xmm0 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw xmm5, xmm1 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw xmm6, xmm2 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw xmm7, xmm3 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endmacro 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%macro PROCESS_16X2X3_OFFSET 2 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%if %1 57538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa xmm0, XMMWORD PTR [rsi] 58538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa xmm4, XMMWORD PTR [rdi] 59538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa xmm7, XMMWORD PTR [rdi+16] 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm5, xmm7 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber palignr xmm5, xmm4, %2 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm6, xmm7 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber palignr xmm6, xmm4, (%2+1) 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber palignr xmm7, xmm4, (%2+2) 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw xmm5, xmm0 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw xmm6, xmm0 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw xmm7, xmm0 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%else 73538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa xmm0, XMMWORD PTR [rsi] 74538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa xmm4, XMMWORD PTR [rdi] 75538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa xmm3, XMMWORD PTR [rdi+16] 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm1, xmm3 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber palignr xmm1, xmm4, %2 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm2, xmm3 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber palignr xmm2, xmm4, (%2+1) 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber palignr xmm3, xmm4, (%2+2) 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw xmm1, xmm0 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw xmm2, xmm0 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw xmm3, xmm0 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw xmm5, xmm1 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw xmm6, xmm2 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw xmm7, xmm3 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endif 93538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa xmm0, XMMWORD PTR [rsi+rax] 94538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa xmm4, XMMWORD PTR [rdi+rdx] 95538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa xmm3, XMMWORD PTR [rdi+rdx+16] 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm1, xmm3 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber palignr xmm1, xmm4, %2 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm2, xmm3 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber palignr xmm2, xmm4, (%2+1) 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber palignr xmm3, xmm4, (%2+2) 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rsi, [rsi+rax*2] 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rdi, [rdi+rdx*2] 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw xmm1, xmm0 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw xmm2, xmm0 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw xmm3, xmm0 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw xmm5, xmm1 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw xmm6, xmm2 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw xmm7, xmm3 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endmacro 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%macro PROCESS_16X16X3_OFFSET 2 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%2_aligned_by_%1: 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub rdi, %1 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3_OFFSET 1, %1 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3_OFFSET 0, %1 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3_OFFSET 0, %1 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3_OFFSET 0, %1 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3_OFFSET 0, %1 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3_OFFSET 0, %1 12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3_OFFSET 0, %1 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3_OFFSET 0, %1 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jmp %2_store_off 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endmacro 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%macro PROCESS_16X8X3_OFFSET 2 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%2_aligned_by_%1: 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub rdi, %1 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3_OFFSET 1, %1 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3_OFFSET 0, %1 14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3_OFFSET 0, %1 14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3_OFFSET 0, %1 14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jmp %2_store_off 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endmacro 14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;void int vp8_sad16x16x3_ssse3( 15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *src_ptr, 15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int src_stride, 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *ref_ptr, 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int ref_stride, 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int *results) 1551b362b15af34006e6a11974088a46d42b903418eJohannglobal sym(vp8_sad16x16x3_ssse3) PRIVATE 15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_sad16x16x3_ssse3): 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 5 1601b362b15af34006e6a11974088a46d42b903418eJohann SAVE_XMM 7 16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rsi 16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rdi 16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rcx 16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rsi, arg(0) ;src_ptr 16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(2) ;ref_ptr 16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdx, 0xf 17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber and rdx, rdi 17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1721b362b15af34006e6a11974088a46d42b903418eJohann jmp .vp8_sad16x16x3_ssse3_skiptable 1731b362b15af34006e6a11974088a46d42b903418eJohann.vp8_sad16x16x3_ssse3_jumptable: 1741b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x16x3_ssse3_aligned_by_0 - .vp8_sad16x16x3_ssse3_do_jump 1751b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x16x3_ssse3_aligned_by_1 - .vp8_sad16x16x3_ssse3_do_jump 1761b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x16x3_ssse3_aligned_by_2 - .vp8_sad16x16x3_ssse3_do_jump 1771b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x16x3_ssse3_aligned_by_3 - .vp8_sad16x16x3_ssse3_do_jump 1781b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x16x3_ssse3_aligned_by_4 - .vp8_sad16x16x3_ssse3_do_jump 1791b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x16x3_ssse3_aligned_by_5 - .vp8_sad16x16x3_ssse3_do_jump 1801b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x16x3_ssse3_aligned_by_6 - .vp8_sad16x16x3_ssse3_do_jump 1811b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x16x3_ssse3_aligned_by_7 - .vp8_sad16x16x3_ssse3_do_jump 1821b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x16x3_ssse3_aligned_by_8 - .vp8_sad16x16x3_ssse3_do_jump 1831b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x16x3_ssse3_aligned_by_9 - .vp8_sad16x16x3_ssse3_do_jump 1841b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x16x3_ssse3_aligned_by_10 - .vp8_sad16x16x3_ssse3_do_jump 1851b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x16x3_ssse3_aligned_by_11 - .vp8_sad16x16x3_ssse3_do_jump 1861b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x16x3_ssse3_aligned_by_12 - .vp8_sad16x16x3_ssse3_do_jump 1871b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x16x3_ssse3_aligned_by_13 - .vp8_sad16x16x3_ssse3_do_jump 1881b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x16x3_ssse3_aligned_by_14 - .vp8_sad16x16x3_ssse3_do_jump 1891b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x16x3_ssse3_aligned_by_15 - .vp8_sad16x16x3_ssse3_do_jump 1901b362b15af34006e6a11974088a46d42b903418eJohann.vp8_sad16x16x3_ssse3_skiptable: 1911b362b15af34006e6a11974088a46d42b903418eJohann 1921b362b15af34006e6a11974088a46d42b903418eJohann call .vp8_sad16x16x3_ssse3_do_jump 1931b362b15af34006e6a11974088a46d42b903418eJohann.vp8_sad16x16x3_ssse3_do_jump: 19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rcx ; get the address of do_jump 1951b362b15af34006e6a11974088a46d42b903418eJohann mov rax, .vp8_sad16x16x3_ssse3_jumptable - .vp8_sad16x16x3_ssse3_do_jump 19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rax, rcx ; get the absolute address of vp8_sad16x16x3_ssse3_jumptable 19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable 19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rcx, rax 20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rax, dword ptr arg(1) ;src_stride 20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rdx, dword ptr arg(3) ;ref_stride 20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jmp rcx 20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2061b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X16X3_OFFSET 0, .vp8_sad16x16x3_ssse3 2071b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X16X3_OFFSET 1, .vp8_sad16x16x3_ssse3 2081b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X16X3_OFFSET 2, .vp8_sad16x16x3_ssse3 2091b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X16X3_OFFSET 3, .vp8_sad16x16x3_ssse3 2101b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X16X3_OFFSET 4, .vp8_sad16x16x3_ssse3 2111b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X16X3_OFFSET 5, .vp8_sad16x16x3_ssse3 2121b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X16X3_OFFSET 6, .vp8_sad16x16x3_ssse3 2131b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X16X3_OFFSET 7, .vp8_sad16x16x3_ssse3 2141b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X16X3_OFFSET 8, .vp8_sad16x16x3_ssse3 2151b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X16X3_OFFSET 9, .vp8_sad16x16x3_ssse3 2161b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X16X3_OFFSET 10, .vp8_sad16x16x3_ssse3 2171b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X16X3_OFFSET 11, .vp8_sad16x16x3_ssse3 2181b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X16X3_OFFSET 12, .vp8_sad16x16x3_ssse3 2191b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X16X3_OFFSET 13, .vp8_sad16x16x3_ssse3 2201b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X16X3_OFFSET 14, .vp8_sad16x16x3_ssse3 2211b362b15af34006e6a11974088a46d42b903418eJohann 2221b362b15af34006e6a11974088a46d42b903418eJohann.vp8_sad16x16x3_ssse3_aligned_by_15: 22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3 1 22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3 0 22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3 0 22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3 0 22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3 0 22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3 0 22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3 0 23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3 0 23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2321b362b15af34006e6a11974088a46d42b903418eJohann.vp8_sad16x16x3_ssse3_store_off: 23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(4) ;Results 23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq xmm0, xmm5 23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrldq xmm5, 8 23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw xmm0, xmm5 23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd [rdi], xmm0 24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;- 24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq xmm0, xmm6 24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrldq xmm6, 8 24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw xmm0, xmm6 24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd [rdi+4], xmm0 24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;- 24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq xmm0, xmm7 24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrldq xmm7, 8 24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw xmm0, xmm7 25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd [rdi+8], xmm0 25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rcx 25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rdi 25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rsi 2571b362b15af34006e6a11974088a46d42b903418eJohann RESTORE_XMM 25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;void int vp8_sad16x8x3_ssse3( 26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *src_ptr, 26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int src_stride, 26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *ref_ptr, 26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int ref_stride, 26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int *results) 2681b362b15af34006e6a11974088a46d42b903418eJohannglobal sym(vp8_sad16x8x3_ssse3) PRIVATE 26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_sad16x8x3_ssse3): 27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 5 2731b362b15af34006e6a11974088a46d42b903418eJohann SAVE_XMM 7 27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rsi 27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rdi 27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rcx 27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rsi, arg(0) ;src_ptr 28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(2) ;ref_ptr 28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdx, 0xf 28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber and rdx, rdi 28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2851b362b15af34006e6a11974088a46d42b903418eJohann jmp .vp8_sad16x8x3_ssse3_skiptable 2861b362b15af34006e6a11974088a46d42b903418eJohann.vp8_sad16x8x3_ssse3_jumptable: 2871b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x8x3_ssse3_aligned_by_0 - .vp8_sad16x8x3_ssse3_do_jump 2881b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x8x3_ssse3_aligned_by_1 - .vp8_sad16x8x3_ssse3_do_jump 2891b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x8x3_ssse3_aligned_by_2 - .vp8_sad16x8x3_ssse3_do_jump 2901b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x8x3_ssse3_aligned_by_3 - .vp8_sad16x8x3_ssse3_do_jump 2911b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x8x3_ssse3_aligned_by_4 - .vp8_sad16x8x3_ssse3_do_jump 2921b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x8x3_ssse3_aligned_by_5 - .vp8_sad16x8x3_ssse3_do_jump 2931b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x8x3_ssse3_aligned_by_6 - .vp8_sad16x8x3_ssse3_do_jump 2941b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x8x3_ssse3_aligned_by_7 - .vp8_sad16x8x3_ssse3_do_jump 2951b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x8x3_ssse3_aligned_by_8 - .vp8_sad16x8x3_ssse3_do_jump 2961b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x8x3_ssse3_aligned_by_9 - .vp8_sad16x8x3_ssse3_do_jump 2971b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x8x3_ssse3_aligned_by_10 - .vp8_sad16x8x3_ssse3_do_jump 2981b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x8x3_ssse3_aligned_by_11 - .vp8_sad16x8x3_ssse3_do_jump 2991b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x8x3_ssse3_aligned_by_12 - .vp8_sad16x8x3_ssse3_do_jump 3001b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x8x3_ssse3_aligned_by_13 - .vp8_sad16x8x3_ssse3_do_jump 3011b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x8x3_ssse3_aligned_by_14 - .vp8_sad16x8x3_ssse3_do_jump 3021b362b15af34006e6a11974088a46d42b903418eJohann dd .vp8_sad16x8x3_ssse3_aligned_by_15 - .vp8_sad16x8x3_ssse3_do_jump 3031b362b15af34006e6a11974088a46d42b903418eJohann.vp8_sad16x8x3_ssse3_skiptable: 3041b362b15af34006e6a11974088a46d42b903418eJohann 3051b362b15af34006e6a11974088a46d42b903418eJohann call .vp8_sad16x8x3_ssse3_do_jump 3061b362b15af34006e6a11974088a46d42b903418eJohann.vp8_sad16x8x3_ssse3_do_jump: 30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rcx ; get the address of do_jump 3081b362b15af34006e6a11974088a46d42b903418eJohann mov rax, .vp8_sad16x8x3_ssse3_jumptable - .vp8_sad16x8x3_ssse3_do_jump 30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rax, rcx ; get the absolute address of vp8_sad16x8x3_ssse3_jumptable 31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable 31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rcx, rax 31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rax, dword ptr arg(1) ;src_stride 31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rdx, dword ptr arg(3) ;ref_stride 31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jmp rcx 31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3191b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X8X3_OFFSET 0, .vp8_sad16x8x3_ssse3 3201b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X8X3_OFFSET 1, .vp8_sad16x8x3_ssse3 3211b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X8X3_OFFSET 2, .vp8_sad16x8x3_ssse3 3221b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X8X3_OFFSET 3, .vp8_sad16x8x3_ssse3 3231b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X8X3_OFFSET 4, .vp8_sad16x8x3_ssse3 3241b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X8X3_OFFSET 5, .vp8_sad16x8x3_ssse3 3251b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X8X3_OFFSET 6, .vp8_sad16x8x3_ssse3 3261b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X8X3_OFFSET 7, .vp8_sad16x8x3_ssse3 3271b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X8X3_OFFSET 8, .vp8_sad16x8x3_ssse3 3281b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X8X3_OFFSET 9, .vp8_sad16x8x3_ssse3 3291b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X8X3_OFFSET 10, .vp8_sad16x8x3_ssse3 3301b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X8X3_OFFSET 11, .vp8_sad16x8x3_ssse3 3311b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X8X3_OFFSET 12, .vp8_sad16x8x3_ssse3 3321b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X8X3_OFFSET 13, .vp8_sad16x8x3_ssse3 3331b362b15af34006e6a11974088a46d42b903418eJohann PROCESS_16X8X3_OFFSET 14, .vp8_sad16x8x3_ssse3 3341b362b15af34006e6a11974088a46d42b903418eJohann 3351b362b15af34006e6a11974088a46d42b903418eJohann.vp8_sad16x8x3_ssse3_aligned_by_15: 33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3 1 33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3 0 33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3 0 34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PROCESS_16X2X3 0 34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3421b362b15af34006e6a11974088a46d42b903418eJohann.vp8_sad16x8x3_ssse3_store_off: 34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(4) ;Results 34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq xmm0, xmm5 34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrldq xmm5, 8 34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw xmm0, xmm5 34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd [rdi], xmm0 35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;- 35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq xmm0, xmm6 35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrldq xmm6, 8 35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw xmm0, xmm6 35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd [rdi+4], xmm0 35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;- 35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq xmm0, xmm7 35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrldq xmm7, 8 35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw xmm0, xmm7 36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd [rdi+8], xmm0 36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rcx 36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rdi 36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rsi 3671b362b15af34006e6a11974088a46d42b903418eJohann RESTORE_XMM 36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 371