190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%include "vpx_ports/x86_abi_support.asm" 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;unsigned int vp8_sad16x16_wmt( 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *src_ptr, 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int src_stride, 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *ref_ptr, 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int ref_stride) 191b362b15af34006e6a11974088a46d42b903418eJohannglobal sym(vp8_sad16x16_wmt) PRIVATE 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_sad16x16_wmt): 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 4 241b362b15af34006e6a11974088a46d42b903418eJohann SAVE_XMM 6 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rsi 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rdi 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rsi, arg(0) ;src_ptr 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(2) ;ref_ptr 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rax, dword ptr arg(1) ;src_stride 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rdx, dword ptr arg(3) ;ref_stride 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rcx, [rsi+rax*8] 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rcx, [rcx+rax*8] 381b362b15af34006e6a11974088a46d42b903418eJohann pxor xmm6, xmm6 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 401b362b15af34006e6a11974088a46d42b903418eJohann.x16x16sad_wmt_loop: 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq xmm0, QWORD PTR [rsi] 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq xmm2, QWORD PTR [rsi+8] 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq xmm1, QWORD PTR [rdi] 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq xmm3, QWORD PTR [rdi+8] 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq xmm4, QWORD PTR [rsi+rax] 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq xmm5, QWORD PTR [rdi+rdx] 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw xmm0, xmm2 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw xmm1, xmm3 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw xmm0, xmm1 561b362b15af34006e6a11974088a46d42b903418eJohann movq xmm2, QWORD PTR [rsi+rax+8] 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq xmm3, QWORD PTR [rdi+rdx+8] 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rsi, [rsi+rax*2] 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rdi, [rdi+rdx*2] 621b362b15af34006e6a11974088a46d42b903418eJohann punpcklbw xmm4, xmm2 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw xmm5, xmm3 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw xmm4, xmm5 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 671b362b15af34006e6a11974088a46d42b903418eJohann paddw xmm6, xmm0 681b362b15af34006e6a11974088a46d42b903418eJohann paddw xmm6, xmm4 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp rsi, rcx 711b362b15af34006e6a11974088a46d42b903418eJohann jne .x16x16sad_wmt_loop 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 731b362b15af34006e6a11974088a46d42b903418eJohann movq xmm0, xmm6 741b362b15af34006e6a11974088a46d42b903418eJohann psrldq xmm6, 8 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 761b362b15af34006e6a11974088a46d42b903418eJohann paddw xmm0, xmm6 77538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movq rax, xmm0 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rdi 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rsi 821b362b15af34006e6a11974088a46d42b903418eJohann RESTORE_XMM 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;unsigned int vp8_sad8x16_wmt( 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *src_ptr, 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int src_stride, 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *ref_ptr, 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int ref_stride, 921b362b15af34006e6a11974088a46d42b903418eJohann; int max_sad) 931b362b15af34006e6a11974088a46d42b903418eJohannglobal sym(vp8_sad8x16_wmt) PRIVATE 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_sad8x16_wmt): 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 5 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbx 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rsi 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rdi 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rsi, arg(0) ;src_ptr 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(2) ;ref_ptr 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rbx, dword ptr arg(1) ;src_stride 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rdx, dword ptr arg(3) ;ref_stride 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rcx, [rsi+rbx*8] 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rcx, [rcx+rbx*8] 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1141b362b15af34006e6a11974088a46d42b903418eJohann.x8x16sad_wmt_loop: 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 116538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movq rax, mm7 1171b362b15af34006e6a11974088a46d42b903418eJohann cmp eax, arg(4) 1181b362b15af34006e6a11974088a46d42b903418eJohann ja .x8x16sad_wmt_early_exit 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, QWORD PTR [rsi] 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, QWORD PTR [rdi] 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, QWORD PTR [rsi+rbx] 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, QWORD PTR [rdi+rdx] 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw mm0, mm1 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw mm2, mm3 12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rsi, [rsi+rbx*2] 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rdi, [rdi+rdx*2] 13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm7, mm0 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm7, mm2 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp rsi, rcx 1361b362b15af34006e6a11974088a46d42b903418eJohann jne .x8x16sad_wmt_loop 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 138538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movq rax, mm7 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1401b362b15af34006e6a11974088a46d42b903418eJohann.x8x16sad_wmt_early_exit: 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rdi 14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rsi 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbx 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;unsigned int vp8_sad8x8_wmt( 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *src_ptr, 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int src_stride, 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *ref_ptr, 15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int ref_stride) 1561b362b15af34006e6a11974088a46d42b903418eJohannglobal sym(vp8_sad8x8_wmt) PRIVATE 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_sad8x8_wmt): 15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 5 16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbx 16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rsi 16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rdi 16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rsi, arg(0) ;src_ptr 16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(2) ;ref_ptr 16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rbx, dword ptr arg(1) ;src_stride 17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rdx, dword ptr arg(3) ;ref_stride 17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rcx, [rsi+rbx*8] 17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1751b362b15af34006e6a11974088a46d42b903418eJohann.x8x8sad_wmt_loop: 17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 177538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movq rax, mm7 1781b362b15af34006e6a11974088a46d42b903418eJohann cmp eax, arg(4) 1791b362b15af34006e6a11974088a46d42b903418eJohann ja .x8x8sad_wmt_early_exit 18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, QWORD PTR [rsi] 18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, QWORD PTR [rdi] 18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw mm0, mm1 18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rsi, [rsi+rbx] 18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rdi, rdx 18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm7, mm0 18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp rsi, rcx 1911b362b15af34006e6a11974088a46d42b903418eJohann jne .x8x8sad_wmt_loop 19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 193538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movq rax, mm7 1941b362b15af34006e6a11974088a46d42b903418eJohann.x8x8sad_wmt_early_exit: 19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rdi 19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rsi 19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbx 20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;unsigned int vp8_sad4x4_wmt( 20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *src_ptr, 20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int src_stride, 20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *ref_ptr, 20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int ref_stride) 2091b362b15af34006e6a11974088a46d42b903418eJohannglobal sym(vp8_sad4x4_wmt) PRIVATE 21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_sad4x4_wmt): 21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 4 21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rsi 21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rdi 21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rsi, arg(0) ;src_ptr 21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(2) ;ref_ptr 22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rax, dword ptr arg(1) ;src_stride 22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rdx, dword ptr arg(3) ;ref_stride 22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 224538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movd mm0, DWORD PTR [rsi] 225538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movd mm1, DWORD PTR [rdi] 22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 227538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movd mm2, DWORD PTR [rsi+rax] 228538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movd mm3, DWORD PTR [rdi+rdx] 22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm0, mm2 23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm1, mm3 23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw mm0, mm1 23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rsi, [rsi+rax*2] 23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rdi, [rdi+rdx*2] 237538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movd mm4, DWORD PTR [rsi] 23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 239538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movd mm5, DWORD PTR [rdi] 240538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movd mm6, DWORD PTR [rsi+rax] 24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 242538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movd mm7, DWORD PTR [rdi+rdx] 24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm4, mm6 24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm5, mm7 24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw mm4, mm5 24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm4 249538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movq rax, mm0 25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rdi 25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rsi 25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;unsigned int vp8_sad16x8_wmt( 26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *src_ptr, 26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int src_stride, 26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *ref_ptr, 26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int ref_stride) 2641b362b15af34006e6a11974088a46d42b903418eJohannglobal sym(vp8_sad16x8_wmt) PRIVATE 26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_sad16x8_wmt): 26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 5 26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbx 27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rsi 27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rdi 27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rsi, arg(0) ;src_ptr 27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(2) ;ref_ptr 27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rbx, dword ptr arg(1) ;src_stride 27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rdx, dword ptr arg(3) ;ref_stride 28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rcx, [rsi+rbx*8] 28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2841b362b15af34006e6a11974088a46d42b903418eJohann.x16x8sad_wmt_loop: 28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 286538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movq rax, mm7 2871b362b15af34006e6a11974088a46d42b903418eJohann cmp eax, arg(4) 2881b362b15af34006e6a11974088a46d42b903418eJohann ja .x16x8sad_wmt_early_exit 28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, QWORD PTR [rsi] 29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, QWORD PTR [rsi+8] 29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, QWORD PTR [rdi] 29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, QWORD PTR [rdi+8] 29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, QWORD PTR [rsi+rbx] 29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, QWORD PTR [rdi+rdx] 29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw mm0, mm1 30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw mm2, mm3 30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, QWORD PTR [rsi+rbx+8] 30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, QWORD PTR [rdi+rdx+8] 30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw mm4, mm5 30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psadbw mm1, mm3 30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rsi, [rsi+rbx*2] 30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rdi, [rdi+rdx*2] 31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm2 31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm4, mm1 31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm7, mm0 31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm7, mm4 31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp rsi, rcx 3181b362b15af34006e6a11974088a46d42b903418eJohann jne .x16x8sad_wmt_loop 31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 320538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movq rax, mm7 32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3221b362b15af34006e6a11974088a46d42b903418eJohann.x16x8sad_wmt_early_exit: 32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rdi 32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rsi 32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbx 32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 3311b362b15af34006e6a11974088a46d42b903418eJohann 3321b362b15af34006e6a11974088a46d42b903418eJohann;void vp8_copy32xn_sse2( 3331b362b15af34006e6a11974088a46d42b903418eJohann; unsigned char *src_ptr, 3341b362b15af34006e6a11974088a46d42b903418eJohann; int src_stride, 3351b362b15af34006e6a11974088a46d42b903418eJohann; unsigned char *dst_ptr, 3361b362b15af34006e6a11974088a46d42b903418eJohann; int dst_stride, 3371b362b15af34006e6a11974088a46d42b903418eJohann; int height); 3381b362b15af34006e6a11974088a46d42b903418eJohannglobal sym(vp8_copy32xn_sse2) PRIVATE 3391b362b15af34006e6a11974088a46d42b903418eJohannsym(vp8_copy32xn_sse2): 3401b362b15af34006e6a11974088a46d42b903418eJohann push rbp 3411b362b15af34006e6a11974088a46d42b903418eJohann mov rbp, rsp 3421b362b15af34006e6a11974088a46d42b903418eJohann SHADOW_ARGS_TO_STACK 5 3431b362b15af34006e6a11974088a46d42b903418eJohann SAVE_XMM 7 3441b362b15af34006e6a11974088a46d42b903418eJohann push rsi 3451b362b15af34006e6a11974088a46d42b903418eJohann push rdi 3461b362b15af34006e6a11974088a46d42b903418eJohann ; end prolog 3471b362b15af34006e6a11974088a46d42b903418eJohann 3481b362b15af34006e6a11974088a46d42b903418eJohann mov rsi, arg(0) ;src_ptr 3491b362b15af34006e6a11974088a46d42b903418eJohann mov rdi, arg(2) ;dst_ptr 3501b362b15af34006e6a11974088a46d42b903418eJohann 3511b362b15af34006e6a11974088a46d42b903418eJohann movsxd rax, dword ptr arg(1) ;src_stride 3521b362b15af34006e6a11974088a46d42b903418eJohann movsxd rdx, dword ptr arg(3) ;dst_stride 3531b362b15af34006e6a11974088a46d42b903418eJohann movsxd rcx, dword ptr arg(4) ;height 3541b362b15af34006e6a11974088a46d42b903418eJohann 3551b362b15af34006e6a11974088a46d42b903418eJohann.block_copy_sse2_loopx4: 3561b362b15af34006e6a11974088a46d42b903418eJohann movdqu xmm0, XMMWORD PTR [rsi] 3571b362b15af34006e6a11974088a46d42b903418eJohann movdqu xmm1, XMMWORD PTR [rsi + 16] 3581b362b15af34006e6a11974088a46d42b903418eJohann movdqu xmm2, XMMWORD PTR [rsi + rax] 3591b362b15af34006e6a11974088a46d42b903418eJohann movdqu xmm3, XMMWORD PTR [rsi + rax + 16] 3601b362b15af34006e6a11974088a46d42b903418eJohann 3611b362b15af34006e6a11974088a46d42b903418eJohann lea rsi, [rsi+rax*2] 3621b362b15af34006e6a11974088a46d42b903418eJohann 3631b362b15af34006e6a11974088a46d42b903418eJohann movdqu xmm4, XMMWORD PTR [rsi] 3641b362b15af34006e6a11974088a46d42b903418eJohann movdqu xmm5, XMMWORD PTR [rsi + 16] 3651b362b15af34006e6a11974088a46d42b903418eJohann movdqu xmm6, XMMWORD PTR [rsi + rax] 3661b362b15af34006e6a11974088a46d42b903418eJohann movdqu xmm7, XMMWORD PTR [rsi + rax + 16] 3671b362b15af34006e6a11974088a46d42b903418eJohann 3681b362b15af34006e6a11974088a46d42b903418eJohann lea rsi, [rsi+rax*2] 3691b362b15af34006e6a11974088a46d42b903418eJohann 3701b362b15af34006e6a11974088a46d42b903418eJohann movdqa XMMWORD PTR [rdi], xmm0 3711b362b15af34006e6a11974088a46d42b903418eJohann movdqa XMMWORD PTR [rdi + 16], xmm1 3721b362b15af34006e6a11974088a46d42b903418eJohann movdqa XMMWORD PTR [rdi + rdx], xmm2 3731b362b15af34006e6a11974088a46d42b903418eJohann movdqa XMMWORD PTR [rdi + rdx + 16], xmm3 3741b362b15af34006e6a11974088a46d42b903418eJohann 3751b362b15af34006e6a11974088a46d42b903418eJohann lea rdi, [rdi+rdx*2] 3761b362b15af34006e6a11974088a46d42b903418eJohann 3771b362b15af34006e6a11974088a46d42b903418eJohann movdqa XMMWORD PTR [rdi], xmm4 3781b362b15af34006e6a11974088a46d42b903418eJohann movdqa XMMWORD PTR [rdi + 16], xmm5 3791b362b15af34006e6a11974088a46d42b903418eJohann movdqa XMMWORD PTR [rdi + rdx], xmm6 3801b362b15af34006e6a11974088a46d42b903418eJohann movdqa XMMWORD PTR [rdi + rdx + 16], xmm7 3811b362b15af34006e6a11974088a46d42b903418eJohann 3821b362b15af34006e6a11974088a46d42b903418eJohann lea rdi, [rdi+rdx*2] 3831b362b15af34006e6a11974088a46d42b903418eJohann 3841b362b15af34006e6a11974088a46d42b903418eJohann sub rcx, 4 3851b362b15af34006e6a11974088a46d42b903418eJohann cmp rcx, 4 3861b362b15af34006e6a11974088a46d42b903418eJohann jge .block_copy_sse2_loopx4 3871b362b15af34006e6a11974088a46d42b903418eJohann 3881b362b15af34006e6a11974088a46d42b903418eJohann cmp rcx, 0 3891b362b15af34006e6a11974088a46d42b903418eJohann je .copy_is_done 3901b362b15af34006e6a11974088a46d42b903418eJohann 3911b362b15af34006e6a11974088a46d42b903418eJohann.block_copy_sse2_loop: 3921b362b15af34006e6a11974088a46d42b903418eJohann movdqu xmm0, XMMWORD PTR [rsi] 3931b362b15af34006e6a11974088a46d42b903418eJohann movdqu xmm1, XMMWORD PTR [rsi + 16] 3941b362b15af34006e6a11974088a46d42b903418eJohann lea rsi, [rsi+rax] 3951b362b15af34006e6a11974088a46d42b903418eJohann 3961b362b15af34006e6a11974088a46d42b903418eJohann movdqa XMMWORD PTR [rdi], xmm0 3971b362b15af34006e6a11974088a46d42b903418eJohann movdqa XMMWORD PTR [rdi + 16], xmm1 3981b362b15af34006e6a11974088a46d42b903418eJohann lea rdi, [rdi+rdx] 3991b362b15af34006e6a11974088a46d42b903418eJohann 4001b362b15af34006e6a11974088a46d42b903418eJohann sub rcx, 1 4011b362b15af34006e6a11974088a46d42b903418eJohann jne .block_copy_sse2_loop 4021b362b15af34006e6a11974088a46d42b903418eJohann 4031b362b15af34006e6a11974088a46d42b903418eJohann.copy_is_done: 4041b362b15af34006e6a11974088a46d42b903418eJohann ; begin epilog 4051b362b15af34006e6a11974088a46d42b903418eJohann pop rdi 4061b362b15af34006e6a11974088a46d42b903418eJohann pop rsi 4071b362b15af34006e6a11974088a46d42b903418eJohann RESTORE_XMM 4081b362b15af34006e6a11974088a46d42b903418eJohann UNSHADOW_ARGS 4091b362b15af34006e6a11974088a46d42b903418eJohann pop rbp 4101b362b15af34006e6a11974088a46d42b903418eJohann ret 411