190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%include "vpx_ports/x86_abi_support.asm" 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;int vp8_block_error_xmm(short *coeff_ptr, short *dcoef_ptr) 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_block_error_xmm) 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_block_error_xmm): 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 2 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rsi 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rdi 22f71323e297a928af368937089d3ed71239786f86Andreas Huber ; end prologue 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rsi, arg(0) ;coeff_ptr 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(1) ;dcoef_ptr 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm3, [rsi] 2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm4, [rdi] 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm5, [rsi+16] 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm6, [rdi+16] 33f71323e297a928af368937089d3ed71239786f86Andreas Huber psubw xmm3, xmm4 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw xmm5, xmm6 36f71323e297a928af368937089d3ed71239786f86Andreas Huber pmaddwd xmm3, xmm3 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmaddwd xmm5, xmm5 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 39f71323e297a928af368937089d3ed71239786f86Andreas Huber paddd xmm3, xmm5 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 41f71323e297a928af368937089d3ed71239786f86Andreas Huber pxor xmm7, xmm7 42f71323e297a928af368937089d3ed71239786f86Andreas Huber movdqa xmm0, xmm3 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckldq xmm0, xmm7 45f71323e297a928af368937089d3ed71239786f86Andreas Huber punpckhdq xmm3, xmm7 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 47f71323e297a928af368937089d3ed71239786f86Andreas Huber paddd xmm0, xmm3 48f71323e297a928af368937089d3ed71239786f86Andreas Huber movdqa xmm3, xmm0 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrldq xmm0, 8 51f71323e297a928af368937089d3ed71239786f86Andreas Huber paddd xmm0, xmm3 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 53538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movq rax, xmm0 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rdi 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rsi 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;int vp8_block_error_mmx(short *coeff_ptr, short *dcoef_ptr) 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_block_error_mmx) 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_block_error_mmx): 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 2 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rsi 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rdi 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rsi, arg(0) ;coeff_ptr 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(1) ;dcoef_ptr 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, [rsi] 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, [rdi] 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, [rsi+8] 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, [rdi+8] 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm1, mm1 ; from movd mm1, dc ; dc =0 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, mm7 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm5, mm6 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber por mm1, mm2 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmaddwd mm5, mm5 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pcmpeqw mm1, mm7 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm3, mm4 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pand mm1, mm3 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmaddwd mm1, mm1 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddd mm1, mm5 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, [rsi+16] 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, [rdi+16] 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, [rsi+24] 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, [rdi+24] 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm5, mm6 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmaddwd mm5, mm5 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm3, mm4 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmaddwd mm3, mm3 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddd mm3, mm5 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddd mm1, mm3 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, mm1 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlq mm1, 32 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddd mm0, mm1 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 118538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movq rax, mm0 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rdi 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rsi 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc); 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_mbblock_error_mmx_impl) 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_mbblock_error_mmx_impl): 13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 3 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rsi 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rdi 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rsi, arg(0) ;coeff_ptr 14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(1) ;dcoef_ptr 14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm2, mm2 14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm1, dword ptr arg(2) ;dc 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber por mm1, mm2 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pcmpeqw mm1, mm7 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rcx, 16 15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubermberror_loop_mmx: 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, [rsi] 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, [rdi] 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, [rsi+8] 15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, [rdi+8] 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm5, mm6 16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmaddwd mm5, mm5 16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm3, mm4 16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pand mm3, mm1 16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmaddwd mm3, mm3 16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddd mm2, mm5 16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddd mm2, mm3 16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, [rsi+16] 17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, [rdi+16] 17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, [rsi+24] 17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, [rdi+24] 17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm5, mm6 17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmaddwd mm5, mm5 17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm3, mm4 17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmaddwd mm3, mm3 18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddd mm2, mm5 18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddd mm2, mm3 18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rsi, 32 18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rdi, 32 18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub rcx, 1 18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jnz mberror_loop_mmx 19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, mm2 19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlq mm2, 32 19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddd mm0, mm2 195538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movq rax, mm0 19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rdi 19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rsi 19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); 20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_mbblock_error_xmm_impl) 20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_mbblock_error_xmm_impl): 20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 3 21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rsi 21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rdi 21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rsi, arg(0) ;coeff_ptr 21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor xmm7, xmm7 21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(1) ;dcoef_ptr 22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor xmm2, xmm2 22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd xmm1, dword ptr arg(2) ;dc 22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber por xmm1, xmm2 22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pcmpeqw xmm1, xmm7 22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rcx, 16 22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubermberror_loop: 22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm3, [rsi] 23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm4, [rdi] 23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm5, [rsi+16] 23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm6, [rdi+16] 23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw xmm5, xmm6 23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmaddwd xmm5, xmm5 23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw xmm3, xmm4 24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pand xmm3, xmm1 24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmaddwd xmm3, xmm3 24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rsi, 32 24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rdi, 32 24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub rcx, 1 24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddd xmm2, xmm5 24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddd xmm2, xmm3 25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jnz mberror_loop 25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm0, xmm2 25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckldq xmm0, xmm7 25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhdq xmm2, xmm7 25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddd xmm0, xmm2 25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm1, xmm0 26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrldq xmm0, 8 26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddd xmm0, xmm1 263538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movq rax, xmm0 26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rdi 26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rsi 26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); 27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_mbuverror_mmx_impl) 27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_mbuverror_mmx_impl): 27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 2 27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rsi 28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rdi 28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rsi, arg(0) ;s_ptr 28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(1) ;d_ptr 28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rcx, 16 28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubermbuverror_loop_mmx: 29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, [rsi] 29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, [rdi] 29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm1, mm2 29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmaddwd mm1, mm1 29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, [rsi+8] 30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, [rdi+8] 30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm3, mm4 30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmaddwd mm3, mm3 30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddd mm7, mm1 30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddd mm7, mm3 30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rsi, 16 31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rdi, 16 31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber dec rcx 31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jnz mbuverror_loop_mmx 31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, mm7 31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlq mm7, 32 31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddd mm0, mm7 320538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movq rax, mm0 32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rdi 32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rsi 32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr); 33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_mbuverror_xmm_impl) 33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_mbuverror_xmm_impl): 33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 2 33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rsi 33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rdi 33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rsi, arg(0) ;s_ptr 34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(1) ;d_ptr 34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rcx, 16 34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor xmm7, xmm7 34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubermbuverror_loop: 34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm1, [rsi] 35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm2, [rdi] 35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw xmm1, xmm2 35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmaddwd xmm1, xmm1 35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddd xmm7, xmm1 35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rsi, 16 35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rdi, 16 35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber dec rcx 36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jnz mbuverror_loop 36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor xmm0, xmm0 36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm1, xmm7 36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm2, xmm1 36790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckldq xmm1, xmm0 36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhdq xmm2, xmm0 37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddd xmm1, xmm2 37190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 37290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movdqa xmm2, xmm1 37390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 37490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrldq xmm1, 8 37590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddd xmm1, xmm2 37690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 377538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movq rax, xmm1 37890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 37990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rdi 38090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rsi 38190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 38290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 38390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 38490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 385