190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%include "vpx_ports/x86_abi_support.asm" 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; /**************************************************************************** 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; * Notes: 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; * 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; * This implementation makes use of 16 bit fixed point verio of two multiply 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; * constants: 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; * 1. sqrt(2) * cos (pi/8) 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; * 2. sqrt(2) * sin (pi/8) 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; * Becuase the first constant is bigger than 1, to maintain the same 16 bit 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; * fixed point prrcision as the second one, we use a trick of 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; * x * a = x + x*(a-1) 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; * so 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; * 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; * For the second constant, becuase of the 16bit version is 35468, which 2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; * is bigger than 32768, in signed 16 bit multiply, it become a negative 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; * number. 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; * (x * (unsigned)35468 >> 16) = x * (signed)35468 >> 16 + x 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; * 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; **************************************************************************/ 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;void short_idct4x4llm_mmx(short *input, short *output, int pitch) 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_short_idct4x4llm_mmx) 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_short_idct4x4llm_mmx): 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 3 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber GET_GOT rbx 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rax, arg(0) ;input 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdx, arg(1) ;output 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, [rax ] 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, [rax+ 8] 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, [rax+16] 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, [rax+24] 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rax, dword ptr arg(2) ;pitch 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm0, mm2 ; b1= 0-2 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm2 ; 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm1 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm0 ; a1 =0+2 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 61538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pmulhw mm5, [GLOBAL(x_s1sqr2)] ; 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm7, mm3 ; 65538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pmulhw mm7, [GLOBAL(x_c1sqr2less1)] ; 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm7, mm5 ; c1 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm1 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm3 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 73538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pmulhw mm5, [GLOBAL(x_c1sqr2less1)] 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm5, mm1 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 76538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pmulhw mm3, [GLOBAL(x_s1sqr2)] 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm4 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm5 ; d1 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, mm2 ; a1 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm0 ; b1 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm3 ;0 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm4, mm7 ;1 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm0, mm7 ;2 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm6, mm3 ;3 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, mm2 ; 03 02 01 00 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, mm4 ; 23 22 21 20 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklwd mm1, mm0 ; 11 01 10 00 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhwd mm2, mm0 ; 13 03 12 02 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklwd mm3, mm6 ; 31 21 30 20 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhwd mm4, mm6 ; 33 23 32 22 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, mm1 ; 11 01 10 00 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm2 ; 13 03 12 02 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckldq mm0, mm3 ; 30 20 10 00 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhdq mm1, mm3 ; 31 21 11 01 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckldq mm2, mm4 ; 32 22 12 02 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhdq mm5, mm4 ; 33 23 13 03 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, mm5 ; 33 23 13 03 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm0, mm2 ; b1= 0-2 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm2 ; 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm1 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm0 ; a1 =0+2 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 116538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pmulhw mm5, [GLOBAL(x_s1sqr2)] ; 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm7, mm3 ; 120538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pmulhw mm7, [GLOBAL(x_c1sqr2less1)] ; 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm7, mm5 ; c1 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm1 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm3 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 128538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pmulhw mm5, [GLOBAL(x_c1sqr2less1)] 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm5, mm1 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 131538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pmulhw mm3, [GLOBAL(x_s1sqr2)] 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm4 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm5 ; d1 135538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw mm0, [GLOBAL(fours)] 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 137538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw mm2, [GLOBAL(fours)] 13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, mm2 ; a1 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm0 ; b1 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm3 ;0 14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm4, mm7 ;1 14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm0, mm7 ;2 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm6, mm3 ;3 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm2, 3 14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm0, 3 15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm4, 3 15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm6, 3 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, mm2 ; 03 02 01 00 15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, mm4 ; 23 22 21 20 15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklwd mm1, mm0 ; 11 01 10 00 15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhwd mm2, mm0 ; 13 03 12 02 15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklwd mm3, mm6 ; 31 21 30 20 16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhwd mm4, mm6 ; 33 23 32 22 16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, mm1 ; 11 01 10 00 16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm2 ; 13 03 12 02 16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckldq mm0, mm3 ; 30 20 10 00 16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhdq mm1, mm3 ; 31 21 11 01 16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckldq mm2, mm4 ; 32 22 12 02 17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhdq mm5, mm4 ; 33 23 13 03 17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [rdx], mm0 17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [rdx+rax], mm1 17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [rdx+rax*2], mm2 17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rdx, rax 17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [rdx+rax*2], mm5 17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber RESTORE_GOT 18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;void short_idct4x4llm_1_mmx(short *input, short *output, int pitch) 18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_short_idct4x4llm_1_mmx) 18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_short_idct4x4llm_1_mmx): 19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 3 19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber GET_GOT rbx 19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rax, arg(0) ;input 19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm0, [rax] 19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 199538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw mm0, [GLOBAL(fours)] 20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdx, arg(1) ;output 20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm0, 3 20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rax, dword ptr arg(2) ;pitch 20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklwd mm0, mm0 20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckldq mm0, mm0 20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [rdx], mm0 20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [rdx+rax], mm0 21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [rdx+rax*2], mm0 21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rdx, rax 21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [rdx+rax*2], mm0 21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber RESTORE_GOT 21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 223f71323e297a928af368937089d3ed71239786f86Andreas Huber;void vp8_dc_only_idct_add_mmx(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride) 224f71323e297a928af368937089d3ed71239786f86Andreas Huberglobal sym(vp8_dc_only_idct_add_mmx) 225f71323e297a928af368937089d3ed71239786f86Andreas Hubersym(vp8_dc_only_idct_add_mmx): 22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 228f71323e297a928af368937089d3ed71239786f86Andreas Huber SHADOW_ARGS_TO_STACK 5 22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber GET_GOT rbx 230f71323e297a928af368937089d3ed71239786f86Andreas Huber push rsi 231f71323e297a928af368937089d3ed71239786f86Andreas Huber push rdi 23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 234f71323e297a928af368937089d3ed71239786f86Andreas Huber mov rsi, arg(1) ;s -- prediction 235f71323e297a928af368937089d3ed71239786f86Andreas Huber mov rdi, arg(2) ;d -- destination 236f71323e297a928af368937089d3ed71239786f86Andreas Huber movsxd rax, dword ptr arg(4) ;stride 237f71323e297a928af368937089d3ed71239786f86Andreas Huber movsxd rdx, dword ptr arg(3) ;pitch 238f71323e297a928af368937089d3ed71239786f86Andreas Huber pxor mm0, mm0 23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 240f71323e297a928af368937089d3ed71239786f86Andreas Huber movd mm5, arg(0) ;input_dc 24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 242538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw mm5, [GLOBAL(fours)] 24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 244f71323e297a928af368937089d3ed71239786f86Andreas Huber psraw mm5, 3 24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 246f71323e297a928af368937089d3ed71239786f86Andreas Huber punpcklwd mm5, mm5 247f71323e297a928af368937089d3ed71239786f86Andreas Huber punpckldq mm5, mm5 24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 249f71323e297a928af368937089d3ed71239786f86Andreas Huber movd mm1, [rsi] 250f71323e297a928af368937089d3ed71239786f86Andreas Huber punpcklbw mm1, mm0 251f71323e297a928af368937089d3ed71239786f86Andreas Huber paddsw mm1, mm5 252f71323e297a928af368937089d3ed71239786f86Andreas Huber packuswb mm1, mm0 ; pack and unpack to saturate 253f71323e297a928af368937089d3ed71239786f86Andreas Huber movd [rdi], mm1 25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 255f71323e297a928af368937089d3ed71239786f86Andreas Huber movd mm2, [rsi+rdx] 256f71323e297a928af368937089d3ed71239786f86Andreas Huber punpcklbw mm2, mm0 257f71323e297a928af368937089d3ed71239786f86Andreas Huber paddsw mm2, mm5 258f71323e297a928af368937089d3ed71239786f86Andreas Huber packuswb mm2, mm0 ; pack and unpack to saturate 259f71323e297a928af368937089d3ed71239786f86Andreas Huber movd [rdi+rax], mm2 260f71323e297a928af368937089d3ed71239786f86Andreas Huber 261f71323e297a928af368937089d3ed71239786f86Andreas Huber movd mm3, [rsi+2*rdx] 262f71323e297a928af368937089d3ed71239786f86Andreas Huber punpcklbw mm3, mm0 263f71323e297a928af368937089d3ed71239786f86Andreas Huber paddsw mm3, mm5 264f71323e297a928af368937089d3ed71239786f86Andreas Huber packuswb mm3, mm0 ; pack and unpack to saturate 265f71323e297a928af368937089d3ed71239786f86Andreas Huber movd [rdi+2*rax], mm3 266f71323e297a928af368937089d3ed71239786f86Andreas Huber 267f71323e297a928af368937089d3ed71239786f86Andreas Huber add rdi, rax 268f71323e297a928af368937089d3ed71239786f86Andreas Huber add rsi, rdx 269f71323e297a928af368937089d3ed71239786f86Andreas Huber movd mm4, [rsi+2*rdx] 270f71323e297a928af368937089d3ed71239786f86Andreas Huber punpcklbw mm4, mm0 271f71323e297a928af368937089d3ed71239786f86Andreas Huber paddsw mm4, mm5 272f71323e297a928af368937089d3ed71239786f86Andreas Huber packuswb mm4, mm0 ; pack and unpack to saturate 273f71323e297a928af368937089d3ed71239786f86Andreas Huber movd [rdi+2*rax], mm4 27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 276f71323e297a928af368937089d3ed71239786f86Andreas Huber pop rdi 277f71323e297a928af368937089d3ed71239786f86Andreas Huber pop rsi 27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber RESTORE_GOT 27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas HuberSECTION_RODATA 28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberalign 16 28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberx_s1sqr2: 28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 4 dw 0x8A8C 28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberalign 16 28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberx_c1sqr2less1: 28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 4 dw 0x4E7B 29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberalign 16 29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfours: 29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 4 dw 0x0004 293