190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%include "vpx_ports/x86_abi_support.asm" 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;void vp8_short_inv_walsh4x4_1_mmx(short *input, short *output) 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_short_inv_walsh4x4_1_mmx) 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_short_inv_walsh4x4_1_mmx): 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 2 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rsi 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rdi 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rsi, arg(0) 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rax, 3 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(1) 2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rax, [rsi] ;input[0] + 3 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm0, eax 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklwd mm0, mm0 ;x x val val 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckldq mm0, mm0 ;val val val val 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm0, 3 ;(input[0] + 3) >> 3 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [rdi + 0], mm0 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [rdi + 8], mm0 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [rdi + 16], mm0 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [rdi + 24], mm0 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rdi 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rsi 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;void vp8_short_inv_walsh4x4_mmx(short *input, short *output) 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_short_inv_walsh4x4_mmx) 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_short_inv_walsh4x4_mmx): 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 2 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rsi 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rdi 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rax, 3 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rsi, arg(0) 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(1) 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber shl rax, 16 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, [rsi + 0] ;ip[0] 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, [rsi + 8] ;ip[4] 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber or rax, 3 ;00030003h 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, [rsi + 16] ;ip[8] 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, [rsi + 24] ;ip[12] 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 72538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movq mm7, rax 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm0 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklwd mm7, mm7 ;0003000300030003h 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm1 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm4, mm3 ;ip[0] + ip[12] aka al 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm5, mm2 ;ip[4] + ip[8] aka bl 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, mm4 ;temp al 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm4, mm5 ;al + bl 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm6, mm5 ;al - bl 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm0, mm3 ;ip[0] - ip[12] aka d1 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm1, mm2 ;ip[4] - ip[8] aka c1 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm0 ;temp dl 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm1 ;dl + cl 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm5, mm1 ;dl - cl 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; 03 02 01 00 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; 13 12 11 10 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; 23 22 21 20 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; 33 32 31 30 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, mm4 ; 03 02 01 00 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklwd mm4, mm0 ; 11 01 10 00 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhwd mm3, mm0 ; 13 03 12 02 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, mm6 ; 23 22 21 20 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklwd mm6, mm5 ; 31 21 30 20 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhwd mm1, mm5 ; 33 23 32 22 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, mm4 ; 11 01 10 00 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, mm3 ; 13 03 12 02 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckldq mm0, mm6 ; 30 20 10 00 aka ip[0] 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhdq mm4, mm6 ; 31 21 11 01 aka ip[4] 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckldq mm2, mm1 ; 32 22 12 02 aka ip[8] 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhdq mm3, mm1 ; 33 23 13 03 aka ip[12] 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;~~~~~~~~~~~~~~~~~~~~~ 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, mm0 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm4 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, mm3 ;ip[0] + ip[12] aka al 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm5, mm2 ;ip[4] + ip[8] aka bl 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, mm1 ;temp al 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, mm5 ;al + bl 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm6, mm5 ;al - bl 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm0, mm3 ;ip[0] - ip[12] aka d1 12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm4, mm2 ;ip[4] - ip[8] aka c1 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm0 ;temp dl 13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm4 ;dl + cl 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psubw mm5, mm4 ;dl - cl 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;~~~~~~~~~~~~~~~~~~~~~ 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, mm1 ; 03 02 01 00 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklwd mm1, mm0 ; 11 01 10 00 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhwd mm3, mm0 ; 13 03 12 02 13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm6 ; 23 22 21 20 14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklwd mm6, mm5 ; 31 21 30 20 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhwd mm4, mm5 ; 33 23 32 22 14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, mm1 ; 11 01 10 00 14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, mm3 ; 13 03 12 02 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckldq mm0, mm6 ; 30 20 10 00 aka ip[0] 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhdq mm1, mm6 ; 31 21 11 01 aka ip[4] 14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckldq mm2, mm4 ; 32 22 12 02 aka ip[8] 15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhdq mm3, mm4 ; 33 23 13 03 aka ip[12] 15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm7 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, mm7 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm7 15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm7 15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm0, 3 15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm1, 3 15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm2, 3 16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm3, 3 16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [rdi + 0], mm0 16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [rdi + 8], mm1 16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [rdi + 16], mm2 16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [rdi + 24], mm3 16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rdi 16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rsi 17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 174