190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%include "vpx_ports/x86_abi_support.asm" 131b362b15af34006e6a11974088a46d42b903418eJohannextern sym(vp8_bilinear_filters_x86_8) 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%define BLOCK_HEIGHT_WIDTH 4 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%define vp8_filter_weight 128 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%define VP8_FILTER_SHIFT 7 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;void vp8_filter_block1d_h6_mmx 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;( 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *src_ptr, 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned short *output_ptr, 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned int src_pixels_per_line, 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned int pixel_step, 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned int output_height, 2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned int output_width, 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; short * vp8_filter 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;) 311b362b15af34006e6a11974088a46d42b903418eJohannglobal sym(vp8_filter_block1d_h6_mmx) PRIVATE 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_filter_block1d_h6_mmx): 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 7 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber GET_GOT rbx 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rsi 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rdi 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdx, arg(6) ;vp8_filter 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, [rdx + 16] ; do both the negative taps first!!! 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, [rdx + 32] ; 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, [rdx + 48] ; 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm7, [rdx + 64] ; 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(1) ;output_ptr 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rsi, arg(0) ;src_ptr 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rcx, dword ptr arg(4) ;output_height 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rax, dword ptr arg(5) ;output_width ; destination pitch? 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm0, mm0 ; mm0 = 00000000 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 541b362b15af34006e6a11974088a46d42b903418eJohann.nextrow: 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, [rsi-2] ; mm3 = p-2..p5 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm3 ; mm4 = p-2..p5 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlq mm3, 8 ; mm3 = p-1..p5 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm3, mm0 ; mm3 = p-1..p2 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers. 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm4 ; mm5 = p-2..p5 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm4, mm0 ; mm5 = p2..p5 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, mm7 ; mm5 *= kernel 4 modifiers 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddsw mm3, mm4 ; mm3 += mm5 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm5 ; mm4 = p-2..p5; 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlq mm5, 16 ; mm5 = p0..p5; 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm5, mm0 ; mm5 = p0..p3 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm5, mm2 ; mm5 *= kernel 2 modifiers 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddsw mm3, mm5 ; mm3 += mm5 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm4 ; mm5 = p-2..p5 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlq mm4, 24 ; mm4 = p1..p5 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm4, mm0 ; mm4 = p1..p4 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, mm6 ; mm5 *= kernel 3 modifiers 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddsw mm3, mm4 ; mm3 += mm5 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; do outer positive taps 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm4, [rsi+3] 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm4, mm0 ; mm5 = p3..p6 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, [rdx+80] ; mm5 *= kernel 0 modifiers 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddsw mm3, mm4 ; mm3 += mm5 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm5, mm0 ; mm5 = p-2..p1 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm5, [rdx] ; mm5 *= kernel 5 modifiers 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddsw mm3, mm5 ; mm3 += mm5 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 88538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddsw mm3, [GLOBAL(rd)] ; mm3 += round value 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm3, mm0 ; pack and unpack to saturate 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm3, mm0 ; 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [rdi], mm3 ; store the results in the destination 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%if ABI_IS_32BIT 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rsi, dword ptr arg(2) ;src_pixels_per_line ; next line 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rdi, rax; 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%else 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd r8, dword ptr arg(2) ;src_pixels_per_line 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rdi, rax; 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rsi, r8 ; next line 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endif 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber dec rcx ; decrement count 1061b362b15af34006e6a11974088a46d42b903418eJohann jnz .nextrow ; next row 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rdi 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rsi 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber RESTORE_GOT 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;void vp8_filter_block1dc_v6_mmx 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;( 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; short *src_ptr, 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *output_ptr, 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int output_pitch, 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned int pixels_per_line, 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned int pixel_step, 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned int output_height, 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned int output_width, 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; short * vp8_filter 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;) 1281b362b15af34006e6a11974088a46d42b903418eJohannglobal sym(vp8_filter_block1dc_v6_mmx) PRIVATE 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_filter_block1dc_v6_mmx): 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 8 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber GET_GOT rbx 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rsi 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rdi 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 138538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movq mm5, [GLOBAL(rd)] 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbx 14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbx, arg(7) ;vp8_filter 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, [rbx + 16] ; do both the negative taps first!!! 14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, [rbx + 32] ; 14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, [rbx + 48] ; 14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm7, [rbx + 64] ; 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rdx, dword ptr arg(3) ;pixels_per_line 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(1) ;output_ptr 14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rsi, arg(0) ;src_ptr 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub rsi, rdx 15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub rsi, rdx 15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rcx, DWORD PTR arg(5) ;output_height 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rax, DWORD PTR arg(2) ;output_pitch ; destination pitch? 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm0, mm0 ; mm0 = 00000000 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1561b362b15af34006e6a11974088a46d42b903418eJohann.nextrow_cv: 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, [rsi+rdx] ; mm3 = p0..p8 = row -1 15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers. 15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 2 16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, mm7 ; mm4 *= kernel 4 modifiers. 16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddsw mm3, mm4 ; mm3 += mm4 16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 0 16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, mm2 ; mm4 *= kernel 2 modifiers. 16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddsw mm3, mm4 ; mm3 += mm4 16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, [rsi] ; mm4 = p0..p3 = row -2 17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, [rbx] ; mm4 *= kernel 0 modifiers. 17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddsw mm3, mm4 ; mm3 += mm4 17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rsi, rdx ; move source forward 1 line to avoid 3 * pitch 17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 1 17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, mm6 ; mm4 *= kernel 3 modifiers. 17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddsw mm3, mm4 ; mm3 += mm4 17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 3 18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, [rbx +80] ; mm4 *= kernel 3 modifiers. 18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddsw mm3, mm4 ; mm3 += mm4 18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddsw mm3, mm5 ; mm3 += round value 18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128 18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm3, mm0 ; pack and saturate 18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd [rdi],mm3 ; store the results in the destination 18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; the subsequent iterations repeat 3 out of 4 of these reads. Since the 19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; recon block should be in cache this shouldn't cost much. Its obviously 19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; avoidable!!!. 19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rdi, [rdi+rax] ; 19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber dec rcx ; decrement count 1941b362b15af34006e6a11974088a46d42b903418eJohann jnz .nextrow_cv ; next row 19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbx 19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rdi 20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rsi 20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber RESTORE_GOT 20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;void bilinear_predict8x8_mmx 20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;( 20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *src_ptr, 21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int src_pixels_per_line, 21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int xoffset, 21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int yoffset, 21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *dst_ptr, 21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int dst_pitch 21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;) 2161b362b15af34006e6a11974088a46d42b903418eJohannglobal sym(vp8_bilinear_predict8x8_mmx) PRIVATE 21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_bilinear_predict8x8_mmx): 21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 6 22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber GET_GOT rbx 22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rsi 22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rdi 22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2261b362b15af34006e6a11974088a46d42b903418eJohann ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]; 2271b362b15af34006e6a11974088a46d42b903418eJohann ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]; 22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rax, dword ptr arg(2) ;xoffset 23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(4) ;dst_ptr ; 23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber shl rax, 5 ; offset * 32 2331b362b15af34006e6a11974088a46d42b903418eJohann lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] 23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rax, rcx ; HFilter 23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rsi, arg(0) ;src_ptr ; 23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rdx, dword ptr arg(5) ;dst_pitch 23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, [rax] ; 24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, [rax+16] ; 24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rax, dword ptr arg(3) ;yoffset 24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm0, mm0 ; 24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber shl rax, 5 ; offset*32 24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rax, rcx ; VFilter 24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rcx, [rdi+rdx*8] ; 25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; 25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; get the first horizontal line done ; 25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm3 ; make a copy of current line 25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm4, mm0 ; 26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, mm1 ; 26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, mm1 ; 26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, [rsi+1] ; 26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, mm5 ; 26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm5, mm0 ; 26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm6, mm0 ; 26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm5, mm2 ; 27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm6, mm2 ; 27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm5 ; 27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm4, mm6 ; 27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 276538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw mm3, [GLOBAL(rd)] ; xmm3 += round value 27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 279538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw mm4, [GLOBAL(rd)] ; 28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm4, VP8_FILTER_SHIFT ; 28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm7, mm3 ; 28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm7, mm4 ; 28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rsi, rdx ; next line 2861b362b15af34006e6a11974088a46d42b903418eJohann.next_row_8x8: 28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm3 ; make a copy of current line 28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm4, mm0 ; 29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, mm1 ; 29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, mm1 ; 29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, [rsi+1] ; 29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, mm5 ; 29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm5, mm0 ; 30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm6, mm0 ; 30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm5, mm2 ; 30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm6, mm2 ; 30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm5 ; 30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm4, mm6 ; 30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm7 ; 30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, mm7 ; 31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm5, mm0 ; 31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm6, mm0 31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm5, [rax] ; 31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm6, [rax] ; 31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 317538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw mm3, [GLOBAL(rd)] ; xmm3 += round value 31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 320538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw mm4, [GLOBAL(rd)] ; 32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm4, VP8_FILTER_SHIFT ; 32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm7, mm3 ; 32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm7, mm4 ; 32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, [rax+16] ; 32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, [rax+16] ; 32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm5 ; 33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm4, mm6 ; 33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 334538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw mm3, [GLOBAL(rd)] ; xmm3 += round value 33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 337538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw mm4, [GLOBAL(rd)] ; 33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm4, VP8_FILTER_SHIFT ; 33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm3, mm4 34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [rdi], mm3 ; store the results in the destination 34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%if ABI_IS_32BIT 34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rsi, rdx ; next line 34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rdi, dword ptr arg(5) ;dst_pitch ; 34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%else 34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd r8, dword ptr arg(5) ;dst_pitch 34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rsi, rdx ; next line 35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rdi, r8 ;dst_pitch 35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endif 35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp rdi, rcx ; 3531b362b15af34006e6a11974088a46d42b903418eJohann jne .next_row_8x8 35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rdi 35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rsi 35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber RESTORE_GOT 35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;void bilinear_predict8x4_mmx 36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;( 36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *src_ptr, 36790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int src_pixels_per_line, 36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int xoffset, 36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int yoffset, 37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *dst_ptr, 37190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int dst_pitch 37290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;) 3731b362b15af34006e6a11974088a46d42b903418eJohannglobal sym(vp8_bilinear_predict8x4_mmx) PRIVATE 37490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_bilinear_predict8x4_mmx): 37590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 37690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 37790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 6 37890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber GET_GOT rbx 37990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rsi 38090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rdi 38190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 38290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3831b362b15af34006e6a11974088a46d42b903418eJohann ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]; 3841b362b15af34006e6a11974088a46d42b903418eJohann ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]; 38590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 38690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rax, dword ptr arg(2) ;xoffset 38790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(4) ;dst_ptr ; 38890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3891b362b15af34006e6a11974088a46d42b903418eJohann lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] 39090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber shl rax, 5 39190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 39290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rsi, arg(0) ;src_ptr ; 39390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rax, rcx 39490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 39590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rdx, dword ptr arg(5) ;dst_pitch 39690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, [rax] ; 39790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 39890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, [rax+16] ; 39990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rax, dword ptr arg(3) ;yoffset 40090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm0, mm0 ; 40290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber shl rax, 5 40390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rax, rcx 40590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rcx, [rdi+rdx*4] ; 40690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; 40890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; get the first horizontal line done ; 41090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 41190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm3 ; make a copy of current line 41290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 41390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 41490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm4, mm0 ; 41590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 41690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, mm1 ; 41790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, mm1 ; 41890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 41990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, [rsi+1] ; 42090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, mm5 ; 42190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 42290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm5, mm0 ; 42390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm6, mm0 ; 42490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 42590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm5, mm2 ; 42690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm6, mm2 ; 42790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 42890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm5 ; 42990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm4, mm6 ; 43090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 431538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw mm3, [GLOBAL(rd)] ; xmm3 += round value 43290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 43390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 434538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw mm4, [GLOBAL(rd)] ; 43590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm4, VP8_FILTER_SHIFT ; 43690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 43790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm7, mm3 ; 43890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm7, mm4 ; 43990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 44090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rsi, rdx ; next line 4411b362b15af34006e6a11974088a46d42b903418eJohann.next_row_8x4: 44290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 44390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm3 ; make a copy of current line 44490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 44590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 44690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm4, mm0 ; 44790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 44890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, mm1 ; 44990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, mm1 ; 45090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 45190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, [rsi+1] ; 45290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, mm5 ; 45390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 45490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm5, mm0 ; 45590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm6, mm0 ; 45690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 45790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm5, mm2 ; 45890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm6, mm2 ; 45990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 46090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm5 ; 46190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm4, mm6 ; 46290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 46390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm7 ; 46490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, mm7 ; 46590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 46690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm5, mm0 ; 46790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm6, mm0 46890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 46990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm5, [rax] ; 47090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm6, [rax] ; 47190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 472538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw mm3, [GLOBAL(rd)] ; xmm3 += round value 47390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 47490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 475538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw mm4, [GLOBAL(rd)] ; 47690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm4, VP8_FILTER_SHIFT ; 47790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 47890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm7, mm3 ; 47990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm7, mm4 ; 48090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 48190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 48290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, [rax+16] ; 48390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, [rax+16] ; 48490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 48590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm5 ; 48690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm4, mm6 ; 48790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 48890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 489538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw mm3, [GLOBAL(rd)] ; xmm3 += round value 49090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 49190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 492538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw mm4, [GLOBAL(rd)] ; 49390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm4, VP8_FILTER_SHIFT ; 49490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 49590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm3, mm4 49690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 49790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [rdi], mm3 ; store the results in the destination 49890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 49990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%if ABI_IS_32BIT 50090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rsi, rdx ; next line 50190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rdi, dword ptr arg(5) ;dst_pitch ; 50290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%else 50390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd r8, dword ptr arg(5) ;dst_pitch 50490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rsi, rdx ; next line 50590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rdi, r8 50690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endif 50790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp rdi, rcx ; 5081b362b15af34006e6a11974088a46d42b903418eJohann jne .next_row_8x4 50990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 51090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 51190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rdi 51290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rsi 51390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber RESTORE_GOT 51490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 51590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 51690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 51790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 51890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 51990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;void bilinear_predict4x4_mmx 52090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;( 52190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *src_ptr, 52290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int src_pixels_per_line, 52390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int xoffset, 52490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int yoffset, 52590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; unsigned char *dst_ptr, 52690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; int dst_pitch 52790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;) 5281b362b15af34006e6a11974088a46d42b903418eJohannglobal sym(vp8_bilinear_predict4x4_mmx) PRIVATE 52990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_bilinear_predict4x4_mmx): 53090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 53190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 53290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber SHADOW_ARGS_TO_STACK 6 53390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber GET_GOT rbx 53490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rsi 53590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rdi 53690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 53790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5381b362b15af34006e6a11974088a46d42b903418eJohann ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]; 5391b362b15af34006e6a11974088a46d42b903418eJohann ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]; 54090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 54190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rax, dword ptr arg(2) ;xoffset 54290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rdi, arg(4) ;dst_ptr ; 54390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5441b362b15af34006e6a11974088a46d42b903418eJohann lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] 54590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber shl rax, 5 54690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 54790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rax, rcx ; HFilter 54890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rsi, arg(0) ;src_ptr ; 54990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 55090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rdx, dword ptr arg(5) ;ldst_pitch 55190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, [rax] ; 55290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 55390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, [rax+16] ; 55490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rax, dword ptr arg(3) ;yoffset 55590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 55690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm0, mm0 ; 55790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber shl rax, 5 55890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 55990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rax, rcx 56090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea rcx, [rdi+rdx*4] ; 56190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 56290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; 56390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 56490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; get the first horizontal line done ; 56590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 56690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 56790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 56890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, mm1 ; 56990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm5, [rsi+1] ; 57090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 57190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm5, mm0 ; 57290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm5, mm2 ; 57390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 57490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm5 ; 575538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw mm3, [GLOBAL(rd)] ; xmm3 += round value 57690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 57790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 57890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 57990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm7, mm3 ; 58090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm7, mm0 ; 58190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 58290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rsi, rdx ; next line 5831b362b15af34006e6a11974088a46d42b903418eJohann.next_row_4x4: 58490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 58590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 58690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 58790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, mm1 ; 58890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm5, [rsi+1] ; 58990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 59090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm5, mm0 ; 59190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm5, mm2 ; 59290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 59390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm5 ; 59490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 59590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm7 ; 59690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm5, mm0 ; 59790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 59890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm5, [rax] ; 599538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw mm3, [GLOBAL(rd)] ; xmm3 += round value 60090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 60190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 60290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm7, mm3 ; 60390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 60490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm7, mm0 ; 60590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 60690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, [rax+16] ; 60790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm5 ; 60890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 60990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 610538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw mm3, [GLOBAL(rd)] ; xmm3 += round value 61190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 61290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 61390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm3, mm0 61490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd [rdi], mm3 ; store the results in the destination 61590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 61690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%if ABI_IS_32BIT 61790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rsi, rdx ; next line 61890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rdi, dword ptr arg(5) ;dst_pitch ; 61990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%else 62090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movsxd r8, dword ptr arg(5) ;dst_pitch ; 62190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rsi, rdx ; next line 62290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add rdi, r8 62390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%endif 62490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 62590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp rdi, rcx ; 6261b362b15af34006e6a11974088a46d42b903418eJohann jne .next_row_4x4 62790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 62890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 62990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rdi 63090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rsi 63190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber RESTORE_GOT 63290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 63390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 63490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 63590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 63690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 63790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 63890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas HuberSECTION_RODATA 63990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberalign 16 64090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberrd: 64190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 4 dw 0x40 64290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 64390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberalign 16 644f71323e297a928af368937089d3ed71239786f86Andreas Huberglobal HIDDEN_DATA(sym(vp8_six_tap_mmx)) 64590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_six_tap_mmx): 64690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 0 64790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 0 64890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 128 64990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 0 65090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 0 65190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 0 65290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 65390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 0 65490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw -6 65590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 123 65690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 12 65790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw -1 65890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 0 65990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 66090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 2 66190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw -11 66290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 108 66390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 36 66490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw -8 66590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 1 66690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 66790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 0 66890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw -9 66990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 93 67090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 50 67190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw -6 67290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 0 67390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 67490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 3 67590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw -16 67690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 77 67790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 77 67890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw -16 67990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 3 68090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 68190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 0 68290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw -6 68390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 50 68490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 93 68590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw -9 68690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 0 68790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 68890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 1 68990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw -8 69090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 36 69190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 108 69290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw -11 69390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 2 69490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 69590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 0 69690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw -1 69790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 12 69890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 123 69990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw -6 70090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber times 8 dw 0 70190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 70290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 703