1474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; 2474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; 4474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; Use of this source code is governed by a BSD-style license 5474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; that can be found in the LICENSE file in the root of the source 6474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; tree. An additional intellectual property rights grant can be found 7474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; in the file PATENTS. All contributing project authors may 8474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; be found in the AUTHORS file in the root of the source tree. 9474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; 10474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 11474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 12474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%include "vpx_ports/x86_abi_support.asm" 13474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 14474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%define VP8_FILTER_WEIGHT 128 15474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%define VP8_FILTER_SHIFT 7 16474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 17474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;void vp8_mbpost_proc_down_mmx(unsigned char *dst, 18474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; int pitch, int rows, int cols,int flimit) 19474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgextern sym(vp8_rv) 20f18f5eb544bb35db231ced346e077907a8c61fc9hclam@chromium.orgglobal sym(vp8_mbpost_proc_down_mmx) PRIVATE 21474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsym(vp8_mbpost_proc_down_mmx): 22474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org push rbp 23474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov rbp, rsp 24474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org SHADOW_ARGS_TO_STACK 5 25474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org GET_GOT rbx 26474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org push rsi 27474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org push rdi 28474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; end prolog 29474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 30474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ALIGN_STACK 16, rax 31474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub rsp, 136 32474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 33474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; unsigned char d[16][8] at [rsp] 34474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; create flimit2 at [rsp+128] 35474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov eax, dword ptr arg(4) ;flimit 36474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov [rsp+128], eax 37474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov [rsp+128+4], eax 38474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%define flimit2 [rsp+128] 39474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 40474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%if ABI_IS_32BIT=0 41474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lea r8, [GLOBAL(sym(vp8_rv))] 42474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%endif 43474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 44474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;rows +=8; 45474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add dword ptr arg(2), 8 46474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 47474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;for(c=0; c<cols; c+=4) 48167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org.loop_col: 49474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov rsi, arg(0) ;s 50474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pxor mm0, mm0 ; 51474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 52474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movsxd rax, dword ptr arg(1) ;pitch ; 53167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org 54167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org ; this copies the last row down into the border 8 rows 55167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org mov rdi, rsi 56167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org mov rdx, arg(2) 57167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org sub rdx, 9 58167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org imul rdx, rax 59167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org lea rdi, [rdi+rdx] 60167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org movq mm1, QWORD ptr[rdi] ; first row 61167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org mov rcx, 8 62167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org.init_borderd ; initialize borders 63167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org lea rdi, [rdi + rax] 64ab3cb4e3f1f39482ee3bd15c6918af868144d6dejohannkoenig@chromium.org movq [rdi], mm1 65167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org 66167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org dec rcx 67167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org jne .init_borderd 68167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org 69474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org neg rax ; rax = -pitch 70474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 71167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org ; this copies the first row up into the border 8 rows 72167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org mov rdi, rsi 73167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org movq mm1, QWORD ptr[rdi] ; first row 74167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org mov rcx, 8 75167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org.init_border ; initialize borders 76167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org lea rdi, [rdi + rax] 77167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org movq [rdi], mm1 78167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org 79167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org dec rcx 80167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org jne .init_border 81167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org 82167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org 83474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lea rsi, [rsi + rax*8]; ; rdi = s[-pitch*8] 84474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org neg rax 85474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 86474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 87474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pxor mm5, mm5 88474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pxor mm6, mm6 ; 89474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 90474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pxor mm7, mm7 ; 91474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov rdi, rsi 92474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 93474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov rcx, 15 ; 94474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 95167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org.loop_initvar: 96474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movd mm1, DWORD PTR [rdi]; 97474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org punpcklbw mm1, mm0 ; 98474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 99474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org paddw mm5, mm1 ; 100474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pmullw mm1, mm1 ; 101474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 102474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movq mm2, mm1 ; 103474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org punpcklwd mm1, mm0 ; 104474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 105474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org punpckhwd mm2, mm0 ; 106474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org paddd mm6, mm1 ; 107474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 108474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org paddd mm7, mm2 ; 109474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lea rdi, [rdi+rax] ; 110474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 111474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org dec rcx 112167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org jne .loop_initvar 113474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;save the var and sum 114474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org xor rdx, rdx 115167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org.loop_row: 116474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movd mm1, DWORD PTR [rsi] ; [s-pitch*8] 117474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movd mm2, DWORD PTR [rdi] ; [s+pitch*7] 118474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 119474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org punpcklbw mm1, mm0 120474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org punpcklbw mm2, mm0 121474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 122474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org paddw mm5, mm2 123474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org psubw mm5, mm1 124474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 125474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pmullw mm2, mm2 126474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movq mm4, mm2 127474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 128474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org punpcklwd mm2, mm0 129474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org punpckhwd mm4, mm0 130474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 131474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org paddd mm6, mm2 132474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org paddd mm7, mm4 133474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 134474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pmullw mm1, mm1 135474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movq mm2, mm1 136474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 137474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org punpcklwd mm1, mm0 138474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org psubd mm6, mm1 139474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 140474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org punpckhwd mm2, mm0 141474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org psubd mm7, mm2 142474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 143474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 144474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movq mm3, mm6 145474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pslld mm3, 4 146474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 147474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org psubd mm3, mm6 148474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movq mm1, mm5 149474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 150474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movq mm4, mm5 151474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pmullw mm1, mm1 152474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 153474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pmulhw mm4, mm4 154474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movq mm2, mm1 155474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 156474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org punpcklwd mm1, mm4 157474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org punpckhwd mm2, mm4 158474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 159474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movq mm4, mm7 160474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pslld mm4, 4 161474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 162474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org psubd mm4, mm7 163474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 164474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org psubd mm3, mm1 165474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org psubd mm4, mm2 166474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 167474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org psubd mm3, flimit2 168474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org psubd mm4, flimit2 169474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 170474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org psrad mm3, 31 171474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org psrad mm4, 31 172474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 173474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org packssdw mm3, mm4 174474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org packsswb mm3, mm0 175474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 176474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movd mm1, DWORD PTR [rsi+rax*8] 177474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 178474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movq mm2, mm1 179474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org punpcklbw mm1, mm0 180474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 181474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org paddw mm1, mm5 182474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov rcx, rdx 183474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 184474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org and rcx, 127 185474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%if ABI_IS_32BIT=1 && CONFIG_PIC=1 186474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org push rax 187474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lea rax, [GLOBAL(sym(vp8_rv))] 188474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movq mm4, [rax + rcx*2] ;vp8_rv[rcx*2] 189474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pop rax 190474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%elif ABI_IS_32BIT=0 191474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movq mm4, [r8 + rcx*2] ;vp8_rv[rcx*2] 192474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%else 193474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movq mm4, [sym(vp8_rv) + rcx*2] 194474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%endif 195474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org paddw mm1, mm4 196474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org psraw mm1, 4 197474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 198474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org packuswb mm1, mm0 199474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pand mm1, mm3 200474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 201474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pandn mm3, mm2 202474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org por mm1, mm3 203474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 204474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org and rcx, 15 205474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movd DWORD PTR [rsp+rcx*4], mm1 ;d[rcx*4] 206474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 20793a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org cmp edx, 8 20893a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org jl .skip_assignment 20993a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org 210474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov rcx, rdx 211474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub rcx, 8 212474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org and rcx, 15 213474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movd mm1, DWORD PTR [rsp+rcx*4] ;d[rcx*4] 214474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movd [rsi], mm1 21593a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org 21693a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org.skip_assignment 217474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lea rsi, [rsi+rax] 218474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 219474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lea rdi, [rdi+rax] 220474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add rdx, 1 221474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 222474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org cmp edx, dword arg(2) ;rows 223167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org jl .loop_row 224474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 225474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 226474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add dword arg(0), 4 ; s += 4 227474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub dword arg(3), 4 ; cols -= 4 228474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org cmp dword arg(3), 0 229167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org jg .loop_col 230474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 231474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add rsp, 136 232474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pop rsp 233474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 234474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; begin epilog 235474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pop rdi 236474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pop rsi 237474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RESTORE_GOT 238474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org UNSHADOW_ARGS 239474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pop rbp 240474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ret 241474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%undef flimit2 242474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 243474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 244474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;void vp8_plane_add_noise_mmx (unsigned char *Start, unsigned char *noise, 245474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; unsigned char blackclamp[16], 246474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; unsigned char whiteclamp[16], 247474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; unsigned char bothclamp[16], 248474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; unsigned int Width, unsigned int Height, int Pitch) 249f18f5eb544bb35db231ced346e077907a8c61fc9hclam@chromium.orgglobal sym(vp8_plane_add_noise_mmx) PRIVATE 250474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsym(vp8_plane_add_noise_mmx): 251474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org push rbp 252474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov rbp, rsp 253474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org SHADOW_ARGS_TO_STACK 8 254474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org GET_GOT rbx 255474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org push rsi 256474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org push rdi 257474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; end prolog 258474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 259167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org.addnoise_loop: 2609d92657d2ee8ab69da0e227c7fb81f04fe518a72Ben Murdoch call sym(LIBVPX_RAND) WRT_PLT 261474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov rcx, arg(1) ;noise 262474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org and rax, 0xff 263474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add rcx, rax 264474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 265474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; we rely on the fact that the clamping vectors are stored contiguously 266474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; in black/white/both order. Note that we have to reload this here because 267474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; rdx could be trashed by rand() 268474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov rdx, arg(2) ; blackclamp 269474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 270474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 271474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov rdi, rcx 272474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movsxd rcx, dword arg(5) ;[Width] 273474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov rsi, arg(0) ;Pos 274474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org xor rax,rax 275474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 276167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org.addnoise_nextset: 277474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movq mm1,[rsi+rax] ; get the source 278474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 279474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org psubusb mm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise 280474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org paddusb mm1, [rdx+32] ;bothclamp 281474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org psubusb mm1, [rdx+16] ;whiteclamp 282474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 283474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movq mm2,[rdi+rax] ; get the noise for this line 284474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org paddb mm1,mm2 ; add it in 285474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movq [rsi+rax],mm1 ; store the result 286474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 287474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add rax,8 ; move to the next line 288474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 289474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org cmp rax, rcx 290167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org jl .addnoise_nextset 291474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 292474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org movsxd rax, dword arg(7) ; Pitch 293474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add arg(0), rax ; Start += Pitch 294474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub dword arg(6), 1 ; Height -= 1 295167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org jg .addnoise_loop 296474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 297474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; begin epilog 298474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pop rdi 299474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pop rsi 300474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RESTORE_GOT 301474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org UNSHADOW_ARGS 302474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pop rbp 303474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ret 304474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 305474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 306474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgSECTION_RODATA 307474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgalign 16 308474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgBlur: 309474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org times 16 dw 16 310474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org times 8 dw 64 311474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org times 16 dw 16 312474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org times 8 dw 0 313474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 314474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgrd: 315474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org times 4 dw 0x40 316