1ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; 2ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; 4ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; Use of this source code is governed by a BSD-style license 5ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; that can be found in the LICENSE file in the root of the source 6ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; tree. An additional intellectual property rights grant can be found 7ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; in the file PATENTS. All contributing project authors may 8ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; be found in the AUTHORS file in the root of the source tree. 9ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; 10ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 11ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 12ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang%include "vpx_ports/x86_abi_support.asm" 13ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 14ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang;void vp9_post_proc_down_and_across_xmm 15ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang;( 16ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; unsigned char *src_ptr, 17ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; unsigned char *dst_ptr, 18ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; int src_pixels_per_line, 19ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; int dst_pixels_per_line, 20ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; int rows, 21ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; int cols, 22ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; int flimit 23ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang;) 24ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangglobal sym(vp9_post_proc_down_and_across_xmm) PRIVATE 25ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangsym(vp9_post_proc_down_and_across_xmm): 26ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rbp 27ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rbp, rsp 28ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang SHADOW_ARGS_TO_STACK 7 29ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang SAVE_XMM 7 30ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang GET_GOT rbx 31ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rsi 32ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rdi 33ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; end prolog 34ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 35ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang%if ABI_IS_32BIT=1 && CONFIG_PIC=1 36ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ALIGN_STACK 16, rax 37ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; move the global rd onto the stack, since we don't have enough registers 38ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; to do PIC addressing 39ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm0, [GLOBAL(rd42)] 40ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang sub rsp, 16 41ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa [rsp], xmm0 42ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang%define RD42 [rsp] 43ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang%else 44ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang%define RD42 [GLOBAL(rd42)] 45ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang%endif 46ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 47ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 48ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movd xmm2, dword ptr arg(6) ;flimit 49ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklwd xmm2, xmm2 50ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpckldq xmm2, xmm2 51ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklqdq xmm2, xmm2 52ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 53ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rsi, arg(0) ;src_ptr 54ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rdi, arg(1) ;dst_ptr 55ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 56ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movsxd rcx, DWORD PTR arg(4) ;rows 57ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch? 58ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pxor xmm0, xmm0 ; mm0 = 00000000 59ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 60ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang.nextrow: 61ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 62ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang xor rdx, rdx ; clear out rdx for use as loop counter 63ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang.nextcol: 64ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq xmm3, QWORD PTR [rsi] ; mm4 = r0 p0..p7 65ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw xmm3, xmm0 ; mm3 = p0..p3 66ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm1, xmm3 ; mm1 = p0..p3 67ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psllw xmm3, 2 ; 68ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 69ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq xmm5, QWORD PTR [rsi + rax] ; mm4 = r1 p0..p7 70ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw xmm5, xmm0 ; mm5 = r1 p0..p3 71ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusw xmm3, xmm5 ; mm3 += mm6 72ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 73ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; thresholding 74ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm7, xmm1 ; mm7 = r0 p0..p3 75ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusw xmm7, xmm5 ; mm7 = r0 p0..p3 - r1 p0..p3 76ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusw xmm5, xmm1 ; mm5 = r1 p0..p3 - r0 p0..p3 77ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusw xmm7, xmm5 ; mm7 = abs(r0 p0..p3 - r1 p0..p3) 78ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pcmpgtw xmm7, xmm2 79ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 80ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq xmm5, QWORD PTR [rsi + 2*rax] ; mm4 = r2 p0..p7 81ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw xmm5, xmm0 ; mm5 = r2 p0..p3 82ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusw xmm3, xmm5 ; mm3 += mm5 83ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 84ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; thresholding 85ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 86ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusw xmm6, xmm5 ; mm6 = r0 p0..p3 - r2 p0..p3 87ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusw xmm5, xmm1 ; mm5 = r2 p0..p3 - r2 p0..p3 88ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r2 p0..p3) 89ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pcmpgtw xmm6, xmm2 90ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang por xmm7, xmm6 ; accumulate thresholds 91ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 92ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 93ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang neg rax 94ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq xmm5, QWORD PTR [rsi+2*rax] ; mm4 = r-2 p0..p7 95ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw xmm5, xmm0 ; mm5 = r-2 p0..p3 96ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusw xmm3, xmm5 ; mm3 += mm5 97ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 98ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; thresholding 99ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 100ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusw xmm6, xmm5 ; mm6 = p0..p3 - r-2 p0..p3 101ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusw xmm5, xmm1 ; mm5 = r-2 p0..p3 - p0..p3 102ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r-2 p0..p3) 103ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pcmpgtw xmm6, xmm2 104ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang por xmm7, xmm6 ; accumulate thresholds 105ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 106ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq xmm4, QWORD PTR [rsi+rax] ; mm4 = r-1 p0..p7 107ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw xmm4, xmm0 ; mm4 = r-1 p0..p3 108ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusw xmm3, xmm4 ; mm3 += mm5 109ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 110ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; thresholding 111ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 112ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusw xmm6, xmm4 ; mm6 = p0..p3 - r-2 p0..p3 113ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusw xmm4, xmm1 ; mm5 = r-1 p0..p3 - p0..p3 114ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusw xmm6, xmm4 ; mm6 = abs(r0 p0..p3 - r-1 p0..p3) 115ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pcmpgtw xmm6, xmm2 116ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang por xmm7, xmm6 ; accumulate thresholds 117ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 118ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 119ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusw xmm3, RD42 ; mm3 += round value 120ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psraw xmm3, 3 ; mm3 /= 8 121ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 122ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pand xmm1, xmm7 ; mm1 select vals > thresh from source 123ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result 124ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusw xmm1, xmm7 ; combination 125ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 126ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang packuswb xmm1, xmm0 ; pack to bytes 127ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq QWORD PTR [rdi], xmm1 ; 128ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 129ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang neg rax ; pitch is positive 130ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add rsi, 8 131ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add rdi, 8 132ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 133ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add rdx, 8 134ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang cmp edx, dword arg(5) ;cols 135ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 136ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang jl .nextcol 137ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 138ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; done with the all cols, start the across filtering in place 139ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang sub rsi, rdx 140ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang sub rdi, rdx 141ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 142ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang xor rdx, rdx 143ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm0, QWORD PTR [rdi-8]; 144ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 145ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang.acrossnextcol: 146ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq xmm7, QWORD PTR [rdi +rdx -2] 147ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movd xmm4, DWORD PTR [rdi +rdx +6] 148ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 149ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pslldq xmm4, 8 150ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang por xmm4, xmm7 151ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 152ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm3, xmm4 153ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psrldq xmm3, 2 154ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw xmm3, xmm0 ; mm3 = p0..p3 155ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm1, xmm3 ; mm1 = p0..p3 156ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psllw xmm3, 2 157ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 158ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 159ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm5, xmm4 160ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psrldq xmm5, 3 161ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw xmm5, xmm0 ; mm5 = p1..p4 162ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusw xmm3, xmm5 ; mm3 += mm6 163ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 164ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; thresholding 165ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm7, xmm1 ; mm7 = p0..p3 166ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusw xmm7, xmm5 ; mm7 = p0..p3 - p1..p4 167ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 168ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusw xmm7, xmm5 ; mm7 = abs(p0..p3 - p1..p4) 169ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pcmpgtw xmm7, xmm2 170ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 171ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm5, xmm4 172ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psrldq xmm5, 4 173ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw xmm5, xmm0 ; mm5 = p2..p5 174ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusw xmm3, xmm5 ; mm3 += mm5 175ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 176ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; thresholding 177ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm6, xmm1 ; mm6 = p0..p3 178ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4 179ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 180ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4) 181ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pcmpgtw xmm6, xmm2 182ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang por xmm7, xmm6 ; accumulate thresholds 183ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 184ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 185ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm5, xmm4 ; mm5 = p-2..p5 186ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw xmm5, xmm0 ; mm5 = p-2..p1 187ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusw xmm3, xmm5 ; mm3 += mm5 188ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 189ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; thresholding 190ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm6, xmm1 ; mm6 = p0..p3 191ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4 192ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 193ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4) 194ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pcmpgtw xmm6, xmm2 195ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang por xmm7, xmm6 ; accumulate thresholds 196ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 197ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psrldq xmm4, 1 ; mm4 = p-1..p5 198ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw xmm4, xmm0 ; mm4 = p-1..p2 199ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusw xmm3, xmm4 ; mm3 += mm5 200ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 201ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; thresholding 202ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm6, xmm1 ; mm6 = p0..p3 203ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusw xmm6, xmm4 ; mm6 = p0..p3 - p1..p4 204ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusw xmm4, xmm1 ; mm5 = p1..p4 - p0..p3 205ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusw xmm6, xmm4 ; mm6 = abs(p0..p3 - p1..p4) 206ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pcmpgtw xmm6, xmm2 207ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang por xmm7, xmm6 ; accumulate thresholds 208ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 209ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusw xmm3, RD42 ; mm3 += round value 210ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psraw xmm3, 3 ; mm3 /= 8 211ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 212ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pand xmm1, xmm7 ; mm1 select vals > thresh from source 213ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result 214ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusw xmm1, xmm7 ; combination 215ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 216ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang packuswb xmm1, xmm0 ; pack to bytes 217ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq QWORD PTR [rdi+rdx-8], mm0 ; store previous four bytes 218ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdq2q mm0, xmm1 219ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 220ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add rdx, 8 221ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang cmp edx, dword arg(5) ;cols 222ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang jl .acrossnextcol; 223ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 224ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; last 8 pixels 225ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq QWORD PTR [rdi+rdx-8], mm0 226ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 227ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; done with this rwo 228ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add rsi,rax ; next line 229ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov eax, dword arg(3) ;dst_pixels_per_line ; destination pitch? 230ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add rdi,rax ; next destination 231ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov eax, dword arg(2) ;src_pixels_per_line ; destination pitch? 232ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 233ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang dec rcx ; decrement count 234ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang jnz .nextrow ; next row 235ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 236ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang%if ABI_IS_32BIT=1 && CONFIG_PIC=1 237ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add rsp,16 238ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rsp 239ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang%endif 240ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; begin epilog 241ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rdi 242ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rsi 243ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang RESTORE_GOT 244ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang RESTORE_XMM 245ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang UNSHADOW_ARGS 246ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rbp 247ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ret 248ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang%undef RD42 249ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 250ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 251ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang;void vp9_mbpost_proc_down_xmm(unsigned char *dst, 252ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; int pitch, int rows, int cols,int flimit) 253ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangextern sym(vp9_rv) 254ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangglobal sym(vp9_mbpost_proc_down_xmm) PRIVATE 255ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangsym(vp9_mbpost_proc_down_xmm): 256ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rbp 257ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rbp, rsp 258ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang SHADOW_ARGS_TO_STACK 5 259ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang SAVE_XMM 7 260ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang GET_GOT rbx 261ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rsi 262ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rdi 263ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; end prolog 264ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 265ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ALIGN_STACK 16, rax 266ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang sub rsp, 128+16 267ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 268ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; unsigned char d[16][8] at [rsp] 269ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; create flimit2 at [rsp+128] 270ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov eax, dword ptr arg(4) ;flimit 271ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov [rsp+128], eax 272ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov [rsp+128+4], eax 273ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov [rsp+128+8], eax 274ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov [rsp+128+12], eax 275ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang%define flimit4 [rsp+128] 276ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 277ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang%if ABI_IS_32BIT=0 278ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lea r8, [GLOBAL(sym(vp9_rv))] 279ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang%endif 280ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 281ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ;rows +=8; 282ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add dword arg(2), 8 283ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 284ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ;for(c=0; c<cols; c+=8) 285ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang.loop_col: 286ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rsi, arg(0) ; s 287ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pxor xmm0, xmm0 ; 288ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 289ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movsxd rax, dword ptr arg(1) ;pitch ; 290ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang neg rax ; rax = -pitch 291ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 292ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lea rsi, [rsi + rax*8]; ; rdi = s[-pitch*8] 293ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang neg rax 294ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 295ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 296ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pxor xmm5, xmm5 297ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pxor xmm6, xmm6 ; 298ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 299ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pxor xmm7, xmm7 ; 300ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rdi, rsi 301ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 302ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rcx, 15 ; 303ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 304ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang.loop_initvar: 305ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq xmm1, QWORD PTR [rdi]; 306ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw xmm1, xmm0 ; 307ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 308ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw xmm5, xmm1 ; 309ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pmullw xmm1, xmm1 ; 310ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 311ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm2, xmm1 ; 312ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklwd xmm1, xmm0 ; 313ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 314ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpckhwd xmm2, xmm0 ; 315ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddd xmm6, xmm1 ; 316ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 317ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddd xmm7, xmm2 ; 318ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lea rdi, [rdi+rax] ; 319ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 320ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang dec rcx 321ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang jne .loop_initvar 322ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ;save the var and sum 323ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang xor rdx, rdx 324ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang.loop_row: 325ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq xmm1, QWORD PTR [rsi] ; [s-pitch*8] 326ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq xmm2, QWORD PTR [rdi] ; [s+pitch*7] 327ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 328ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw xmm1, xmm0 329ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw xmm2, xmm0 330ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 331ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw xmm5, xmm2 332ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubw xmm5, xmm1 333ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 334ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pmullw xmm2, xmm2 335ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm4, xmm2 336ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 337ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklwd xmm2, xmm0 338ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpckhwd xmm4, xmm0 339ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 340ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddd xmm6, xmm2 341ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddd xmm7, xmm4 342ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 343ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pmullw xmm1, xmm1 344ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm2, xmm1 345ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 346ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklwd xmm1, xmm0 347ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubd xmm6, xmm1 348ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 349ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpckhwd xmm2, xmm0 350ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubd xmm7, xmm2 351ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 352ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 353ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm3, xmm6 354ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pslld xmm3, 4 355ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 356ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubd xmm3, xmm6 357ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm1, xmm5 358ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 359ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm4, xmm5 360ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pmullw xmm1, xmm1 361ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 362ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pmulhw xmm4, xmm4 363ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm2, xmm1 364ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 365ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklwd xmm1, xmm4 366ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpckhwd xmm2, xmm4 367ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 368ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm4, xmm7 369ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pslld xmm4, 4 370ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 371ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubd xmm4, xmm7 372ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 373ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubd xmm3, xmm1 374ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubd xmm4, xmm2 375ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 376ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubd xmm3, flimit4 377ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubd xmm4, flimit4 378ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 379ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psrad xmm3, 31 380ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psrad xmm4, 31 381ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 382ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang packssdw xmm3, xmm4 383ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang packsswb xmm3, xmm0 384ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 385ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq xmm1, QWORD PTR [rsi+rax*8] 386ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 387ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq xmm2, xmm1 388ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw xmm1, xmm0 389ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 390ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw xmm1, xmm5 391ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rcx, rdx 392ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 393ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang and rcx, 127 394ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang%if ABI_IS_32BIT=1 && CONFIG_PIC=1 395ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rax 396ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lea rax, [GLOBAL(sym(vp9_rv))] 397ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqu xmm4, [rax + rcx*2] ;vp9_rv[rcx*2] 398ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rax 399ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang%elif ABI_IS_32BIT=0 400ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqu xmm4, [r8 + rcx*2] ;vp9_rv[rcx*2] 401ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang%else 402ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqu xmm4, [sym(vp9_rv) + rcx*2] 403ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang%endif 404ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 405ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw xmm1, xmm4 406ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ;paddw xmm1, eight8s 407ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psraw xmm1, 4 408ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 409ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang packuswb xmm1, xmm0 410ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pand xmm1, xmm3 411ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 412ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pandn xmm3, xmm2 413ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang por xmm1, xmm3 414ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 415ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang and rcx, 15 416ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq QWORD PTR [rsp + rcx*8], xmm1 ;d[rcx*8] 417ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 418ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rcx, rdx 419ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang sub rcx, 8 420ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 421ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang and rcx, 15 422ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm0, [rsp + rcx*8] ;d[rcx*8] 423ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 424ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq [rsi], mm0 425ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lea rsi, [rsi+rax] 426ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 427ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lea rdi, [rdi+rax] 428ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add rdx, 1 429ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 430ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang cmp edx, dword arg(2) ;rows 431ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang jl .loop_row 432ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 433ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add dword arg(0), 8 ; s += 8 434ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang sub dword arg(3), 8 ; cols -= 8 435ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang cmp dword arg(3), 0 436ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang jg .loop_col 437ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 438ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add rsp, 128+16 439ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rsp 440ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 441ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; begin epilog 442ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rdi 443ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rsi 444ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang RESTORE_GOT 445ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang RESTORE_XMM 446ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang UNSHADOW_ARGS 447ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rbp 448ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ret 449ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang%undef flimit4 450ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 451ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 452ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang;void vp9_mbpost_proc_across_ip_xmm(unsigned char *src, 453ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; int pitch, int rows, int cols,int flimit) 454ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangglobal sym(vp9_mbpost_proc_across_ip_xmm) PRIVATE 455ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangsym(vp9_mbpost_proc_across_ip_xmm): 456ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rbp 457ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rbp, rsp 458ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang SHADOW_ARGS_TO_STACK 5 459ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang SAVE_XMM 7 460ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang GET_GOT rbx 461ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rsi 462ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rdi 463ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; end prolog 464ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 465ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ALIGN_STACK 16, rax 466ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang sub rsp, 16 467ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 468ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; create flimit4 at [rsp] 469ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov eax, dword ptr arg(4) ;flimit 470ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov [rsp], eax 471ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov [rsp+4], eax 472ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov [rsp+8], eax 473ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov [rsp+12], eax 474ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang%define flimit4 [rsp] 475ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 476ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 477ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ;for(r=0;r<rows;r++) 478ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang.ip_row_loop: 479ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 480ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang xor rdx, rdx ;sumsq=0; 481ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang xor rcx, rcx ;sum=0; 482ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rsi, arg(0); s 483ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rdi, -8 484ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang.ip_var_loop: 485ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ;for(i=-8;i<=6;i++) 486ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ;{ 487ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; sumsq += s[i]*s[i]; 488ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; sum += s[i]; 489ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ;} 490ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movzx eax, byte [rsi+rdi] 491ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add ecx, eax 492ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mul al 493ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add edx, eax 494ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add rdi, 1 495ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang cmp rdi, 6 496ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang jle .ip_var_loop 497ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 498ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 499ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ;mov rax, sumsq 500ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ;movd xmm7, rax 501ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movd xmm7, edx 502ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 503ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ;mov rax, sum 504ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ;movd xmm6, rax 505ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movd xmm6, ecx 506ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 507ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rsi, arg(0) ;s 508ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang xor rcx, rcx 509ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 510ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movsxd rdx, dword arg(3) ;cols 511ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add rdx, 8 512ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pxor mm0, mm0 513ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pxor mm1, mm1 514ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 515ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pxor xmm0, xmm0 516ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang.nextcol4: 517ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 518ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movd xmm1, DWORD PTR [rsi+rcx-8] ; -8 -7 -6 -5 519ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movd xmm2, DWORD PTR [rsi+rcx+7] ; +7 +8 +9 +10 520ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 521ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw xmm1, xmm0 ; expanding 522ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw xmm2, xmm0 ; expanding 523ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 524ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklwd xmm1, xmm0 ; expanding to dwords 525ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklwd xmm2, xmm0 ; expanding to dwords 526ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 527ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubd xmm2, xmm1 ; 7--8 8--7 9--6 10--5 528ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddd xmm1, xmm1 ; -8*2 -7*2 -6*2 -5*2 529ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 530ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddd xmm1, xmm2 ; 7+-8 8+-7 9+-6 10+-5 531ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pmaddwd xmm1, xmm2 ; squared of 7+-8 8+-7 9+-6 10+-5 532ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 533ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddd xmm6, xmm2 534ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddd xmm7, xmm1 535ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 536ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pshufd xmm6, xmm6, 0 ; duplicate the last ones 537ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pshufd xmm7, xmm7, 0 ; duplicate the last ones 538ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 539ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psrldq xmm1, 4 ; 8--7 9--6 10--5 0000 540ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psrldq xmm2, 4 ; 8--7 9--6 10--5 0000 541ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 542ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pshufd xmm3, xmm1, 3 ; 0000 8--7 8--7 8--7 squared 543ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pshufd xmm4, xmm2, 3 ; 0000 8--7 8--7 8--7 squared 544ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 545ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddd xmm6, xmm4 546ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddd xmm7, xmm3 547ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 548ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pshufd xmm3, xmm1, 01011111b ; 0000 0000 9--6 9--6 squared 549ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pshufd xmm4, xmm2, 01011111b ; 0000 0000 9--6 9--6 squared 550ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 551ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddd xmm7, xmm3 552ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddd xmm6, xmm4 553ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 554ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pshufd xmm3, xmm1, 10111111b ; 0000 0000 8--7 8--7 squared 555ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pshufd xmm4, xmm2, 10111111b ; 0000 0000 8--7 8--7 squared 556ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 557ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddd xmm7, xmm3 558ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddd xmm6, xmm4 559ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 560ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm3, xmm6 561ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pmaddwd xmm3, xmm3 562ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 563ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqa xmm5, xmm7 564ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pslld xmm5, 4 565ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 566ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubd xmm5, xmm7 567ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubd xmm5, xmm3 568ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 569ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubd xmm5, flimit4 570ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psrad xmm5, 31 571ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 572ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang packssdw xmm5, xmm0 573ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang packsswb xmm5, xmm0 574ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 575ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movd xmm1, DWORD PTR [rsi+rcx] 576ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq xmm2, xmm1 577ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 578ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw xmm1, xmm0 579ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklwd xmm1, xmm0 580ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 581ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddd xmm1, xmm6 582ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddd xmm1, [GLOBAL(four8s)] 583ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 584ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psrad xmm1, 4 585ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang packssdw xmm1, xmm0 586ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 587ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang packuswb xmm1, xmm0 588ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pand xmm1, xmm5 589ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 590ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pandn xmm5, xmm2 591ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang por xmm5, xmm1 592ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 593ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movd [rsi+rcx-8], mm0 594ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm0, mm1 595ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 596ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdq2q mm1, xmm5 597ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psrldq xmm7, 12 598ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 599ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psrldq xmm6, 12 600ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add rcx, 4 601ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 602ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang cmp rcx, rdx 603ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang jl .nextcol4 604ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 605ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ;s+=pitch; 606ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movsxd rax, dword arg(1) 607ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add arg(0), rax 608ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 609ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang sub dword arg(2), 1 ;rows-=1 610ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang cmp dword arg(2), 0 611ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang jg .ip_row_loop 612ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 613ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add rsp, 16 614ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rsp 615ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 616ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; begin epilog 617ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rdi 618ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rsi 619ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang RESTORE_GOT 620ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang RESTORE_XMM 621ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang UNSHADOW_ARGS 622ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rbp 623ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ret 624ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang%undef flimit4 625ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 626ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 627ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang;void vp9_plane_add_noise_wmt (unsigned char *start, unsigned char *noise, 628ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; unsigned char blackclamp[16], 629ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; unsigned char whiteclamp[16], 630ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; unsigned char bothclamp[16], 631ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; unsigned int width, unsigned int height, int pitch) 632ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangglobal sym(vp9_plane_add_noise_wmt) PRIVATE 633ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangsym(vp9_plane_add_noise_wmt): 634ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rbp 635ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rbp, rsp 636ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang SHADOW_ARGS_TO_STACK 8 637ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang GET_GOT rbx 638ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rsi 639ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rdi 640ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; end prolog 641ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 642ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang.addnoise_loop: 643614a6a21483b59b4ab557785c160c8ca4722b062Johann call sym(LIBVPX_RAND) WRT_PLT 644ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rcx, arg(1) ;noise 645ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang and rax, 0xff 646ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add rcx, rax 647ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 648ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; we rely on the fact that the clamping vectors are stored contiguously 649ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; in black/white/both order. Note that we have to reload this here because 650ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; rdx could be trashed by rand() 651ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rdx, arg(2) ; blackclamp 652ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 653ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 654ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rdi, rcx 655ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movsxd rcx, dword arg(5) ;[Width] 656ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rsi, arg(0) ;Pos 657ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang xor rax,rax 658ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 659ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang.addnoise_nextset: 660ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqu xmm1,[rsi+rax] ; get the source 661ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 662ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise 663ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddusb xmm1, [rdx+32] ;bothclamp 664ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusb xmm1, [rdx+16] ;whiteclamp 665ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 666ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqu xmm2,[rdi+rax] ; get the noise for this line 667ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddb xmm1,xmm2 ; add it in 668ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movdqu [rsi+rax],xmm1 ; store the result 669ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 670ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add rax,16 ; move to the next line 671ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 672ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang cmp rax, rcx 673ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang jl .addnoise_nextset 674ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 675ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movsxd rax, dword arg(7) ; Pitch 676ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add arg(0), rax ; Start += Pitch 677ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang sub dword arg(6), 1 ; Height -= 1 678ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang jg .addnoise_loop 679ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 680ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; begin epilog 681ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rdi 682ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rsi 683ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang RESTORE_GOT 684ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang UNSHADOW_ARGS 685ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rbp 686ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ret 687ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 688ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 689ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangSECTION_RODATA 690ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangalign 16 691ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangrd42: 692ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang times 8 dw 0x04 693ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangfour8s: 694ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang times 4 dd 8 695