16fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; 26fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 36fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; 46fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; Use of this source code is governed by a BSD-style license 56fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; that can be found in the LICENSE file in the root of the source 66fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; tree. An additional intellectual property rights grant can be found 76fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; in the file PATENTS. All contributing project authors may 86fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; be found in the AUTHORS file in the root of the source tree. 96fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; 106fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 116fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 126fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org%include "vpx_ports/x86_abi_support.asm" 136fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 146fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org;void vp9_post_proc_down_and_across_xmm 156fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org;( 166fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; unsigned char *src_ptr, 176fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; unsigned char *dst_ptr, 186fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; int src_pixels_per_line, 196fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; int dst_pixels_per_line, 206fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; int rows, 216fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; int cols, 226fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; int flimit 236fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org;) 24afc4a270e3f2ecbbb47aad63c6a6a77ca902d30efgalligan@chromium.orgglobal sym(vp9_post_proc_down_and_across_xmm) PRIVATE 256fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.orgsym(vp9_post_proc_down_and_across_xmm): 266fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org push rbp 276fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov rbp, rsp 286fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org SHADOW_ARGS_TO_STACK 7 296fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org SAVE_XMM 7 306fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org GET_GOT rbx 316fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org push rsi 326fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org push rdi 336fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; end prolog 346fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 356fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org%if ABI_IS_32BIT=1 && CONFIG_PIC=1 366fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ALIGN_STACK 16, rax 376fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; move the global rd onto the stack, since we don't have enough registers 386fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; to do PIC addressing 396fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm0, [GLOBAL(rd42)] 406fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org sub rsp, 16 416fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa [rsp], xmm0 426fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org%define RD42 [rsp] 436fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org%else 446fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org%define RD42 [GLOBAL(rd42)] 456fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org%endif 466fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 476fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 486fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movd xmm2, dword ptr arg(6) ;flimit 496fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklwd xmm2, xmm2 506fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpckldq xmm2, xmm2 516fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklqdq xmm2, xmm2 526fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 536fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov rsi, arg(0) ;src_ptr 546fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov rdi, arg(1) ;dst_ptr 556fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 566fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movsxd rcx, DWORD PTR arg(4) ;rows 576fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch? 586fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pxor xmm0, xmm0 ; mm0 = 00000000 596fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 606fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org.nextrow: 616fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 626fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org xor rdx, rdx ; clear out rdx for use as loop counter 636fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org.nextcol: 646fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movq xmm3, QWORD PTR [rsi] ; mm4 = r0 p0..p7 656fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklbw xmm3, xmm0 ; mm3 = p0..p3 666fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm1, xmm3 ; mm1 = p0..p3 676fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psllw xmm3, 2 ; 686fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 696fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movq xmm5, QWORD PTR [rsi + rax] ; mm4 = r1 p0..p7 706fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklbw xmm5, xmm0 ; mm5 = r1 p0..p3 716fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusw xmm3, xmm5 ; mm3 += mm6 726fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 736fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; thresholding 746fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm7, xmm1 ; mm7 = r0 p0..p3 756fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubusw xmm7, xmm5 ; mm7 = r0 p0..p3 - r1 p0..p3 766fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubusw xmm5, xmm1 ; mm5 = r1 p0..p3 - r0 p0..p3 776fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusw xmm7, xmm5 ; mm7 = abs(r0 p0..p3 - r1 p0..p3) 786fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pcmpgtw xmm7, xmm2 796fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 806fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movq xmm5, QWORD PTR [rsi + 2*rax] ; mm4 = r2 p0..p7 816fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklbw xmm5, xmm0 ; mm5 = r2 p0..p3 826fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusw xmm3, xmm5 ; mm3 += mm5 836fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 846fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; thresholding 856fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 866fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubusw xmm6, xmm5 ; mm6 = r0 p0..p3 - r2 p0..p3 876fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubusw xmm5, xmm1 ; mm5 = r2 p0..p3 - r2 p0..p3 886fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r2 p0..p3) 896fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pcmpgtw xmm6, xmm2 906fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org por xmm7, xmm6 ; accumulate thresholds 916fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 926fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 936fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org neg rax 946fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movq xmm5, QWORD PTR [rsi+2*rax] ; mm4 = r-2 p0..p7 956fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklbw xmm5, xmm0 ; mm5 = r-2 p0..p3 966fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusw xmm3, xmm5 ; mm3 += mm5 976fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 986fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; thresholding 996fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 1006fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubusw xmm6, xmm5 ; mm6 = p0..p3 - r-2 p0..p3 1016fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubusw xmm5, xmm1 ; mm5 = r-2 p0..p3 - p0..p3 1026fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r-2 p0..p3) 1036fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pcmpgtw xmm6, xmm2 1046fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org por xmm7, xmm6 ; accumulate thresholds 1056fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1066fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movq xmm4, QWORD PTR [rsi+rax] ; mm4 = r-1 p0..p7 1076fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklbw xmm4, xmm0 ; mm4 = r-1 p0..p3 1086fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusw xmm3, xmm4 ; mm3 += mm5 1096fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1106fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; thresholding 1116fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 1126fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubusw xmm6, xmm4 ; mm6 = p0..p3 - r-2 p0..p3 1136fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubusw xmm4, xmm1 ; mm5 = r-1 p0..p3 - p0..p3 1146fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusw xmm6, xmm4 ; mm6 = abs(r0 p0..p3 - r-1 p0..p3) 1156fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pcmpgtw xmm6, xmm2 1166fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org por xmm7, xmm6 ; accumulate thresholds 1176fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1186fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1196fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusw xmm3, RD42 ; mm3 += round value 1206fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psraw xmm3, 3 ; mm3 /= 8 1216fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1226fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pand xmm1, xmm7 ; mm1 select vals > thresh from source 1236fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result 1246fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusw xmm1, xmm7 ; combination 1256fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1266fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org packuswb xmm1, xmm0 ; pack to bytes 1276fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movq QWORD PTR [rdi], xmm1 ; 1286fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1296fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org neg rax ; pitch is positive 1306fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add rsi, 8 1316fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add rdi, 8 1326fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1336fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add rdx, 8 1346fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org cmp edx, dword arg(5) ;cols 1356fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1366fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org jl .nextcol 1376fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1386fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; done with the all cols, start the across filtering in place 1396fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org sub rsi, rdx 1406fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org sub rdi, rdx 1416fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1426fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org xor rdx, rdx 1436fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movq mm0, QWORD PTR [rdi-8]; 1446fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1456fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org.acrossnextcol: 1466fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movq xmm7, QWORD PTR [rdi +rdx -2] 1476fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movd xmm4, DWORD PTR [rdi +rdx +6] 1486fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1496fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pslldq xmm4, 8 1506fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org por xmm4, xmm7 1516fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1526fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm3, xmm4 1536fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psrldq xmm3, 2 1546fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklbw xmm3, xmm0 ; mm3 = p0..p3 1556fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm1, xmm3 ; mm1 = p0..p3 1566fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psllw xmm3, 2 1576fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1586fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1596fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm5, xmm4 1606fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psrldq xmm5, 3 1616fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklbw xmm5, xmm0 ; mm5 = p1..p4 1626fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusw xmm3, xmm5 ; mm3 += mm6 1636fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1646fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; thresholding 1656fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm7, xmm1 ; mm7 = p0..p3 1666fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubusw xmm7, xmm5 ; mm7 = p0..p3 - p1..p4 1676fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 1686fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusw xmm7, xmm5 ; mm7 = abs(p0..p3 - p1..p4) 1696fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pcmpgtw xmm7, xmm2 1706fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1716fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm5, xmm4 1726fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psrldq xmm5, 4 1736fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklbw xmm5, xmm0 ; mm5 = p2..p5 1746fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusw xmm3, xmm5 ; mm3 += mm5 1756fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1766fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; thresholding 1776fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm6, xmm1 ; mm6 = p0..p3 1786fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4 1796fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 1806fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4) 1816fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pcmpgtw xmm6, xmm2 1826fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org por xmm7, xmm6 ; accumulate thresholds 1836fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1846fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1856fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm5, xmm4 ; mm5 = p-2..p5 1866fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklbw xmm5, xmm0 ; mm5 = p-2..p1 1876fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusw xmm3, xmm5 ; mm3 += mm5 1886fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1896fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; thresholding 1906fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm6, xmm1 ; mm6 = p0..p3 1916fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4 1926fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 1936fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4) 1946fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pcmpgtw xmm6, xmm2 1956fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org por xmm7, xmm6 ; accumulate thresholds 1966fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1976fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psrldq xmm4, 1 ; mm4 = p-1..p5 1986fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklbw xmm4, xmm0 ; mm4 = p-1..p2 1996fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusw xmm3, xmm4 ; mm3 += mm5 2006fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2016fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; thresholding 2026fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm6, xmm1 ; mm6 = p0..p3 2036fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubusw xmm6, xmm4 ; mm6 = p0..p3 - p1..p4 2046fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubusw xmm4, xmm1 ; mm5 = p1..p4 - p0..p3 2056fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusw xmm6, xmm4 ; mm6 = abs(p0..p3 - p1..p4) 2066fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pcmpgtw xmm6, xmm2 2076fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org por xmm7, xmm6 ; accumulate thresholds 2086fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2096fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusw xmm3, RD42 ; mm3 += round value 2106fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psraw xmm3, 3 ; mm3 /= 8 2116fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2126fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pand xmm1, xmm7 ; mm1 select vals > thresh from source 2136fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result 2146fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusw xmm1, xmm7 ; combination 2156fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2166fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org packuswb xmm1, xmm0 ; pack to bytes 2176fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movq QWORD PTR [rdi+rdx-8], mm0 ; store previous four bytes 2186fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdq2q mm0, xmm1 2196fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2206fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add rdx, 8 2216fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org cmp edx, dword arg(5) ;cols 2226fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org jl .acrossnextcol; 2236fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2246fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; last 8 pixels 2256fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movq QWORD PTR [rdi+rdx-8], mm0 2266fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2276fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; done with this rwo 2286fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add rsi,rax ; next line 2296fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov eax, dword arg(3) ;dst_pixels_per_line ; destination pitch? 2306fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add rdi,rax ; next destination 2316fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov eax, dword arg(2) ;src_pixels_per_line ; destination pitch? 2326fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2336fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org dec rcx ; decrement count 2346fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org jnz .nextrow ; next row 2356fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2366fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org%if ABI_IS_32BIT=1 && CONFIG_PIC=1 2376fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add rsp,16 2386fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pop rsp 2396fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org%endif 2406fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; begin epilog 2416fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pop rdi 2426fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pop rsi 2436fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org RESTORE_GOT 2446fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org RESTORE_XMM 2456fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org UNSHADOW_ARGS 2466fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pop rbp 2476fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ret 2486fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org%undef RD42 2496fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2506fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2516fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org;void vp9_mbpost_proc_down_xmm(unsigned char *dst, 2526fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; int pitch, int rows, int cols,int flimit) 2536fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.orgextern sym(vp9_rv) 254afc4a270e3f2ecbbb47aad63c6a6a77ca902d30efgalligan@chromium.orgglobal sym(vp9_mbpost_proc_down_xmm) PRIVATE 2556fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.orgsym(vp9_mbpost_proc_down_xmm): 2566fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org push rbp 2576fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov rbp, rsp 2586fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org SHADOW_ARGS_TO_STACK 5 2596fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org SAVE_XMM 7 2606fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org GET_GOT rbx 2616fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org push rsi 2626fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org push rdi 2636fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; end prolog 2646fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2656fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ALIGN_STACK 16, rax 2666fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org sub rsp, 128+16 2676fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2686fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; unsigned char d[16][8] at [rsp] 2696fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; create flimit2 at [rsp+128] 2706fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov eax, dword ptr arg(4) ;flimit 2716fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov [rsp+128], eax 2726fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov [rsp+128+4], eax 2736fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov [rsp+128+8], eax 2746fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov [rsp+128+12], eax 2756fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org%define flimit4 [rsp+128] 2766fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2776fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org%if ABI_IS_32BIT=0 2786fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org lea r8, [GLOBAL(sym(vp9_rv))] 2796fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org%endif 2806fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2816fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ;rows +=8; 2826fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add dword arg(2), 8 2836fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2846fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ;for(c=0; c<cols; c+=8) 2856fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org.loop_col: 2866fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov rsi, arg(0) ; s 2876fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pxor xmm0, xmm0 ; 2886fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2896fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movsxd rax, dword ptr arg(1) ;pitch ; 2906fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org neg rax ; rax = -pitch 2916fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2926fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org lea rsi, [rsi + rax*8]; ; rdi = s[-pitch*8] 2936fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org neg rax 2946fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2956fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2966fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pxor xmm5, xmm5 2976fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pxor xmm6, xmm6 ; 2986fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 2996fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pxor xmm7, xmm7 ; 3006fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov rdi, rsi 3016fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3026fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov rcx, 15 ; 3036fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3046fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org.loop_initvar: 3056fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movq xmm1, QWORD PTR [rdi]; 3066fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklbw xmm1, xmm0 ; 3076fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3086fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddw xmm5, xmm1 ; 3096fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pmullw xmm1, xmm1 ; 3106fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3116fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm2, xmm1 ; 3126fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklwd xmm1, xmm0 ; 3136fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3146fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpckhwd xmm2, xmm0 ; 3156fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddd xmm6, xmm1 ; 3166fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3176fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddd xmm7, xmm2 ; 3186fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org lea rdi, [rdi+rax] ; 3196fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3206fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org dec rcx 3216fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org jne .loop_initvar 3226fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ;save the var and sum 3236fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org xor rdx, rdx 3246fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org.loop_row: 3256fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movq xmm1, QWORD PTR [rsi] ; [s-pitch*8] 3266fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movq xmm2, QWORD PTR [rdi] ; [s+pitch*7] 3276fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3286fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklbw xmm1, xmm0 3296fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklbw xmm2, xmm0 3306fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3316fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddw xmm5, xmm2 3326fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubw xmm5, xmm1 3336fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3346fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pmullw xmm2, xmm2 3356fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm4, xmm2 3366fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3376fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklwd xmm2, xmm0 3386fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpckhwd xmm4, xmm0 3396fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3406fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddd xmm6, xmm2 3416fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddd xmm7, xmm4 3426fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3436fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pmullw xmm1, xmm1 3446fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm2, xmm1 3456fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3466fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklwd xmm1, xmm0 3476fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubd xmm6, xmm1 3486fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3496fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpckhwd xmm2, xmm0 3506fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubd xmm7, xmm2 3516fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3526fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3536fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm3, xmm6 3546fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pslld xmm3, 4 3556fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3566fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubd xmm3, xmm6 3576fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm1, xmm5 3586fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3596fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm4, xmm5 3606fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pmullw xmm1, xmm1 3616fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3626fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pmulhw xmm4, xmm4 3636fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm2, xmm1 3646fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3656fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklwd xmm1, xmm4 3666fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpckhwd xmm2, xmm4 3676fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3686fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm4, xmm7 3696fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pslld xmm4, 4 3706fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3716fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubd xmm4, xmm7 3726fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3736fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubd xmm3, xmm1 3746fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubd xmm4, xmm2 3756fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3766fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubd xmm3, flimit4 3776fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubd xmm4, flimit4 3786fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3796fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psrad xmm3, 31 3806fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psrad xmm4, 31 3816fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3826fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org packssdw xmm3, xmm4 3836fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org packsswb xmm3, xmm0 3846fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3856fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movq xmm1, QWORD PTR [rsi+rax*8] 3866fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3876fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movq xmm2, xmm1 3886fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklbw xmm1, xmm0 3896fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3906fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddw xmm1, xmm5 3916fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov rcx, rdx 3926fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 3936fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org and rcx, 127 3946fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org%if ABI_IS_32BIT=1 && CONFIG_PIC=1 3956fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org push rax 3966fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org lea rax, [GLOBAL(sym(vp9_rv))] 3976fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqu xmm4, [rax + rcx*2] ;vp9_rv[rcx*2] 3986fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pop rax 3996fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org%elif ABI_IS_32BIT=0 4006fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqu xmm4, [r8 + rcx*2] ;vp9_rv[rcx*2] 4016fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org%else 4026fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqu xmm4, [sym(vp9_rv) + rcx*2] 4036fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org%endif 4046fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4056fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddw xmm1, xmm4 4066fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ;paddw xmm1, eight8s 4076fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psraw xmm1, 4 4086fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4096fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org packuswb xmm1, xmm0 4106fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pand xmm1, xmm3 4116fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4126fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pandn xmm3, xmm2 4136fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org por xmm1, xmm3 4146fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4156fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org and rcx, 15 4166fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movq QWORD PTR [rsp + rcx*8], xmm1 ;d[rcx*8] 4176fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4186fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov rcx, rdx 4196fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org sub rcx, 8 4206fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4216fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org and rcx, 15 4226fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movq mm0, [rsp + rcx*8] ;d[rcx*8] 4236fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4246fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movq [rsi], mm0 4256fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org lea rsi, [rsi+rax] 4266fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4276fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org lea rdi, [rdi+rax] 4286fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add rdx, 1 4296fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4306fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org cmp edx, dword arg(2) ;rows 4316fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org jl .loop_row 4326fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4336fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add dword arg(0), 8 ; s += 8 4346fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org sub dword arg(3), 8 ; cols -= 8 4356fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org cmp dword arg(3), 0 4366fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org jg .loop_col 4376fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4386fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add rsp, 128+16 4396fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pop rsp 4406fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4416fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; begin epilog 4426fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pop rdi 4436fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pop rsi 4446fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org RESTORE_GOT 4456fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org RESTORE_XMM 4466fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org UNSHADOW_ARGS 4476fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pop rbp 4486fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ret 4496fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org%undef flimit4 4506fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4516fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4526fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org;void vp9_mbpost_proc_across_ip_xmm(unsigned char *src, 4536fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; int pitch, int rows, int cols,int flimit) 454afc4a270e3f2ecbbb47aad63c6a6a77ca902d30efgalligan@chromium.orgglobal sym(vp9_mbpost_proc_across_ip_xmm) PRIVATE 4556fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.orgsym(vp9_mbpost_proc_across_ip_xmm): 4566fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org push rbp 4576fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov rbp, rsp 4586fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org SHADOW_ARGS_TO_STACK 5 4596fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org SAVE_XMM 7 4606fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org GET_GOT rbx 4616fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org push rsi 4626fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org push rdi 4636fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; end prolog 4646fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4656fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ALIGN_STACK 16, rax 4666fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org sub rsp, 16 4676fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4686fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; create flimit4 at [rsp] 4696fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov eax, dword ptr arg(4) ;flimit 4706fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov [rsp], eax 4716fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov [rsp+4], eax 4726fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov [rsp+8], eax 4736fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov [rsp+12], eax 4746fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org%define flimit4 [rsp] 4756fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4766fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4776fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ;for(r=0;r<rows;r++) 4786fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org.ip_row_loop: 4796fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4806fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org xor rdx, rdx ;sumsq=0; 4816fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org xor rcx, rcx ;sum=0; 4826fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov rsi, arg(0); s 4836fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov rdi, -8 4846fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org.ip_var_loop: 4856fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ;for(i=-8;i<=6;i++) 4866fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ;{ 4876fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; sumsq += s[i]*s[i]; 4886fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; sum += s[i]; 4896fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ;} 4906fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movzx eax, byte [rsi+rdi] 4916fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add ecx, eax 4926fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mul al 4936fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add edx, eax 4946fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add rdi, 1 4956fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org cmp rdi, 6 4966fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org jle .ip_var_loop 4976fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4986fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 4996fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ;mov rax, sumsq 5006fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ;movd xmm7, rax 5016fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movd xmm7, edx 5026fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5036fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ;mov rax, sum 5046fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ;movd xmm6, rax 5056fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movd xmm6, ecx 5066fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5076fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov rsi, arg(0) ;s 5086fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org xor rcx, rcx 5096fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5106fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movsxd rdx, dword arg(3) ;cols 5116fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add rdx, 8 5126fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pxor mm0, mm0 5136fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pxor mm1, mm1 5146fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5156fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pxor xmm0, xmm0 5166fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org.nextcol4: 5176fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5186fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movd xmm1, DWORD PTR [rsi+rcx-8] ; -8 -7 -6 -5 5196fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movd xmm2, DWORD PTR [rsi+rcx+7] ; +7 +8 +9 +10 5206fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5216fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklbw xmm1, xmm0 ; expanding 5226fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklbw xmm2, xmm0 ; expanding 5236fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5246fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklwd xmm1, xmm0 ; expanding to dwords 5256fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklwd xmm2, xmm0 ; expanding to dwords 5266fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5276fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubd xmm2, xmm1 ; 7--8 8--7 9--6 10--5 5286fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddd xmm1, xmm1 ; -8*2 -7*2 -6*2 -5*2 5296fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5306fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddd xmm1, xmm2 ; 7+-8 8+-7 9+-6 10+-5 5316fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pmaddwd xmm1, xmm2 ; squared of 7+-8 8+-7 9+-6 10+-5 5326fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5336fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddd xmm6, xmm2 5346fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddd xmm7, xmm1 5356fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5366fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pshufd xmm6, xmm6, 0 ; duplicate the last ones 5376fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pshufd xmm7, xmm7, 0 ; duplicate the last ones 5386fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5396fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psrldq xmm1, 4 ; 8--7 9--6 10--5 0000 5406fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psrldq xmm2, 4 ; 8--7 9--6 10--5 0000 5416fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5426fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pshufd xmm3, xmm1, 3 ; 0000 8--7 8--7 8--7 squared 5436fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pshufd xmm4, xmm2, 3 ; 0000 8--7 8--7 8--7 squared 5446fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5456fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddd xmm6, xmm4 5466fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddd xmm7, xmm3 5476fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5486fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pshufd xmm3, xmm1, 01011111b ; 0000 0000 9--6 9--6 squared 5496fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pshufd xmm4, xmm2, 01011111b ; 0000 0000 9--6 9--6 squared 5506fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5516fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddd xmm7, xmm3 5526fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddd xmm6, xmm4 5536fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5546fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pshufd xmm3, xmm1, 10111111b ; 0000 0000 8--7 8--7 squared 5556fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pshufd xmm4, xmm2, 10111111b ; 0000 0000 8--7 8--7 squared 5566fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5576fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddd xmm7, xmm3 5586fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddd xmm6, xmm4 5596fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5606fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm3, xmm6 5616fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pmaddwd xmm3, xmm3 5626fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5636fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqa xmm5, xmm7 5646fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pslld xmm5, 4 5656fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5666fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubd xmm5, xmm7 5676fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubd xmm5, xmm3 5686fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5696fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubd xmm5, flimit4 5706fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psrad xmm5, 31 5716fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5726fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org packssdw xmm5, xmm0 5736fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org packsswb xmm5, xmm0 5746fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5756fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movd xmm1, DWORD PTR [rsi+rcx] 5766fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movq xmm2, xmm1 5776fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5786fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklbw xmm1, xmm0 5796fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org punpcklwd xmm1, xmm0 5806fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5816fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddd xmm1, xmm6 5826fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddd xmm1, [GLOBAL(four8s)] 5836fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5846fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psrad xmm1, 4 5856fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org packssdw xmm1, xmm0 5866fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5876fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org packuswb xmm1, xmm0 5886fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pand xmm1, xmm5 5896fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5906fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pandn xmm5, xmm2 5916fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org por xmm5, xmm1 5926fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5936fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movd [rsi+rcx-8], mm0 5946fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movq mm0, mm1 5956fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5966fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdq2q mm1, xmm5 5976fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psrldq xmm7, 12 5986fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 5996fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psrldq xmm6, 12 6006fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add rcx, 4 6016fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 6026fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org cmp rcx, rdx 6036fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org jl .nextcol4 6046fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 6056fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ;s+=pitch; 6066fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movsxd rax, dword arg(1) 6076fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add arg(0), rax 6086fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 6096fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org sub dword arg(2), 1 ;rows-=1 6106fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org cmp dword arg(2), 0 6116fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org jg .ip_row_loop 6126fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 6136fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add rsp, 16 6146fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pop rsp 6156fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 6166fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; begin epilog 6176fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pop rdi 6186fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pop rsi 6196fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org RESTORE_GOT 6206fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org RESTORE_XMM 6216fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org UNSHADOW_ARGS 6226fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pop rbp 6236fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ret 6246fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org%undef flimit4 6256fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 6266fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 62706d88a191f52640d533e6204ba067d1fd6fc0accjohannkoenig@chromium.org;void vp9_plane_add_noise_wmt (unsigned char *start, unsigned char *noise, 6286fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; unsigned char blackclamp[16], 6296fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; unsigned char whiteclamp[16], 6306fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org; unsigned char bothclamp[16], 63106d88a191f52640d533e6204ba067d1fd6fc0accjohannkoenig@chromium.org; unsigned int width, unsigned int height, int pitch) 632afc4a270e3f2ecbbb47aad63c6a6a77ca902d30efgalligan@chromium.orgglobal sym(vp9_plane_add_noise_wmt) PRIVATE 6336fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.orgsym(vp9_plane_add_noise_wmt): 6346fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org push rbp 6356fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov rbp, rsp 6366fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org SHADOW_ARGS_TO_STACK 8 6376fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org GET_GOT rbx 6386fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org push rsi 6396fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org push rdi 6406fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; end prolog 6416fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 6426fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org.addnoise_loop: 6439d92657d2ee8ab69da0e227c7fb81f04fe518a72Ben Murdoch call sym(LIBVPX_RAND) WRT_PLT 6446fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov rcx, arg(1) ;noise 6456fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org and rax, 0xff 6466fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add rcx, rax 6476fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 6486fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; we rely on the fact that the clamping vectors are stored contiguously 6496fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; in black/white/both order. Note that we have to reload this here because 6506fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; rdx could be trashed by rand() 6516fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov rdx, arg(2) ; blackclamp 6526fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 6536fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 6546fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov rdi, rcx 6556fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movsxd rcx, dword arg(5) ;[Width] 6566fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org mov rsi, arg(0) ;Pos 6576fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org xor rax,rax 6586fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 6596fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org.addnoise_nextset: 6606fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqu xmm1,[rsi+rax] ; get the source 6616fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 6626fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise 6636fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddusb xmm1, [rdx+32] ;bothclamp 6646fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org psubusb xmm1, [rdx+16] ;whiteclamp 6656fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 6666fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqu xmm2,[rdi+rax] ; get the noise for this line 6676fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org paddb xmm1,xmm2 ; add it in 6686fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movdqu [rsi+rax],xmm1 ; store the result 6696fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 6706fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add rax,16 ; move to the next line 6716fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 6726fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org cmp rax, rcx 6736fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org jl .addnoise_nextset 6746fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 6756fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org movsxd rax, dword arg(7) ; Pitch 6766fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org add arg(0), rax ; Start += Pitch 6776fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org sub dword arg(6), 1 ; Height -= 1 6786fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org jg .addnoise_loop 6796fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 6806fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ; begin epilog 6816fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pop rdi 6826fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pop rsi 6836fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org RESTORE_GOT 6846fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org UNSHADOW_ARGS 6856fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org pop rbp 6866fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org ret 6876fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 6886fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 6896fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.orgSECTION_RODATA 6906fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.orgalign 16 6916fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.orgrd42: 6926fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org times 8 dw 0x04 6936fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.orgfour8s: 6946fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org times 4 dd 8 695