1474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;
2474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;
4474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  Use of this source code is governed by a BSD-style license
5474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  that can be found in the LICENSE file in the root of the source
6474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  tree. An additional intellectual property rights grant can be found
7474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  in the file PATENTS.  All contributing project authors may
8474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  be found in the AUTHORS file in the root of the source tree.
9474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;
10474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
11474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
12474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%include "vpx_ports/x86_abi_support.asm"
13474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
14474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%define VP8_FILTER_WEIGHT 128
15474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%define VP8_FILTER_SHIFT  7
16474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
17474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;void vp8_mbpost_proc_down_mmx(unsigned char *dst,
18474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;                             int pitch, int rows, int cols,int flimit)
19474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgextern sym(vp8_rv)
20f18f5eb544bb35db231ced346e077907a8c61fc9hclam@chromium.orgglobal sym(vp8_mbpost_proc_down_mmx) PRIVATE
21474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsym(vp8_mbpost_proc_down_mmx):
22474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    push        rbp
23474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov         rbp, rsp
24474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    SHADOW_ARGS_TO_STACK 5
25474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    GET_GOT     rbx
26474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    push        rsi
27474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    push        rdi
28474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ; end prolog
29474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
30474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ALIGN_STACK 16, rax
31474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub         rsp, 136
32474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
33474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ; unsigned char d[16][8] at [rsp]
34474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ; create flimit2 at [rsp+128]
35474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov         eax, dword ptr arg(4) ;flimit
36474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov         [rsp+128], eax
37474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov         [rsp+128+4], eax
38474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%define flimit2 [rsp+128]
39474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
40474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%if ABI_IS_32BIT=0
41474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lea         r8,       [GLOBAL(sym(vp8_rv))]
42474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%endif
43474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
44474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;rows +=8;
45474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add         dword ptr arg(2), 8
46474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
47474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;for(c=0; c<cols; c+=4)
48167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org.loop_col:
49474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            mov         rsi,        arg(0)  ;s
50474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            pxor        mm0,        mm0     ;
51474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
52474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movsxd      rax,        dword ptr arg(1) ;pitch       ;
53167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org
54167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            ; this copies the last row down into the border 8 rows
55167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            mov         rdi,        rsi
56167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            mov         rdx,        arg(2)
57167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            sub         rdx,        9
58167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            imul        rdx,        rax
59167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            lea         rdi,        [rdi+rdx]
60167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            movq        mm1,        QWORD ptr[rdi]              ; first row
61167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            mov         rcx,        8
62167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org.init_borderd                                                    ; initialize borders
63167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            lea         rdi,        [rdi + rax]
64ab3cb4e3f1f39482ee3bd15c6918af868144d6dejohannkoenig@chromium.org            movq        [rdi],      mm1
65167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org
66167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            dec         rcx
67167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            jne         .init_borderd
68167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org
69474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            neg         rax                                     ; rax = -pitch
70474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
71167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            ; this copies the first row up into the border 8 rows
72167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            mov         rdi,        rsi
73167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            movq        mm1,        QWORD ptr[rdi]              ; first row
74167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            mov         rcx,        8
75167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org.init_border                                                    ; initialize borders
76167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            lea         rdi,        [rdi + rax]
77167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            movq        [rdi],      mm1
78167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org
79167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            dec         rcx
80167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            jne         .init_border
81167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org
82167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org
83474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            lea         rsi,        [rsi + rax*8];              ; rdi = s[-pitch*8]
84474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            neg         rax
85474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
86474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
87474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            pxor        mm5,        mm5
88474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            pxor        mm6,        mm6     ;
89474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
90474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            pxor        mm7,        mm7     ;
91474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            mov         rdi,        rsi
92474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
93474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            mov         rcx,        15          ;
94474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
95167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org.loop_initvar:
96474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movd        mm1,        DWORD PTR [rdi];
97474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            punpcklbw   mm1,        mm0     ;
98474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
99474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            paddw       mm5,        mm1     ;
100474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            pmullw      mm1,        mm1     ;
101474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
102474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movq        mm2,        mm1     ;
103474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            punpcklwd   mm1,        mm0     ;
104474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
105474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            punpckhwd   mm2,        mm0     ;
106474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            paddd       mm6,        mm1     ;
107474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
108474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            paddd       mm7,        mm2     ;
109474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            lea         rdi,        [rdi+rax]   ;
110474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
111474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            dec         rcx
112167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            jne         .loop_initvar
113474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            ;save the var and sum
114474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            xor         rdx,        rdx
115167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org.loop_row:
116474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movd        mm1,        DWORD PTR [rsi]     ; [s-pitch*8]
117474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movd        mm2,        DWORD PTR [rdi]     ; [s+pitch*7]
118474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
119474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            punpcklbw   mm1,        mm0
120474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            punpcklbw   mm2,        mm0
121474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
122474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            paddw       mm5,        mm2
123474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            psubw       mm5,        mm1
124474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
125474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            pmullw      mm2,        mm2
126474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movq        mm4,        mm2
127474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
128474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            punpcklwd   mm2,        mm0
129474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            punpckhwd   mm4,        mm0
130474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
131474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            paddd       mm6,        mm2
132474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            paddd       mm7,        mm4
133474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
134474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            pmullw      mm1,        mm1
135474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movq        mm2,        mm1
136474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
137474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            punpcklwd   mm1,        mm0
138474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            psubd       mm6,        mm1
139474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
140474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            punpckhwd   mm2,        mm0
141474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            psubd       mm7,        mm2
142474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
143474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
144474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movq        mm3,        mm6
145474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            pslld       mm3,        4
146474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
147474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            psubd       mm3,        mm6
148474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movq        mm1,        mm5
149474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
150474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movq        mm4,        mm5
151474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            pmullw      mm1,        mm1
152474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
153474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            pmulhw      mm4,        mm4
154474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movq        mm2,        mm1
155474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
156474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            punpcklwd   mm1,        mm4
157474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            punpckhwd   mm2,        mm4
158474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
159474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movq        mm4,        mm7
160474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            pslld       mm4,        4
161474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
162474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            psubd       mm4,        mm7
163474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
164474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            psubd       mm3,        mm1
165474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            psubd       mm4,        mm2
166474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
167474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            psubd       mm3,        flimit2
168474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            psubd       mm4,        flimit2
169474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
170474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            psrad       mm3,        31
171474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            psrad       mm4,        31
172474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
173474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            packssdw    mm3,        mm4
174474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            packsswb    mm3,        mm0
175474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
176474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movd        mm1,        DWORD PTR [rsi+rax*8]
177474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
178474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movq        mm2,        mm1
179474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            punpcklbw   mm1,        mm0
180474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
181474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            paddw       mm1,        mm5
182474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            mov         rcx,        rdx
183474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
184474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            and         rcx,        127
185474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%if ABI_IS_32BIT=1 && CONFIG_PIC=1
186474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            push        rax
187474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            lea         rax,        [GLOBAL(sym(vp8_rv))]
188474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movq        mm4,        [rax + rcx*2] ;vp8_rv[rcx*2]
189474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            pop         rax
190474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%elif ABI_IS_32BIT=0
191474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movq        mm4,        [r8 + rcx*2] ;vp8_rv[rcx*2]
192474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%else
193474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movq        mm4,        [sym(vp8_rv) + rcx*2]
194474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%endif
195474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            paddw       mm1,        mm4
196474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            psraw       mm1,        4
197474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
198474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            packuswb    mm1,        mm0
199474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            pand        mm1,        mm3
200474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
201474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            pandn       mm3,        mm2
202474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            por         mm1,        mm3
203474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
204474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            and         rcx,        15
205474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movd        DWORD PTR   [rsp+rcx*4], mm1 ;d[rcx*4]
206474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
20793a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org            cmp         edx,        8
20893a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org            jl          .skip_assignment
20993a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org
210474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            mov         rcx,        rdx
211474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            sub         rcx,        8
212474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            and         rcx,        15
213474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movd        mm1,        DWORD PTR [rsp+rcx*4] ;d[rcx*4]
214474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movd        [rsi],      mm1
21593a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org
21693a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org.skip_assignment
217474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            lea         rsi,        [rsi+rax]
218474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
219474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            lea         rdi,        [rdi+rax]
220474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            add         rdx,        1
221474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
222474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            cmp         edx,        dword arg(2) ;rows
223167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            jl          .loop_row
224474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
225474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
226474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org        add         dword arg(0), 4 ; s += 4
227474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org        sub         dword arg(3), 4 ; cols -= 4
228474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org        cmp         dword arg(3), 0
229167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org        jg          .loop_col
230474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
231474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add         rsp, 136
232474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pop         rsp
233474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
234474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ; begin epilog
235474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pop rdi
236474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pop rsi
237474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RESTORE_GOT
238474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    UNSHADOW_ARGS
239474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pop         rbp
240474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ret
241474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org%undef flimit2
242474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
243474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
244474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;void vp8_plane_add_noise_mmx (unsigned char *Start, unsigned char *noise,
245474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;                            unsigned char blackclamp[16],
246474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;                            unsigned char whiteclamp[16],
247474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;                            unsigned char bothclamp[16],
248474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;                            unsigned int Width, unsigned int Height, int Pitch)
249f18f5eb544bb35db231ced346e077907a8c61fc9hclam@chromium.orgglobal sym(vp8_plane_add_noise_mmx) PRIVATE
250474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsym(vp8_plane_add_noise_mmx):
251474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    push        rbp
252474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov         rbp, rsp
253474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    SHADOW_ARGS_TO_STACK 8
254474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    GET_GOT     rbx
255474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    push        rsi
256474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    push        rdi
257474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ; end prolog
258474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
259167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org.addnoise_loop:
2609d92657d2ee8ab69da0e227c7fb81f04fe518a72Ben Murdoch    call sym(LIBVPX_RAND) WRT_PLT
261474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov     rcx, arg(1) ;noise
262474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    and     rax, 0xff
263474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     rcx, rax
264474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
265474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ; we rely on the fact that the clamping vectors are stored contiguously
266474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ; in black/white/both order. Note that we have to reload this here because
267474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ; rdx could be trashed by rand()
268474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov     rdx, arg(2) ; blackclamp
269474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
270474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
271474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            mov     rdi, rcx
272474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movsxd  rcx, dword arg(5) ;[Width]
273474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            mov     rsi, arg(0) ;Pos
274474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            xor         rax,rax
275474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
276167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org.addnoise_nextset:
277474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movq        mm1,[rsi+rax]         ; get the source
278474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
279474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            psubusb     mm1, [rdx]    ;blackclamp        ; clamp both sides so we don't outrange adding noise
280474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            paddusb     mm1, [rdx+32] ;bothclamp
281474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            psubusb     mm1, [rdx+16] ;whiteclamp
282474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
283474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movq        mm2,[rdi+rax]         ; get the noise for this line
284474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            paddb       mm1,mm2              ; add it in
285474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            movq        [rsi+rax],mm1         ; store the result
286474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
287474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            add         rax,8                 ; move to the next line
288474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
289474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org            cmp         rax, rcx
290167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org            jl          .addnoise_nextset
291474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
292474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    movsxd  rax, dword arg(7) ; Pitch
293474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     arg(0), rax ; Start += Pitch
294474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     dword arg(6), 1   ; Height -= 1
295167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org    jg      .addnoise_loop
296474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
297474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ; begin epilog
298474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pop rdi
299474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pop rsi
300474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RESTORE_GOT
301474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    UNSHADOW_ARGS
302474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pop         rbp
303474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ret
304474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
305474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
306474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgSECTION_RODATA
307474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgalign 16
308474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgBlur:
309474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    times 16 dw 16
310474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    times  8 dw 64
311474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    times 16 dw 16
312474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    times  8 dw  0
313474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
314474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgrd:
315474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    times 4 dw 0x40
316