1ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;
2ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;
4ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;  Use of this source code is governed by a BSD-style license
5ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;  that can be found in the LICENSE file in the root of the source
6ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;  tree. An additional intellectual property rights grant can be found
7ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;  in the file PATENTS.  All contributing project authors may
8ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;  be found in the AUTHORS file in the root of the source tree.
9ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;
10ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
11ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
12ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%include "vpx_ports/x86_abi_support.asm"
13ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
14ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;Note: tap3 and tap4 have to be applied and added after other taps to avoid
15ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;overflow.
16ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
17ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%macro GET_FILTERS_4 0
18ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rdx, arg(5)                 ;filter ptr
19ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rcx, 0x0400040
20ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
21ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm7, [rdx]                 ;load filters
22ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pshuflw     xmm0, xmm7, 0b              ;k0
23ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pshuflw     xmm1, xmm7, 01010101b       ;k1
24ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pshuflw     xmm2, xmm7, 10101010b       ;k2
25ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pshuflw     xmm3, xmm7, 11111111b       ;k3
26ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm7, 8
27ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pshuflw     xmm4, xmm7, 0b              ;k4
28ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pshuflw     xmm5, xmm7, 01010101b       ;k5
29ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pshuflw     xmm6, xmm7, 10101010b       ;k6
30ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pshuflw     xmm7, xmm7, 11111111b       ;k7
31ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
32ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpcklqdq  xmm0, xmm1
33ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpcklqdq  xmm2, xmm3
34ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpcklqdq  xmm5, xmm4
35ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpcklqdq  xmm6, xmm7
36ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
37ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      k0k1, xmm0
38ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      k2k3, xmm2
39ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      k5k4, xmm5
40ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      k6k7, xmm6
41ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
42ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movq        xmm6, rcx
43ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pshufd      xmm6, xmm6, 0
44ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      krd, xmm6
45ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
46ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pxor        xmm7, xmm7
47ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      zero, xmm7
48ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%endm
49ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
50ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%macro APPLY_FILTER_4 1
51ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpckldq   xmm0, xmm1                  ;two row in one register
52ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpckldq   xmm6, xmm7
53ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpckldq   xmm2, xmm3
54ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpckldq   xmm5, xmm4
55ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
56ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpcklbw   xmm0, zero                  ;unpack to word
57ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpcklbw   xmm6, zero
58ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpcklbw   xmm2, zero
59ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpcklbw   xmm5, zero
60ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
61ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pmullw      xmm0, k0k1                  ;multiply the filter factors
62ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pmullw      xmm6, k6k7
63ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pmullw      xmm2, k2k3
64ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pmullw      xmm5, k5k4
65ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
66ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    paddsw      xmm0, xmm6                  ;sum
67ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm1, xmm0
68ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm1, 8
69ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    paddsw      xmm0, xmm1
70ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    paddsw      xmm0, xmm2
71ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm2, 8
72ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    paddsw      xmm0, xmm5
73ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm5, 8
74ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    paddsw      xmm0, xmm2
75ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    paddsw      xmm0, xmm5
76ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
77ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    paddsw      xmm0, krd                   ;rounding
78ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psraw       xmm0, 7                     ;shift
79ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    packuswb    xmm0, xmm0                  ;pack to byte
80ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
81ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%if %1
82ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movd        xmm1, [rdi]
83ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pavgb       xmm0, xmm1
84ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%endif
85ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movd        [rdi], xmm0
86ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%endm
87ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
88ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%macro GET_FILTERS 0
89ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rdx, arg(5)                 ;filter ptr
90ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rsi, arg(0)                 ;src_ptr
91ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rdi, arg(2)                 ;output_ptr
92ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rcx, 0x0400040
93ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
94ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm7, [rdx]                 ;load filters
95ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pshuflw     xmm0, xmm7, 0b              ;k0
96ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pshuflw     xmm1, xmm7, 01010101b       ;k1
97ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pshuflw     xmm2, xmm7, 10101010b       ;k2
98ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pshuflw     xmm3, xmm7, 11111111b       ;k3
99ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pshufhw     xmm4, xmm7, 0b              ;k4
100ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pshufhw     xmm5, xmm7, 01010101b       ;k5
101ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pshufhw     xmm6, xmm7, 10101010b       ;k6
102ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pshufhw     xmm7, xmm7, 11111111b       ;k7
103ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
104ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpcklwd   xmm0, xmm0
105ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpcklwd   xmm1, xmm1
106ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpcklwd   xmm2, xmm2
107ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpcklwd   xmm3, xmm3
108ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpckhwd   xmm4, xmm4
109ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpckhwd   xmm5, xmm5
110ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpckhwd   xmm6, xmm6
111ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpckhwd   xmm7, xmm7
112ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
113ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      k0,   xmm0                  ;store filter factors on stack
114ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      k1,   xmm1
115ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      k2,   xmm2
116ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      k3,   xmm3
117ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      k4,   xmm4
118ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      k5,   xmm5
119ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      k6,   xmm6
120ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      k7,   xmm7
121ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
122ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movq        xmm6, rcx
123ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pshufd      xmm6, xmm6, 0
124ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      krd, xmm6                   ;rounding
125ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
126ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pxor        xmm7, xmm7
127ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      zero, xmm7
128ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%endm
129ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
130ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%macro LOAD_VERT_8 1
131ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movq        xmm0, [rsi + %1]            ;0
132ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movq        xmm1, [rsi + rax + %1]      ;1
133ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movq        xmm6, [rsi + rdx * 2 + %1]  ;6
134ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rsi,  [rsi + rax]
135ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movq        xmm7, [rsi + rdx * 2 + %1]  ;7
136ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movq        xmm2, [rsi + rax + %1]      ;2
137ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movq        xmm3, [rsi + rax * 2 + %1]  ;3
138ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movq        xmm4, [rsi + rdx + %1]      ;4
139ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movq        xmm5, [rsi + rax * 4 + %1]  ;5
140ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%endm
141ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
142ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%macro APPLY_FILTER_8 2
143ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpcklbw   xmm0, zero
144ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpcklbw   xmm1, zero
145ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpcklbw   xmm6, zero
146ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpcklbw   xmm7, zero
147ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpcklbw   xmm2, zero
148ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpcklbw   xmm5, zero
149ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpcklbw   xmm3, zero
150ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    punpcklbw   xmm4, zero
151ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
152ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pmullw      xmm0, k0
153ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pmullw      xmm1, k1
154ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pmullw      xmm6, k6
155ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pmullw      xmm7, k7
156ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pmullw      xmm2, k2
157ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pmullw      xmm5, k5
158ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pmullw      xmm3, k3
159ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pmullw      xmm4, k4
160ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
161ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    paddsw      xmm0, xmm1
162ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    paddsw      xmm0, xmm6
163ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    paddsw      xmm0, xmm7
164ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    paddsw      xmm0, xmm2
165ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    paddsw      xmm0, xmm5
166ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    paddsw      xmm0, xmm3
167ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    paddsw      xmm0, xmm4
168ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
169ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    paddsw      xmm0, krd                   ;rounding
170ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psraw       xmm0, 7                     ;shift
171ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    packuswb    xmm0, xmm0                  ;pack back to byte
172ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%if %1
173ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movq        xmm1, [rdi + %2]
174ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pavgb       xmm0, xmm1
175ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%endif
176ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movq        [rdi + %2], xmm0
177ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org%endm
178ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
179ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;void vp9_filter_block1d4_v8_sse2
180ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;(
181ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned char *src_ptr,
182ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned int   src_pitch,
183ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned char *output_ptr,
184ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned int   out_pitch,
185ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned int   output_height,
186ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    short *filter
187ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;)
188ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d4_v8_sse2) PRIVATE
189ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d4_v8_sse2):
190ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rbp
191ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rbp, rsp
192ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 6
193ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SAVE_XMM 7
194ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rsi
195ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rdi
196ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rbx
197ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; end prolog
198ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
199ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ALIGN_STACK 16, rax
200ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    sub         rsp, 16 * 6
201ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k0k1 [rsp + 16 * 0]
202ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k2k3 [rsp + 16 * 1]
203ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k5k4 [rsp + 16 * 2]
204ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k6k7 [rsp + 16 * 3]
205ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define krd [rsp + 16 * 4]
206ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define zero [rsp + 16 * 5]
207ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
208ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    GET_FILTERS_4
209ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
210ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rsi, arg(0)                 ;src_ptr
211ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rdi, arg(2)                 ;output_ptr
212ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
213ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
214ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rbx, DWORD PTR arg(3)       ;out_pitch
215ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rdx, [rax + rax * 2]
216ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
217ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
218ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop:
219ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movd        xmm0, [rsi]                 ;load src: row 0
220ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movd        xmm1, [rsi + rax]           ;1
221ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movd        xmm6, [rsi + rdx * 2]       ;6
222ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rsi,  [rsi + rax]
223ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movd        xmm7, [rsi + rdx * 2]       ;7
224ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movd        xmm2, [rsi + rax]           ;2
225ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movd        xmm3, [rsi + rax * 2]       ;3
226ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movd        xmm4, [rsi + rdx]           ;4
227ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movd        xmm5, [rsi + rax * 4]       ;5
228ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
229ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    APPLY_FILTER_4 0
230ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
231ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rdi, [rdi + rbx]
232ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    dec         rcx
233ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    jnz         .loop
234ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
235ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    add rsp, 16 * 6
236ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsp
237ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rbx
238ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; begin epilog
239ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rdi
240ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsi
241ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    RESTORE_XMM
242ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    UNSHADOW_ARGS
243ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop         rbp
244ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ret
245ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
246ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;void vp9_filter_block1d8_v8_sse2
247ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;(
248ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned char *src_ptr,
249ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned int   src_pitch,
250ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned char *output_ptr,
251ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned int   out_pitch,
252ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned int   output_height,
253ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    short *filter
254ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;)
255ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d8_v8_sse2) PRIVATE
256ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d8_v8_sse2):
257ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rbp
258ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rbp, rsp
259ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 6
260ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SAVE_XMM 7
261ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rsi
262ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rdi
263ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rbx
264ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; end prolog
265ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
266ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ALIGN_STACK 16, rax
267ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    sub         rsp, 16 * 10
268ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k0 [rsp + 16 * 0]
269ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k1 [rsp + 16 * 1]
270ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k2 [rsp + 16 * 2]
271ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k3 [rsp + 16 * 3]
272ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k4 [rsp + 16 * 4]
273ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k5 [rsp + 16 * 5]
274ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k6 [rsp + 16 * 6]
275ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k7 [rsp + 16 * 7]
276ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define krd [rsp + 16 * 8]
277ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define zero [rsp + 16 * 9]
278ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
279ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    GET_FILTERS
280ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
281ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
282ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rbx, DWORD PTR arg(3)       ;out_pitch
283ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rdx, [rax + rax * 2]
284ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
285ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
286ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop:
287ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    LOAD_VERT_8 0
288ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    APPLY_FILTER_8 0, 0
289ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
290ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rdi, [rdi + rbx]
291ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    dec         rcx
292ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    jnz         .loop
293ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
294ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    add rsp, 16 * 10
295ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsp
296ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rbx
297ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; begin epilog
298ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rdi
299ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsi
300ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    RESTORE_XMM
301ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    UNSHADOW_ARGS
302ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop         rbp
303ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ret
304ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
305ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;void vp9_filter_block1d16_v8_sse2
306ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;(
307ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned char *src_ptr,
308ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned int   src_pitch,
309ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned char *output_ptr,
310ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned int   out_pitch,
311ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned int   output_height,
312ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    short *filter
313ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;)
314ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d16_v8_sse2) PRIVATE
315ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d16_v8_sse2):
316ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rbp
317ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rbp, rsp
318ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 6
319ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SAVE_XMM 7
320ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rsi
321ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rdi
322ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rbx
323ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; end prolog
324ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
325ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ALIGN_STACK 16, rax
326ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    sub         rsp, 16 * 10
327ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k0 [rsp + 16 * 0]
328ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k1 [rsp + 16 * 1]
329ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k2 [rsp + 16 * 2]
330ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k3 [rsp + 16 * 3]
331ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k4 [rsp + 16 * 4]
332ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k5 [rsp + 16 * 5]
333ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k6 [rsp + 16 * 6]
334ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k7 [rsp + 16 * 7]
335ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define krd [rsp + 16 * 8]
336ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define zero [rsp + 16 * 9]
337ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
338ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    GET_FILTERS
339ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
340ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
341ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rbx, DWORD PTR arg(3)       ;out_pitch
342ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rdx, [rax + rax * 2]
343ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
344ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
345ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop:
346ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    LOAD_VERT_8 0
347ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    APPLY_FILTER_8 0, 0
348ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    sub         rsi, rax
349ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
350ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    LOAD_VERT_8 8
351ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    APPLY_FILTER_8 0, 8
352ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    add         rdi, rbx
353ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
354ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    dec         rcx
355ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    jnz         .loop
356ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
357ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    add rsp, 16 * 10
358ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsp
359ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rbx
360ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; begin epilog
361ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rdi
362ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsi
363ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    RESTORE_XMM
364ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    UNSHADOW_ARGS
365ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop         rbp
366ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ret
367ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
368ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d4_v8_avg_sse2) PRIVATE
369ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d4_v8_avg_sse2):
370ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rbp
371ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rbp, rsp
372ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 6
373ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SAVE_XMM 7
374ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rsi
375ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rdi
376ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rbx
377ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; end prolog
378ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
379ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ALIGN_STACK 16, rax
380ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    sub         rsp, 16 * 6
381ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k0k1 [rsp + 16 * 0]
382ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k2k3 [rsp + 16 * 1]
383ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k5k4 [rsp + 16 * 2]
384ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k6k7 [rsp + 16 * 3]
385ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define krd [rsp + 16 * 4]
386ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define zero [rsp + 16 * 5]
387ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
388ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    GET_FILTERS_4
389ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
390ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rsi, arg(0)                 ;src_ptr
391ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rdi, arg(2)                 ;output_ptr
392ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
393ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
394ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rbx, DWORD PTR arg(3)       ;out_pitch
395ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rdx, [rax + rax * 2]
396ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
397ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
398ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop:
399ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movd        xmm0, [rsi]                 ;load src: row 0
400ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movd        xmm1, [rsi + rax]           ;1
401ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movd        xmm6, [rsi + rdx * 2]       ;6
402ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rsi,  [rsi + rax]
403ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movd        xmm7, [rsi + rdx * 2]       ;7
404ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movd        xmm2, [rsi + rax]           ;2
405ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movd        xmm3, [rsi + rax * 2]       ;3
406ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movd        xmm4, [rsi + rdx]           ;4
407ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movd        xmm5, [rsi + rax * 4]       ;5
408ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
409ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    APPLY_FILTER_4 1
410ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
411ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rdi, [rdi + rbx]
412ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    dec         rcx
413ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    jnz         .loop
414ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
415ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    add rsp, 16 * 6
416ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsp
417ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rbx
418ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; begin epilog
419ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rdi
420ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsi
421ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    RESTORE_XMM
422ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    UNSHADOW_ARGS
423ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop         rbp
424ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ret
425ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
426ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d8_v8_avg_sse2) PRIVATE
427ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d8_v8_avg_sse2):
428ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rbp
429ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rbp, rsp
430ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 6
431ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SAVE_XMM 7
432ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rsi
433ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rdi
434ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rbx
435ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; end prolog
436ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
437ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ALIGN_STACK 16, rax
438ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    sub         rsp, 16 * 10
439ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k0 [rsp + 16 * 0]
440ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k1 [rsp + 16 * 1]
441ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k2 [rsp + 16 * 2]
442ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k3 [rsp + 16 * 3]
443ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k4 [rsp + 16 * 4]
444ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k5 [rsp + 16 * 5]
445ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k6 [rsp + 16 * 6]
446ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k7 [rsp + 16 * 7]
447ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define krd [rsp + 16 * 8]
448ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define zero [rsp + 16 * 9]
449ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
450ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    GET_FILTERS
451ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
452ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
453ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rbx, DWORD PTR arg(3)       ;out_pitch
454ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rdx, [rax + rax * 2]
455ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
456ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop:
457ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    LOAD_VERT_8 0
458ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    APPLY_FILTER_8 1, 0
459ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
460ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rdi, [rdi + rbx]
461ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    dec         rcx
462ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    jnz         .loop
463ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
464ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    add rsp, 16 * 10
465ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsp
466ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rbx
467ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; begin epilog
468ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rdi
469ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsi
470ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    RESTORE_XMM
471ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    UNSHADOW_ARGS
472ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop         rbp
473ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ret
474ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
475ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d16_v8_avg_sse2) PRIVATE
476ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d16_v8_avg_sse2):
477ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rbp
478ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rbp, rsp
479ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 6
480ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SAVE_XMM 7
481ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rsi
482ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rdi
483ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rbx
484ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; end prolog
485ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
486ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ALIGN_STACK 16, rax
487ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    sub         rsp, 16 * 10
488ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k0 [rsp + 16 * 0]
489ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k1 [rsp + 16 * 1]
490ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k2 [rsp + 16 * 2]
491ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k3 [rsp + 16 * 3]
492ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k4 [rsp + 16 * 4]
493ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k5 [rsp + 16 * 5]
494ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k6 [rsp + 16 * 6]
495ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k7 [rsp + 16 * 7]
496ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define krd [rsp + 16 * 8]
497ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define zero [rsp + 16 * 9]
498ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
499ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    GET_FILTERS
500ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
501ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
502ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rbx, DWORD PTR arg(3)       ;out_pitch
503ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rdx, [rax + rax * 2]
504ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
505ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop:
506ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    LOAD_VERT_8 0
507ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    APPLY_FILTER_8 1, 0
508ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    sub         rsi, rax
509ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
510ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    LOAD_VERT_8 8
511ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    APPLY_FILTER_8 1, 8
512ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    add         rdi, rbx
513ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
514ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    dec         rcx
515ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    jnz         .loop
516ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
517ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    add rsp, 16 * 10
518ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsp
519ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rbx
520ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; begin epilog
521ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rdi
522ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsi
523ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    RESTORE_XMM
524ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    UNSHADOW_ARGS
525ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop         rbp
526ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ret
527ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
528ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;void vp9_filter_block1d4_h8_sse2
529ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;(
530ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned char  *src_ptr,
531ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned int    src_pixels_per_line,
532ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned char  *output_ptr,
533ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned int    output_pitch,
534ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned int    output_height,
535ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    short *filter
536ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;)
537ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d4_h8_sse2) PRIVATE
538ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d4_h8_sse2):
539ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rbp
540ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rbp, rsp
541ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 6
542ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SAVE_XMM 7
543ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rsi
544ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rdi
545ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; end prolog
546ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
547ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ALIGN_STACK 16, rax
548ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    sub         rsp, 16 * 6
549ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k0k1 [rsp + 16 * 0]
550ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k2k3 [rsp + 16 * 1]
551ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k5k4 [rsp + 16 * 2]
552ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k6k7 [rsp + 16 * 3]
553ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define krd [rsp + 16 * 4]
554ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define zero [rsp + 16 * 5]
555ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
556ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    GET_FILTERS_4
557ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
558ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rsi, arg(0)                 ;src_ptr
559ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rdi, arg(2)                 ;output_ptr
560ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
561ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
562ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rdx, DWORD PTR arg(3)       ;out_pitch
563ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
564ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
565ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop:
566ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqu      xmm0,   [rsi - 3]           ;load src
567ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
568ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm1, xmm0
569ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm6, xmm0
570ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm7, xmm0
571ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm2, xmm0
572ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm3, xmm0
573ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm5, xmm0
574ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm4, xmm0
575ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
576ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm1, 1
577ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm6, 6
578ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm7, 7
579ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm2, 2
580ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm3, 3
581ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm5, 5
582ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm4, 4
583ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
584ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    APPLY_FILTER_4 0
585ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
586ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rsi, [rsi + rax]
587ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rdi, [rdi + rdx]
588ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    dec         rcx
589ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    jnz         .loop
590ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
591ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    add rsp, 16 * 6
592ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsp
593ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
594ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; begin epilog
595ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rdi
596ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsi
597ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    RESTORE_XMM
598ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    UNSHADOW_ARGS
599ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop         rbp
600ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ret
601ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
602ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;void vp9_filter_block1d8_h8_sse2
603ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;(
604ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned char  *src_ptr,
605ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned int    src_pixels_per_line,
606ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned char  *output_ptr,
607ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned int    output_pitch,
608ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned int    output_height,
609ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    short *filter
610ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;)
611ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d8_h8_sse2) PRIVATE
612ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d8_h8_sse2):
613ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rbp
614ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rbp, rsp
615ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 6
616ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SAVE_XMM 7
617ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rsi
618ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rdi
619ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; end prolog
620ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
621ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ALIGN_STACK 16, rax
622ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    sub         rsp, 16 * 10
623ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k0 [rsp + 16 * 0]
624ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k1 [rsp + 16 * 1]
625ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k2 [rsp + 16 * 2]
626ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k3 [rsp + 16 * 3]
627ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k4 [rsp + 16 * 4]
628ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k5 [rsp + 16 * 5]
629ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k6 [rsp + 16 * 6]
630ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k7 [rsp + 16 * 7]
631ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define krd [rsp + 16 * 8]
632ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define zero [rsp + 16 * 9]
633ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
634ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    GET_FILTERS
635ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
636ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
637ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rdx, DWORD PTR arg(3)       ;out_pitch
638ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
639ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
640ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop:
641ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqu      xmm0,   [rsi - 3]           ;load src
642ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
643ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm1, xmm0
644ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm6, xmm0
645ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm7, xmm0
646ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm2, xmm0
647ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm5, xmm0
648ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm3, xmm0
649ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm4, xmm0
650ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
651ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm1, 1
652ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm6, 6
653ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm7, 7
654ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm2, 2
655ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm5, 5
656ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm3, 3
657ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm4, 4
658ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
659ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    APPLY_FILTER_8 0, 0
660ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
661ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rsi, [rsi + rax]
662ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rdi, [rdi + rdx]
663ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    dec         rcx
664ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    jnz         .loop
665ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
666ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    add rsp, 16 * 10
667ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsp
668ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
669ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; begin epilog
670ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rdi
671ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsi
672ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    RESTORE_XMM
673ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    UNSHADOW_ARGS
674ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop         rbp
675ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ret
676ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
677ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;void vp9_filter_block1d16_h8_sse2
678ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;(
679ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned char  *src_ptr,
680ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned int    src_pixels_per_line,
681ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned char  *output_ptr,
682ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned int    output_pitch,
683ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    unsigned int    output_height,
684ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;    short *filter
685ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;)
686ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d16_h8_sse2) PRIVATE
687ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d16_h8_sse2):
688ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rbp
689ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rbp, rsp
690ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 6
691ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SAVE_XMM 7
692ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rsi
693ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rdi
694ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; end prolog
695ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
696ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ALIGN_STACK 16, rax
697ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    sub         rsp, 16 * 10
698ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k0 [rsp + 16 * 0]
699ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k1 [rsp + 16 * 1]
700ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k2 [rsp + 16 * 2]
701ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k3 [rsp + 16 * 3]
702ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k4 [rsp + 16 * 4]
703ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k5 [rsp + 16 * 5]
704ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k6 [rsp + 16 * 6]
705ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k7 [rsp + 16 * 7]
706ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define krd [rsp + 16 * 8]
707ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define zero [rsp + 16 * 9]
708ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
709ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    GET_FILTERS
710ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
711ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
712ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rdx, DWORD PTR arg(3)       ;out_pitch
713ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
714ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
715ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop:
716ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqu      xmm0,   [rsi - 3]           ;load src
717ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
718ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm1, xmm0
719ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm6, xmm0
720ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm7, xmm0
721ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm2, xmm0
722ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm5, xmm0
723ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm3, xmm0
724ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm4, xmm0
725ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
726ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm1, 1
727ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm6, 6
728ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm7, 7
729ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm2, 2
730ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm5, 5
731ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm3, 3
732ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm4, 4
733ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
734ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    APPLY_FILTER_8 0, 0
735ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
736ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqu      xmm0,   [rsi + 5]           ;load src
737ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
738ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm1, xmm0
739ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm6, xmm0
740ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm7, xmm0
741ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm2, xmm0
742ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm5, xmm0
743ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm3, xmm0
744ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm4, xmm0
745ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
746ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm1, 1
747ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm6, 6
748ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm7, 7
749ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm2, 2
750ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm5, 5
751ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm3, 3
752ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm4, 4
753ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
754ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    APPLY_FILTER_8 0, 8
755ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
756ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rsi, [rsi + rax]
757ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rdi, [rdi + rdx]
758ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    dec         rcx
759ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    jnz         .loop
760ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
761ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    add rsp, 16 * 10
762ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsp
763ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
764ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; begin epilog
765ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rdi
766ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsi
767ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    RESTORE_XMM
768ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    UNSHADOW_ARGS
769ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop         rbp
770ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ret
771ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
772ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d4_h8_avg_sse2) PRIVATE
773ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d4_h8_avg_sse2):
774ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rbp
775ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rbp, rsp
776ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 6
777ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SAVE_XMM 7
778ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rsi
779ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rdi
780ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; end prolog
781ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
782ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ALIGN_STACK 16, rax
783ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    sub         rsp, 16 * 6
784ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k0k1 [rsp + 16 * 0]
785ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k2k3 [rsp + 16 * 1]
786ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k5k4 [rsp + 16 * 2]
787ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k6k7 [rsp + 16 * 3]
788ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define krd [rsp + 16 * 4]
789ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define zero [rsp + 16 * 5]
790ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
791ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    GET_FILTERS_4
792ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
793ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rsi, arg(0)                 ;src_ptr
794ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rdi, arg(2)                 ;output_ptr
795ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
796ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
797ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rdx, DWORD PTR arg(3)       ;out_pitch
798ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
799ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
800ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop:
801ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqu      xmm0,   [rsi - 3]           ;load src
802ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
803ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm1, xmm0
804ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm6, xmm0
805ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm7, xmm0
806ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm2, xmm0
807ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm3, xmm0
808ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm5, xmm0
809ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm4, xmm0
810ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
811ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm1, 1
812ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm6, 6
813ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm7, 7
814ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm2, 2
815ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm3, 3
816ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm5, 5
817ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm4, 4
818ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
819ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    APPLY_FILTER_4 1
820ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
821ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rsi, [rsi + rax]
822ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rdi, [rdi + rdx]
823ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    dec         rcx
824ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    jnz         .loop
825ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
826ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    add rsp, 16 * 6
827ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsp
828ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
829ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; begin epilog
830ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rdi
831ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsi
832ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    RESTORE_XMM
833ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    UNSHADOW_ARGS
834ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop         rbp
835ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ret
836ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
837ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d8_h8_avg_sse2) PRIVATE
838ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d8_h8_avg_sse2):
839ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rbp
840ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rbp, rsp
841ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 6
842ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SAVE_XMM 7
843ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rsi
844ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rdi
845ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; end prolog
846ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
847ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ALIGN_STACK 16, rax
848ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    sub         rsp, 16 * 10
849ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k0 [rsp + 16 * 0]
850ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k1 [rsp + 16 * 1]
851ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k2 [rsp + 16 * 2]
852ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k3 [rsp + 16 * 3]
853ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k4 [rsp + 16 * 4]
854ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k5 [rsp + 16 * 5]
855ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k6 [rsp + 16 * 6]
856ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k7 [rsp + 16 * 7]
857ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define krd [rsp + 16 * 8]
858ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define zero [rsp + 16 * 9]
859ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
860ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    GET_FILTERS
861ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
862ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
863ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rdx, DWORD PTR arg(3)       ;out_pitch
864ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
865ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
866ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop:
867ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqu      xmm0,   [rsi - 3]           ;load src
868ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
869ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm1, xmm0
870ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm6, xmm0
871ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm7, xmm0
872ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm2, xmm0
873ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm5, xmm0
874ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm3, xmm0
875ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm4, xmm0
876ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
877ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm1, 1
878ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm6, 6
879ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm7, 7
880ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm2, 2
881ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm5, 5
882ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm3, 3
883ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm4, 4
884ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
885ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    APPLY_FILTER_8 1, 0
886ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
887ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rsi, [rsi + rax]
888ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rdi, [rdi + rdx]
889ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    dec         rcx
890ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    jnz         .loop
891ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
892ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    add rsp, 16 * 10
893ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsp
894ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
895ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; begin epilog
896ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rdi
897ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsi
898ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    RESTORE_XMM
899ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    UNSHADOW_ARGS
900ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop         rbp
901ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ret
902ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
903ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgglobal sym(vp9_filter_block1d16_h8_avg_sse2) PRIVATE
904ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgsym(vp9_filter_block1d16_h8_avg_sse2):
905ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rbp
906ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    mov         rbp, rsp
907ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SHADOW_ARGS_TO_STACK 6
908ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    SAVE_XMM 7
909ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rsi
910ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    push        rdi
911ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; end prolog
912ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
913ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ALIGN_STACK 16, rax
914ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    sub         rsp, 16 * 10
915ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k0 [rsp + 16 * 0]
916ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k1 [rsp + 16 * 1]
917ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k2 [rsp + 16 * 2]
918ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k3 [rsp + 16 * 3]
919ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k4 [rsp + 16 * 4]
920ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k5 [rsp + 16 * 5]
921ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k6 [rsp + 16 * 6]
922ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define k7 [rsp + 16 * 7]
923ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define krd [rsp + 16 * 8]
924ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    %define zero [rsp + 16 * 9]
925ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
926ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    GET_FILTERS
927ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
928ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
929ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rdx, DWORD PTR arg(3)       ;out_pitch
930ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movsxd      rcx, DWORD PTR arg(4)       ;output_height
931ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
932ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org.loop:
933ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqu      xmm0,   [rsi - 3]           ;load src
934ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
935ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm1, xmm0
936ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm6, xmm0
937ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm7, xmm0
938ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm2, xmm0
939ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm5, xmm0
940ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm3, xmm0
941ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm4, xmm0
942ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
943ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm1, 1
944ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm6, 6
945ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm7, 7
946ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm2, 2
947ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm5, 5
948ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm3, 3
949ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm4, 4
950ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
951ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    APPLY_FILTER_8 1, 0
952ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
953ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqu      xmm0,   [rsi + 5]           ;load src
954ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
955ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm1, xmm0
956ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm6, xmm0
957ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm7, xmm0
958ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm2, xmm0
959ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm5, xmm0
960ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm3, xmm0
961ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    movdqa      xmm4, xmm0
962ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
963ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm1, 1
964ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm6, 6
965ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm7, 7
966ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm2, 2
967ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm5, 5
968ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm3, 3
969ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    psrldq      xmm4, 4
970ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
971ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    APPLY_FILTER_8 1, 8
972ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
973ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rsi, [rsi + rax]
974ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    lea         rdi, [rdi + rdx]
975ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    dec         rcx
976ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    jnz         .loop
977ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
978ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    add rsp, 16 * 10
979ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsp
980ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org
981ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ; begin epilog
982ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rdi
983ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop rsi
984ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    RESTORE_XMM
985ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    UNSHADOW_ARGS
986ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    pop         rbp
987ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org    ret
988