1b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian;
2b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian;  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian;
4b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian;  Use of this source code is governed by a BSD-style license
5b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian;  that can be found in the LICENSE file in the root of the source
6b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian;  tree. An additional intellectual property rights grant can be found
7b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian;  in the file PATENTS.  All contributing project authors may
8b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian;  be found in the AUTHORS file in the root of the source tree.
9b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian;
10b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
11b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian%include "vpx_ports/x86_abi_support.asm"
12b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
13b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian%macro GET_PARAM_4 0
14b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    mov         rdx, arg(5)                 ;filter ptr
15b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    mov         rsi, arg(0)                 ;src_ptr
16b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    mov         rdi, arg(2)                 ;output_ptr
177bc9febe8749e98a3812a0dc4380ceae75c29450Johann    mov         ecx, 0x01000100
18b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
19b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqa      xmm3, [rdx]                 ;load filters
20b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    psrldq      xmm3, 6
21b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    packsswb    xmm3, xmm3
22b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pshuflw     xmm3, xmm3, 0b              ;k3_k4
23b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
247bc9febe8749e98a3812a0dc4380ceae75c29450Johann    movd        xmm2, ecx                   ;rounding_shift
25b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pshufd      xmm2, xmm2, 0
26b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
27b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
28b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movsxd      rdx, DWORD PTR arg(3)       ;out_pitch
29b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movsxd      rcx, DWORD PTR arg(4)       ;output_height
30b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian%endm
31b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
32b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian%macro APPLY_FILTER_4 1
33b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    punpcklbw   xmm0, xmm1
34b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pmaddubsw   xmm0, xmm3
35b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
367bc9febe8749e98a3812a0dc4380ceae75c29450Johann    pmulhrsw    xmm0, xmm2                  ;rounding(+64)+shift(>>7)
37b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    packuswb    xmm0, xmm0                  ;pack to byte
38b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
39b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian%if %1
40b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movd        xmm1, [rdi]
41b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pavgb       xmm0, xmm1
42b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian%endif
43b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movd        [rdi], xmm0
44b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    lea         rsi, [rsi + rax]
45b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    lea         rdi, [rdi + rdx]
46b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    dec         rcx
47b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian%endm
48b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
49b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian%macro GET_PARAM 0
50b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    mov         rdx, arg(5)                 ;filter ptr
51b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    mov         rsi, arg(0)                 ;src_ptr
52b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    mov         rdi, arg(2)                 ;output_ptr
537bc9febe8749e98a3812a0dc4380ceae75c29450Johann    mov         ecx, 0x01000100
54b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
55b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqa      xmm7, [rdx]                 ;load filters
56b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    psrldq      xmm7, 6
57b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    packsswb    xmm7, xmm7
58b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pshuflw     xmm7, xmm7, 0b              ;k3_k4
59b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    punpcklwd   xmm7, xmm7
60b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
617bc9febe8749e98a3812a0dc4380ceae75c29450Johann    movd        xmm6, ecx                   ;rounding_shift
62b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pshufd      xmm6, xmm6, 0
63b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
64b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movsxd      rax, DWORD PTR arg(1)       ;pixels_per_line
65b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movsxd      rdx, DWORD PTR arg(3)       ;out_pitch
66b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movsxd      rcx, DWORD PTR arg(4)       ;output_height
67b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian%endm
68b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
69b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian%macro APPLY_FILTER_8 1
70b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    punpcklbw   xmm0, xmm1
71b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pmaddubsw   xmm0, xmm7
72b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
737bc9febe8749e98a3812a0dc4380ceae75c29450Johann    pmulhrsw    xmm0, xmm6                  ;rounding(+64)+shift(>>7)
74b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    packuswb    xmm0, xmm0                  ;pack back to byte
75b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
76b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian%if %1
77b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movq        xmm1, [rdi]
78b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pavgb       xmm0, xmm1
79b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian%endif
80b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movq        [rdi], xmm0                 ;store the result
81b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
82b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    lea         rsi, [rsi + rax]
83b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    lea         rdi, [rdi + rdx]
84b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    dec         rcx
85b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian%endm
86b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
87b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian%macro APPLY_FILTER_16 1
88b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    punpcklbw   xmm0, xmm1
89b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    punpckhbw   xmm2, xmm1
90b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pmaddubsw   xmm0, xmm7
91b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pmaddubsw   xmm2, xmm7
92b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
937bc9febe8749e98a3812a0dc4380ceae75c29450Johann    pmulhrsw    xmm0, xmm6                  ;rounding(+64)+shift(>>7)
947bc9febe8749e98a3812a0dc4380ceae75c29450Johann    pmulhrsw    xmm2, xmm6
95b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    packuswb    xmm0, xmm2                  ;pack back to byte
96b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
97b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian%if %1
98b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqu      xmm1, [rdi]
99b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pavgb       xmm0, xmm1
100b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian%endif
101b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqu      [rdi], xmm0                 ;store the result
102b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
103b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    lea         rsi, [rsi + rax]
104b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    lea         rdi, [rdi + rdx]
105b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    dec         rcx
106b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian%endm
107b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
108df37111358d02836cb29bbcb9c6e4c95dff90a16JohannSECTION .text
109df37111358d02836cb29bbcb9c6e4c95dff90a16Johann
110da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianglobal sym(vpx_filter_block1d4_v2_ssse3) PRIVATE
111da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramaniansym(vpx_filter_block1d4_v2_ssse3):
112b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rbp
113b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    mov         rbp, rsp
114b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    SHADOW_ARGS_TO_STACK 6
115b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rsi
116b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rdi
117b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; end prolog
118b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
119b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    GET_PARAM_4
120b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian.loop:
121b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movd        xmm0, [rsi]                 ;load src
122b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movd        xmm1, [rsi + rax]
123b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
124b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    APPLY_FILTER_4 0
125b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    jnz         .loop
126b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
127b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; begin epilog
128b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rdi
129b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rsi
130b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    UNSHADOW_ARGS
131b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rbp
132b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ret
133b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
134da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianglobal sym(vpx_filter_block1d8_v2_ssse3) PRIVATE
135da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramaniansym(vpx_filter_block1d8_v2_ssse3):
136b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rbp
137b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    mov         rbp, rsp
138b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    SHADOW_ARGS_TO_STACK 6
139b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    SAVE_XMM 7
140b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rsi
141b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rdi
142b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; end prolog
143b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
144b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    GET_PARAM
145b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian.loop:
146b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movq        xmm0, [rsi]                 ;0
147b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movq        xmm1, [rsi + rax]           ;1
148b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
149b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    APPLY_FILTER_8 0
150b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    jnz         .loop
151b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
152b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; begin epilog
153b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rdi
154b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rsi
155b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    RESTORE_XMM
156b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    UNSHADOW_ARGS
157b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rbp
158b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ret
159b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
160da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianglobal sym(vpx_filter_block1d16_v2_ssse3) PRIVATE
161da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramaniansym(vpx_filter_block1d16_v2_ssse3):
162b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rbp
163b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    mov         rbp, rsp
164b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    SHADOW_ARGS_TO_STACK 6
165b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    SAVE_XMM 7
166b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rsi
167b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rdi
168b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; end prolog
169b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
170b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    GET_PARAM
171b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian.loop:
172b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqu        xmm0, [rsi]               ;0
173b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqu        xmm1, [rsi + rax]         ;1
174b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqa        xmm2, xmm0
175b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
176b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    APPLY_FILTER_16 0
177b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    jnz         .loop
178b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
179b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; begin epilog
180b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rdi
181b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rsi
182b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    RESTORE_XMM
183b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    UNSHADOW_ARGS
184b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rbp
185b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ret
186b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
187da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianglobal sym(vpx_filter_block1d4_v2_avg_ssse3) PRIVATE
188da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramaniansym(vpx_filter_block1d4_v2_avg_ssse3):
189b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rbp
190b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    mov         rbp, rsp
191b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    SHADOW_ARGS_TO_STACK 6
192b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rsi
193b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rdi
194b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; end prolog
195b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
196b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    GET_PARAM_4
197b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian.loop:
198b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movd        xmm0, [rsi]                 ;load src
199b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movd        xmm1, [rsi + rax]
200b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
201b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    APPLY_FILTER_4 1
202b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    jnz         .loop
203b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
204b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; begin epilog
205b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rdi
206b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rsi
207b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    UNSHADOW_ARGS
208b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rbp
209b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ret
210b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
211da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianglobal sym(vpx_filter_block1d8_v2_avg_ssse3) PRIVATE
212da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramaniansym(vpx_filter_block1d8_v2_avg_ssse3):
213b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rbp
214b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    mov         rbp, rsp
215b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    SHADOW_ARGS_TO_STACK 6
216b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    SAVE_XMM 7
217b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rsi
218b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rdi
219b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; end prolog
220b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
221b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    GET_PARAM
222b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian.loop:
223b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movq        xmm0, [rsi]                 ;0
224b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movq        xmm1, [rsi + rax]           ;1
225b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
226b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    APPLY_FILTER_8 1
227b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    jnz         .loop
228b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
229b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; begin epilog
230b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rdi
231b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rsi
232b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    RESTORE_XMM
233b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    UNSHADOW_ARGS
234b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rbp
235b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ret
236b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
237da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianglobal sym(vpx_filter_block1d16_v2_avg_ssse3) PRIVATE
238da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramaniansym(vpx_filter_block1d16_v2_avg_ssse3):
239b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rbp
240b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    mov         rbp, rsp
241b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    SHADOW_ARGS_TO_STACK 6
242b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    SAVE_XMM 7
243b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rsi
244b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rdi
245b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; end prolog
246b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
247b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    GET_PARAM
248b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian.loop:
249b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqu        xmm0, [rsi]               ;0
250b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqu        xmm1, [rsi + rax]         ;1
251b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqa        xmm2, xmm0
252b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
253b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    APPLY_FILTER_16 1
254b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    jnz         .loop
255b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
256b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; begin epilog
257b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rdi
258b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rsi
259b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    RESTORE_XMM
260b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    UNSHADOW_ARGS
261b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rbp
262b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ret
263b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
264da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianglobal sym(vpx_filter_block1d4_h2_ssse3) PRIVATE
265da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramaniansym(vpx_filter_block1d4_h2_ssse3):
266b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rbp
267b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    mov         rbp, rsp
268b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    SHADOW_ARGS_TO_STACK 6
269b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rsi
270b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rdi
271b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; end prolog
272b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
273b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    GET_PARAM_4
274b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian.loop:
275b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqu      xmm0, [rsi]                 ;load src
276b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqa      xmm1, xmm0
277b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    psrldq      xmm1, 1
278b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
279b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    APPLY_FILTER_4 0
280b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    jnz         .loop
281b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
282b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; begin epilog
283b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rdi
284b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rsi
285b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    UNSHADOW_ARGS
286b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rbp
287b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ret
288b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
289da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianglobal sym(vpx_filter_block1d8_h2_ssse3) PRIVATE
290da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramaniansym(vpx_filter_block1d8_h2_ssse3):
291b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rbp
292b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    mov         rbp, rsp
293b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    SHADOW_ARGS_TO_STACK 6
294b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    SAVE_XMM 7
295b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rsi
296b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rdi
297b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; end prolog
298b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
299b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    GET_PARAM
300b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian.loop:
301b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqu      xmm0, [rsi]                 ;load src
302b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqa      xmm1, xmm0
303b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    psrldq      xmm1, 1
304b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
305b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    APPLY_FILTER_8 0
306b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    jnz         .loop
307b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
308b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; begin epilog
309b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rdi
310b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rsi
311b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    RESTORE_XMM
312b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    UNSHADOW_ARGS
313b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rbp
314b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ret
315b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
316da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianglobal sym(vpx_filter_block1d16_h2_ssse3) PRIVATE
317da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramaniansym(vpx_filter_block1d16_h2_ssse3):
318b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rbp
319b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    mov         rbp, rsp
320b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    SHADOW_ARGS_TO_STACK 6
321b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    SAVE_XMM 7
322b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rsi
323b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rdi
324b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; end prolog
325b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
326b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    GET_PARAM
327b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian.loop:
328b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqu      xmm0,   [rsi]               ;load src
329b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqu      xmm1,   [rsi + 1]
330b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqa      xmm2, xmm0
331b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
332b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    APPLY_FILTER_16 0
333b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    jnz         .loop
334b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
335b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; begin epilog
336b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rdi
337b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rsi
338b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    RESTORE_XMM
339b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    UNSHADOW_ARGS
340b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rbp
341b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ret
342b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
343da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianglobal sym(vpx_filter_block1d4_h2_avg_ssse3) PRIVATE
344da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramaniansym(vpx_filter_block1d4_h2_avg_ssse3):
345b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rbp
346b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    mov         rbp, rsp
347b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    SHADOW_ARGS_TO_STACK 6
348b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rsi
349b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rdi
350b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; end prolog
351b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
352b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    GET_PARAM_4
353b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian.loop:
354b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqu      xmm0, [rsi]                 ;load src
355b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqa      xmm1, xmm0
356b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    psrldq      xmm1, 1
357b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
358b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    APPLY_FILTER_4 1
359b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    jnz         .loop
360b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
361b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; begin epilog
362b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rdi
363b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rsi
364b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    UNSHADOW_ARGS
365b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rbp
366b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ret
367b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
368da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianglobal sym(vpx_filter_block1d8_h2_avg_ssse3) PRIVATE
369da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramaniansym(vpx_filter_block1d8_h2_avg_ssse3):
370b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rbp
371b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    mov         rbp, rsp
372b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    SHADOW_ARGS_TO_STACK 6
373b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    SAVE_XMM 7
374b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rsi
375b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rdi
376b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; end prolog
377b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
378b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    GET_PARAM
379b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian.loop:
380b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqu      xmm0, [rsi]                 ;load src
381b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqa      xmm1, xmm0
382b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    psrldq      xmm1, 1
383b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
384b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    APPLY_FILTER_8 1
385b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    jnz         .loop
386b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
387b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; begin epilog
388b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rdi
389b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rsi
390b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    RESTORE_XMM
391b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    UNSHADOW_ARGS
392b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rbp
393b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ret
394b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
395da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianglobal sym(vpx_filter_block1d16_h2_avg_ssse3) PRIVATE
396da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramaniansym(vpx_filter_block1d16_h2_avg_ssse3):
397b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rbp
398b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    mov         rbp, rsp
399b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    SHADOW_ARGS_TO_STACK 6
400b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    SAVE_XMM 7
401b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rsi
402b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    push        rdi
403b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; end prolog
404b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
405b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    GET_PARAM
406b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian.loop:
407b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqu      xmm0,   [rsi]               ;load src
408b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqu      xmm1,   [rsi + 1]
409b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    movdqa      xmm2, xmm0
410b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
411b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    APPLY_FILTER_16 1
412b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    jnz         .loop
413b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
414b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; begin epilog
415b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rdi
416b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rsi
417b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    RESTORE_XMM
418b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    UNSHADOW_ARGS
419b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    pop         rbp
420b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ret
421