1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12%include "vpx_ports/x86_abi_support.asm" 13;void vp8_recon2b_sse2(unsigned char *s, short *q, unsigned char *d, int stride) 14global sym(vp8_recon2b_sse2) 15sym(vp8_recon2b_sse2): 16 push rbp 17 mov rbp, rsp 18 SHADOW_ARGS_TO_STACK 4 19 push rsi 20 push rdi 21 ; end prolog 22 23 mov rsi, arg(0) ;s 24 mov rdi, arg(2) ;d 25 mov rdx, arg(1) ;q 26 movsxd rax, dword ptr arg(3) ;stride 27 pxor xmm0, xmm0 28 29 movq xmm1, MMWORD PTR [rsi] 30 punpcklbw xmm1, xmm0 31 paddsw xmm1, XMMWORD PTR [rdx] 32 packuswb xmm1, xmm0 ; pack and unpack to saturate 33 movq MMWORD PTR [rdi], xmm1 34 35 36 movq xmm2, MMWORD PTR [rsi+8] 37 punpcklbw xmm2, xmm0 38 paddsw xmm2, XMMWORD PTR [rdx+16] 39 packuswb xmm2, xmm0 ; pack and unpack to saturate 40 movq MMWORD PTR [rdi+rax], xmm2 41 42 43 movq xmm3, MMWORD PTR [rsi+16] 44 punpcklbw xmm3, xmm0 45 paddsw xmm3, XMMWORD PTR [rdx+32] 46 packuswb xmm3, xmm0 ; pack and unpack to saturate 47 movq MMWORD PTR [rdi+rax*2], xmm3 48 49 add rdi, rax 50 movq xmm4, MMWORD PTR [rsi+24] 51 punpcklbw xmm4, xmm0 52 paddsw xmm4, XMMWORD PTR [rdx+48] 53 packuswb xmm4, xmm0 ; pack and unpack to saturate 54 movq MMWORD PTR [rdi+rax*2], xmm4 55 56 ; begin epilog 57 pop rdi 58 pop rsi 59 UNSHADOW_ARGS 60 pop rbp 61 ret 62 63 64;void vp8_recon4b_sse2(unsigned char *s, short *q, unsigned char *d, int stride) 65global sym(vp8_recon4b_sse2) 66sym(vp8_recon4b_sse2): 67 push rbp 68 mov rbp, rsp 69 SHADOW_ARGS_TO_STACK 4 70 SAVE_XMM 71 push rsi 72 push rdi 73 ; end prolog 74 75 mov rsi, arg(0) ;s 76 mov rdi, arg(2) ;d 77 mov rdx, arg(1) ;q 78 movsxd rax, dword ptr arg(3) ;stride 79 pxor xmm0, xmm0 80 81 movdqa xmm1, XMMWORD PTR [rsi] 82 movdqa xmm5, xmm1 83 punpcklbw xmm1, xmm0 84 punpckhbw xmm5, xmm0 85 paddsw xmm1, XMMWORD PTR [rdx] 86 paddsw xmm5, XMMWORD PTR [rdx+16] 87 packuswb xmm1, xmm5 ; pack and unpack to saturate 88 movdqa XMMWORD PTR [rdi], xmm1 89 90 91 movdqa xmm2, XMMWORD PTR [rsi+16] 92 movdqa xmm6, xmm2 93 punpcklbw xmm2, xmm0 94 punpckhbw xmm6, xmm0 95 paddsw xmm2, XMMWORD PTR [rdx+32] 96 paddsw xmm6, XMMWORD PTR [rdx+48] 97 packuswb xmm2, xmm6 ; pack and unpack to saturate 98 movdqa XMMWORD PTR [rdi+rax], xmm2 99 100 101 movdqa xmm3, XMMWORD PTR [rsi+32] 102 movdqa xmm7, xmm3 103 punpcklbw xmm3, xmm0 104 punpckhbw xmm7, xmm0 105 paddsw xmm3, XMMWORD PTR [rdx+64] 106 paddsw xmm7, XMMWORD PTR [rdx+80] 107 packuswb xmm3, xmm7 ; pack and unpack to saturate 108 movdqa XMMWORD PTR [rdi+rax*2], xmm3 109 110 add rdi, rax 111 movdqa xmm4, XMMWORD PTR [rsi+48] 112 movdqa xmm5, xmm4 113 punpcklbw xmm4, xmm0 114 punpckhbw xmm5, xmm0 115 paddsw xmm4, XMMWORD PTR [rdx+96] 116 paddsw xmm5, XMMWORD PTR [rdx+112] 117 packuswb xmm4, xmm5 ; pack and unpack to saturate 118 movdqa XMMWORD PTR [rdi+rax*2], xmm4 119 120 ; begin epilog 121 pop rdi 122 pop rsi 123 RESTORE_XMM 124 UNSHADOW_ARGS 125 pop rbp 126 ret 127 128 129;void copy_mem16x16_sse2( 130; unsigned char *src, 131; int src_stride, 132; unsigned char *dst, 133; int dst_stride 134; ) 135global sym(vp8_copy_mem16x16_sse2) 136sym(vp8_copy_mem16x16_sse2): 137 push rbp 138 mov rbp, rsp 139 SHADOW_ARGS_TO_STACK 4 140 push rsi 141 push rdi 142 ; end prolog 143 144 mov rsi, arg(0) ;src; 145 movdqu xmm0, [rsi] 146 147 movsxd rax, dword ptr arg(1) ;src_stride; 148 mov rdi, arg(2) ;dst; 149 150 movdqu xmm1, [rsi+rax] 151 movdqu xmm2, [rsi+rax*2] 152 153 movsxd rcx, dword ptr arg(3) ;dst_stride 154 lea rsi, [rsi+rax*2] 155 156 movdqa [rdi], xmm0 157 add rsi, rax 158 159 movdqa [rdi+rcx], xmm1 160 movdqa [rdi+rcx*2],xmm2 161 162 lea rdi, [rdi+rcx*2] 163 movdqu xmm3, [rsi] 164 165 add rdi, rcx 166 movdqu xmm4, [rsi+rax] 167 168 movdqu xmm5, [rsi+rax*2] 169 lea rsi, [rsi+rax*2] 170 171 movdqa [rdi], xmm3 172 add rsi, rax 173 174 movdqa [rdi+rcx], xmm4 175 movdqa [rdi+rcx*2],xmm5 176 177 lea rdi, [rdi+rcx*2] 178 movdqu xmm0, [rsi] 179 180 add rdi, rcx 181 movdqu xmm1, [rsi+rax] 182 183 movdqu xmm2, [rsi+rax*2] 184 lea rsi, [rsi+rax*2] 185 186 movdqa [rdi], xmm0 187 add rsi, rax 188 189 movdqa [rdi+rcx], xmm1 190 191 movdqa [rdi+rcx*2], xmm2 192 movdqu xmm3, [rsi] 193 194 movdqu xmm4, [rsi+rax] 195 lea rdi, [rdi+rcx*2] 196 197 add rdi, rcx 198 movdqu xmm5, [rsi+rax*2] 199 200 lea rsi, [rsi+rax*2] 201 movdqa [rdi], xmm3 202 203 add rsi, rax 204 movdqa [rdi+rcx], xmm4 205 206 movdqa [rdi+rcx*2],xmm5 207 movdqu xmm0, [rsi] 208 209 lea rdi, [rdi+rcx*2] 210 movdqu xmm1, [rsi+rax] 211 212 add rdi, rcx 213 movdqu xmm2, [rsi+rax*2] 214 215 lea rsi, [rsi+rax*2] 216 movdqa [rdi], xmm0 217 218 movdqa [rdi+rcx], xmm1 219 movdqa [rdi+rcx*2],xmm2 220 221 movdqu xmm3, [rsi+rax] 222 lea rdi, [rdi+rcx*2] 223 224 movdqa [rdi+rcx], xmm3 225 226 ; begin epilog 227 pop rdi 228 pop rsi 229 UNSHADOW_ARGS 230 pop rbp 231 ret 232