1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12%include "vpx_ports/x86_abi_support.asm" 13 14;void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride, 15; short *diff, unsigned char *Predictor, 16; int pitch); 17global sym(vp8_subtract_b_sse2_impl) 18sym(vp8_subtract_b_sse2_impl): 19 push rbp 20 mov rbp, rsp 21 SHADOW_ARGS_TO_STACK 5 22 GET_GOT rbx 23 push rsi 24 push rdi 25 ; end prolog 26 27 mov rdi, arg(2) ;diff 28 mov rax, arg(3) ;Predictor 29 mov rsi, arg(0) ;z 30 movsxd rdx, dword ptr arg(1);src_stride; 31 movsxd rcx, dword ptr arg(4);pitch 32 pxor mm7, mm7 33 34 movd mm0, [rsi] 35 movd mm1, [rax] 36 punpcklbw mm0, mm7 37 punpcklbw mm1, mm7 38 psubw mm0, mm1 39 movq MMWORD PTR [rdi], mm0 40 41 movd mm0, [rsi+rdx] 42 movd mm1, [rax+rcx] 43 punpcklbw mm0, mm7 44 punpcklbw mm1, mm7 45 psubw mm0, mm1 46 movq MMWORD PTR [rdi+rcx*2], mm0 47 48 movd mm0, [rsi+rdx*2] 49 movd mm1, [rax+rcx*2] 50 punpcklbw mm0, mm7 51 punpcklbw mm1, mm7 52 psubw mm0, mm1 53 movq MMWORD PTR [rdi+rcx*4], mm0 54 55 lea rsi, [rsi+rdx*2] 56 lea rcx, [rcx+rcx*2] 57 58 movd mm0, [rsi+rdx] 59 movd mm1, [rax+rcx] 60 punpcklbw mm0, mm7 61 punpcklbw mm1, mm7 62 psubw mm0, mm1 63 movq MMWORD PTR [rdi+rcx*2], mm0 64 65 ; begin epilog 66 pop rdi 67 pop rsi 68 RESTORE_GOT 69 UNSHADOW_ARGS 70 pop rbp 71 ret 72 73 74;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, unsigned char *pred, int stride) 75global sym(vp8_subtract_mby_sse2) 76sym(vp8_subtract_mby_sse2): 77 push rbp 78 mov rbp, rsp 79 SHADOW_ARGS_TO_STACK 4 80 SAVE_XMM 81 GET_GOT rbx 82 push rsi 83 push rdi 84 ; end prolog 85 86 mov rsi, arg(1) ;src 87 mov rdi, arg(0) ;diff 88 89 mov rax, arg(2) ;pred 90 movsxd rdx, dword ptr arg(3) ;stride 91 92 mov rcx, 8 ; do two lines at one time 93 94submby_loop: 95 movdqa xmm0, XMMWORD PTR [rsi] ; src 96 movdqa xmm1, XMMWORD PTR [rax] ; pred 97 98 movdqa xmm2, xmm0 99 psubb xmm0, xmm1 100 101 pxor xmm1, [GLOBAL(t80)] ;convert to signed values 102 pxor xmm2, [GLOBAL(t80)] 103 pcmpgtb xmm1, xmm2 ; obtain sign information 104 105 movdqa xmm2, xmm0 106 movdqa xmm3, xmm1 107 punpcklbw xmm0, xmm1 ; put sign back to subtraction 108 punpckhbw xmm2, xmm3 ; put sign back to subtraction 109 110 movdqa XMMWORD PTR [rdi], xmm0 111 movdqa XMMWORD PTR [rdi +16], xmm2 112 113 movdqa xmm4, XMMWORD PTR [rsi + rdx] 114 movdqa xmm5, XMMWORD PTR [rax + 16] 115 116 movdqa xmm6, xmm4 117 psubb xmm4, xmm5 118 119 pxor xmm5, [GLOBAL(t80)] ;convert to signed values 120 pxor xmm6, [GLOBAL(t80)] 121 pcmpgtb xmm5, xmm6 ; obtain sign information 122 123 movdqa xmm6, xmm4 124 movdqa xmm7, xmm5 125 punpcklbw xmm4, xmm5 ; put sign back to subtraction 126 punpckhbw xmm6, xmm7 ; put sign back to subtraction 127 128 movdqa XMMWORD PTR [rdi +32], xmm4 129 movdqa XMMWORD PTR [rdi +48], xmm6 130 131 add rdi, 64 132 add rax, 32 133 lea rsi, [rsi+rdx*2] 134 135 sub rcx, 1 136 jnz submby_loop 137 138 pop rdi 139 pop rsi 140 ; begin epilog 141 RESTORE_GOT 142 RESTORE_XMM 143 UNSHADOW_ARGS 144 pop rbp 145 ret 146 147 148;void vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride) 149global sym(vp8_subtract_mbuv_sse2) 150sym(vp8_subtract_mbuv_sse2): 151 push rbp 152 mov rbp, rsp 153 SHADOW_ARGS_TO_STACK 5 154 GET_GOT rbx 155 push rsi 156 push rdi 157 ; end prolog 158 159 mov rdi, arg(0) ;diff 160 mov rax, arg(3) ;pred 161 mov rsi, arg(1) ;z = usrc 162 add rdi, 256*2 ;diff = diff + 256 (shorts) 163 add rax, 256 ;Predictor = pred + 256 164 movsxd rdx, dword ptr arg(4) ;stride; 165 lea rcx, [rdx + rdx*2] 166 167 ;u 168 ;line 0 1 169 movq xmm0, MMWORD PTR [rsi] ; src 170 movq xmm2, MMWORD PTR [rsi+rdx] 171 movdqa xmm1, XMMWORD PTR [rax] ; pred 172 punpcklqdq xmm0, xmm2 173 174 movdqa xmm2, xmm0 175 psubb xmm0, xmm1 ; subtraction with sign missed 176 177 pxor xmm1, [GLOBAL(t80)] ;convert to signed values 178 pxor xmm2, [GLOBAL(t80)] 179 pcmpgtb xmm1, xmm2 ; obtain sign information 180 181 movdqa xmm2, xmm0 182 movdqa xmm3, xmm1 183 punpcklbw xmm0, xmm1 ; put sign back to subtraction 184 punpckhbw xmm2, xmm3 ; put sign back to subtraction 185 186 movdqa XMMWORD PTR [rdi], xmm0 187 movdqa XMMWORD PTR [rdi +16], xmm2 188 189 ;line 2 3 190 movq xmm0, MMWORD PTR [rsi+rdx*2] ; src 191 movq xmm2, MMWORD PTR [rsi+rcx] 192 movdqa xmm1, XMMWORD PTR [rax+16] ; pred 193 punpcklqdq xmm0, xmm2 194 195 movdqa xmm2, xmm0 196 psubb xmm0, xmm1 ; subtraction with sign missed 197 198 pxor xmm1, [GLOBAL(t80)] ;convert to signed values 199 pxor xmm2, [GLOBAL(t80)] 200 pcmpgtb xmm1, xmm2 ; obtain sign information 201 202 movdqa xmm2, xmm0 203 movdqa xmm3, xmm1 204 punpcklbw xmm0, xmm1 ; put sign back to subtraction 205 punpckhbw xmm2, xmm3 ; put sign back to subtraction 206 207 movdqa XMMWORD PTR [rdi + 32], xmm0 208 movdqa XMMWORD PTR [rdi + 48], xmm2 209 210 ;line 4 5 211 lea rsi, [rsi + rdx*4] 212 213 movq xmm0, MMWORD PTR [rsi] ; src 214 movq xmm2, MMWORD PTR [rsi+rdx] 215 movdqa xmm1, XMMWORD PTR [rax + 32] ; pred 216 punpcklqdq xmm0, xmm2 217 218 movdqa xmm2, xmm0 219 psubb xmm0, xmm1 ; subtraction with sign missed 220 221 pxor xmm1, [GLOBAL(t80)] ;convert to signed values 222 pxor xmm2, [GLOBAL(t80)] 223 pcmpgtb xmm1, xmm2 ; obtain sign information 224 225 movdqa xmm2, xmm0 226 movdqa xmm3, xmm1 227 punpcklbw xmm0, xmm1 ; put sign back to subtraction 228 punpckhbw xmm2, xmm3 ; put sign back to subtraction 229 230 movdqa XMMWORD PTR [rdi + 64], xmm0 231 movdqa XMMWORD PTR [rdi + 80], xmm2 232 233 ;line 6 7 234 movq xmm0, MMWORD PTR [rsi+rdx*2] ; src 235 movq xmm2, MMWORD PTR [rsi+rcx] 236 movdqa xmm1, XMMWORD PTR [rax+ 48] ; pred 237 punpcklqdq xmm0, xmm2 238 239 movdqa xmm2, xmm0 240 psubb xmm0, xmm1 ; subtraction with sign missed 241 242 pxor xmm1, [GLOBAL(t80)] ;convert to signed values 243 pxor xmm2, [GLOBAL(t80)] 244 pcmpgtb xmm1, xmm2 ; obtain sign information 245 246 movdqa xmm2, xmm0 247 movdqa xmm3, xmm1 248 punpcklbw xmm0, xmm1 ; put sign back to subtraction 249 punpckhbw xmm2, xmm3 ; put sign back to subtraction 250 251 movdqa XMMWORD PTR [rdi + 96], xmm0 252 movdqa XMMWORD PTR [rdi + 112], xmm2 253 254 ;v 255 mov rsi, arg(2) ;z = vsrc 256 add rdi, 64*2 ;diff = diff + 320 (shorts) 257 add rax, 64 ;Predictor = pred + 320 258 259 ;line 0 1 260 movq xmm0, MMWORD PTR [rsi] ; src 261 movq xmm2, MMWORD PTR [rsi+rdx] 262 movdqa xmm1, XMMWORD PTR [rax] ; pred 263 punpcklqdq xmm0, xmm2 264 265 movdqa xmm2, xmm0 266 psubb xmm0, xmm1 ; subtraction with sign missed 267 268 pxor xmm1, [GLOBAL(t80)] ;convert to signed values 269 pxor xmm2, [GLOBAL(t80)] 270 pcmpgtb xmm1, xmm2 ; obtain sign information 271 272 movdqa xmm2, xmm0 273 movdqa xmm3, xmm1 274 punpcklbw xmm0, xmm1 ; put sign back to subtraction 275 punpckhbw xmm2, xmm3 ; put sign back to subtraction 276 277 movdqa XMMWORD PTR [rdi], xmm0 278 movdqa XMMWORD PTR [rdi +16], xmm2 279 280 ;line 2 3 281 movq xmm0, MMWORD PTR [rsi+rdx*2] ; src 282 movq xmm2, MMWORD PTR [rsi+rcx] 283 movdqa xmm1, XMMWORD PTR [rax+16] ; pred 284 punpcklqdq xmm0, xmm2 285 286 movdqa xmm2, xmm0 287 psubb xmm0, xmm1 ; subtraction with sign missed 288 289 pxor xmm1, [GLOBAL(t80)] ;convert to signed values 290 pxor xmm2, [GLOBAL(t80)] 291 pcmpgtb xmm1, xmm2 ; obtain sign information 292 293 movdqa xmm2, xmm0 294 movdqa xmm3, xmm1 295 punpcklbw xmm0, xmm1 ; put sign back to subtraction 296 punpckhbw xmm2, xmm3 ; put sign back to subtraction 297 298 movdqa XMMWORD PTR [rdi + 32], xmm0 299 movdqa XMMWORD PTR [rdi + 48], xmm2 300 301 ;line 4 5 302 lea rsi, [rsi + rdx*4] 303 304 movq xmm0, MMWORD PTR [rsi] ; src 305 movq xmm2, MMWORD PTR [rsi+rdx] 306 movdqa xmm1, XMMWORD PTR [rax + 32] ; pred 307 punpcklqdq xmm0, xmm2 308 309 movdqa xmm2, xmm0 310 psubb xmm0, xmm1 ; subtraction with sign missed 311 312 pxor xmm1, [GLOBAL(t80)] ;convert to signed values 313 pxor xmm2, [GLOBAL(t80)] 314 pcmpgtb xmm1, xmm2 ; obtain sign information 315 316 movdqa xmm2, xmm0 317 movdqa xmm3, xmm1 318 punpcklbw xmm0, xmm1 ; put sign back to subtraction 319 punpckhbw xmm2, xmm3 ; put sign back to subtraction 320 321 movdqa XMMWORD PTR [rdi + 64], xmm0 322 movdqa XMMWORD PTR [rdi + 80], xmm2 323 324 ;line 6 7 325 movq xmm0, MMWORD PTR [rsi+rdx*2] ; src 326 movq xmm2, MMWORD PTR [rsi+rcx] 327 movdqa xmm1, XMMWORD PTR [rax+ 48] ; pred 328 punpcklqdq xmm0, xmm2 329 330 movdqa xmm2, xmm0 331 psubb xmm0, xmm1 ; subtraction with sign missed 332 333 pxor xmm1, [GLOBAL(t80)] ;convert to signed values 334 pxor xmm2, [GLOBAL(t80)] 335 pcmpgtb xmm1, xmm2 ; obtain sign information 336 337 movdqa xmm2, xmm0 338 movdqa xmm3, xmm1 339 punpcklbw xmm0, xmm1 ; put sign back to subtraction 340 punpckhbw xmm2, xmm3 ; put sign back to subtraction 341 342 movdqa XMMWORD PTR [rdi + 96], xmm0 343 movdqa XMMWORD PTR [rdi + 112], xmm2 344 345 ; begin epilog 346 pop rdi 347 pop rsi 348 RESTORE_GOT 349 UNSHADOW_ARGS 350 pop rbp 351 ret 352 353SECTION_RODATA 354align 16 355t80: 356 times 16 db 0x80 357