15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); Copyright (c) 2010 The WebM project authors. All Rights Reserved. 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); Use of this source code is governed by a BSD-style license 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); that can be found in the LICENSE file in the root of the source 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); tree. An additional intellectual property rights grant can be found 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); in the file PATENTS. All contributing project authors may 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); be found in the AUTHORS file in the root of the source tree. 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%include "vpx_ports/x86_abi_support.asm" 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)global sym(vp8_sad16x16_mmx) PRIVATE 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)global sym(vp8_sad8x16_mmx) PRIVATE 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)global sym(vp8_sad8x8_mmx) PRIVATE 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)global sym(vp8_sad4x4_mmx) PRIVATE 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)global sym(vp8_sad16x8_mmx) PRIVATE 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles);unsigned int vp8_sad16x16_mmx( 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); unsigned char *src_ptr, 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); int src_stride, 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); unsigned char *ref_ptr, 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); int ref_stride) 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)sym(vp8_sad16x16_mmx): 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) push rbp 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov rbp, rsp 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SHADOW_ARGS_TO_STACK 4 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) push rsi 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) push rdi 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; end prolog 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov rsi, arg(0) ;src_ptr 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov rdi, arg(2) ;ref_ptr 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movsxd rax, dword ptr arg(1) ;src_stride 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movsxd rdx, dword ptr arg(3) ;ref_stride 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lea rcx, [rsi+rax*8] 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lea rcx, [rcx+rax*8] 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pxor mm7, mm7 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pxor mm6, mm6 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).x16x16sad_mmx_loop: 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm0, QWORD PTR [rsi] 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm2, QWORD PTR [rsi+8] 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm1, QWORD PTR [rdi] 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm3, QWORD PTR [rdi+8] 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm4, mm0 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm5, mm2 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) psubusb mm0, mm1 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) psubusb mm1, mm4 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) psubusb mm2, mm3 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) psubusb mm3, mm5 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) por mm0, mm1 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) por mm2, mm3 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm1, mm0 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm3, mm2 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) punpcklbw mm0, mm6 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) punpcklbw mm2, mm6 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) punpckhbw mm1, mm6 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) punpckhbw mm3, mm6 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) paddw mm0, mm2 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) paddw mm1, mm3 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lea rsi, [rsi+rax] 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) add rdi, rdx 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) paddw mm7, mm0 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) paddw mm7, mm1 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmp rsi, rcx 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) jne .x16x16sad_mmx_loop 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm0, mm7 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) punpcklwd mm0, mm6 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) punpckhwd mm7, mm6 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) paddw mm0, mm7 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm7, mm0 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) psrlq mm0, 32 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) paddw mm7, mm0 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq rax, mm7 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pop rdi 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pop rsi 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov rsp, rbp 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; begin epilog 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UNSHADOW_ARGS 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pop rbp 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ret 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles);unsigned int vp8_sad8x16_mmx( 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); unsigned char *src_ptr, 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); int src_stride, 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); unsigned char *ref_ptr, 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); int ref_stride) 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)sym(vp8_sad8x16_mmx): 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) push rbp 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov rbp, rsp 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SHADOW_ARGS_TO_STACK 4 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) push rsi 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) push rdi 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; end prolog 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov rsi, arg(0) ;src_ptr 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov rdi, arg(2) ;ref_ptr 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movsxd rax, dword ptr arg(1) ;src_stride 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movsxd rdx, dword ptr arg(3) ;ref_stride 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lea rcx, [rsi+rax*8] 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lea rcx, [rcx+rax*8] 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pxor mm7, mm7 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pxor mm6, mm6 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).x8x16sad_mmx_loop: 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm0, QWORD PTR [rsi] 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm1, QWORD PTR [rdi] 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm2, mm0 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) psubusb mm0, mm1 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) psubusb mm1, mm2 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) por mm0, mm1 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm2, mm0 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) punpcklbw mm0, mm6 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) punpckhbw mm2, mm6 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lea rsi, [rsi+rax] 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) add rdi, rdx 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) paddw mm7, mm0 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) paddw mm7, mm2 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmp rsi, rcx 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) jne .x8x16sad_mmx_loop 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm0, mm7 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) punpcklwd mm0, mm6 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) punpckhwd mm7, mm6 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) paddw mm0, mm7 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm7, mm0 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) psrlq mm0, 32 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) paddw mm7, mm0 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq rax, mm7 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pop rdi 1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pop rsi 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov rsp, rbp 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; begin epilog 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UNSHADOW_ARGS 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pop rbp 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ret 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles);unsigned int vp8_sad8x8_mmx( 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); unsigned char *src_ptr, 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); int src_stride, 1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); unsigned char *ref_ptr, 1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); int ref_stride) 1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)sym(vp8_sad8x8_mmx): 1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) push rbp 1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov rbp, rsp 1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SHADOW_ARGS_TO_STACK 4 1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) push rsi 1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) push rdi 1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; end prolog 1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov rsi, arg(0) ;src_ptr 1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov rdi, arg(2) ;ref_ptr 1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movsxd rax, dword ptr arg(1) ;src_stride 2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movsxd rdx, dword ptr arg(3) ;ref_stride 2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lea rcx, [rsi+rax*8] 2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pxor mm7, mm7 2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pxor mm6, mm6 2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).x8x8sad_mmx_loop: 2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm0, QWORD PTR [rsi] 2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm1, QWORD PTR [rdi] 2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) movq mm2, mm0 2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) psubusb mm0, mm1 2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) psubusb mm1, mm2 2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) por mm0, mm1 2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 219 movq mm2, mm0 220 punpcklbw mm0, mm6 221 222 punpckhbw mm2, mm6 223 paddw mm0, mm2 224 225 lea rsi, [rsi+rax] 226 add rdi, rdx 227 228 paddw mm7, mm0 229 cmp rsi, rcx 230 231 jne .x8x8sad_mmx_loop 232 233 movq mm0, mm7 234 punpcklwd mm0, mm6 235 236 punpckhwd mm7, mm6 237 paddw mm0, mm7 238 239 movq mm7, mm0 240 psrlq mm0, 32 241 242 paddw mm7, mm0 243 movq rax, mm7 244 245 pop rdi 246 pop rsi 247 mov rsp, rbp 248 ; begin epilog 249 UNSHADOW_ARGS 250 pop rbp 251 ret 252 253 254;unsigned int vp8_sad4x4_mmx( 255; unsigned char *src_ptr, 256; int src_stride, 257; unsigned char *ref_ptr, 258; int ref_stride) 259sym(vp8_sad4x4_mmx): 260 push rbp 261 mov rbp, rsp 262 SHADOW_ARGS_TO_STACK 4 263 push rsi 264 push rdi 265 ; end prolog 266 267 mov rsi, arg(0) ;src_ptr 268 mov rdi, arg(2) ;ref_ptr 269 270 movsxd rax, dword ptr arg(1) ;src_stride 271 movsxd rdx, dword ptr arg(3) ;ref_stride 272 273 movd mm0, DWORD PTR [rsi] 274 movd mm1, DWORD PTR [rdi] 275 276 movd mm2, DWORD PTR [rsi+rax] 277 movd mm3, DWORD PTR [rdi+rdx] 278 279 punpcklbw mm0, mm2 280 punpcklbw mm1, mm3 281 282 movq mm2, mm0 283 psubusb mm0, mm1 284 285 psubusb mm1, mm2 286 por mm0, mm1 287 288 movq mm2, mm0 289 pxor mm3, mm3 290 291 punpcklbw mm0, mm3 292 punpckhbw mm2, mm3 293 294 paddw mm0, mm2 295 296 lea rsi, [rsi+rax*2] 297 lea rdi, [rdi+rdx*2] 298 299 movd mm4, DWORD PTR [rsi] 300 movd mm5, DWORD PTR [rdi] 301 302 movd mm6, DWORD PTR [rsi+rax] 303 movd mm7, DWORD PTR [rdi+rdx] 304 305 punpcklbw mm4, mm6 306 punpcklbw mm5, mm7 307 308 movq mm6, mm4 309 psubusb mm4, mm5 310 311 psubusb mm5, mm6 312 por mm4, mm5 313 314 movq mm5, mm4 315 punpcklbw mm4, mm3 316 317 punpckhbw mm5, mm3 318 paddw mm4, mm5 319 320 paddw mm0, mm4 321 movq mm1, mm0 322 323 punpcklwd mm0, mm3 324 punpckhwd mm1, mm3 325 326 paddw mm0, mm1 327 movq mm1, mm0 328 329 psrlq mm0, 32 330 paddw mm0, mm1 331 332 movq rax, mm0 333 334 pop rdi 335 pop rsi 336 mov rsp, rbp 337 ; begin epilog 338 UNSHADOW_ARGS 339 pop rbp 340 ret 341 342 343;unsigned int vp8_sad16x8_mmx( 344; unsigned char *src_ptr, 345; int src_stride, 346; unsigned char *ref_ptr, 347; int ref_stride) 348sym(vp8_sad16x8_mmx): 349 push rbp 350 mov rbp, rsp 351 SHADOW_ARGS_TO_STACK 4 352 push rsi 353 push rdi 354 ; end prolog 355 356 mov rsi, arg(0) ;src_ptr 357 mov rdi, arg(2) ;ref_ptr 358 359 movsxd rax, dword ptr arg(1) ;src_stride 360 movsxd rdx, dword ptr arg(3) ;ref_stride 361 362 lea rcx, [rsi+rax*8] 363 pxor mm7, mm7 364 365 pxor mm6, mm6 366 367.x16x8sad_mmx_loop: 368 369 movq mm0, [rsi] 370 movq mm1, [rdi] 371 372 movq mm2, [rsi+8] 373 movq mm3, [rdi+8] 374 375 movq mm4, mm0 376 movq mm5, mm2 377 378 psubusb mm0, mm1 379 psubusb mm1, mm4 380 381 psubusb mm2, mm3 382 psubusb mm3, mm5 383 384 por mm0, mm1 385 por mm2, mm3 386 387 movq mm1, mm0 388 movq mm3, mm2 389 390 punpcklbw mm0, mm6 391 punpckhbw mm1, mm6 392 393 punpcklbw mm2, mm6 394 punpckhbw mm3, mm6 395 396 397 paddw mm0, mm2 398 paddw mm1, mm3 399 400 paddw mm0, mm1 401 lea rsi, [rsi+rax] 402 403 add rdi, rdx 404 paddw mm7, mm0 405 406 cmp rsi, rcx 407 jne .x16x8sad_mmx_loop 408 409 movq mm0, mm7 410 punpcklwd mm0, mm6 411 412 punpckhwd mm7, mm6 413 paddw mm0, mm7 414 415 movq mm7, mm0 416 psrlq mm0, 32 417 418 paddw mm7, mm0 419 movq rax, mm7 420 421 pop rdi 422 pop rsi 423 mov rsp, rbp 424 ; begin epilog 425 UNSHADOW_ARGS 426 pop rbp 427 ret 428