1ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; 2ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; 4ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; Use of this source code is governed by a BSD-style license 5ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; that can be found in the LICENSE file in the root of the source 6ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; tree. An additional intellectual property rights grant can be found 7ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; in the file PATENTS. All contributing project authors may 8ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; be found in the AUTHORS file in the root of the source tree. 9ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; 10ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 11ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 12ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang%include "vpx_ports/x86_abi_support.asm" 13ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 14ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangglobal sym(vp9_sad16x16_mmx) PRIVATE 15ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangglobal sym(vp9_sad8x16_mmx) PRIVATE 16ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangglobal sym(vp9_sad8x8_mmx) PRIVATE 17ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangglobal sym(vp9_sad4x4_mmx) PRIVATE 18ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangglobal sym(vp9_sad16x8_mmx) PRIVATE 19ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 20ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang;unsigned int vp9_sad16x16_mmx( 21ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; unsigned char *src_ptr, 22ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; int src_stride, 23ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; unsigned char *ref_ptr, 24ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; int ref_stride) 25ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangsym(vp9_sad16x16_mmx): 26ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rbp 27ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rbp, rsp 28ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang SHADOW_ARGS_TO_STACK 4 29ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rsi 30ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rdi 31ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; end prolog 32ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 33ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rsi, arg(0) ;src_ptr 34ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rdi, arg(2) ;ref_ptr 35ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 36ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movsxd rax, dword ptr arg(1) ;src_stride 37ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movsxd rdx, dword ptr arg(3) ;ref_stride 38ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 39ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lea rcx, [rsi+rax*8] 40ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 41ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lea rcx, [rcx+rax*8] 42ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pxor mm7, mm7 43ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 44ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pxor mm6, mm6 45ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 46ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang.x16x16sad_mmx_loop: 47ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 48ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm0, QWORD PTR [rsi] 49ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm2, QWORD PTR [rsi+8] 50ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 51ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm1, QWORD PTR [rdi] 52ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm3, QWORD PTR [rdi+8] 53ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 54ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm4, mm0 55ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm5, mm2 56ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 57ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusb mm0, mm1 58ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusb mm1, mm4 59ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 60ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusb mm2, mm3 61ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusb mm3, mm5 62ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 63ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang por mm0, mm1 64ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang por mm2, mm3 65ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 66ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm1, mm0 67ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm3, mm2 68ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 69ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw mm0, mm6 70ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw mm2, mm6 71ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 72ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpckhbw mm1, mm6 73ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpckhbw mm3, mm6 74ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 75ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm0, mm2 76ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm1, mm3 77ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 78ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 79ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lea rsi, [rsi+rax] 80ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add rdi, rdx 81ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 82ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm7, mm0 83ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm7, mm1 84ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 85ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang cmp rsi, rcx 86ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang jne .x16x16sad_mmx_loop 87ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 88ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 89ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm0, mm7 90ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 91ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklwd mm0, mm6 92ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpckhwd mm7, mm6 93ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 94ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm0, mm7 95ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm7, mm0 96ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 97ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 98ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psrlq mm0, 32 99ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm7, mm0 100ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 101ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq rax, mm7 102ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 103ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rdi 104ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rsi 105ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rsp, rbp 106ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; begin epilog 107ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang UNSHADOW_ARGS 108ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rbp 109ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ret 110ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 111ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 112ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang;unsigned int vp9_sad8x16_mmx( 113ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; unsigned char *src_ptr, 114ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; int src_stride, 115ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; unsigned char *ref_ptr, 116ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; int ref_stride) 117ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangsym(vp9_sad8x16_mmx): 118ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rbp 119ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rbp, rsp 120ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang SHADOW_ARGS_TO_STACK 4 121ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rsi 122ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rdi 123ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; end prolog 124ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 125ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rsi, arg(0) ;src_ptr 126ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rdi, arg(2) ;ref_ptr 127ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 128ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movsxd rax, dword ptr arg(1) ;src_stride 129ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movsxd rdx, dword ptr arg(3) ;ref_stride 130ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 131ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lea rcx, [rsi+rax*8] 132ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 133ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lea rcx, [rcx+rax*8] 134ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pxor mm7, mm7 135ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 136ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pxor mm6, mm6 137ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 138ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang.x8x16sad_mmx_loop: 139ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 140ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm0, QWORD PTR [rsi] 141ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm1, QWORD PTR [rdi] 142ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 143ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm2, mm0 144ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusb mm0, mm1 145ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 146ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusb mm1, mm2 147ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang por mm0, mm1 148ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 149ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm2, mm0 150ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw mm0, mm6 151ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 152ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpckhbw mm2, mm6 153ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lea rsi, [rsi+rax] 154ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 155ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add rdi, rdx 156ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm7, mm0 157ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 158ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm7, mm2 159ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang cmp rsi, rcx 160ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 161ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang jne .x8x16sad_mmx_loop 162ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 163ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm0, mm7 164ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklwd mm0, mm6 165ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 166ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpckhwd mm7, mm6 167ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm0, mm7 168ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 169ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm7, mm0 170ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psrlq mm0, 32 171ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 172ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm7, mm0 173ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq rax, mm7 174ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 175ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rdi 176ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rsi 177ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rsp, rbp 178ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; begin epilog 179ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang UNSHADOW_ARGS 180ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rbp 181ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ret 182ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 183ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 184ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang;unsigned int vp9_sad8x8_mmx( 185ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; unsigned char *src_ptr, 186ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; int src_stride, 187ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; unsigned char *ref_ptr, 188ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; int ref_stride) 189ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangsym(vp9_sad8x8_mmx): 190ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rbp 191ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rbp, rsp 192ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang SHADOW_ARGS_TO_STACK 4 193ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rsi 194ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rdi 195ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; end prolog 196ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 197ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rsi, arg(0) ;src_ptr 198ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rdi, arg(2) ;ref_ptr 199ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 200ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movsxd rax, dword ptr arg(1) ;src_stride 201ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movsxd rdx, dword ptr arg(3) ;ref_stride 202ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 203ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lea rcx, [rsi+rax*8] 204ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pxor mm7, mm7 205ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 206ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pxor mm6, mm6 207ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 208ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang.x8x8sad_mmx_loop: 209ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 210ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm0, QWORD PTR [rsi] 211ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm1, QWORD PTR [rdi] 212ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 213ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm2, mm0 214ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusb mm0, mm1 215ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 216ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusb mm1, mm2 217ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang por mm0, mm1 218ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 219ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm2, mm0 220ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw mm0, mm6 221ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 222ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpckhbw mm2, mm6 223ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm0, mm2 224ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 225ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lea rsi, [rsi+rax] 226ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add rdi, rdx 227ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 228ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm7, mm0 229ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang cmp rsi, rcx 230ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 231ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang jne .x8x8sad_mmx_loop 232ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 233ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm0, mm7 234ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklwd mm0, mm6 235ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 236ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpckhwd mm7, mm6 237ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm0, mm7 238ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 239ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm7, mm0 240ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psrlq mm0, 32 241ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 242ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm7, mm0 243ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq rax, mm7 244ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 245ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rdi 246ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rsi 247ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rsp, rbp 248ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; begin epilog 249ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang UNSHADOW_ARGS 250ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rbp 251ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ret 252ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 253ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 254ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang;unsigned int vp9_sad4x4_mmx( 255ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; unsigned char *src_ptr, 256ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; int src_stride, 257ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; unsigned char *ref_ptr, 258ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; int ref_stride) 259ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangsym(vp9_sad4x4_mmx): 260ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rbp 261ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rbp, rsp 262ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang SHADOW_ARGS_TO_STACK 4 263ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rsi 264ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rdi 265ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; end prolog 266ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 267ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rsi, arg(0) ;src_ptr 268ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rdi, arg(2) ;ref_ptr 269ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 270ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movsxd rax, dword ptr arg(1) ;src_stride 271ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movsxd rdx, dword ptr arg(3) ;ref_stride 272ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 273ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movd mm0, DWORD PTR [rsi] 274ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movd mm1, DWORD PTR [rdi] 275ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 276ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movd mm2, DWORD PTR [rsi+rax] 277ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movd mm3, DWORD PTR [rdi+rdx] 278ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 279ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw mm0, mm2 280ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw mm1, mm3 281ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 282ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm2, mm0 283ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusb mm0, mm1 284ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 285ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusb mm1, mm2 286ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang por mm0, mm1 287ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 288ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm2, mm0 289ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pxor mm3, mm3 290ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 291ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw mm0, mm3 292ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpckhbw mm2, mm3 293ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 294ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm0, mm2 295ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 296ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lea rsi, [rsi+rax*2] 297ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lea rdi, [rdi+rdx*2] 298ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 299ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movd mm4, DWORD PTR [rsi] 300ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movd mm5, DWORD PTR [rdi] 301ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 302ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movd mm6, DWORD PTR [rsi+rax] 303ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movd mm7, DWORD PTR [rdi+rdx] 304ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 305ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw mm4, mm6 306ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw mm5, mm7 307ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 308ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm6, mm4 309ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusb mm4, mm5 310ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 311ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusb mm5, mm6 312ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang por mm4, mm5 313ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 314ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm5, mm4 315ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw mm4, mm3 316ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 317ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpckhbw mm5, mm3 318ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm4, mm5 319ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 320ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm0, mm4 321ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm1, mm0 322ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 323ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklwd mm0, mm3 324ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpckhwd mm1, mm3 325ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 326ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm0, mm1 327ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm1, mm0 328ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 329ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psrlq mm0, 32 330ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm0, mm1 331ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 332ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq rax, mm0 333ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 334ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rdi 335ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rsi 336ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rsp, rbp 337ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; begin epilog 338ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang UNSHADOW_ARGS 339ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rbp 340ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ret 341ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 342ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 343ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang;unsigned int vp9_sad16x8_mmx( 344ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; unsigned char *src_ptr, 345ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; int src_stride, 346ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; unsigned char *ref_ptr, 347ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang; int ref_stride) 348ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangsym(vp9_sad16x8_mmx): 349ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rbp 350ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rbp, rsp 351ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang SHADOW_ARGS_TO_STACK 4 352ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rsi 353ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang push rdi 354ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; end prolog 355ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 356ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rsi, arg(0) ;src_ptr 357ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rdi, arg(2) ;ref_ptr 358ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 359ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movsxd rax, dword ptr arg(1) ;src_stride 360ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movsxd rdx, dword ptr arg(3) ;ref_stride 361ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 362ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lea rcx, [rsi+rax*8] 363ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pxor mm7, mm7 364ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 365ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pxor mm6, mm6 366ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 367ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang.x16x8sad_mmx_loop: 368ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 369ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm0, [rsi] 370ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm1, [rdi] 371ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 372ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm2, [rsi+8] 373ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm3, [rdi+8] 374ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 375ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm4, mm0 376ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm5, mm2 377ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 378ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusb mm0, mm1 379ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusb mm1, mm4 380ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 381ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusb mm2, mm3 382ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psubusb mm3, mm5 383ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 384ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang por mm0, mm1 385ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang por mm2, mm3 386ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 387ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm1, mm0 388ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm3, mm2 389ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 390ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw mm0, mm6 391ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpckhbw mm1, mm6 392ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 393ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklbw mm2, mm6 394ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpckhbw mm3, mm6 395ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 396ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 397ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm0, mm2 398ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm1, mm3 399ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 400ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm0, mm1 401ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lea rsi, [rsi+rax] 402ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 403ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang add rdi, rdx 404ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm7, mm0 405ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 406ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang cmp rsi, rcx 407ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang jne .x16x8sad_mmx_loop 408ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 409ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm0, mm7 410ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpcklwd mm0, mm6 411ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 412ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang punpckhwd mm7, mm6 413ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm0, mm7 414ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 415ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq mm7, mm0 416ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang psrlq mm0, 32 417ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 418ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang paddw mm7, mm0 419ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang movq rax, mm7 420ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 421ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rdi 422ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rsi 423ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mov rsp, rbp 424ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ; begin epilog 425ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang UNSHADOW_ARGS 426ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pop rbp 427ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ret 428