1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12%include "vpx_ports/x86_abi_support.asm" 13 14;int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr, 15; short *qcoeff_ptr,short *dequant_ptr, 16; short *scan_mask, short *round_ptr, 17; short *quant_ptr, short *dqcoeff_ptr); 18global sym(vp8_fast_quantize_b_impl_mmx) PRIVATE 19sym(vp8_fast_quantize_b_impl_mmx): 20 push rbp 21 mov rbp, rsp 22 SHADOW_ARGS_TO_STACK 8 23 push rsi 24 push rdi 25 ; end prolog 26 27 28 mov rsi, arg(0) ;coeff_ptr 29 movq mm0, [rsi] 30 31 mov rax, arg(1) ;zbin_ptr 32 movq mm1, [rax] 33 34 movq mm3, mm0 35 psraw mm0, 15 36 37 pxor mm3, mm0 38 psubw mm3, mm0 ; abs 39 40 movq mm2, mm3 41 pcmpgtw mm1, mm2 42 43 pandn mm1, mm2 44 movq mm3, mm1 45 46 mov rdx, arg(6) ;quant_ptr 47 movq mm1, [rdx] 48 49 mov rcx, arg(5) ;round_ptr 50 movq mm2, [rcx] 51 52 paddw mm3, mm2 53 pmulhuw mm3, mm1 54 55 pxor mm3, mm0 56 psubw mm3, mm0 ;gain the sign back 57 58 mov rdi, arg(2) ;qcoeff_ptr 59 movq mm0, mm3 60 61 movq [rdi], mm3 62 63 mov rax, arg(3) ;dequant_ptr 64 movq mm2, [rax] 65 66 pmullw mm3, mm2 67 mov rax, arg(7) ;dqcoeff_ptr 68 69 movq [rax], mm3 70 71 ; next 8 72 movq mm4, [rsi+8] 73 74 mov rax, arg(1) ;zbin_ptr 75 movq mm5, [rax+8] 76 77 movq mm7, mm4 78 psraw mm4, 15 79 80 pxor mm7, mm4 81 psubw mm7, mm4 ; abs 82 83 movq mm6, mm7 84 pcmpgtw mm5, mm6 85 86 pandn mm5, mm6 87 movq mm7, mm5 88 89 movq mm5, [rdx+8] 90 movq mm6, [rcx+8] 91 92 paddw mm7, mm6 93 pmulhuw mm7, mm5 94 95 pxor mm7, mm4 96 psubw mm7, mm4;gain the sign back 97 98 mov rdi, arg(2) ;qcoeff_ptr 99 100 movq mm1, mm7 101 movq [rdi+8], mm7 102 103 mov rax, arg(3) ;dequant_ptr 104 movq mm6, [rax+8] 105 106 pmullw mm7, mm6 107 mov rax, arg(7) ;dqcoeff_ptr 108 109 movq [rax+8], mm7 110 111 112 ; next 8 113 movq mm4, [rsi+16] 114 115 mov rax, arg(1) ;zbin_ptr 116 movq mm5, [rax+16] 117 118 movq mm7, mm4 119 psraw mm4, 15 120 121 pxor mm7, mm4 122 psubw mm7, mm4 ; abs 123 124 movq mm6, mm7 125 pcmpgtw mm5, mm6 126 127 pandn mm5, mm6 128 movq mm7, mm5 129 130 movq mm5, [rdx+16] 131 movq mm6, [rcx+16] 132 133 paddw mm7, mm6 134 pmulhuw mm7, mm5 135 136 pxor mm7, mm4 137 psubw mm7, mm4;gain the sign back 138 139 mov rdi, arg(2) ;qcoeff_ptr 140 141 movq mm1, mm7 142 movq [rdi+16], mm7 143 144 mov rax, arg(3) ;dequant_ptr 145 movq mm6, [rax+16] 146 147 pmullw mm7, mm6 148 mov rax, arg(7) ;dqcoeff_ptr 149 150 movq [rax+16], mm7 151 152 153 ; next 8 154 movq mm4, [rsi+24] 155 156 mov rax, arg(1) ;zbin_ptr 157 movq mm5, [rax+24] 158 159 movq mm7, mm4 160 psraw mm4, 15 161 162 pxor mm7, mm4 163 psubw mm7, mm4 ; abs 164 165 movq mm6, mm7 166 pcmpgtw mm5, mm6 167 168 pandn mm5, mm6 169 movq mm7, mm5 170 171 movq mm5, [rdx+24] 172 movq mm6, [rcx+24] 173 174 paddw mm7, mm6 175 pmulhuw mm7, mm5 176 177 pxor mm7, mm4 178 psubw mm7, mm4;gain the sign back 179 180 mov rdi, arg(2) ;qcoeff_ptr 181 182 movq mm1, mm7 183 movq [rdi+24], mm7 184 185 mov rax, arg(3) ;dequant_ptr 186 movq mm6, [rax+24] 187 188 pmullw mm7, mm6 189 mov rax, arg(7) ;dqcoeff_ptr 190 191 movq [rax+24], mm7 192 193 194 195 mov rdi, arg(4) ;scan_mask 196 mov rsi, arg(2) ;qcoeff_ptr 197 198 pxor mm5, mm5 199 pxor mm7, mm7 200 201 movq mm0, [rsi] 202 movq mm1, [rsi+8] 203 204 movq mm2, [rdi] 205 movq mm3, [rdi+8]; 206 207 pcmpeqw mm0, mm7 208 pcmpeqw mm1, mm7 209 210 pcmpeqw mm6, mm6 211 pxor mm0, mm6 212 213 pxor mm1, mm6 214 psrlw mm0, 15 215 216 psrlw mm1, 15 217 pmaddwd mm0, mm2 218 219 pmaddwd mm1, mm3 220 movq mm5, mm0 221 222 paddd mm5, mm1 223 224 movq mm0, [rsi+16] 225 movq mm1, [rsi+24] 226 227 movq mm2, [rdi+16] 228 movq mm3, [rdi+24]; 229 230 pcmpeqw mm0, mm7 231 pcmpeqw mm1, mm7 232 233 pcmpeqw mm6, mm6 234 pxor mm0, mm6 235 236 pxor mm1, mm6 237 psrlw mm0, 15 238 239 psrlw mm1, 15 240 pmaddwd mm0, mm2 241 242 pmaddwd mm1, mm3 243 paddd mm5, mm0 244 245 paddd mm5, mm1 246 movq mm0, mm5 247 248 psrlq mm5, 32 249 paddd mm0, mm5 250 251 ; eob adjustment begins here 252 movq rcx, mm0 253 and rcx, 0xffff 254 255 xor rdx, rdx 256 sub rdx, rcx ; rdx=-rcx 257 258 bsr rax, rcx 259 inc rax 260 261 sar rdx, 31 262 and rax, rdx 263 ; Substitute the sse assembly for the old mmx mixed assembly/C. The 264 ; following is kept as reference 265 ; movq rcx, mm0 266 ; bsr rax, rcx 267 ; 268 ; mov eob, rax 269 ; mov eee, rcx 270 ; 271 ;if(eee==0) 272 ;{ 273 ; eob=-1; 274 ;} 275 ;else if(eee<0) 276 ;{ 277 ; eob=15; 278 ;} 279 ;d->eob = eob+1; 280 281 ; begin epilog 282 pop rdi 283 pop rsi 284 UNSHADOW_ARGS 285 pop rbp 286 ret 287