1538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber; 2538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber; 4538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber; Use of this source code is governed by a BSD-style license and patent 5538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber; grant that can be found in the LICENSE file in the root of the source 6538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber; tree. All contributing project authors may be found in the AUTHORS 7538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber; file in the root of the source tree. 8538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber; 9538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 10538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 11538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber%include "vpx_ports/x86_abi_support.asm" 12538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 13538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 14538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber;int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr 15538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber; short *qcoeff_ptr,short *dequant_ptr, 16538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber; short *round_ptr, 17538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber; short *quant_ptr, short *dqcoeff_ptr); 18538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber; 19538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huberglobal sym(vp8_fast_quantize_b_impl_ssse3) 20538f6170b788de7408b06efc6613dc98579aa6a6Andreas Hubersym(vp8_fast_quantize_b_impl_ssse3): 21538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber push rbp 22538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber mov rbp, rsp 23538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber SHADOW_ARGS_TO_STACK 6 24538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber GET_GOT rbx 25538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber push rsi 26538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber push rdi 27538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber ; end prolog 28538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 29538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber mov rdx, arg(0) ;coeff_ptr 30538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber mov rdi, arg(3) ;round_ptr 31538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber mov rsi, arg(4) ;quant_ptr 32538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 33538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa xmm0, [rdx] 34538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa xmm4, [rdx + 16] 35538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 36538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa xmm2, [rdi] ;round lo 37538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa xmm3, [rdi + 16] ;round hi 38538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 39538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa xmm1, xmm0 40538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa xmm5, xmm4 41538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 42538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber psraw xmm0, 15 ;sign of z (aka sz) 43538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber psraw xmm4, 15 ;sign of z (aka sz) 44538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 45538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pabsw xmm1, xmm1 46538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pabsw xmm5, xmm5 47538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 48538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw xmm1, xmm2 49538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber paddw xmm5, xmm3 50538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 51538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pmulhw xmm1, [rsi] 52538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pmulhw xmm5, [rsi + 16] 53538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 54538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber mov rdi, arg(1) ;qcoeff_ptr 55538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber mov rcx, arg(2) ;dequant_ptr 56538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber mov rsi, arg(5) ;dqcoeff_ptr 57538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 58538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pxor xmm1, xmm0 59538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pxor xmm5, xmm4 60538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber psubw xmm1, xmm0 61538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber psubw xmm5, xmm4 62538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 63538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa [rdi], xmm1 64538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa [rdi + 16], xmm5 65538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 66538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa xmm2, [rcx] 67538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa xmm3, [rcx + 16] 68538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 69538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pxor xmm4, xmm4 70538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pmullw xmm2, xmm1 71538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pmullw xmm3, xmm5 72538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 73538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pcmpeqw xmm1, xmm4 ;non zero mask 74538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pcmpeqw xmm5, xmm4 ;non zero mask 75538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber packsswb xmm1, xmm5 76538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pshufb xmm1, [ GLOBAL(zz_shuf)] 77538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 78538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pmovmskb edx, xmm1 79538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 80538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber; xor ecx, ecx 81538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber; mov eax, -1 82538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber;find_eob_loop: 83538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber; shr edx, 1 84538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber; jc fq_skip 85538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber; mov eax, ecx 86538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber;fq_skip: 87538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber; inc ecx 88538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber; cmp ecx, 16 89538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber; jne find_eob_loop 90538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber xor rdi, rdi 91538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber mov eax, -1 92538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber xor dx, ax ;flip the bits for bsr 93538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber bsr eax, edx 94538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 95538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa [rsi], xmm2 ;store dqcoeff 96538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber movdqa [rsi + 16], xmm3 ;store dqcoeff 97538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 98538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber sub edi, edx ;check for all zeros in bit mask 99538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber sar edi, 31 ;0 or -1 100538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber add eax, 1 101538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber and eax, edi ;if the bit mask was all zero, 102538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber ;then eob = 0 103538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber ; begin epilog 104538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pop rdi 105538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pop rsi 106538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber RESTORE_GOT 107538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber UNSHADOW_ARGS 108538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber pop rbp 109538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber ret 110538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 111538f6170b788de7408b06efc6613dc98579aa6a6Andreas HuberSECTION_RODATA 112538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huberalign 16 113538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huberzz_shuf: 114538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 115