1ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian; 2ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian; 4ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian; Use of this source code is governed by a BSD-style license 5ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian; that can be found in the LICENSE file in the root of the source 6ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian; tree. An additional intellectual property rights grant can be found 7ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian; in the file PATENTS. All contributing project authors may 8ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian; be found in the AUTHORS file in the root of the source tree. 9ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian; 10ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 11da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian%define private_prefix vp9 12da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 13ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%include "third_party/x86inc/x86inc.asm" 140a39d0a697ff3603e8c100300fda363658e10b23James Zern%include "vpx_dsp/x86/bitdepth_conversion_sse2.asm" 15ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 16ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh VenkatasubramanianSECTION_RODATA 17ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianpw_1: times 8 dw 1 18ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 19ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh VenkatasubramanianSECTION .text 20ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 21ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%macro QUANTIZE_FP 2 220a39d0a697ff3603e8c100300fda363658e10b23James Zerncglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, round, quant, \ 230a39d0a697ff3603e8c100300fda363658e10b23James Zern qcoeff, dqcoeff, dequant, \ 24ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian eob, scan, iscan 25ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian cmp dword skipm, 0 26ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian jne .blank 27ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 28ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ; actual quantize loop - setup pointers, rounders, etc. 29ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian movifnidn coeffq, coeffmp 30ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian movifnidn ncoeffq, ncoeffmp 31ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mov r2, dequantmp 32ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian movifnidn roundq, roundmp 33ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian movifnidn quantq, quantmp 34ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mova m1, [roundq] ; m1 = round 35ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mova m2, [quantq] ; m2 = quant 36ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%ifidn %1, fp_32x32 37ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pcmpeqw m5, m5 38ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psrlw m5, 15 39ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian paddw m1, m5 40ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psrlw m1, 1 ; m1 = (m1 + 1) / 2 41ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%endif 42ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mova m3, [r2q] ; m3 = dequant 43ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mov r3, qcoeffmp 44ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mov r4, dqcoeffmp 45ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mov r5, iscanmp 46ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%ifidn %1, fp_32x32 47ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psllw m2, 1 48ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%endif 49ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pxor m5, m5 ; m5 = dedicated zero 50da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 510a39d0a697ff3603e8c100300fda363658e10b23James Zern INCREMENT_ELEMENTS_TRAN_LOW coeffq, ncoeffq 520a39d0a697ff3603e8c100300fda363658e10b23James Zern lea r5q, [r5q+ncoeffq*2] 530a39d0a697ff3603e8c100300fda363658e10b23James Zern INCREMENT_ELEMENTS_TRAN_LOW r3q, ncoeffq 540a39d0a697ff3603e8c100300fda363658e10b23James Zern INCREMENT_ELEMENTS_TRAN_LOW r4q, ncoeffq 55ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian neg ncoeffq 56ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 57ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ; get DC and first 15 AC coeffs 580a39d0a697ff3603e8c100300fda363658e10b23James Zern LOAD_TRAN_LOW 9, coeffq, ncoeffq ; m9 = c[i] 590a39d0a697ff3603e8c100300fda363658e10b23James Zern LOAD_TRAN_LOW 10, coeffq, ncoeffq + 8 ; m10 = c[i] 60ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pabsw m6, m9 ; m6 = abs(m9) 61ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pabsw m11, m10 ; m11 = abs(m10) 62ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pcmpeqw m7, m7 63ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 64ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian paddsw m6, m1 ; m6 += round 65ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian punpckhqdq m1, m1 66ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian paddsw m11, m1 ; m11 += round 67ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pmulhw m8, m6, m2 ; m8 = m6*q>>16 68ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian punpckhqdq m2, m2 69ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pmulhw m13, m11, m2 ; m13 = m11*q>>16 70ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psignw m8, m9 ; m8 = reinsert sign 71ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psignw m13, m10 ; m13 = reinsert sign 720a39d0a697ff3603e8c100300fda363658e10b23James Zern STORE_TRAN_LOW 8, r3q, ncoeffq, 6, 11, 12 730a39d0a697ff3603e8c100300fda363658e10b23James Zern STORE_TRAN_LOW 13, r3q, ncoeffq + 8, 6, 11, 12 74ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%ifidn %1, fp_32x32 75ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pabsw m8, m8 76ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pabsw m13, m13 77ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%endif 78da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian pmullw m8, m3 ; r4[i] = r3[i] * q 79ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian punpckhqdq m3, m3 80da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian pmullw m13, m3 ; r4[i] = r3[i] * q 81ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%ifidn %1, fp_32x32 82ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psrlw m8, 1 83ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psrlw m13, 1 84ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psignw m8, m9 85ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psignw m13, m10 86ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psrlw m0, m3, 2 87da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian%else 88da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian psrlw m0, m3, 1 89ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%endif 900a39d0a697ff3603e8c100300fda363658e10b23James Zern STORE_TRAN_LOW 8, r4q, ncoeffq, 6, 11, 12 910a39d0a697ff3603e8c100300fda363658e10b23James Zern STORE_TRAN_LOW 13, r4q, ncoeffq + 8, 6, 11, 12 92ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pcmpeqw m8, m5 ; m8 = c[i] == 0 93ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pcmpeqw m13, m5 ; m13 = c[i] == 0 94da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i] 95da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i] 96ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psubw m6, m7 ; m6 = scan[i] + 1 97ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psubw m11, m7 ; m11 = scan[i] + 1 98ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pandn m8, m6 ; m8 = max(eob) 99ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pandn m13, m11 ; m13 = max(eob) 100ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pmaxsw m8, m13 101ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian add ncoeffq, mmsize 102ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian jz .accumulate_eob 103ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 104ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian.ac_only_loop: 1050a39d0a697ff3603e8c100300fda363658e10b23James Zern LOAD_TRAN_LOW 9, coeffq, ncoeffq ; m9 = c[i] 1060a39d0a697ff3603e8c100300fda363658e10b23James Zern LOAD_TRAN_LOW 10, coeffq, ncoeffq + 8 ; m10 = c[i] 107ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pabsw m6, m9 ; m6 = abs(m9) 108ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pabsw m11, m10 ; m11 = abs(m10) 109da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 110ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pcmpgtw m7, m6, m0 111ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pcmpgtw m12, m11, m0 112da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian pmovmskb r6d, m7 113da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian pmovmskb r2d, m12 114ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 115ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian or r6, r2 116ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian jz .skip_iter 117da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 118ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pcmpeqw m7, m7 119ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 120ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian paddsw m6, m1 ; m6 += round 121ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian paddsw m11, m1 ; m11 += round 122ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pmulhw m14, m6, m2 ; m14 = m6*q>>16 123ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pmulhw m13, m11, m2 ; m13 = m11*q>>16 124ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psignw m14, m9 ; m14 = reinsert sign 125ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psignw m13, m10 ; m13 = reinsert sign 1260a39d0a697ff3603e8c100300fda363658e10b23James Zern STORE_TRAN_LOW 14, r3q, ncoeffq, 6, 11, 12 1270a39d0a697ff3603e8c100300fda363658e10b23James Zern STORE_TRAN_LOW 13, r3q, ncoeffq + 8, 6, 11, 12 128ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%ifidn %1, fp_32x32 129ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pabsw m14, m14 130ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pabsw m13, m13 131ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%endif 132da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian pmullw m14, m3 ; r4[i] = r3[i] * q 133da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian pmullw m13, m3 ; r4[i] = r3[i] * q 134ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%ifidn %1, fp_32x32 135ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psrlw m14, 1 136ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psrlw m13, 1 137ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psignw m14, m9 138ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psignw m13, m10 139ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%endif 1400a39d0a697ff3603e8c100300fda363658e10b23James Zern STORE_TRAN_LOW 14, r4q, ncoeffq, 6, 11, 12 1410a39d0a697ff3603e8c100300fda363658e10b23James Zern STORE_TRAN_LOW 13, r4q, ncoeffq + 8, 6, 11, 12 142ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pcmpeqw m14, m5 ; m14 = c[i] == 0 143ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pcmpeqw m13, m5 ; m13 = c[i] == 0 144da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i] 145da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i] 146ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psubw m6, m7 ; m6 = scan[i] + 1 147ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psubw m11, m7 ; m11 = scan[i] + 1 148ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pandn m14, m6 ; m14 = max(eob) 149ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pandn m13, m11 ; m13 = max(eob) 150ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pmaxsw m8, m14 151ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pmaxsw m8, m13 152ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian add ncoeffq, mmsize 153ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian jl .ac_only_loop 154ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 155ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian jmp .accumulate_eob 156ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian.skip_iter: 1570a39d0a697ff3603e8c100300fda363658e10b23James Zern STORE_ZERO_TRAN_LOW 5, r3q, ncoeffq 1580a39d0a697ff3603e8c100300fda363658e10b23James Zern STORE_ZERO_TRAN_LOW 5, r3q, ncoeffq + 8 1590a39d0a697ff3603e8c100300fda363658e10b23James Zern STORE_ZERO_TRAN_LOW 5, r4q, ncoeffq 1600a39d0a697ff3603e8c100300fda363658e10b23James Zern STORE_ZERO_TRAN_LOW 5, r4q, ncoeffq + 8 161ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian add ncoeffq, mmsize 162ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian jl .ac_only_loop 163ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 164ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian.accumulate_eob: 165ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ; horizontally accumulate/max eobs and write into [eob] memory pointer 166ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mov r2, eobmp 167ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pshufd m7, m8, 0xe 168ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pmaxsw m8, m7 169ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pshuflw m7, m8, 0xe 170ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pmaxsw m8, m7 171ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pshuflw m7, m8, 0x1 172ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pmaxsw m8, m7 173ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pextrw r6, m8, 0 174da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mov [r2], r6 175ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian RET 176ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 177ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ; skip-block, i.e. just write all zeroes 178ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian.blank: 179ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mov r0, dqcoeffmp 180ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian movifnidn ncoeffq, ncoeffmp 181ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mov r2, qcoeffmp 182ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mov r3, eobmp 183da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 184da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian lea r0q, [r0q+ncoeffq*2] 185da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian lea r2q, [r2q+ncoeffq*2] 186ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian neg ncoeffq 187ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pxor m7, m7 188ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian.blank_loop: 1890a39d0a697ff3603e8c100300fda363658e10b23James Zern STORE_ZERO_TRAN_LOW 7, r0q, ncoeffq 1900a39d0a697ff3603e8c100300fda363658e10b23James Zern STORE_ZERO_TRAN_LOW 7, r0q, ncoeffq + 8 1910a39d0a697ff3603e8c100300fda363658e10b23James Zern STORE_ZERO_TRAN_LOW 7, r2q, ncoeffq 1920a39d0a697ff3603e8c100300fda363658e10b23James Zern STORE_ZERO_TRAN_LOW 7, r2q, ncoeffq + 8 193ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian add ncoeffq, mmsize 194ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian jl .blank_loop 195da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mov word [r3q], 0 196ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian RET 197ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%endmacro 198ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 199ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh VenkatasubramanianINIT_XMM ssse3 200ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh VenkatasubramanianQUANTIZE_FP fp, 7 201ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh VenkatasubramanianQUANTIZE_FP fp_32x32, 7 202