1ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian;
2ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian;
4ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian;  Use of this source code is governed by a BSD-style license
5ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian;  that can be found in the LICENSE file in the root of the source
6ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian;  tree. An additional intellectual property rights grant can be found
7ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian;  in the file PATENTS.  All contributing project authors may
8ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian;  be found in the AUTHORS file in the root of the source tree.
9ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian;
10ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
11da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian%define private_prefix vp9
12da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
13ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%include "third_party/x86inc/x86inc.asm"
140a39d0a697ff3603e8c100300fda363658e10b23James Zern%include "vpx_dsp/x86/bitdepth_conversion_sse2.asm"
15ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
16ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh VenkatasubramanianSECTION_RODATA
17ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianpw_1: times 8 dw 1
18ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
19ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh VenkatasubramanianSECTION .text
20ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
21ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%macro QUANTIZE_FP 2
220a39d0a697ff3603e8c100300fda363658e10b23James Zerncglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, round, quant, \
230a39d0a697ff3603e8c100300fda363658e10b23James Zern                                qcoeff, dqcoeff, dequant, \
24ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian                                eob, scan, iscan
25ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  cmp                    dword skipm, 0
26ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  jne .blank
27ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
28ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  ; actual quantize loop - setup pointers, rounders, etc.
29ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  movifnidn                   coeffq, coeffmp
30ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  movifnidn                  ncoeffq, ncoeffmp
31ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  mov                             r2, dequantmp
32ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  movifnidn                   roundq, roundmp
33ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  movifnidn                   quantq, quantmp
34ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  mova                            m1, [roundq]             ; m1 = round
35ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  mova                            m2, [quantq]             ; m2 = quant
36ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%ifidn %1, fp_32x32
37ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pcmpeqw                         m5, m5
38ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  psrlw                           m5, 15
39ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  paddw                           m1, m5
40ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  psrlw                           m1, 1                    ; m1 = (m1 + 1) / 2
41ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%endif
42ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  mova                            m3, [r2q]                ; m3 = dequant
43ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  mov                             r3, qcoeffmp
44ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  mov                             r4, dqcoeffmp
45ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  mov                             r5, iscanmp
46ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%ifidn %1, fp_32x32
47ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  psllw                           m2, 1
48ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%endif
49ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pxor                            m5, m5                   ; m5 = dedicated zero
50da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
510a39d0a697ff3603e8c100300fda363658e10b23James Zern  INCREMENT_ELEMENTS_TRAN_LOW coeffq, ncoeffq
520a39d0a697ff3603e8c100300fda363658e10b23James Zern  lea                            r5q, [r5q+ncoeffq*2]
530a39d0a697ff3603e8c100300fda363658e10b23James Zern  INCREMENT_ELEMENTS_TRAN_LOW    r3q, ncoeffq
540a39d0a697ff3603e8c100300fda363658e10b23James Zern  INCREMENT_ELEMENTS_TRAN_LOW    r4q, ncoeffq
55ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  neg                        ncoeffq
56ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
57ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  ; get DC and first 15 AC coeffs
580a39d0a697ff3603e8c100300fda363658e10b23James Zern  LOAD_TRAN_LOW  9, coeffq, ncoeffq                        ; m9 = c[i]
590a39d0a697ff3603e8c100300fda363658e10b23James Zern  LOAD_TRAN_LOW 10, coeffq, ncoeffq + 8                    ; m10 = c[i]
60ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pabsw                           m6, m9                   ; m6 = abs(m9)
61ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pabsw                          m11, m10                  ; m11 = abs(m10)
62ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pcmpeqw                         m7, m7
63ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
64ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  paddsw                          m6, m1                   ; m6 += round
65ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  punpckhqdq                      m1, m1
66ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  paddsw                         m11, m1                   ; m11 += round
67ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pmulhw                          m8, m6, m2               ; m8 = m6*q>>16
68ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  punpckhqdq                      m2, m2
69ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pmulhw                         m13, m11, m2              ; m13 = m11*q>>16
70ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  psignw                          m8, m9                   ; m8 = reinsert sign
71ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  psignw                         m13, m10                  ; m13 = reinsert sign
720a39d0a697ff3603e8c100300fda363658e10b23James Zern  STORE_TRAN_LOW  8, r3q, ncoeffq,     6, 11, 12
730a39d0a697ff3603e8c100300fda363658e10b23James Zern  STORE_TRAN_LOW 13, r3q, ncoeffq + 8, 6, 11, 12
74ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%ifidn %1, fp_32x32
75ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pabsw                           m8, m8
76ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pabsw                          m13, m13
77ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%endif
78da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  pmullw                          m8, m3                   ; r4[i] = r3[i] * q
79ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  punpckhqdq                      m3, m3
80da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  pmullw                         m13, m3                   ; r4[i] = r3[i] * q
81ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%ifidn %1, fp_32x32
82ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  psrlw                           m8, 1
83ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  psrlw                          m13, 1
84ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  psignw                          m8, m9
85ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  psignw                         m13, m10
86ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  psrlw                           m0, m3, 2
87da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian%else
88da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  psrlw                           m0, m3, 1
89ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%endif
900a39d0a697ff3603e8c100300fda363658e10b23James Zern  STORE_TRAN_LOW  8, r4q, ncoeffq,     6, 11, 12
910a39d0a697ff3603e8c100300fda363658e10b23James Zern  STORE_TRAN_LOW 13, r4q, ncoeffq + 8, 6, 11, 12
92ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pcmpeqw                         m8, m5                   ; m8 = c[i] == 0
93ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pcmpeqw                        m13, m5                   ; m13 = c[i] == 0
94da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  mova                            m6, [  r5q+ncoeffq*2+ 0] ; m6 = scan[i]
95da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  mova                           m11, [  r5q+ncoeffq*2+16] ; m11 = scan[i]
96ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  psubw                           m6, m7                   ; m6 = scan[i] + 1
97ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  psubw                          m11, m7                   ; m11 = scan[i] + 1
98ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pandn                           m8, m6                   ; m8 = max(eob)
99ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pandn                          m13, m11                  ; m13 = max(eob)
100ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pmaxsw                          m8, m13
101ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  add                        ncoeffq, mmsize
102ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  jz .accumulate_eob
103ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
104ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian.ac_only_loop:
1050a39d0a697ff3603e8c100300fda363658e10b23James Zern  LOAD_TRAN_LOW  9, coeffq, ncoeffq                        ; m9 = c[i]
1060a39d0a697ff3603e8c100300fda363658e10b23James Zern  LOAD_TRAN_LOW 10, coeffq, ncoeffq + 8                    ; m10 = c[i]
107ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pabsw                           m6, m9                   ; m6 = abs(m9)
108ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pabsw                          m11, m10                  ; m11 = abs(m10)
109da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
110ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pcmpgtw                         m7, m6,  m0
111ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pcmpgtw                        m12, m11, m0
112da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  pmovmskb                       r6d, m7
113da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  pmovmskb                       r2d, m12
114ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
115ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  or                              r6, r2
116ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  jz .skip_iter
117da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
118ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pcmpeqw                         m7, m7
119ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
120ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  paddsw                          m6, m1                   ; m6 += round
121ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  paddsw                         m11, m1                   ; m11 += round
122ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pmulhw                         m14, m6, m2               ; m14 = m6*q>>16
123ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pmulhw                         m13, m11, m2              ; m13 = m11*q>>16
124ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  psignw                         m14, m9                   ; m14 = reinsert sign
125ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  psignw                         m13, m10                  ; m13 = reinsert sign
1260a39d0a697ff3603e8c100300fda363658e10b23James Zern  STORE_TRAN_LOW 14, r3q, ncoeffq,     6, 11, 12
1270a39d0a697ff3603e8c100300fda363658e10b23James Zern  STORE_TRAN_LOW 13, r3q, ncoeffq + 8, 6, 11, 12
128ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%ifidn %1, fp_32x32
129ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pabsw                          m14, m14
130ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pabsw                          m13, m13
131ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%endif
132da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  pmullw                         m14, m3                   ; r4[i] = r3[i] * q
133da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  pmullw                         m13, m3                   ; r4[i] = r3[i] * q
134ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%ifidn %1, fp_32x32
135ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  psrlw                          m14, 1
136ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  psrlw                          m13, 1
137ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  psignw                         m14, m9
138ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  psignw                         m13, m10
139ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%endif
1400a39d0a697ff3603e8c100300fda363658e10b23James Zern  STORE_TRAN_LOW 14, r4q, ncoeffq,     6, 11, 12
1410a39d0a697ff3603e8c100300fda363658e10b23James Zern  STORE_TRAN_LOW 13, r4q, ncoeffq + 8, 6, 11, 12
142ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pcmpeqw                        m14, m5                   ; m14 = c[i] == 0
143ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pcmpeqw                        m13, m5                   ; m13 = c[i] == 0
144da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  mova                            m6, [  r5q+ncoeffq*2+ 0] ; m6 = scan[i]
145da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  mova                           m11, [  r5q+ncoeffq*2+16] ; m11 = scan[i]
146ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  psubw                           m6, m7                   ; m6 = scan[i] + 1
147ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  psubw                          m11, m7                   ; m11 = scan[i] + 1
148ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pandn                          m14, m6                   ; m14 = max(eob)
149ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pandn                          m13, m11                  ; m13 = max(eob)
150ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pmaxsw                          m8, m14
151ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pmaxsw                          m8, m13
152ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  add                        ncoeffq, mmsize
153ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  jl .ac_only_loop
154ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
155ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  jmp .accumulate_eob
156ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian.skip_iter:
1570a39d0a697ff3603e8c100300fda363658e10b23James Zern  STORE_ZERO_TRAN_LOW 5, r3q, ncoeffq
1580a39d0a697ff3603e8c100300fda363658e10b23James Zern  STORE_ZERO_TRAN_LOW 5, r3q, ncoeffq + 8
1590a39d0a697ff3603e8c100300fda363658e10b23James Zern  STORE_ZERO_TRAN_LOW 5, r4q, ncoeffq
1600a39d0a697ff3603e8c100300fda363658e10b23James Zern  STORE_ZERO_TRAN_LOW 5, r4q, ncoeffq + 8
161ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  add                        ncoeffq, mmsize
162ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  jl .ac_only_loop
163ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
164ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian.accumulate_eob:
165ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  ; horizontally accumulate/max eobs and write into [eob] memory pointer
166ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  mov                             r2, eobmp
167ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pshufd                          m7, m8, 0xe
168ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pmaxsw                          m8, m7
169ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pshuflw                         m7, m8, 0xe
170ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pmaxsw                          m8, m7
171ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pshuflw                         m7, m8, 0x1
172ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pmaxsw                          m8, m7
173ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pextrw                          r6, m8, 0
174da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  mov                           [r2], r6
175ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  RET
176ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
177ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  ; skip-block, i.e. just write all zeroes
178ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian.blank:
179ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  mov                             r0, dqcoeffmp
180ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  movifnidn                  ncoeffq, ncoeffmp
181ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  mov                             r2, qcoeffmp
182ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  mov                             r3, eobmp
183da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
184da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  lea                            r0q, [r0q+ncoeffq*2]
185da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  lea                            r2q, [r2q+ncoeffq*2]
186ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  neg                        ncoeffq
187ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  pxor                            m7, m7
188ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian.blank_loop:
1890a39d0a697ff3603e8c100300fda363658e10b23James Zern  STORE_ZERO_TRAN_LOW 7, r0q, ncoeffq
1900a39d0a697ff3603e8c100300fda363658e10b23James Zern  STORE_ZERO_TRAN_LOW 7, r0q, ncoeffq + 8
1910a39d0a697ff3603e8c100300fda363658e10b23James Zern  STORE_ZERO_TRAN_LOW 7, r2q, ncoeffq
1920a39d0a697ff3603e8c100300fda363658e10b23James Zern  STORE_ZERO_TRAN_LOW 7, r2q, ncoeffq + 8
193ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  add                        ncoeffq, mmsize
194ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  jl .blank_loop
195da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  mov                     word [r3q], 0
196ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  RET
197ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian%endmacro
198ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
199ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh VenkatasubramanianINIT_XMM ssse3
200ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh VenkatasubramanianQUANTIZE_FP fp, 7
201ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh VenkatasubramanianQUANTIZE_FP fp_32x32, 7
202