1233d2500723e5594f3e7c70896ffeeef32b9c950ywan;
2233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
3233d2500723e5594f3e7c70896ffeeef32b9c950ywan;
4233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  Use of this source code is governed by a BSD-style license
5233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  that can be found in the LICENSE file in the root of the source
6233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  tree. An additional intellectual property rights grant can be found
7233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  in the file PATENTS.  All contributing project authors may
8233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  be found in the AUTHORS file in the root of the source tree.
9233d2500723e5594f3e7c70896ffeeef32b9c950ywan;
10233d2500723e5594f3e7c70896ffeeef32b9c950ywan
11233d2500723e5594f3e7c70896ffeeef32b9c950ywan
12233d2500723e5594f3e7c70896ffeeef32b9c950ywan    EXPORT  |vp8_fast_quantize_b_neon|
13233d2500723e5594f3e7c70896ffeeef32b9c950ywan    EXPORT  |vp8_fast_quantize_b_pair_neon|
14233d2500723e5594f3e7c70896ffeeef32b9c950ywan
15233d2500723e5594f3e7c70896ffeeef32b9c950ywan    INCLUDE vp8_asm_enc_offsets.asm
16233d2500723e5594f3e7c70896ffeeef32b9c950ywan
17233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ARM
18233d2500723e5594f3e7c70896ffeeef32b9c950ywan    REQUIRE8
19233d2500723e5594f3e7c70896ffeeef32b9c950ywan    PRESERVE8
20233d2500723e5594f3e7c70896ffeeef32b9c950ywan
21233d2500723e5594f3e7c70896ffeeef32b9c950ywan    AREA ||.text||, CODE, READONLY, ALIGN=4
22233d2500723e5594f3e7c70896ffeeef32b9c950ywan
23233d2500723e5594f3e7c70896ffeeef32b9c950ywan;vp8_fast_quantize_b_pair_neon(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2);
24233d2500723e5594f3e7c70896ffeeef32b9c950ywan|vp8_fast_quantize_b_pair_neon| PROC
25233d2500723e5594f3e7c70896ffeeef32b9c950ywan
26233d2500723e5594f3e7c70896ffeeef32b9c950ywan    stmfd           sp!, {r4-r9}
27233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vstmdb          sp!, {q4-q7}
28233d2500723e5594f3e7c70896ffeeef32b9c950ywan
29233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r4, [r0, #vp8_block_coeff]
30233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r5, [r0, #vp8_block_quant_fast]
31233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r6, [r0, #vp8_block_round]
32233d2500723e5594f3e7c70896ffeeef32b9c950ywan
33233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.16         {q0, q1}, [r4@128]  ; load z
34233d2500723e5594f3e7c70896ffeeef32b9c950ywan
35233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r7, [r2, #vp8_blockd_qcoeff]
36233d2500723e5594f3e7c70896ffeeef32b9c950ywan
37233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vabs.s16        q4, q0              ; calculate x = abs(z)
38233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vabs.s16        q5, q1
39233d2500723e5594f3e7c70896ffeeef32b9c950ywan
40233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative
41233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vshr.s16        q2, q0, #15         ; sz
42233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vshr.s16        q3, q1, #15
43233d2500723e5594f3e7c70896ffeeef32b9c950ywan
44233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.s16        {q6, q7}, [r6@128]  ; load round_ptr [0-15]
45233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.s16        {q8, q9}, [r5@128]  ; load quant_ptr [0-15]
46233d2500723e5594f3e7c70896ffeeef32b9c950ywan
47233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r4, [r1, #vp8_block_coeff]
48233d2500723e5594f3e7c70896ffeeef32b9c950ywan
49233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vadd.s16        q4, q6              ; x + Round
50233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vadd.s16        q5, q7
51233d2500723e5594f3e7c70896ffeeef32b9c950ywan
52233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.16         {q0, q1}, [r4@128]  ; load z2
53233d2500723e5594f3e7c70896ffeeef32b9c950ywan
54233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqdmulh.s16     q4, q8              ; y = ((Round+abs(z)) * Quant) >> 16
55233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqdmulh.s16     q5, q9
56233d2500723e5594f3e7c70896ffeeef32b9c950ywan
57233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vabs.s16        q10, q0             ; calculate x2 = abs(z_2)
58233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vabs.s16        q11, q1
59233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vshr.s16        q12, q0, #15        ; sz2
60233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vshr.s16        q13, q1, #15
61233d2500723e5594f3e7c70896ffeeef32b9c950ywan
62233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ;modify data to have its original sign
63233d2500723e5594f3e7c70896ffeeef32b9c950ywan    veor.s16        q4, q2              ; y^sz
64233d2500723e5594f3e7c70896ffeeef32b9c950ywan    veor.s16        q5, q3
65233d2500723e5594f3e7c70896ffeeef32b9c950ywan
66233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vadd.s16        q10, q6             ; x2 + Round
67233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vadd.s16        q11, q7
68233d2500723e5594f3e7c70896ffeeef32b9c950ywan
69233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r8, [r2, #vp8_blockd_dequant]
70233d2500723e5594f3e7c70896ffeeef32b9c950ywan
71233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqdmulh.s16     q10, q8             ; y2 = ((Round+abs(z)) * Quant) >> 16
72233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqdmulh.s16     q11, q9
73233d2500723e5594f3e7c70896ffeeef32b9c950ywan
74233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vshr.s16        q4, #1              ; right shift 1 after vqdmulh
75233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vshr.s16        q5, #1
76233d2500723e5594f3e7c70896ffeeef32b9c950ywan
77233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.s16        {q6, q7}, [r8@128]  ;load dequant_ptr[i]
78233d2500723e5594f3e7c70896ffeeef32b9c950ywan
79233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vsub.s16        q4, q2              ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement)
80233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vsub.s16        q5, q3
81233d2500723e5594f3e7c70896ffeeef32b9c950ywan
82233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vshr.s16        q10, #1             ; right shift 1 after vqdmulh
83233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vshr.s16        q11, #1
84233d2500723e5594f3e7c70896ffeeef32b9c950ywan
85233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r9, [r2, #vp8_blockd_dqcoeff]
86233d2500723e5594f3e7c70896ffeeef32b9c950ywan
87233d2500723e5594f3e7c70896ffeeef32b9c950ywan    veor.s16        q10, q12            ; y2^sz2
88233d2500723e5594f3e7c70896ffeeef32b9c950ywan    veor.s16        q11, q13
89233d2500723e5594f3e7c70896ffeeef32b9c950ywan
90233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.s16        {q4, q5}, [r7]      ; store: qcoeff = x1
91233d2500723e5594f3e7c70896ffeeef32b9c950ywan
92233d2500723e5594f3e7c70896ffeeef32b9c950ywan
93233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vsub.s16        q10, q12            ; x2=(y^sz)-sz = (y^sz)-(-1) (2's complement)
94233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vsub.s16        q11, q13
95233d2500723e5594f3e7c70896ffeeef32b9c950ywan
96233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r6, [r3, #vp8_blockd_qcoeff]
97233d2500723e5594f3e7c70896ffeeef32b9c950ywan
98233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmul.s16        q2, q6, q4          ; x * Dequant
99233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmul.s16        q3, q7, q5
100233d2500723e5594f3e7c70896ffeeef32b9c950ywan
101233d2500723e5594f3e7c70896ffeeef32b9c950ywan    adr             r0, inv_zig_zag     ; load ptr of inverse zigzag table
102233d2500723e5594f3e7c70896ffeeef32b9c950ywan
103233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vceq.s16        q8, q8              ; set q8 to all 1
104233d2500723e5594f3e7c70896ffeeef32b9c950ywan
105233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.s16        {q10, q11}, [r6]    ; store: qcoeff = x2
106233d2500723e5594f3e7c70896ffeeef32b9c950ywan
107233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmul.s16        q12, q6, q10        ; x2 * Dequant
108233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmul.s16        q13, q7, q11
109233d2500723e5594f3e7c70896ffeeef32b9c950ywan
110233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.16         {q6, q7}, [r0@128]  ; load inverse scan order
111233d2500723e5594f3e7c70896ffeeef32b9c950ywan
112233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vtst.16         q14, q4, q8         ; now find eob
113233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vtst.16         q15, q5, q8         ; non-zero element is set to all 1
114233d2500723e5594f3e7c70896ffeeef32b9c950ywan
115233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.s16        {q2, q3}, [r9]      ; store dqcoeff = x * Dequant
116233d2500723e5594f3e7c70896ffeeef32b9c950ywan
117233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r7, [r3, #vp8_blockd_dqcoeff]
118233d2500723e5594f3e7c70896ffeeef32b9c950ywan
119233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vand            q0, q6, q14         ; get all valid numbers from scan array
120233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vand            q1, q7, q15
121233d2500723e5594f3e7c70896ffeeef32b9c950ywan
122233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.s16        {q12, q13}, [r7]    ; store dqcoeff = x * Dequant
123233d2500723e5594f3e7c70896ffeeef32b9c950ywan
124233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vtst.16         q2, q10, q8         ; now find eob
125233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vtst.16         q3, q11, q8         ; non-zero element is set to all 1
126233d2500723e5594f3e7c70896ffeeef32b9c950ywan
127233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmax.u16        q0, q0, q1          ; find maximum value in q0, q1
128233d2500723e5594f3e7c70896ffeeef32b9c950ywan
129233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vand            q10, q6, q2         ; get all valid numbers from scan array
130233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vand            q11, q7, q3
131233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmax.u16        q10, q10, q11       ; find maximum value in q10, q11
132233d2500723e5594f3e7c70896ffeeef32b9c950ywan
133233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmax.u16        d0, d0, d1
134233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmax.u16        d20, d20, d21
135233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmovl.u16       q0, d0
136233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmovl.u16       q10, d20
137233d2500723e5594f3e7c70896ffeeef32b9c950ywan
138233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmax.u32        d0, d0, d1
139233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmax.u32        d20, d20, d21
140233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vpmax.u32       d0, d0, d0
141233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vpmax.u32       d20, d20, d20
142233d2500723e5594f3e7c70896ffeeef32b9c950ywan
143233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r4, [r2, #vp8_blockd_eob]
144233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r5, [r3, #vp8_blockd_eob]
145233d2500723e5594f3e7c70896ffeeef32b9c950ywan
146233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.8          {d0[0]}, [r4]       ; store eob
147233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.8          {d20[0]}, [r5]      ; store eob
148233d2500723e5594f3e7c70896ffeeef32b9c950ywan
149233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vldmia          sp!, {q4-q7}
150233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldmfd           sp!, {r4-r9}
151233d2500723e5594f3e7c70896ffeeef32b9c950ywan    bx              lr
152233d2500723e5594f3e7c70896ffeeef32b9c950ywan
153233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ENDP
154233d2500723e5594f3e7c70896ffeeef32b9c950ywan
155233d2500723e5594f3e7c70896ffeeef32b9c950ywan;void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
156233d2500723e5594f3e7c70896ffeeef32b9c950ywan|vp8_fast_quantize_b_neon| PROC
157233d2500723e5594f3e7c70896ffeeef32b9c950ywan
158233d2500723e5594f3e7c70896ffeeef32b9c950ywan    stmfd           sp!, {r4-r7}
159233d2500723e5594f3e7c70896ffeeef32b9c950ywan
160233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r3, [r0, #vp8_block_coeff]
161233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r4, [r0, #vp8_block_quant_fast]
162233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r5, [r0, #vp8_block_round]
163233d2500723e5594f3e7c70896ffeeef32b9c950ywan
164233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.16         {q0, q1}, [r3@128]  ; load z
165233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vorr.s16        q14, q0, q1         ; check if all zero (step 1)
166233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r6, [r1, #vp8_blockd_qcoeff]
167233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r7, [r1, #vp8_blockd_dqcoeff]
168233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vorr.s16        d28, d28, d29       ; check if all zero (step 2)
169233d2500723e5594f3e7c70896ffeeef32b9c950ywan
170233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vabs.s16        q12, q0             ; calculate x = abs(z)
171233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vabs.s16        q13, q1
172233d2500723e5594f3e7c70896ffeeef32b9c950ywan
173233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative
174233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vshr.s16        q2, q0, #15         ; sz
175233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmov            r2, r3, d28         ; check if all zero (step 3)
176233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vshr.s16        q3, q1, #15
177233d2500723e5594f3e7c70896ffeeef32b9c950ywan
178233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.s16        {q14, q15}, [r5@128]; load round_ptr [0-15]
179233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.s16        {q8, q9}, [r4@128]  ; load quant_ptr [0-15]
180233d2500723e5594f3e7c70896ffeeef32b9c950ywan
181233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vadd.s16        q12, q14            ; x + Round
182233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vadd.s16        q13, q15
183233d2500723e5594f3e7c70896ffeeef32b9c950ywan
184233d2500723e5594f3e7c70896ffeeef32b9c950ywan    adr             r0, inv_zig_zag     ; load ptr of inverse zigzag table
185233d2500723e5594f3e7c70896ffeeef32b9c950ywan
186233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqdmulh.s16     q12, q8             ; y = ((Round+abs(z)) * Quant) >> 16
187233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqdmulh.s16     q13, q9
188233d2500723e5594f3e7c70896ffeeef32b9c950ywan
189233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.16         {q10, q11}, [r0@128]; load inverse scan order
190233d2500723e5594f3e7c70896ffeeef32b9c950ywan
191233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vceq.s16        q8, q8              ; set q8 to all 1
192233d2500723e5594f3e7c70896ffeeef32b9c950ywan
193233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r4, [r1, #vp8_blockd_dequant]
194233d2500723e5594f3e7c70896ffeeef32b9c950ywan
195233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vshr.s16        q12, #1             ; right shift 1 after vqdmulh
196233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vshr.s16        q13, #1
197233d2500723e5594f3e7c70896ffeeef32b9c950ywan
198233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r5, [r1, #vp8_blockd_eob]
199233d2500723e5594f3e7c70896ffeeef32b9c950ywan
200233d2500723e5594f3e7c70896ffeeef32b9c950ywan    orr             r2, r2, r3          ; check if all zero (step 4)
201233d2500723e5594f3e7c70896ffeeef32b9c950ywan    cmp             r2, #0              ; check if all zero (step 5)
202233d2500723e5594f3e7c70896ffeeef32b9c950ywan    beq             zero_output         ; check if all zero (step 6)
203233d2500723e5594f3e7c70896ffeeef32b9c950ywan
204233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ;modify data to have its original sign
205233d2500723e5594f3e7c70896ffeeef32b9c950ywan    veor.s16        q12, q2             ; y^sz
206233d2500723e5594f3e7c70896ffeeef32b9c950ywan    veor.s16        q13, q3
207233d2500723e5594f3e7c70896ffeeef32b9c950ywan
208233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vsub.s16        q12, q2             ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement)
209233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vsub.s16        q13, q3
210233d2500723e5594f3e7c70896ffeeef32b9c950ywan
211233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.s16        {q2, q3}, [r4@128]  ; load dequant_ptr[i]
212233d2500723e5594f3e7c70896ffeeef32b9c950ywan
213233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vtst.16         q14, q12, q8        ; now find eob
214233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vtst.16         q15, q13, q8        ; non-zero element is set to all 1
215233d2500723e5594f3e7c70896ffeeef32b9c950ywan
216233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.s16        {q12, q13}, [r6@128]; store: qcoeff = x1
217233d2500723e5594f3e7c70896ffeeef32b9c950ywan
218233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vand            q10, q10, q14       ; get all valid numbers from scan array
219233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vand            q11, q11, q15
220233d2500723e5594f3e7c70896ffeeef32b9c950ywan
221233d2500723e5594f3e7c70896ffeeef32b9c950ywan
222233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmax.u16        q0, q10, q11        ; find maximum value in q0, q1
223233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmax.u16        d0, d0, d1
224233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmovl.u16       q0, d0
225233d2500723e5594f3e7c70896ffeeef32b9c950ywan
226233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmul.s16        q2, q12             ; x * Dequant
227233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmul.s16        q3, q13
228233d2500723e5594f3e7c70896ffeeef32b9c950ywan
229233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmax.u32        d0, d0, d1
230233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vpmax.u32       d0, d0, d0
231233d2500723e5594f3e7c70896ffeeef32b9c950ywan
232233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.s16        {q2, q3}, [r7@128]  ; store dqcoeff = x * Dequant
233233d2500723e5594f3e7c70896ffeeef32b9c950ywan
234233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.8          {d0[0]}, [r5]       ; store eob
235233d2500723e5594f3e7c70896ffeeef32b9c950ywan
236233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldmfd           sp!, {r4-r7}
237233d2500723e5594f3e7c70896ffeeef32b9c950ywan    bx              lr
238233d2500723e5594f3e7c70896ffeeef32b9c950ywan
239233d2500723e5594f3e7c70896ffeeef32b9c950ywanzero_output
240233d2500723e5594f3e7c70896ffeeef32b9c950ywan    strb            r2, [r5]            ; store eob
241233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.s16        {q0, q1}, [r6@128]  ; qcoeff = 0
242233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.s16        {q0, q1}, [r7@128]  ; dqcoeff = 0
243233d2500723e5594f3e7c70896ffeeef32b9c950ywan
244233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldmfd           sp!, {r4-r7}
245233d2500723e5594f3e7c70896ffeeef32b9c950ywan    bx              lr
246233d2500723e5594f3e7c70896ffeeef32b9c950ywan
247233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ENDP
248233d2500723e5594f3e7c70896ffeeef32b9c950ywan
249233d2500723e5594f3e7c70896ffeeef32b9c950ywan; default inverse zigzag table is defined in vp8/common/entropy.c
250233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ALIGN 16    ; enable use of @128 bit aligned loads
251233d2500723e5594f3e7c70896ffeeef32b9c950ywaninv_zig_zag
252233d2500723e5594f3e7c70896ffeeef32b9c950ywan    DCW 0x0001, 0x0002, 0x0006, 0x0007
253233d2500723e5594f3e7c70896ffeeef32b9c950ywan    DCW 0x0003, 0x0005, 0x0008, 0x000d
254233d2500723e5594f3e7c70896ffeeef32b9c950ywan    DCW 0x0004, 0x0009, 0x000c, 0x000e
255233d2500723e5594f3e7c70896ffeeef32b9c950ywan    DCW 0x000a, 0x000b, 0x000f, 0x0010
256233d2500723e5594f3e7c70896ffeeef32b9c950ywan
257233d2500723e5594f3e7c70896ffeeef32b9c950ywan    END
258233d2500723e5594f3e7c70896ffeeef32b9c950ywan
259