1474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;
2474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
3474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;
4474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  Use of this source code is governed by a BSD-style license
5474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  that can be found in the LICENSE file in the root of the source
6474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  tree. An additional intellectual property rights grant can be found
7474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  in the file PATENTS.  All contributing project authors may
8474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  be found in the AUTHORS file in the root of the source tree.
9474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;
10474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
11474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
12474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    EXPORT  |vp8_fast_quantize_b_neon|
13474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    EXPORT  |vp8_fast_quantize_b_pair_neon|
14474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
156fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org    INCLUDE vp8_asm_enc_offsets.asm
16474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
17474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ARM
18474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    REQUIRE8
19474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    PRESERVE8
20474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
21474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    AREA ||.text||, CODE, READONLY, ALIGN=4
22474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
23474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;vp8_fast_quantize_b_pair_neon(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2);
24474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|vp8_fast_quantize_b_pair_neon| PROC
25474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
26474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stmfd           sp!, {r4-r9}
27474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vstmdb          sp!, {q4-q7}
28474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
29474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr             r4, [r0, #vp8_block_coeff]
30474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr             r5, [r0, #vp8_block_quant_fast]
31474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr             r6, [r0, #vp8_block_round]
32474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
33474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vld1.16         {q0, q1}, [r4@128]  ; load z
34474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
35474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr             r7, [r2, #vp8_blockd_qcoeff]
36474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
37474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vabs.s16        q4, q0              ; calculate x = abs(z)
38474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vabs.s16        q5, q1
39474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
40474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative
41474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vshr.s16        q2, q0, #15         ; sz
42474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vshr.s16        q3, q1, #15
43474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
44474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vld1.s16        {q6, q7}, [r6@128]  ; load round_ptr [0-15]
45474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vld1.s16        {q8, q9}, [r5@128]  ; load quant_ptr [0-15]
46474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
47474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr             r4, [r1, #vp8_block_coeff]
48474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
49474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vadd.s16        q4, q6              ; x + Round
50474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vadd.s16        q5, q7
51474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
52474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vld1.16         {q0, q1}, [r4@128]  ; load z2
53474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
54474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vqdmulh.s16     q4, q8              ; y = ((Round+abs(z)) * Quant) >> 16
55474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vqdmulh.s16     q5, q9
56474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
57474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vabs.s16        q10, q0             ; calculate x2 = abs(z_2)
58474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vabs.s16        q11, q1
59474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vshr.s16        q12, q0, #15        ; sz2
60474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vshr.s16        q13, q1, #15
61474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
62474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;modify data to have its original sign
63474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    veor.s16        q4, q2              ; y^sz
64474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    veor.s16        q5, q3
65474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
66474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vadd.s16        q10, q6             ; x2 + Round
67474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vadd.s16        q11, q7
68474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
69474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr             r8, [r2, #vp8_blockd_dequant]
70474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
71474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vqdmulh.s16     q10, q8             ; y2 = ((Round+abs(z)) * Quant) >> 16
72474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vqdmulh.s16     q11, q9
73474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
74474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vshr.s16        q4, #1              ; right shift 1 after vqdmulh
75474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vshr.s16        q5, #1
76474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
77474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vld1.s16        {q6, q7}, [r8@128]  ;load dequant_ptr[i]
78474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
79474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsub.s16        q4, q2              ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement)
80474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsub.s16        q5, q3
81474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
82474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vshr.s16        q10, #1             ; right shift 1 after vqdmulh
83474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vshr.s16        q11, #1
84474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
85474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr             r9, [r2, #vp8_blockd_dqcoeff]
86474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
87474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    veor.s16        q10, q12            ; y2^sz2
88474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    veor.s16        q11, q13
89474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
90474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vst1.s16        {q4, q5}, [r7]      ; store: qcoeff = x1
91474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
92474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
93474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsub.s16        q10, q12            ; x2=(y^sz)-sz = (y^sz)-(-1) (2's complement)
94474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsub.s16        q11, q13
95474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
96474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr             r6, [r3, #vp8_blockd_qcoeff]
97474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
98474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmul.s16        q2, q6, q4          ; x * Dequant
99474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmul.s16        q3, q7, q5
100474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1015c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org    adr             r0, inv_zig_zag     ; load ptr of inverse zigzag table
102474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
103474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vceq.s16        q8, q8              ; set q8 to all 1
104474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
105474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vst1.s16        {q10, q11}, [r6]    ; store: qcoeff = x2
106474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
107474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmul.s16        q12, q6, q10        ; x2 * Dequant
108474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmul.s16        q13, q7, q11
109474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
110474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vld1.16         {q6, q7}, [r0@128]  ; load inverse scan order
111474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
112474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vtst.16         q14, q4, q8         ; now find eob
113474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vtst.16         q15, q5, q8         ; non-zero element is set to all 1
114474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
115474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vst1.s16        {q2, q3}, [r9]      ; store dqcoeff = x * Dequant
116474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
117474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr             r7, [r3, #vp8_blockd_dqcoeff]
118474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
119474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vand            q0, q6, q14         ; get all valid numbers from scan array
120474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vand            q1, q7, q15
121474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
122474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vst1.s16        {q12, q13}, [r7]    ; store dqcoeff = x * Dequant
123474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
124474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vtst.16         q2, q10, q8         ; now find eob
125474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vtst.16         q3, q11, q8         ; non-zero element is set to all 1
126474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
127474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmax.u16        q0, q0, q1          ; find maximum value in q0, q1
128474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
129474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vand            q10, q6, q2         ; get all valid numbers from scan array
130474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vand            q11, q7, q3
131474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmax.u16        q10, q10, q11       ; find maximum value in q10, q11
132474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
133474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmax.u16        d0, d0, d1
134474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmax.u16        d20, d20, d21
135474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmovl.u16       q0, d0
136474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmovl.u16       q10, d20
137474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
138474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmax.u32        d0, d0, d1
139474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmax.u32        d20, d20, d21
140474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vpmax.u32       d0, d0, d0
141474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vpmax.u32       d20, d20, d20
142474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
143167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org    ldr             r4, [r2, #vp8_blockd_eob]
144167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org    ldr             r5, [r3, #vp8_blockd_eob]
145474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
146167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org    vst1.8          {d0[0]}, [r4]       ; store eob
147167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org    vst1.8          {d20[0]}, [r5]      ; store eob
148474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
149474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vldmia          sp!, {q4-q7}
150474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldmfd           sp!, {r4-r9}
151474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    bx              lr
152474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
153474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ENDP
154474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
155474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
156474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|vp8_fast_quantize_b_neon| PROC
157474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
158474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stmfd           sp!, {r4-r7}
159474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
160474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr             r3, [r0, #vp8_block_coeff]
161474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr             r4, [r0, #vp8_block_quant_fast]
162474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr             r5, [r0, #vp8_block_round]
163474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
164474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vld1.16         {q0, q1}, [r3@128]  ; load z
165474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vorr.s16        q14, q0, q1         ; check if all zero (step 1)
166474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr             r6, [r1, #vp8_blockd_qcoeff]
167474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr             r7, [r1, #vp8_blockd_dqcoeff]
168474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vorr.s16        d28, d28, d29       ; check if all zero (step 2)
169474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
170474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vabs.s16        q12, q0             ; calculate x = abs(z)
171474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vabs.s16        q13, q1
172474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
173474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative
174474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vshr.s16        q2, q0, #15         ; sz
175474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmov            r2, r3, d28         ; check if all zero (step 3)
176474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vshr.s16        q3, q1, #15
177474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
178474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vld1.s16        {q14, q15}, [r5@128]; load round_ptr [0-15]
179474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vld1.s16        {q8, q9}, [r4@128]  ; load quant_ptr [0-15]
180474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
181474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vadd.s16        q12, q14            ; x + Round
182474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vadd.s16        q13, q15
183474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1845c1d3b27608a3f3f6028c069b9bf066a4de474b6hclam@chromium.org    adr             r0, inv_zig_zag     ; load ptr of inverse zigzag table
185474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
186474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vqdmulh.s16     q12, q8             ; y = ((Round+abs(z)) * Quant) >> 16
187474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vqdmulh.s16     q13, q9
188474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
189474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vld1.16         {q10, q11}, [r0@128]; load inverse scan order
190474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
191474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vceq.s16        q8, q8              ; set q8 to all 1
192474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
193474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr             r4, [r1, #vp8_blockd_dequant]
194474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
195474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vshr.s16        q12, #1             ; right shift 1 after vqdmulh
196474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vshr.s16        q13, #1
197474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
198167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org    ldr             r5, [r1, #vp8_blockd_eob]
199167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org
200474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    orr             r2, r2, r3          ; check if all zero (step 4)
201474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    cmp             r2, #0              ; check if all zero (step 5)
202474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq             zero_output         ; check if all zero (step 6)
203474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
204474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;modify data to have its original sign
205474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    veor.s16        q12, q2             ; y^sz
206474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    veor.s16        q13, q3
207474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
208474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsub.s16        q12, q2             ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement)
209474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsub.s16        q13, q3
210474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
211474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vld1.s16        {q2, q3}, [r4@128]  ; load dequant_ptr[i]
212474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
213474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vtst.16         q14, q12, q8        ; now find eob
214474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vtst.16         q15, q13, q8        ; non-zero element is set to all 1
215474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
216474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vst1.s16        {q12, q13}, [r6@128]; store: qcoeff = x1
217474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
218474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vand            q10, q10, q14       ; get all valid numbers from scan array
219474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vand            q11, q11, q15
220474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
221474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
222474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmax.u16        q0, q10, q11        ; find maximum value in q0, q1
223474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmax.u16        d0, d0, d1
224474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmovl.u16       q0, d0
225474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
226474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmul.s16        q2, q12             ; x * Dequant
227474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmul.s16        q3, q13
228474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
229474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmax.u32        d0, d0, d1
230474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vpmax.u32       d0, d0, d0
231474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
232474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vst1.s16        {q2, q3}, [r7@128]  ; store dqcoeff = x * Dequant
233474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
234167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org    vst1.8          {d0[0]}, [r5]       ; store eob
235474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
236474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldmfd           sp!, {r4-r7}
237474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    bx              lr
238474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
239474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgzero_output
240167514562bbce1eb0566271d6cb41d90d2b5ffa0hclam@chromium.org    strb            r2, [r5]            ; store eob
241474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vst1.s16        {q0, q1}, [r6@128]  ; qcoeff = 0
242474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vst1.s16        {q0, q1}, [r7@128]  ; dqcoeff = 0
243474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
244474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldmfd           sp!, {r4-r7}
245474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    bx              lr
246474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
247474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ENDP
248474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
249474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; default inverse zigzag table is defined in vp8/common/entropy.c
250474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ALIGN 16    ; enable use of @128 bit aligned loads
251474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orginv_zig_zag
252474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    DCW 0x0001, 0x0002, 0x0006, 0x0007
253474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    DCW 0x0003, 0x0005, 0x0008, 0x000d
254474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    DCW 0x0004, 0x0009, 0x000c, 0x000e
255474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    DCW 0x000a, 0x000b, 0x000f, 0x0010
256474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
257474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    END
258474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
259