1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12    EXPORT  |vp8_fast_quantize_b_neon_func|
13
14    ARM
15    REQUIRE8
16    PRESERVE8
17
18    AREA ||.text||, CODE, READONLY, ALIGN=2
19
20; r0        short *coeff_ptr
21; r1        short *zbin_ptr
22; r2        short *qcoeff_ptr
23; r3        short *dqcoeff_ptr
24; stack     short *dequant_ptr
25; stack     short *scan_mask
26; stack     short *round_ptr
27; stack     short *quant_ptr
28
29; return    int * eob
30|vp8_fast_quantize_b_neon_func| PROC
31    vld1.16         {q0, q1}, [r0]              ;load z
32    vld1.16         {q10, q11}, [r1]            ;load zbin
33
34    vabs.s16        q4, q0                      ;calculate x = abs(z)
35    vabs.s16        q5, q1
36
37    vcge.s16        q10, q4, q10                ;x>=zbin
38    vcge.s16        q11, q5, q11
39
40    ;if x<zbin (q10 & q11 are all 0), go to zero_output
41    vorr.s16        q6, q10, q11
42    vorr.s16        d12, d12, d13
43    vmov            r0, r1, d12
44    orr             r0, r0, r1
45    cmp             r0, #0
46    beq             zero_output
47
48    ldr             r0, [sp, #8]                ;load round_ptr
49    ldr             r12, [sp, #12]              ;load quant_ptr
50
51    ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative
52    vshr.s16        q2, q0, #15                 ; sz
53    vshr.s16        q3, q1, #15
54
55    vld1.s16        {q6, q7}, [r0]              ;load round_ptr [0-15]
56    vld1.s16        {q8, q9}, [r12]             ;load quant_ptr [0-15]
57
58    vadd.s16        q4, q6                      ;x + Round
59    vadd.s16        q5, q7
60
61    ldr             r0, [sp, #4]                ;load rvsplus1_scan_order ptr
62
63    vqdmulh.s16     q4, q8                      ;y = ((Round + abs(z)) * Quant) >> 16
64    vqdmulh.s16     q5, q9
65
66    vld1.16         {q0, q1}, [r0]              ;load rvsplus1_scan_order
67    vceq.s16        q8, q8                      ;set q8 to all 1
68
69    vshr.s16        q4, #1                      ;right shift 1 after vqdmulh
70    vshr.s16        q5, #1
71
72    ;modify data to have its original sign
73    veor.s16        q4, q2                      ; y^sz
74    veor.s16        q5, q3
75
76    ldr             r12, [sp]                   ;load dequant_ptr
77
78    vsub.s16        q4, q2                      ; x1 = (y^sz) - sz = (y^sz) - (-1) (two's complement)
79    vsub.s16        q5, q3
80
81    vand.s16        q4, q10                     ;mask off x1 elements
82    vand.s16        q5, q11
83
84    vld1.s16        {q6, q7}, [r12]             ;load dequant_ptr[i]
85
86    vtst.16         q14, q4, q8                 ;now find eob
87    vtst.16         q15, q5, q8                 ;non-zero element is set to all 1 in q4, q5
88
89    vst1.s16        {q4, q5}, [r2]              ;store: qcoeff = x1
90
91    vand            q0, q0, q14                 ;get all valid number from rvsplus1_scan_order array
92    vand            q1, q1, q15
93
94    vmax.u16        q0, q0, q1                  ;find maximum value in q0, q1
95    vmax.u16        d0, d0, d1
96    vmovl.u16       q0, d0
97
98    vmul.s16        q6, q4                      ;x * Dequant
99    vmul.s16        q7, q5
100
101    vmax.u32        d0, d0, d1
102    vpmax.u32       d0, d0, d0
103
104    vst1.s16        {q6, q7}, [r3]              ;store dqcoeff = x * Dequant
105
106    vmov.32         r0, d0[0]
107    bx              lr
108
109zero_output
110    vst1.s16        {q10, q11}, [r2]        ; qcoeff = 0
111    vst1.s16        {q10, q11}, [r3]        ; dqcoeff = 0
112    mov             r0, #0
113
114    bx              lr
115
116    ENDP
117
118    END
119