190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    EXPORT  |vp8_fast_quantize_b_neon_func|
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ARM
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    REQUIRE8
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    PRESERVE8
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    AREA ||.text||, CODE, READONLY, ALIGN=2
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r0        short *coeff_ptr
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r1        short *zbin_ptr
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r2        short *qcoeff_ptr
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r3        short *dqcoeff_ptr
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack     short *dequant_ptr
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack     short *scan_mask
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack     short *round_ptr
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack     short *quant_ptr
2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; return    int * eob
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_fast_quantize_b_neon_func| PROC
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.16         {q0, q1}, [r0]              ;load z
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.16         {q10, q11}, [r1]            ;load zbin
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabs.s16        q4, q0                      ;calculate x = abs(z)
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabs.s16        q5, q1
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vcge.s16        q10, q4, q10                ;x>=zbin
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vcge.s16        q11, q5, q11
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;if x<zbin (q10 & q11 are all 0), go to zero_output
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vorr.s16        q6, q10, q11
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vorr.s16        d12, d12, d13
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov            r0, r1, d12
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    orr             r0, r0, r1
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    cmp             r0, #0
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq             zero_output
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr             r0, [sp, #8]                ;load round_ptr
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr             r12, [sp, #12]              ;load quant_ptr
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vshr.s16        q2, q0, #15                 ; sz
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vshr.s16        q3, q1, #15
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.s16        {q6, q7}, [r0]              ;load round_ptr [0-15]
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.s16        {q8, q9}, [r12]             ;load quant_ptr [0-15]
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadd.s16        q4, q6                      ;x + Round
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadd.s16        q5, q7
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr             r0, [sp, #4]                ;load rvsplus1_scan_order ptr
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqdmulh.s16     q4, q8                      ;y = ((Round + abs(z)) * Quant) >> 16
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqdmulh.s16     q5, q9
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.16         {q0, q1}, [r0]              ;load rvsplus1_scan_order
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vceq.s16        q8, q8                      ;set q8 to all 1
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vshr.s16        q4, #1                      ;right shift 1 after vqdmulh
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vshr.s16        q5, #1
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;modify data to have its original sign
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    veor.s16        q4, q2                      ; y^sz
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    veor.s16        q5, q3
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr             r12, [sp]                   ;load dequant_ptr
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsub.s16        q4, q2                      ; x1 = (y^sz) - sz = (y^sz) - (-1) (two's complement)
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsub.s16        q5, q3
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vand.s16        q4, q10                     ;mask off x1 elements
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vand.s16        q5, q11
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.s16        {q6, q7}, [r12]             ;load dequant_ptr[i]
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vtst.16         q14, q4, q8                 ;now find eob
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vtst.16         q15, q5, q8                 ;non-zero element is set to all 1 in q4, q5
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.s16        {q4, q5}, [r2]              ;store: qcoeff = x1
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vand            q0, q0, q14                 ;get all valid number from rvsplus1_scan_order array
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vand            q1, q1, q15
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmax.u16        q0, q0, q1                  ;find maximum value in q0, q1
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmax.u16        d0, d0, d1
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmovl.u16       q0, d0
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmul.s16        q6, q4                      ;x * Dequant
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmul.s16        q7, q5
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmax.u32        d0, d0, d1
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpmax.u32       d0, d0, d0
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.s16        {q6, q7}, [r3]              ;store dqcoeff = x * Dequant
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov.32         r0, d0[0]
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bx              lr
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberzero_output
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.s16        {q10, q11}, [r2]        ; qcoeff = 0
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.s16        {q10, q11}, [r3]        ; dqcoeff = 0
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             r0, #0
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bx              lr
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ENDP
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    END
119