190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber EXPORT |vp8_fast_quantize_b_neon_func| 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ARM 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber REQUIRE8 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PRESERVE8 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber AREA ||.text||, CODE, READONLY, ALIGN=2 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r0 short *coeff_ptr 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r1 short *zbin_ptr 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r2 short *qcoeff_ptr 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r3 short *dqcoeff_ptr 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack short *dequant_ptr 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack short *scan_mask 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack short *round_ptr 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack short *quant_ptr 2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; return int * eob 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_fast_quantize_b_neon_func| PROC 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.16 {q0, q1}, [r0] ;load z 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.16 {q10, q11}, [r1] ;load zbin 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabs.s16 q4, q0 ;calculate x = abs(z) 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabs.s16 q5, q1 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vcge.s16 q10, q4, q10 ;x>=zbin 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vcge.s16 q11, q5, q11 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;if x<zbin (q10 & q11 are all 0), go to zero_output 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vorr.s16 q6, q10, q11 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vorr.s16 d12, d12, d13 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov r0, r1, d12 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber orr r0, r0, r1 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp r0, #0 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq zero_output 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ldr r0, [sp, #8] ;load round_ptr 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ldr r12, [sp, #12] ;load quant_ptr 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.s16 q2, q0, #15 ; sz 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.s16 q3, q1, #15 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.s16 {q6, q7}, [r0] ;load round_ptr [0-15] 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.s16 {q8, q9}, [r12] ;load quant_ptr [0-15] 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadd.s16 q4, q6 ;x + Round 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadd.s16 q5, q7 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ldr r0, [sp, #4] ;load rvsplus1_scan_order ptr 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqdmulh.s16 q4, q8 ;y = ((Round + abs(z)) * Quant) >> 16 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqdmulh.s16 q5, q9 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.16 {q0, q1}, [r0] ;load rvsplus1_scan_order 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vceq.s16 q8, q8 ;set q8 to all 1 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.s16 q4, #1 ;right shift 1 after vqdmulh 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.s16 q5, #1 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;modify data to have its original sign 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber veor.s16 q4, q2 ; y^sz 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber veor.s16 q5, q3 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ldr r12, [sp] ;load dequant_ptr 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsub.s16 q4, q2 ; x1 = (y^sz) - sz = (y^sz) - (-1) (two's complement) 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsub.s16 q5, q3 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vand.s16 q4, q10 ;mask off x1 elements 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vand.s16 q5, q11 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.s16 {q6, q7}, [r12] ;load dequant_ptr[i] 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vtst.16 q14, q4, q8 ;now find eob 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vtst.16 q15, q5, q8 ;non-zero element is set to all 1 in q4, q5 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.s16 {q4, q5}, [r2] ;store: qcoeff = x1 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vand q0, q0, q14 ;get all valid number from rvsplus1_scan_order array 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vand q1, q1, q15 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmax.u16 q0, q0, q1 ;find maximum value in q0, q1 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmax.u16 d0, d0, d1 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmovl.u16 q0, d0 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmul.s16 q6, q4 ;x * Dequant 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmul.s16 q7, q5 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmax.u32 d0, d0, d1 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpmax.u32 d0, d0, d0 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.s16 {q6, q7}, [r3] ;store dqcoeff = x * Dequant 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov.32 r0, d0[0] 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bx lr 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberzero_output 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.s16 {q10, q11}, [r2] ; qcoeff = 0 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.s16 {q10, q11}, [r3] ; dqcoeff = 0 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov r0, #0 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bx lr 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ENDP 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber END 119