1@ 2@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3@ 4@ Use of this source code is governed by a BSD-style license 5@ that can be found in the LICENSE file in the root of the source 6@ tree. An additional intellectual property rights grant can be found 7@ in the file PATENTS. All contributing project authors may 8@ be found in the AUTHORS file in the root of the source tree. 9@ 10 11@ vector_scaling_operations_neon.s 12@ This file contains the function WebRtcSpl_ScaleAndAddVectorsWithRoundNeon(), 13@ optimized for ARM Neon platform. Output is bit-exact with the reference 14@ C code in vector_scaling_operations.c. 15 16#include "webrtc/system_wrappers/interface/asm_defines.h" 17 18GLOBAL_FUNCTION WebRtcSpl_ScaleAndAddVectorsWithRoundNeon 19.align 2 20DEFINE_FUNCTION WebRtcSpl_ScaleAndAddVectorsWithRoundNeon 21 push {r4-r9} 22 23 ldr r4, [sp, #32] @ length 24 ldr r5, [sp, #28] @ out_vector 25 ldrsh r6, [sp, #24] @ right_shifts 26 27 cmp r4, #0 28 ble END @ Return if length <= 0. 29 30 cmp r4, #8 31 blt SET_ROUND_VALUE 32 33 vdup.16 d26, r1 @ in_vector1_scale 34 vdup.16 d27, r3 @ in_vector2_scale 35 36 @ Neon instructions can only right shift by an immediate value. To shift right 37 @ by a register value, we have to do a left shift left by the negative value. 38 rsb r7, r6, #0 39 vdup.16 q12, r7 @ -right_shifts 40 41 bic r7, r4, #7 @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8. 42 43LOOP_UNROLLED_BY_8: 44 vld1.16 {d28, d29}, [r0]! @ in_vector1[] 45 vld1.16 {d30, d31}, [r2]! @ in_vector2[] 46 vmull.s16 q0, d28, d26 47 vmull.s16 q1, d29, d26 48 vmull.s16 q2, d30, d27 49 vmull.s16 q3, d31, d27 50 vadd.s32 q0, q2 51 vadd.s32 q1, q3 52 vrshl.s32 q0, q12 @ Round shift right by right_shifts. 53 vrshl.s32 q1, q12 54 vmovn.i32 d0, q0 @ Cast to 16 bit values. 55 vmovn.i32 d1, q1 56 subs r7, #8 57 vst1.16 {d0, d1}, [r5]! 58 bgt LOOP_UNROLLED_BY_8 59 60 ands r4, #0xFF @ Counter for LOOP_NO_UNROLLING: length % 8. 61 beq END 62 63SET_ROUND_VALUE: 64 mov r9, #1 65 lsl r9, r6 66 lsr r9, #1 67 68LOOP_NO_UNROLLING: 69 ldrh r7, [r0], #2 70 ldrh r8, [r2], #2 71 smulbb r7, r7, r1 72 smulbb r8, r8, r3 73 subs r4, #1 74 add r7, r9 75 add r7, r8 76 asr r7, r6 77 strh r7, [r5], #2 78 bne LOOP_NO_UNROLLING 79 80END: 81 pop {r4-r9} 82 bx lr 83