1b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ 2b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ 4b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ Use of this source code is governed by a BSD-style license 5b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ that can be found in the LICENSE file in the root of the source 6b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ tree. An additional intellectual property rights grant can be found 7b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ in the file PATENTS. All contributing project authors may 8b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ be found in the AUTHORS file in the root of the source tree. 9b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ 10b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 11b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ vector_scaling_operations_neon.s 12b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ This file contains the function WebRtcSpl_ScaleAndAddVectorsWithRoundNeon(), 13b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ optimized for ARM Neon platform. Output is bit-exact with the reference 14b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ C code in vector_scaling_operations.c. 15b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 16b238acaca55b4b345f0e37b82f8bbd9851c8bb6dkma@webrtc.org#include "webrtc/system_wrappers/interface/asm_defines.h" 17b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 18b238acaca55b4b345f0e37b82f8bbd9851c8bb6dkma@webrtc.orgGLOBAL_FUNCTION WebRtcSpl_ScaleAndAddVectorsWithRoundNeon 19b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org.align 2 20b238acaca55b4b345f0e37b82f8bbd9851c8bb6dkma@webrtc.orgDEFINE_FUNCTION WebRtcSpl_ScaleAndAddVectorsWithRoundNeon 21b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org push {r4-r9} 22b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 23b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ldr r4, [sp, #32] @ length 24b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ldr r5, [sp, #28] @ out_vector 25b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ldrsh r6, [sp, #24] @ right_shifts 26b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 27b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org cmp r4, #0 28b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ble END @ Return if length <= 0. 29b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 30b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org cmp r4, #8 31b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org blt SET_ROUND_VALUE 32b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 33b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org vdup.16 d26, r1 @ in_vector1_scale 34b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org vdup.16 d27, r3 @ in_vector2_scale 35b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 36b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org @ Neon instructions can only right shift by an immediate value. To shift right 37b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org @ by a register value, we have to do a left shift left by the negative value. 38b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org rsb r7, r6, #0 39b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org vdup.16 q12, r7 @ -right_shifts 40b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 41b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org bic r7, r4, #7 @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8. 42b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 43b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgLOOP_UNROLLED_BY_8: 44b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org vld1.16 {d28, d29}, [r0]! @ in_vector1[] 45b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org vld1.16 {d30, d31}, [r2]! @ in_vector2[] 46b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org vmull.s16 q0, d28, d26 47b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org vmull.s16 q1, d29, d26 48b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org vmull.s16 q2, d30, d27 49b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org vmull.s16 q3, d31, d27 50b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org vadd.s32 q0, q2 51b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org vadd.s32 q1, q3 52b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org vrshl.s32 q0, q12 @ Round shift right by right_shifts. 53b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org vrshl.s32 q1, q12 54b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org vmovn.i32 d0, q0 @ Cast to 16 bit values. 55b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org vmovn.i32 d1, q1 56b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org subs r7, #8 57b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org vst1.16 {d0, d1}, [r5]! 58b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org bgt LOOP_UNROLLED_BY_8 59b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 60b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ands r4, #0xFF @ Counter for LOOP_NO_UNROLLING: length % 8. 61b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org beq END 62b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 63b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgSET_ROUND_VALUE: 64b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org mov r9, #1 65b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org lsl r9, r6 66b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org lsr r9, #1 67b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 68b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgLOOP_NO_UNROLLING: 69b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ldrh r7, [r0], #2 70b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ldrh r8, [r2], #2 71b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org smulbb r7, r7, r1 72b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org smulbb r8, r8, r3 73b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org subs r4, #1 74b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org add r7, r9 75b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org add r7, r8 76b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org asr r7, r6 77b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org strh r7, [r5], #2 78b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org bne LOOP_NO_UNROLLING 79b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 80b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgEND: 81b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org pop {r4-r9} 82b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org bx lr 83