1b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@
2b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@
4b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ Use of this source code is governed by a BSD-style license
5b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ that can be found in the LICENSE file in the root of the source
6b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ tree. An additional intellectual property rights grant can be found
7b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ in the file PATENTS.  All contributing project authors may
8b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ be found in the AUTHORS file in the root of the source tree.
9b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@
10b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
11b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ vector_scaling_operations_neon.s
12b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ This file contains the function WebRtcSpl_ScaleAndAddVectorsWithRoundNeon(),
13b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ optimized for ARM Neon platform. Output is bit-exact with the reference
14b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ C code in vector_scaling_operations.c.
15b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
16b238acaca55b4b345f0e37b82f8bbd9851c8bb6dkma@webrtc.org#include "webrtc/system_wrappers/interface/asm_defines.h"
17b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
18b238acaca55b4b345f0e37b82f8bbd9851c8bb6dkma@webrtc.orgGLOBAL_FUNCTION WebRtcSpl_ScaleAndAddVectorsWithRoundNeon
19b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org.align  2
20b238acaca55b4b345f0e37b82f8bbd9851c8bb6dkma@webrtc.orgDEFINE_FUNCTION WebRtcSpl_ScaleAndAddVectorsWithRoundNeon
21b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  push {r4-r9}
22b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
23b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r4, [sp, #32]           @ length
24b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r5, [sp, #28]           @ out_vector
25b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldrsh r6, [sp, #24]         @ right_shifts
26b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
27b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  cmp r4, #0
28b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ble END                     @ Return if length <= 0.
29b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
30b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  cmp r4, #8
31b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  blt SET_ROUND_VALUE
32b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
33b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  vdup.16 d26, r1             @ in_vector1_scale
34b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  vdup.16 d27, r3             @ in_vector2_scale
35b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
36b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  @ Neon instructions can only right shift by an immediate value. To shift right
37b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  @ by a register value, we have to do a left shift left by the negative value.
38b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  rsb r7, r6, #0
39b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  vdup.16 q12, r7             @ -right_shifts
40b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
41b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  bic r7, r4, #7              @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8.
42b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
43b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgLOOP_UNROLLED_BY_8:
44b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  vld1.16 {d28, d29}, [r0]!   @ in_vector1[]
45b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  vld1.16 {d30, d31}, [r2]!   @ in_vector2[]
46b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  vmull.s16 q0, d28, d26
47b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  vmull.s16 q1, d29, d26
48b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  vmull.s16 q2, d30, d27
49b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  vmull.s16 q3, d31, d27
50b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  vadd.s32 q0, q2
51b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  vadd.s32 q1, q3
52b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  vrshl.s32 q0, q12           @ Round shift right by right_shifts.
53b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  vrshl.s32 q1, q12
54b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  vmovn.i32 d0, q0            @ Cast to 16 bit values.
55b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  vmovn.i32 d1, q1
56b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  subs r7, #8
57b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  vst1.16 {d0, d1}, [r5]!
58b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  bgt LOOP_UNROLLED_BY_8
59b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
60b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ands r4, #0xFF              @ Counter for LOOP_NO_UNROLLING: length % 8.
61b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  beq END
62b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
63b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgSET_ROUND_VALUE:
64b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  mov r9, #1
65b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  lsl r9, r6
66b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  lsr r9, #1
67b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
68b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgLOOP_NO_UNROLLING:
69b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldrh  r7, [r0], #2
70b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldrh  r8, [r2], #2
71b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  smulbb r7, r7, r1
72b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  smulbb r8, r8, r3
73b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  subs r4, #1
74b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  add r7, r9
75b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  add r7, r8
76b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  asr r7, r6
77b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  strh r7, [r5], #2
78b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  bne LOOP_NO_UNROLLING
79b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
80b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgEND:
81b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  pop {r4-r9}
82b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  bx  lr
83