cor_h_vec_neon.s revision f3664ae9369a861ffbc2354e8e93e48983802062
1@/*
2@ ** Copyright 2003-2010, VisualOn, Inc.
3@ **
4@ ** Licensed under the Apache License, Version 2.0 (the "License");
5@ ** you may not use this file except in compliance with the License.
6@ ** You may obtain a copy of the License at
7@ **
8@ **     http://www.apache.org/licenses/LICENSE-2.0
9@ **
10@ ** Unless required by applicable law or agreed to in writing, software
11@ ** distributed under the License is distributed on an "AS IS" BASIS,
12@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13@ ** See the License for the specific language governing permissions and
14@ ** limitations under the License.
15@ */
16@
17@static void cor_h_vec_012(
18@		Word16 h[],                           /* (i) scaled impulse response                 */
19@		Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
20@		Word16 track,                         /* (i) track to use                            */
21@		Word16 sign[],                        /* (i) sign vector                             */
22@		Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
23@		Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
24@		Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
25@)
26@r0 ---- h[]
27@r1 ---- vec[]
28@r2 ---- track
29@r3 ---- sign[]
30@r4 ---- rrixix[][NB_POS]
31@r5 ---- cor_1[]
32@r6 ---- cor_2[]
33
34              .section .text
35	      .global  cor_h_vec_012_asm
36
37cor_h_vec_012_asm:
38
39             STMFD         r13!, {r4 - r12, r14}
40	     LDR           r4, [r13, #40]                    @load rrixix[][NB_POS]
41	     ADD           r7, r4, r2, LSL #5                @r7 --- p0 = rrixix[track]
42             MOV           r4, #0                            @i=0
43
44	     @r0 --- h[], r1 --- vec[],  r2 --- pos
45	     @r3 --- sign[], r4 --- i, r7 --- p0
46
47LOOPi:
48             MOV           r5, #0                            @L_sum1 = 0
49	     MOV           r6, #0                            @L_sum2 = 0
50	     ADD           r9, r1, r2, LSL #1                @p2 = &vec[pos]
51	     MOV           r10, r0                           @p1 = h
52	     RSB           r11, r2, #62                      @j=62-pos
53
54LOOPj1:
55	     LDRSH         r12, [r10], #2
56	     LDRSH         r8,  [r9], #2
57	     LDRSH         r14, [r9]
58	     SUBS          r11, r11, #1
59             MLA           r5, r12, r8, r5
60             MLA           r6, r12, r14, r6
61	     BGE           LOOPj1
62
63	     LDRSH         r12, [r10], #2                     @*p1++
64	     MOV           r6, r6, LSL #2                     @L_sum2 = (L_sum2 << 2)
65             MLA           r5, r12, r14, r5
66             MOV           r14, #0x8000
67             MOV           r5, r5, LSL #2                     @L_sum1 = (L_sum1 << 2)
68             ADD           r10, r6, r14
69             ADD           r9, r5, r14
70             MOV           r5, r9, ASR #16
71             MOV           r6, r10, ASR #16
72             ADD           r9, r3, r2, LSL #1                 @address of sign[pos]
73             ADD           r8, r7, #32
74             LDRSH         r10, [r9], #2                 	  @sign[pos]
75	     LDRSH         r11, [r9]                          @sign[pos + 1]
76	     MUL           r12, r5, r10
77	     MUL           r14, r6, r11
78	     MOV           r5, r12, ASR #15
79	     MOV           r6, r14, ASR #15
80	     LDR           r9,  [r13, #44]
81	     LDR           r12, [r13, #48]
82             LDRSH         r10, [r7], #2                      @*p0++
83	     LDRSH         r11, [r8]                          @*p3++
84             ADD           r9, r9, r4, LSL #1
85	     ADD           r12, r12, r4, LSL #1
86	     ADD           r5, r5, r10
87	     ADD           r6, r6, r11
88	     STRH          r5, [r9]
89	     STRH          r6, [r12]
90
91             ADD           r2, r2, #4
92
93             MOV           r5, #0                            @L_sum1 = 0
94	     MOV           r6, #0                            @L_sum2 = 0
95	     ADD           r9, r1, r2, LSL #1                @p2 = &vec[pos]
96	     MOV           r10, r0                           @p1 = h
97	     RSB           r11, r2, #62                      @j=62-pos
98	     ADD           r4, r4, #1                        @i++
99
100LOOPj2:
101	     LDRSH         r12, [r10], #2
102	     LDRSH         r8,  [r9], #2
103	     LDRSH         r14, [r9]
104	     SUBS          r11, r11, #1
105             MLA           r5, r12, r8, r5
106             MLA           r6, r12, r14, r6
107	     BGE           LOOPj2
108
109	     LDRSH         r12, [r10], #2                     @*p1++
110	     MOV           r6, r6, LSL #2                     @L_sum2 = (L_sum2 << 2)
111             MLA           r5, r12, r14, r5
112             MOV           r14, #0x8000
113             MOV           r5, r5, LSL #2                     @L_sum1 = (L_sum1 << 2)
114             ADD           r10, r6, r14
115             ADD           r9, r5, r14
116
117             MOV           r5, r9, ASR #16
118             MOV           r6, r10, ASR #16
119             ADD           r9, r3, r2, LSL #1                 @address of sign[pos]
120             ADD           r8, r7, #32
121             LDRSH         r10, [r9], #2                 	  @sign[pos]
122	     LDRSH         r11, [r9]                          @sign[pos + 1]
123	     MUL           r12, r5, r10
124	     MUL           r14, r6, r11
125	     MOV           r5, r12, ASR #15
126	     MOV           r6, r14, ASR #15
127	     LDR           r9,  [r13, #44]
128	     LDR           r12, [r13, #48]
129             LDRSH         r10, [r7], #2                      @*p0++
130	     LDRSH         r11, [r8]                          @*p3++
131             ADD           r9, r9, r4, LSL #1
132	     ADD           r12, r12, r4, LSL #1
133	     ADD           r5, r5, r10
134	     ADD           r6, r6, r11
135	     STRH          r5, [r9]
136	     STRH          r6, [r12]
137	     ADD           r4, r4, #1                         @i+1
138	     ADD           r2, r2, #4                         @pos += STEP
139	     CMP           r4, #16
140
141	     BLT           LOOPi
142
143the_end:
144             LDMFD         r13!, {r4 - r12, r15}
145
146	     .END
147
148
149
150
151
152