cor_h_vec_opt.s revision 41050cdb033641ddf26831d9272c0930f7b40a2d
1@/*
2@ ** Copyright 2003-2010, VisualOn, Inc.
3@ **
4@ ** Licensed under the Apache License, Version 2.0 (the "License");
5@ ** you may not use this file except in compliance with the License.
6@ ** You may obtain a copy of the License at
7@ **
8@ **     http://www.apache.org/licenses/LICENSE-2.0
9@ **
10@ ** Unless required by applicable law or agreed to in writing, software
11@ ** distributed under the License is distributed on an "AS IS" BASIS,
12@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13@ ** See the License for the specific language governing permissions and
14@ ** limitations under the License.
15@ */
16@static void cor_h_vec_012(
17@		Word16 h[],                           /* (i) scaled impulse response                 */
18@		Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
19@		Word16 track,                         /* (i) track to use                            */
20@		Word16 sign[],                        /* (i) sign vector                             */
21@		Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
22@		Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
23@		Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
24@)
25@r0 ---- h[]
26@r1 ---- vec[]
27@r2 ---- track
28@r3 ---- sign[]
29@r4 ---- rrixix[][NB_POS]
30@r5 ---- cor_1[]
31@r6 ---- cor_2[]
32
33
34          .section  .text
35	  .global  cor_h_vec_012_asm
36
37cor_h_vec_012_asm:
38
39         STMFD         r13!, {r4 - r12, r14}
40	 LDR           r4, [r13, #40]                    @load rrixix[][NB_POS]
41	 ADD           r7, r4, r2, LSL #5                @r7 --- p0 = rrixix[track]
42         MOV           r4, #0                            @i=0
43
44	 @r0 --- h[], r1 --- vec[],  r2 --- pos
45	 @r3 --- sign[], r4 --- i, r7 --- p0
46LOOPi:
47         MOV           r5, #0                            @L_sum1 = 0
48         MOV           r6, #0                            @L_sum2 = 0
49         ADD           r9, r1, r2, LSL #1                @p2 = &vec[pos]
50         MOV           r10, r0                           @p1 = h
51         RSB           r11, r2, #62                      @j=62-pos
52
53LOOPj1:
54	 LDRSH         r12, [r10], #2
55	 LDRSH         r8,  [r9], #2
56	 LDRSH         r14, [r9]
57	 SUBS          r11, r11, #1
58         MLA           r5, r12, r8, r5
59         MLA           r6, r12, r14, r6
60	 BGE           LOOPj1
61
62	 LDRSH         r12, [r10], #2                     @*p1++
63	 MOV           r6, r6, LSL #2                     @L_sum2 = (L_sum2 << 2)
64         MLA           r5, r12, r14, r5
65         MOV           r14, #0x8000
66         MOV           r5, r5, LSL #2                     @L_sum1 = (L_sum1 << 2)
67         ADD           r10, r6, r14
68         ADD           r9, r5, r14
69         MOV           r5, r9, ASR #16
70         MOV           r6, r10, ASR #16
71         ADD           r9, r3, r2, LSL #1                 @address of sign[pos]
72         ADD           r8, r7, #32
73         LDRSH         r10, [r9], #2                 	  @sign[pos]
74	 LDRSH         r11, [r9]                          @sign[pos + 1]
75	 MUL           r12, r5, r10
76	 MUL           r14, r6, r11
77	 MOV           r5, r12, ASR #15
78	 MOV           r6, r14, ASR #15
79	 LDR           r9,  [r13, #44]
80	 LDR           r12, [r13, #48]
81         LDRSH         r10, [r7], #2                      @*p0++
82	 LDRSH         r11, [r8]                          @*p3++
83         ADD           r9, r9, r4, LSL #1
84	 ADD           r12, r12, r4, LSL #1
85	 ADD           r5, r5, r10
86	 ADD           r6, r6, r11
87	 STRH          r5, [r9]
88	 STRH          r6, [r12]
89
90         ADD           r2, r2, #4
91
92         MOV           r5, #0                            @L_sum1 = 0
93	 MOV           r6, #0                            @L_sum2 = 0
94	 ADD           r9, r1, r2, LSL #1                @p2 = &vec[pos]
95	 MOV           r10, r0                           @p1 = h
96	 RSB           r11, r2, #62                      @j=62-pos
97	 ADD           r4, r4, #1                        @i++
98
99LOOPj2:
100	 LDRSH         r12, [r10], #2
101	 LDRSH         r8,  [r9], #2
102	 LDRSH         r14, [r9]
103	 SUBS          r11, r11, #1
104         MLA           r5, r12, r8, r5
105         MLA           r6, r12, r14, r6
106	 BGE           LOOPj2
107
108	 LDRSH         r12, [r10], #2                     @*p1++
109	 MOV           r6, r6, LSL #2                     @L_sum2 = (L_sum2 << 2)
110         MLA           r5, r12, r14, r5
111         MOV           r14, #0x8000
112         MOV           r5, r5, LSL #2                     @L_sum1 = (L_sum1 << 2)
113         ADD           r10, r6, r14
114         ADD           r9, r5, r14
115
116         MOV           r5, r9, ASR #16
117         MOV           r6, r10, ASR #16
118         ADD           r9, r3, r2, LSL #1                 @address of sign[pos]
119         ADD           r8, r7, #32
120         LDRSH         r10, [r9], #2                 	  @sign[pos]
121	 LDRSH         r11, [r9]                          @sign[pos + 1]
122	 MUL           r12, r5, r10
123	 MUL           r14, r6, r11
124	 MOV           r5, r12, ASR #15
125	 MOV           r6, r14, ASR #15
126	 LDR           r9,  [r13, #44]
127	 LDR           r12, [r13, #48]
128         LDRSH         r10, [r7], #2                      @*p0++
129	 LDRSH         r11, [r8]                          @*p3++
130         ADD           r9, r9, r4, LSL #1
131	 ADD           r12, r12, r4, LSL #1
132	 ADD           r5, r5, r10
133	 ADD           r6, r6, r11
134	 STRH          r5, [r9]
135	 STRH          r6, [r12]
136	 ADD           r4, r4, #1                         @i+1
137	 ADD           r2, r2, #4                         @pos += STEP
138	 CMP           r4, #16
139
140	 BLT           LOOPi
141
142the_end:
143         LDMFD         r13!, {r4 - r12, r15}
144
145         @ENDFUNC
146         .END
147
148
149
150
151
152