convolve_opt.s revision f3664ae9369a861ffbc2354e8e93e48983802062
1@/*
2@ ** Copyright 2003-2010, VisualOn, Inc.
3@ **
4@ ** Licensed under the Apache License, Version 2.0 (the "License");
5@ ** you may not use this file except in compliance with the License.
6@ ** You may obtain a copy of the License at
7@ **
8@ **     http://www.apache.org/licenses/LICENSE-2.0
9@ **
10@ ** Unless required by applicable law or agreed to in writing, software
11@ ** distributed under the License is distributed on an "AS IS" BASIS,
12@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13@ ** See the License for the specific language governing permissions and
14@ ** limitations under the License.
15@ */
16
17
18@*void Convolve (
19@*    Word16 x[],        /* (i)     : input vector                           */
20@*    Word16 h[],        /* (i)     : impulse response                       */
21@*    Word16 y[],        /* (o)     : output vector                          */
22@*    Word16 L           /* (i)     : vector size                            */
23@*)
24@  r0 --- x[]
25@  r1 --- h[]
26@  r2 --- y[]
27@  r3 --- L
28
29	.section  .text
30        .global   Convolve_asm
31
32Convolve_asm:
33
34        STMFD          r13!, {r4 - r12, r14}
35        MOV            r3,  #0                           @ n
36	MOV            r11, #0x8000
37
38LOOP:
39        ADD            r4, r1, r3, LSL #1                @ tmpH address
40        ADD            r5, r3, #1                        @ i = n + 1
41        MOV            r6, r0                            @ tmpX = x
42        LDRSH          r9,  [r6], #2                     @ *tmpX++
43        LDRSH          r10, [r4], #-2                    @ *tmpH--
44        SUB            r5, r5, #1
45        MUL            r8,  r9, r10
46
47LOOP1:
48        CMP            r5, #0
49        BLE            L1
50	LDRSH          r9,  [r6], #2                     @ *tmpX++
51	LDRSH          r10, [r4], #-2                    @ *tmpH--
52	LDRSH          r12, [r6], #2                     @ *tmpX++
53	LDRSH          r14, [r4], #-2                    @ *tmpH--
54	MLA            r8, r9, r10, r8
55	MLA            r8, r12, r14, r8
56	LDRSH          r9,  [r6], #2                     @ *tmpX++
57	LDRSH          r10, [r4], #-2                    @ *tmpH--
58	LDRSH          r12, [r6], #2                     @ *tmpX++
59	LDRSH          r14, [r4], #-2                    @ *tmpH--
60	MLA            r8, r9, r10, r8
61        SUBS           r5, r5, #4
62	MLA            r8, r12, r14, r8
63
64        B              LOOP1
65
66L1:
67
68        ADD            r5, r11, r8, LSL #1
69        MOV            r5, r5, LSR #16                   @extract_h(s)
70        ADD            r3, r3, #1
71        STRH           r5, [r2], #2                      @y[n]
72
73
74        ADD            r4, r1, r3, LSL #1                @tmpH address
75        ADD            r5, r3, #1
76        MOV            r6, r0
77        LDRSH          r9,  [r6], #2                     @ *tmpX++
78        LDRSH          r10, [r4], #-2
79        LDRSH          r12, [r6], #2
80        LDRSH          r14, [r4], #-2
81
82        MUL            r8, r9, r10
83        SUB            r5, r5, #2
84        MLA            r8, r12, r14, r8
85
86LOOP2:
87        CMP            r5, #0
88        BLE            L2
89	LDRSH          r9,  [r6], #2                     @ *tmpX++
90	LDRSH          r10, [r4], #-2                    @ *tmpH--
91	LDRSH          r12, [r6], #2                     @ *tmpX++
92	LDRSH          r14, [r4], #-2                    @ *tmpH--
93	MLA            r8, r9, r10, r8
94	MLA            r8, r12, r14, r8
95	LDRSH          r9,  [r6], #2                     @ *tmpX++
96	LDRSH          r10, [r4], #-2                    @ *tmpH--
97	LDRSH          r12, [r6], #2                     @ *tmpX++
98	LDRSH          r14, [r4], #-2                    @ *tmpH--
99	MLA            r8, r9, r10, r8
100        SUBS           r5, r5, #4
101	MLA            r8, r12, r14, r8
102        B              LOOP2
103
104L2:
105        ADD            r8, r11, r8, LSL #1
106        MOV            r8, r8, LSR #16                   @extract_h(s)
107        ADD            r3, r3, #1
108        STRH           r8, [r2], #2                      @y[n]
109
110        ADD            r4, r1, r3, LSL #1
111        ADD            r5, r3, #1
112        MOV            r6, r0
113        LDRSH          r9,  [r6], #2
114        LDRSH          r10, [r4], #-2
115        LDRSH          r12, [r6], #2
116        LDRSH          r14, [r4], #-2
117        MUL            r8, r9, r10
118        LDRSH          r9,  [r6], #2
119        LDRSH          r10, [r4], #-2
120        MLA            r8, r12, r14, r8
121        SUB            r5, r5, #3
122        MLA            r8, r9, r10, r8
123
124LOOP3:
125        CMP            r5, #0
126        BLE            L3
127	LDRSH          r9,  [r6], #2                     @ *tmpX++
128	LDRSH          r10, [r4], #-2                    @ *tmpH--
129	LDRSH          r12, [r6], #2                     @ *tmpX++
130	LDRSH          r14, [r4], #-2                    @ *tmpH--
131	MLA            r8, r9, r10, r8
132	MLA            r8, r12, r14, r8
133	LDRSH          r9,  [r6], #2                     @ *tmpX++
134	LDRSH          r10, [r4], #-2                    @ *tmpH--
135	LDRSH          r12, [r6], #2                     @ *tmpX++
136	LDRSH          r14, [r4], #-2                    @ *tmpH--
137	MLA            r8, r9, r10, r8
138        SUBS           r5, r5, #4
139	MLA            r8, r12, r14, r8
140        B              LOOP3
141
142L3:
143        ADD            r8, r11, r8, LSL #1
144        MOV            r8, r8, LSR #16                   @extract_h(s)
145        ADD            r3, r3, #1
146        STRH           r8, [r2], #2                      @y[n]
147
148        ADD            r5, r3, #1                        @ i = n + 1
149        ADD            r4, r1, r3, LSL #1                @ tmpH address
150        MOV            r6, r0
151        MOV            r8, #0
152
153LOOP4:
154        CMP            r5, #0
155        BLE            L4
156	LDRSH          r9,  [r6], #2                     @ *tmpX++
157	LDRSH          r10, [r4], #-2                    @ *tmpH--
158	LDRSH          r12, [r6], #2                     @ *tmpX++
159	LDRSH          r14, [r4], #-2                    @ *tmpH--
160	MLA            r8, r9, r10, r8
161	MLA            r8, r12, r14, r8
162	LDRSH          r9,  [r6], #2                     @ *tmpX++
163	LDRSH          r10, [r4], #-2                    @ *tmpH--
164	LDRSH          r12, [r6], #2                     @ *tmpX++
165	LDRSH          r14, [r4], #-2                    @ *tmpH--
166	MLA            r8, r9, r10, r8
167        SUBS           r5, r5, #4
168	MLA            r8, r12, r14, r8
169        B              LOOP4
170L4:
171        ADD            r5, r11, r8, LSL #1
172        MOV            r5, r5, LSR #16                   @extract_h(s)
173        ADD            r3, r3, #1
174        STRH           r5, [r2], #2                      @y[n]
175
176        CMP            r3, #64
177        BLT            LOOP
178
179Convolve_asm_end:
180
181        LDMFD      r13!, {r4 - r12, r15}
182
183        @ENDFUNC
184        .END
185
186
187