residu_asm_opt.s revision 956c553ab0ce72f8074ad0fda2ffd66a0305700c
1@/*
2@ ** Copyright 2003-2010, VisualOn, Inc.
3@ **
4@ ** Licensed under the Apache License, Version 2.0 (the "License");
5@ ** you may not use this file except in compliance with the License.
6@ ** You may obtain a copy of the License at
7@ **
8@ **     http://www.apache.org/licenses/LICENSE-2.0
9@ **
10@ ** Unless required by applicable law or agreed to in writing, software
11@ ** distributed under the License is distributed on an "AS IS" BASIS,
12@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13@ ** See the License for the specific language governing permissions and
14@ ** limitations under the License.
15@ */
16@
17@void Residu(
18@	    Word16 a[],                           /* (i) Q12 : prediction coefficients */
19@	    Word16 x[],                           /* (i)     : speech (values x[-m..-1] are needed */
20@	    Word16 y[],                           /* (o) x2  : residual signal       */
21@	    Word16 lg                             /* (i)     : size of filtering     */
22@	   )
23@a[] --- r0
24@x[] --- r1
25@y[] --- r2
26@lg  --- r3
27
28        .section  .text
29	.global   Residu_opt
30
31Residu_opt:
32
33        STMFD   r13!, {r4 - r12, r14}
34
35        LDRH    r5, [r0], #2
36        LDRH    r6, [r0], #2
37        ORR     r5, r6, r5, LSL #16                  @r5 --- a0, a1
38
39        LDRH    r6, [r0], #2
40	LDRH    r7, [r0], #2
41	ORR     r6, r7, r6, LSL #16                  @r6 --- a2, a3
42
43        LDRH    r7, [r0], #2
44	LDRH    r8, [r0], #2
45	ORR     r7, r8, r7, LSL #16                  @r7 --- a4, a5
46
47	LDRH    r8, [r0], #2
48	LDRH    r9, [r0], #2
49	ORR     r8, r9, r8, LSL #16                  @r8 --- a6, a7
50
51	LDRH    r9, [r0], #2
52	LDRH    r10, [r0], #2
53	ORR     r9, r10, r9, LSL #16                 @r9 --- a8, a9
54
55	LDRH    r10, [r0], #2
56	LDRH    r11, [r0], #2
57	ORR     r10, r11, r10, LSL #16               @r10 --- a10, a11
58
59        LDRH    r11, [r0], #2
60	LDRH    r12, [r0], #2
61	ORR     r11, r12, r11, LSL #16               @r11 --- a12, a13
62
63	LDRH    r12, [r0], #2
64	LDRH    r4, [r0], #2
65	ORR     r12, r4, r12, LSL #16                @r12 --- a14, a15
66
67
68	STMFD   r13!, {r8 - r12}                     @store r8-r12
69	LDRH    r4, [r0], #2                         @load a16
70        MOV     r14, r3, ASR #2                      @one loop get 4 outputs
71	ADD     r1, r1, #4
72	MOV     r0, r2
73	ORR     r14, r4, r14, LSL #16                @r14 --- loopnum, a16
74
75residu_loop:
76
77        LDR     r10, [r1], #-4                       @r10  --- x[3], x[2]
78	LDR     r2,  [r1], #-4                       @r2   --- x[1], x[0]
79
80	SMULTB  r3, r5, r2                           @i1(0)  --- r3 = x[0] * a0
81        SMULTT  r4, r5, r2                           @i2(0)  --- r4 = x[1] * a0
82	SMULTB  r11, r5, r10                         @i3(0)  --- r11 = x[2] * a0
83	SMULTT  r12, r5, r10                         @i4(0)  --- r12 = x[3] * a0
84
85        SMLABB  r4, r5, r2, r4                       @i2(1)  --- r4 += x[0] * a1
86        SMLABT  r11, r5, r2, r11                     @i3(1)  --- r11 += x[1] * a0
87        SMLABB  r12, r5, r10, r12                    @i4(1)  --- r12 += x[2] * a1
88
89	SMLATB  r11, r6, r2, r11                     @i3(2)  --- r11 += x[0] * a2
90	SMLATT  r12, r6, r2, r12                     @i4(2)  --- r12 += x[1] * a2
91	SMLABB  r12, r6, r2, r12                     @i4(3)  --- r12 += x[0] * a3
92
93	LDR     r2, [r1], #-4                        @r2 ---- x[-1], x[-2]
94
95	SMLABT  r3, r5, r2, r3                       @i1(1) --- r3 += x[-1] * a1
96	SMLATT  r4, r6, r2, r4                       @i2(2) --- r4 += x[-1] * a2
97	SMLABT  r11, r6, r2, r11                     @i3(3) --- r11 += x[-1] * a3
98	SMLATT  r12, r7, r2, r12                     @i4(4) --- r12 += x[-1] * a4
99        SMLATB  r3, r6, r2, r3                       @i1(2) --- r3 += x[-2] * a2
100
101	SMLABB	r4, r6, r2, r4				@ i2	(3)
102	SMLATB  r11,r7, r2, r11				@ i3	(4)
103	SMLABB	r12,r7, r2, r12				@ i4	(5)
104
105	LDR	r2,[r1],#-4
106	SMLABT	r3, r6, r2, r3				@ i1	(3)
107	SMLATT	r4, r7, r2, r4				@ i2	(4)
108	SMLABT	r11,r7, r2, r11				@ i3	(5)
109	SMLATT	r12,r8, r2, r12				@ i4	(6)
110	SMLATB	r3, r7, r2, r3				@ i1	(4)
111	SMLABB	r4, r7, r2, r4				@ i2	(5)
112	SMLATB	r11,r8, r2, r11				@ i3	(6)
113	SMLABB	r12,r8, r2, r12				@ i4	(7)
114
115	LDR	r2,[r1],#-4
116	SMLABT	r3, r7, r2, r3				@ i1	(5)
117	SMLATT	r4, r8, r2, r4				@ i2	(6)
118	SMLABT	r11,r8, r2, r11				@ i3	(7)
119	SMLATT	r12,r9, r2, r12				@ i4	(8)
120	SMLATB	r3, r8, r2, r3				@ i1	(6)
121	SMLABB	r4, r8, r2, r4				@ i2	(7)
122	SMLATB	r11,r9, r2, r11				@ i3	(8)
123	SMLABB	r12,r9, r2, r12				@ i4	(9)
124	LDR	r10, [r13, #8]				@ [ a10 | a11]
125
126	LDR	r2,[r1],#-4
127	SMLABT	r3, r8, r2, r3				@ i1	(7)
128	SMLATT	r4, r9, r2, r4				@ i2	(8)
129	SMLABT	r11,r9, r2, r11				@ i3	(9)
130	SMLATT	r12,r10, r2, r12			@ i4	(10)
131	SMLATB	r3, r9, r2, r3				@ i1	(8)
132	SMLABB	r4, r9, r2, r4				@ i2	(9)
133	SMLATB	r11,r10, r2, r11			@ i3	(10)
134	SMLABB	r12,r10, r2, r12			@ i4	(11)
135	LDR	r8, [r13, #12]				@ [ a12 | a13 ]
136
137	LDR	r2,[r1],#-4
138	SMLABT	r3, r9, r2, r3				@ i1	(9)
139	SMLATT	r4, r10, r2, r4				@ i2	(10)
140	SMLABT	r11,r10, r2, r11			@ i3	(11)
141	SMLATT	r12,r8, r2, r12				@ i4	(12)
142	SMLATB	r3, r10, r2, r3				@ i1	(10)
143	SMLABB	r4, r10, r2, r4				@ i2	(11)
144	SMLATB	r11,r8, r2, r11				@ i3	(12)
145	SMLABB	r12,r8, r2, r12				@ i4	(13)
146	LDR	r9, [r13, #16]				@ [ a14 | a15 ]
147
148	LDR	r2,[r1],#-4
149	SMLABT	r3, r10, r2, r3				@ i1	(11)
150	SMLATT	r4, r8, r2, r4				@ i2	(12)
151	SMLABT	r11,r8, r2, r11				@ i3	(13)
152	SMLATT	r12,r9, r2, r12				@ i4	(14)
153	SMLATB	r3, r8, r2, r3				@ i1	(12)
154	SMLABB	r4, r8, r2, r4				@ i2	(13)
155	SMLATB	r11,r9, r2, r11				@ i3	(14)
156	SMLABB	r12,r9, r2, r12				@ i4	(15)
157
158
159	LDR	r2,[r1],#-4
160	SMLABT	r3, r8, r2, r3				@ i1	(13)
161	SMLATT	r4, r9, r2, r4				@ i2	(14)
162	SMLABT	r11,r9, r2, r11				@ i3	(15)
163	SMLABT	r12,r14, r2, r12			@ i4	(16)
164	SMLATB	r3, r9, r2, r3				@ i1	(14)
165	SMLABB	r4, r9, r2, r4				@ i2	(15)
166	SMLABB	r11,r14, r2, r11			@ i3	(16)
167	LDR		r8, [r13]					@ [ a6 | a7 ]
168
169        LDR     r2,[r1],#44         		@ Change
170	SMLABT	r3, r9, r2, r3
171	SMLABB	r3, r14, r2, r3
172	SMLABT	r4, r14, r2, r4
173	LDR		r9, [r13, #4]				@ [ a8 | a9 ]
174
175
176	QADD	r3,r3,r3
177	QADD	r4,r4,r4
178	QADD	r11,r11,r11
179	QADD	r12,r12,r12
180
181	QADD	r3,r3,r3
182	QADD	r4,r4,r4
183	QADD	r11,r11,r11
184	QADD	r12,r12,r12
185
186	QADD	r3,r3,r3
187	QADD	r4,r4,r4
188	QADD	r11,r11,r11
189	QADD	r12,r12,r12
190
191	QADD	r3,r3,r3
192	QADD	r4,r4,r4
193	QADD	r11,r11,r11
194	QADD	r12,r12,r12
195
196	MOV	r2,#32768
197
198	QDADD	r3,r2,r3
199	QDADD	r4,r2,r4
200	QDADD	r11,r2,r11
201	QDADD	r12,r2,r12
202
203
204	MOV	r3,r3,asr #16
205	MOV	r4,r4,asr #16
206	MOV	r11,r11,asr #16
207	MOV	r12,r12,asr #16
208
209	STRH	r3,[r0],#2
210	STRH	r4,[r0],#2
211	STRH	r11,[r0],#2
212	STRH	r12,[r0],#2
213
214	MOV	r2,r14,asr #16
215	SUB	r14, r14, #0x10000
216	SUBS	r2,r2,#1
217	BNE	residu_loop
218end:
219	LDMFD	r13!, {r8 -r12}
220	LDMFD	r13!, {r4 -r12,pc}
221
222        @ENDFUNC
223        .END
224
225
226
227
228
229