pred_lt4_1_opt.s revision c40e9b86092e67f245ba8f05244f578b26e49fe2
1@/*
2@ ** Copyright 2003-2010, VisualOn, Inc.
3@ **
4@ ** Licensed under the Apache License, Version 2.0 (the "License");
5@ ** you may not use this file except in compliance with the License.
6@ ** You may obtain a copy of the License at
7@ **
8@ **     http://www.apache.org/licenses/LICENSE-2.0
9@ **
10@ ** Unless required by applicable law or agreed to in writing, software
11@ ** distributed under the License is distributed on an "AS IS" BASIS,
12@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13@ ** See the License for the specific language governing permissions and
14@ ** limitations under the License.
15@ */
16@
17@void Pred_lt4(
18@		  Word16 exc[],                         /* in/out: excitation buffer */
19@		  Word16 T0,                            /* input : integer pitch lag */
20@		  Word16 frac,                          /* input : fraction of lag   */
21@		  Word16 L_subfr                        /* input : subframe size     */
22@	      )
23
24@******************************
25@       ARM Register
26@******************************
27@ r0  ---  exc[]
28@ r1  ---  T0
29@ r2  ---  frac
30@ r3  ---  L_subfr
31
32         .section  .text
33	 .global   pred_lt4_asm
34	 .extern   inter4_2
35
36pred_lt4_asm:
37
38         STMFD     r13!, {r4 - r12, r14}
39         RSB       r4, r1, #0                         @-T0
40         RSB       r2, r2, #0                         @frac = -frac
41         ADD       r5, r0, r4, LSL #1                 @x = exc - T0
42         CMP       r2, #0
43         ADDLT     r2, r2, #4                         @frac += UP_SAMP
44         SUBLT     r5, r5, #2                         @x--
45         SUB       r5, r5, #30                        @x -= 15
46         RSB       r4, r2, #3                         @k = 3 - frac
47         ADRL      r8, Table
48         LDR       r6, [r8]
49         ADD       r6, r8
50	 MOV       r8, r4, LSL #6
51         @MOV       r7, #0                             @j = 0
52         ADD       r8, r6, r8                         @ptr2 = &(inter4_2[k][0])
53
54	 MOV       r1, r5
55	 MOV       r5, #0x8000
56	 MOV       r14, #21
57@ used register
58         @r0 --- exc[]  r1 --- x  r7 --- j  r8 --- ptr2  r5 --- 0x8000
59THREE_LOOP:
60
61         @MOV       r1, r5                             @ptr1 = x
62	 MOV       r2, r8                             @ptr = ptr2
63         LDR       r3, [r2], #4                       @h[0], h[1]
64	 LDRSH     r4, [r1], #2                       @x[0]
65	 LDRSH     r6, [r1], #2                       @x[1]
66	 LDRSH     r9, [r1], #2                       @x[2]
67
68	 SMULBB    r10, r4, r3                        @x[0] * h[0]
69	 SMULBB    r11, r6, r3                        @x[1] * h[0]
70	 SMULBB    r12, r9, r3                        @x[2] * h[0]
71
72         LDRSH     r4, [r1], #2                       @x[3]
73	 SMLABT    r10, r6, r3, r10                   @x[1] * h[1]
74         SMLABT    r11, r9, r3, r11                   @x[2] * h[1]
75	 SMLABT    r12, r4, r3, r12                   @x[3] * h[1]
76
77	 LDR       r3, [r2], #4                       @h[2], h[3]
78	 LDRSH     r6, [r1], #2                       @x[4]
79	 SMLABB    r10, r9, r3, r10                   @x[2] * h[2]
80         SMLABB    r11, r4, r3, r11                   @x[3] * h[2]
81         SMLABB    r12, r6, r3, r12                   @x[4] * h[2]
82
83         LDRSH     r9, [r1], #2                       @x[5]
84         SMLABT    r10, r4, r3, r10                   @x[3] * h[3]
85         SMLABT    r11, r6, r3, r11                   @x[4] * h[3]
86         SMLABT    r12, r9, r3, r12                   @x[5] * h[3]
87
88         LDR       r3, [r2], #4                       @h[4], h[5]
89         LDRSH     r4, [r1], #2                       @x[6]
90         SMLABB    r10, r6, r3, r10                   @x[4] * h[4]
91         SMLABB    r11, r9, r3, r11                   @x[5] * h[4]
92         SMLABB    r12, r4, r3, r12                   @x[6] * h[4]
93
94	 LDRSH     r6, [r1], #2                       @x[7]
95	 SMLABT    r10, r9, r3, r10                   @x[5] * h[5]
96	 SMLABT    r11, r4, r3, r11                   @x[6] * h[5]
97	 SMLABT    r12, r6, r3, r12                   @x[7] * h[5]
98
99         LDR       r3, [r2], #4                       @h[6], h[7]
100	 LDRSH     r9, [r1], #2                       @x[8]
101	 SMLABB    r10, r4, r3, r10                   @x[6] * h[6]
102	 SMLABB    r11, r6, r3, r11                   @x[7] * h[6]
103	 SMLABB    r12, r9, r3, r12                   @x[8] * h[6]
104
105	 LDRSH     r4, [r1], #2                       @x[9]
106	 SMLABT    r10, r6, r3, r10                   @x[7] * h[7]
107	 SMLABT    r11, r9, r3, r11                   @x[8] * h[7]
108	 SMLABT    r12, r4, r3, r12                   @x[9] * h[7]
109
110	 LDR       r3, [r2], #4                       @h[8], h[9]
111	 LDRSH     r6, [r1], #2                       @x[10]
112	 SMLABB    r10, r9, r3, r10                   @x[8] * h[8]
113	 SMLABB    r11, r4, r3, r11                   @x[9] * h[8]
114	 SMLABB    r12, r6, r3, r12                   @x[10] * h[8]
115
116	 LDRSH     r9, [r1], #2                       @x[11]
117	 SMLABT    r10, r4, r3, r10                   @x[9] * h[9]
118	 SMLABT    r11, r6, r3, r11                   @x[10] * h[9]
119	 SMLABT    r12, r9, r3, r12                   @x[11] * h[9]
120
121         LDR       r3, [r2], #4                       @h[10], h[11]
122	 LDRSH     r4, [r1], #2                       @x[12]
123         SMLABB    r10, r6, r3, r10                   @x[10] * h[10]
124	 SMLABB    r11, r9, r3, r11                   @x[11] * h[10]
125	 SMLABB    r12, r4, r3, r12                   @x[12] * h[10]
126
127	 LDRSH     r6, [r1], #2                       @x[13]
128	 SMLABT    r10, r9, r3, r10                   @x[11] * h[11]
129	 SMLABT    r11, r4, r3, r11                   @x[12] * h[11]
130	 SMLABT    r12, r6, r3, r12                   @x[13] * h[11]
131
132	 LDR       r3, [r2], #4                       @h[12], h[13]
133	 LDRSH     r9, [r1], #2                       @x[14]
134	 SMLABB    r10, r4, r3, r10                   @x[12] * h[12]
135	 SMLABB    r11, r6, r3, r11                   @x[13] * h[12]
136	 SMLABB    r12, r9, r3, r12                   @x[14] * h[12]
137
138	 LDRSH     r4, [r1], #2                       @x[15]
139	 SMLABT    r10, r6, r3, r10                   @x[13] * h[13]
140	 SMLABT    r11, r9, r3, r11                   @x[14] * h[13]
141	 SMLABT    r12, r4, r3, r12                   @x[15] * h[13]
142
143	 LDR       r3, [r2], #4                       @h[14], h[15]
144	 LDRSH     r6, [r1], #2                       @x[16]
145	 SMLABB    r10, r9, r3, r10                   @x[14] * h[14]
146	 SMLABB    r11, r4, r3, r11                   @x[15] * h[14]
147	 SMLABB    r12, r6, r3, r12                   @x[16] * h[14]
148
149	 LDRSH     r9, [r1], #2                       @x[17]
150         SMLABT    r10, r4, r3, r10                   @x[15] * h[15]
151	 SMLABT    r11, r6, r3, r11                   @x[16] * h[15]
152	 SMLABT    r12, r9, r3, r12                   @x[17] * h[15]
153
154	 LDR       r3, [r2], #4                       @h[16], h[17]
155	 LDRSH     r4, [r1], #2                       @x[18]
156	 SMLABB    r10, r6, r3, r10                   @x[16] * h[16]
157	 SMLABB    r11, r9, r3, r11                   @x[17] * h[16]
158	 SMLABB    r12, r4, r3, r12                   @x[18] * h[16]
159
160         LDRSH     r6, [r1], #2                       @x[19]
161	 SMLABT    r10, r9, r3, r10                   @x[17] * h[17]
162	 SMLABT    r11, r4, r3, r11                   @x[18] * h[17]
163	 SMLABT    r12, r6, r3, r12                   @x[19] * h[17]
164
165	 LDR       r3, [r2], #4                       @h[18], h[19]
166         LDRSH     r9, [r1], #2                       @x[20]
167	 SMLABB    r10, r4, r3, r10                   @x[18] * h[18]
168	 SMLABB    r11, r6, r3, r11                   @x[19] * h[18]
169	 SMLABB    r12, r9, r3, r12                   @x[20] * h[18]
170
171	 LDRSH     r4, [r1], #2                       @x[21]
172	 SMLABT    r10, r6, r3, r10                   @x[19] * h[19]
173	 SMLABT    r11, r9, r3, r11                   @x[20] * h[19]
174	 SMLABT    r12, r4, r3, r12                   @x[21] * h[19]
175
176	 LDR       r3, [r2], #4                       @h[20], h[21]
177	 LDRSH     r6, [r1], #2                       @x[22]
178	 SMLABB    r10, r9, r3, r10                   @x[20] * h[20]
179	 SMLABB    r11, r4, r3, r11                   @x[21] * h[20]
180	 SMLABB    r12, r6, r3, r12                   @x[22] * h[20]
181
182	 LDRSH     r9, [r1], #2                       @x[23]
183	 SMLABT    r10, r4, r3, r10                   @x[21] * h[21]
184	 SMLABT    r11, r6, r3, r11                   @x[22] * h[21]
185	 SMLABT    r12, r9, r3, r12                   @x[23] * h[21]
186
187	 LDR       r3, [r2], #4                       @h[22], h[23]
188	 LDRSH     r4, [r1], #2                       @x[24]
189	 SMLABB    r10, r6, r3, r10                   @x[22] * h[22]
190	 SMLABB    r11, r9, r3, r11                   @x[23] * h[22]
191	 SMLABB    r12, r4, r3, r12                   @x[24] * h[22]
192
193         LDRSH     r6, [r1], #2                       @x[25]
194	 SMLABT    r10, r9, r3, r10                   @x[23] * h[23]
195	 SMLABT    r11, r4, r3, r11                   @x[24] * h[23]
196	 SMLABT    r12, r6, r3, r12                   @x[25] * h[23]
197
198	 LDR       r3, [r2], #4                       @h[24], h[25]
199         LDRSH     r9, [r1], #2                       @x[26]
200	 SMLABB    r10, r4, r3, r10                   @x[24] * h[24]
201	 SMLABB    r11, r6, r3, r11                   @x[25] * h[24]
202	 SMLABB    r12, r9, r3, r12                   @x[26] * h[24]
203
204	 LDRSH     r4, [r1], #2                       @x[27]
205	 SMLABT    r10, r6, r3, r10                   @x[25] * h[25]
206	 SMLABT    r11, r9, r3, r11                   @x[26] * h[25]
207	 SMLABT    r12, r4, r3, r12                   @x[27] * h[25]
208
209	 LDR       r3, [r2], #4                       @h[26], h[27]
210	 LDRSH     r6, [r1], #2                       @x[28]
211	 SMLABB    r10, r9, r3, r10                   @x[26] * h[26]
212	 SMLABB    r11, r4, r3, r11                   @x[27] * h[26]
213	 SMLABB    r12, r6, r3, r12                   @x[28] * h[26]
214
215	 LDRSH     r9, [r1], #2                       @x[29]
216	 SMLABT    r10, r4, r3, r10                   @x[27] * h[27]
217	 SMLABT    r11, r6, r3, r11                   @x[28] * h[27]
218	 SMLABT    r12, r9, r3, r12                   @x[29] * h[27]
219
220	 LDR       r3, [r2], #4                       @h[28], h[29]
221	 LDRSH     r4, [r1], #2                       @x[30]
222	 SMLABB    r10, r6, r3, r10                   @x[28] * h[28]
223	 SMLABB    r11, r9, r3, r11                   @x[29] * h[28]
224	 SMLABB    r12, r4, r3, r12                   @x[30] * h[28]
225
226         LDRSH     r6, [r1], #2                       @x[31]
227	 SMLABT    r10, r9, r3, r10                   @x[29] * h[29]
228	 SMLABT    r11, r4, r3, r11                   @x[30] * h[29]
229	 SMLABT    r12, r6, r3, r12                   @x[31] * h[29]
230
231	 LDR       r3, [r2], #4                       @h[30], h[31]
232         LDRSH     r9, [r1], #2                       @x[32]
233	 SMLABB    r10, r4, r3, r10                   @x[30] * h[30]
234	 SMLABB    r11, r6, r3, r11                   @x[31] * h[30]
235	 SMLABB    r12, r9, r3, r12                   @x[32] * h[30]
236
237	 LDRSH     r4, [r1], #-60                     @x[33]
238	 SMLABT    r10, r6, r3, r10                   @x[31] * h[31]
239	 SMLABT    r11, r9, r3, r11                   @x[32] * h[31]
240	 SMLABT    r12, r4, r3, r12                   @x[33] * h[31]
241
242	 @SSAT      r10, #32, r10, LSL #2
243	 @SSAT      r11, #32, r11, LSL #2
244	 @SSAT      r12, #32, r12, LSL #2
245
246	 MOV       r10, r10, LSL #1
247	 MOV       r11, r11, LSL #1
248	 MOV       r12, r12, LSL #1
249
250	 QADD      r10, r10, r10
251	 QADD      r11, r11, r11
252	 QADD      r12, r12, r12
253
254	 QADD      r10, r10, r5
255	 QADD      r11, r11, r5
256	 QADD      r12, r12, r5
257
258	 SUBS      r14, r14, #1
259
260	 MOV       r10, r10, ASR #16
261	 MOV       r11, r11, ASR #16
262	 MOV       r12, r12, ASR #16
263
264	 STRH      r10, [r0], #2
265	 STRH      r11, [r0], #2
266	 STRH      r12, [r0], #2
267	 BNE       THREE_LOOP
268
269	 MOV       r2, r8                             @ptr = ptr2
270
271Last2LOOP:
272
273         LDR       r3, [r2], #4                       @h[0], h[1]
274	 LDRSH     r4, [r1], #2                       @x[0]
275	 LDRSH     r6, [r1], #2                       @x[1]
276	 LDRSH     r9, [r1], #2                       @x[2]
277
278	 SMULBB    r10, r4, r3                        @x[0] * h[0]
279	 SMULBB    r11, r6, r3                        @x[1] * h[0]
280
281	 SMLABT    r10, r6, r3, r10                   @x[1] * h[1]
282	 SMLABT    r11, r9, r3, r11                   @x[2] * h[1]
283
284	 LDR       r3, [r2], #4                       @h[2], h[3]
285	 LDRSH     r4, [r1], #2                       @x[3]
286         LDRSH     r6, [r1], #2                       @x[4]
287
288	 SMLABB    r10, r9, r3, r10                   @x[2] * h[2]
289         SMLABB    r11, r4, r3, r11                   @x[3] * h[2]
290
291	 SMLABT    r10, r4, r3, r10                   @x[3] * h[3]
292	 SMLABT    r11, r6, r3, r11                   @x[4] * h[3]
293
294	 LDR       r3, [r2], #4                       @h[4], h[5]
295	 LDRSH     r9, [r1], #2                       @x[5]
296	 LDRSH     r4, [r1], #2                       @x[6]
297
298	 SMLABB    r10, r6, r3, r10                   @x[4] * h[4]
299	 SMLABB    r11, r9, r3, r11                   @x[5] * h[4]
300
301	 SMLABT    r10, r9, r3, r10                   @x[5] * h[5]
302	 SMLABT    r11, r4, r3, r11                   @x[6] * h[5]
303
304	 LDR       r3, [r2], #4                       @h[6], h[7]
305	 LDRSH     r6, [r1], #2                       @x[7]
306	 LDRSH     r9, [r1], #2                       @x[8]
307
308	 SMLABB    r10, r4, r3, r10                   @x[6] * h[6]
309	 SMLABB    r11, r6, r3, r11                   @x[7] * h[6]
310
311	 SMLABT    r10, r6, r3, r10                   @x[7] * h[7]
312	 SMLABT    r11, r9, r3, r11                   @x[8] * h[7]
313
314	 LDR       r3, [r2], #4                       @h[8], h[9]
315	 LDRSH     r4, [r1], #2                       @x[9]
316	 LDRSH     r6, [r1], #2                       @x[10]
317
318	 SMLABB    r10, r9, r3, r10                   @x[8] * h[8]
319	 SMLABB    r11, r4, r3, r11                   @x[9] * h[8]
320
321	 SMLABT    r10, r4, r3, r10                   @x[9] * h[9]
322	 SMLABT    r11, r6, r3, r11                   @x[10] * h[9]
323
324	 LDR       r3, [r2], #4                       @h[10], h[11]
325	 LDRSH     r9, [r1], #2                       @x[11]
326	 LDRSH     r4, [r1], #2                       @x[12]
327
328	 SMLABB    r10, r6, r3, r10                   @x[10] * h[10]
329	 SMLABB    r11, r9, r3, r11                   @x[11] * h[10]
330
331	 SMLABT    r10, r9, r3, r10                   @x[11] * h[11]
332	 SMLABT    r11, r4, r3, r11                   @x[12] * h[11]
333
334	 LDR       r3, [r2], #4                       @h[12], h[13]
335	 LDRSH     r6, [r1], #2                       @x[13]
336	 LDRSH     r9, [r1], #2                       @x[14]
337
338	 SMLABB    r10, r4, r3, r10                   @x[12] * h[12]
339	 SMLABB    r11, r6, r3, r11                   @x[13] * h[12]
340
341	 SMLABT    r10, r6, r3, r10                   @x[13] * h[13]
342	 SMLABT    r11, r9, r3, r11                   @x[14] * h[13]
343
344	 LDR       r3, [r2], #4                       @h[14], h[15]
345	 LDRSH     r4, [r1], #2                       @x[15]
346	 LDRSH     r6, [r1], #2                       @x[16]
347
348	 SMLABB    r10, r9, r3, r10                   @x[14] * h[14]
349	 SMLABB    r11, r4, r3, r11                   @x[15] * h[14]
350
351	 SMLABT    r10, r4, r3, r10                   @x[15] * h[15]
352	 SMLABT    r11, r6, r3, r11                   @x[16] * h[15]
353
354	 LDR       r3, [r2], #4                       @h[16], h[17]
355	 LDRSH     r9, [r1], #2                       @x[17]
356	 LDRSH     r4, [r1], #2                       @x[18]
357
358	 SMLABB    r10, r6, r3, r10                   @x[16] * h[16]
359	 SMLABB    r11, r9, r3, r11                   @x[17] * h[16]
360
361	 SMLABT    r10, r9, r3, r10                   @x[17] * h[17]
362	 SMLABT    r11, r4, r3, r11                   @x[18] * h[17]
363
364	 LDR       r3, [r2], #4                       @h[18], h[19]
365	 LDRSH     r6, [r1], #2                       @x[19]
366	 LDRSH     r9, [r1], #2                       @x[20]
367
368	 SMLABB    r10, r4, r3, r10                   @x[18] * h[18]
369	 SMLABB    r11, r6, r3, r11                   @x[19] * h[18]
370
371	 SMLABT    r10, r6, r3, r10                   @x[19] * h[19]
372	 SMLABT    r11, r9, r3, r11                   @x[20] * h[19]
373
374	 LDR       r3, [r2], #4                       @h[20], h[21]
375	 LDRSH     r4, [r1], #2                       @x[21]
376	 LDRSH     r6, [r1], #2                       @x[22]
377
378	 SMLABB    r10, r9, r3, r10                   @x[20] * h[20]
379	 SMLABB    r11, r4, r3, r11                   @x[21] * h[20]
380
381	 SMLABT    r10, r4, r3, r10                   @x[21] * h[21]
382	 SMLABT    r11, r6, r3, r11                   @x[22] * h[21]
383
384	 LDR       r3, [r2], #4                       @h[22], h[23]
385	 LDRSH     r9, [r1], #2                       @x[23]
386	 LDRSH     r4, [r1], #2                       @x[24]
387
388	 SMLABB    r10, r6, r3, r10                   @x[22] * h[22]
389	 SMLABB    r11, r9, r3, r11                   @x[23] * h[22]
390
391	 SMLABT    r10, r9, r3, r10                   @x[23] * h[23]
392	 SMLABT    r11, r4, r3, r11                   @x[24] * h[23]
393
394	 LDR       r3, [r2], #4                       @h[24], h[25]
395	 LDRSH     r6, [r1], #2                       @x[25]
396	 LDRSH     r9, [r1], #2                       @x[26]
397
398	 SMLABB    r10, r4, r3, r10                   @x[24] * h[24]
399	 SMLABB    r11, r6, r3, r11                   @x[25] * h[24]
400
401	 SMLABT    r10, r6, r3, r10                   @x[25] * h[25]
402	 SMLABT    r11, r9, r3, r11                   @x[26] * h[25]
403
404	 LDR       r3, [r2], #4                       @h[26], h[27]
405	 LDRSH     r4, [r1], #2                       @x[27]
406	 LDRSH     r6, [r1], #2                       @x[28]
407
408	 SMLABB    r10, r9, r3, r10                   @x[26] * h[26]
409	 SMLABB    r11, r4, r3, r11                   @x[27] * h[26]
410
411	 SMLABT    r10, r4, r3, r10                   @x[27] * h[27]
412	 SMLABT    r11, r6, r3, r11                   @x[28] * h[27]
413
414	 LDR       r3, [r2], #4                       @h[28], h[29]
415	 LDRSH     r9, [r1], #2                       @x[29]
416	 LDRSH     r4, [r1], #2                       @x[30]
417
418	 SMLABB    r10, r6, r3, r10                   @x[28] * h[28]
419	 SMLABB    r11, r9, r3, r11                   @x[29] * h[28]
420
421	 SMLABT    r10, r9, r3, r10                   @x[29] * h[29]
422	 SMLABT    r11, r4, r3, r11                   @x[30] * h[29]
423
424	 LDR       r3, [r2], #4                       @h[30], h[31]
425	 LDRSH     r6, [r1], #2                       @x[31]
426	 LDRSH     r9, [r1], #2                       @x[32]
427
428	 SMLABB    r10, r4, r3, r10                   @x[30] * h[30]
429	 SMLABB    r11, r6, r3, r11                   @x[31] * h[30]
430
431	 SMLABT    r10, r6, r3, r10                   @x[31] * h[31]
432	 SMLABT    r11, r9, r3, r11                   @x[32] * h[31]
433
434	 @SSAT      r10, #32, r10, LSL #2
435	 @SSAT      r11, #32, r11, LSL #2
436	 MOV       r10, r10, LSL #1
437	 MOV       r11, r11, LSL #1
438
439	 QADD      r10, r10, r10
440	 QADD      r11, r11, r11
441
442	 QADD      r10, r10, r5
443	 QADD      r11, r11, r5
444
445	 MOV       r10, r10, ASR #16
446	 MOV       r11, r11, ASR #16
447
448	 STRH      r10, [r0], #2
449	 STRH      r11, [r0], #2
450
451
452pred_lt4_end:
453         LDMFD     r13!, {r4 - r12, r15}
454
455Table:
456         .word       inter4_2-Table
457	 @ENDFUNC
458	 .END
459
460
461
462
463