pred_lt4_1_opt.s revision e17bb5cafdbc6089716d8e8c5afbb00f207a59bb
1@/*
2@ ** Copyright 2003-2010, VisualOn, Inc.
3@ **
4@ ** Licensed under the Apache License, Version 2.0 (the "License");
5@ ** you may not use this file except in compliance with the License.
6@ ** You may obtain a copy of the License at
7@ **
8@ **     http://www.apache.org/licenses/LICENSE-2.0
9@ **
10@ ** Unless required by applicable law or agreed to in writing, software
11@ ** distributed under the License is distributed on an "AS IS" BASIS,
12@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13@ ** See the License for the specific language governing permissions and
14@ ** limitations under the License.
15@ */
16@
17@void Pred_lt4(
18@		  Word16 exc[],                         /* in/out: excitation buffer */
19@		  Word16 T0,                            /* input : integer pitch lag */
20@		  Word16 frac,                          /* input : fraction of lag   */
21@		  Word16 L_subfr                        /* input : subframe size     */
22@	      )
23
24@******************************
25@       ARM Register
26@******************************
27@ r0  ---  exc[]
28@ r1  ---  T0
29@ r2  ---  frac
30@ r3  ---  L_subfr
31
32         .section  .text
33	 .global   pred_lt4_asm
34	 .extern   inter4_2
35	 .hidden   inter4_2
36
37pred_lt4_asm:
38
39         STMFD     r13!, {r4 - r12, r14}
40         RSB       r4, r1, #0                         @-T0
41         RSB       r2, r2, #0                         @frac = -frac
42         ADD       r5, r0, r4, LSL #1                 @x = exc - T0
43         CMP       r2, #0
44         ADDLT     r2, r2, #4                         @frac += UP_SAMP
45         SUBLT     r5, r5, #2                         @x--
46         SUB       r5, r5, #30                        @x -= 15
47         RSB       r4, r2, #3                         @k = 3 - frac
48         ADRL      r8, Table
49         LDR       r6, [r8]
50         ADD       r6, r8
51	 MOV       r8, r4, LSL #6
52         @MOV       r7, #0                             @j = 0
53         ADD       r8, r6, r8                         @ptr2 = &(inter4_2[k][0])
54
55	 MOV       r1, r5
56	 MOV       r5, #0x8000
57	 MOV       r14, #21
58@ used register
59         @r0 --- exc[]  r1 --- x  r7 --- j  r8 --- ptr2  r5 --- 0x8000
60THREE_LOOP:
61
62         @MOV       r1, r5                             @ptr1 = x
63	 MOV       r2, r8                             @ptr = ptr2
64         LDR       r3, [r2], #4                       @h[0], h[1]
65	 LDRSH     r4, [r1], #2                       @x[0]
66	 LDRSH     r6, [r1], #2                       @x[1]
67	 LDRSH     r9, [r1], #2                       @x[2]
68
69	 SMULBB    r10, r4, r3                        @x[0] * h[0]
70	 SMULBB    r11, r6, r3                        @x[1] * h[0]
71	 SMULBB    r12, r9, r3                        @x[2] * h[0]
72
73         LDRSH     r4, [r1], #2                       @x[3]
74	 SMLABT    r10, r6, r3, r10                   @x[1] * h[1]
75         SMLABT    r11, r9, r3, r11                   @x[2] * h[1]
76	 SMLABT    r12, r4, r3, r12                   @x[3] * h[1]
77
78	 LDR       r3, [r2], #4                       @h[2], h[3]
79	 LDRSH     r6, [r1], #2                       @x[4]
80	 SMLABB    r10, r9, r3, r10                   @x[2] * h[2]
81         SMLABB    r11, r4, r3, r11                   @x[3] * h[2]
82         SMLABB    r12, r6, r3, r12                   @x[4] * h[2]
83
84         LDRSH     r9, [r1], #2                       @x[5]
85         SMLABT    r10, r4, r3, r10                   @x[3] * h[3]
86         SMLABT    r11, r6, r3, r11                   @x[4] * h[3]
87         SMLABT    r12, r9, r3, r12                   @x[5] * h[3]
88
89         LDR       r3, [r2], #4                       @h[4], h[5]
90         LDRSH     r4, [r1], #2                       @x[6]
91         SMLABB    r10, r6, r3, r10                   @x[4] * h[4]
92         SMLABB    r11, r9, r3, r11                   @x[5] * h[4]
93         SMLABB    r12, r4, r3, r12                   @x[6] * h[4]
94
95	 LDRSH     r6, [r1], #2                       @x[7]
96	 SMLABT    r10, r9, r3, r10                   @x[5] * h[5]
97	 SMLABT    r11, r4, r3, r11                   @x[6] * h[5]
98	 SMLABT    r12, r6, r3, r12                   @x[7] * h[5]
99
100         LDR       r3, [r2], #4                       @h[6], h[7]
101	 LDRSH     r9, [r1], #2                       @x[8]
102	 SMLABB    r10, r4, r3, r10                   @x[6] * h[6]
103	 SMLABB    r11, r6, r3, r11                   @x[7] * h[6]
104	 SMLABB    r12, r9, r3, r12                   @x[8] * h[6]
105
106	 LDRSH     r4, [r1], #2                       @x[9]
107	 SMLABT    r10, r6, r3, r10                   @x[7] * h[7]
108	 SMLABT    r11, r9, r3, r11                   @x[8] * h[7]
109	 SMLABT    r12, r4, r3, r12                   @x[9] * h[7]
110
111	 LDR       r3, [r2], #4                       @h[8], h[9]
112	 LDRSH     r6, [r1], #2                       @x[10]
113	 SMLABB    r10, r9, r3, r10                   @x[8] * h[8]
114	 SMLABB    r11, r4, r3, r11                   @x[9] * h[8]
115	 SMLABB    r12, r6, r3, r12                   @x[10] * h[8]
116
117	 LDRSH     r9, [r1], #2                       @x[11]
118	 SMLABT    r10, r4, r3, r10                   @x[9] * h[9]
119	 SMLABT    r11, r6, r3, r11                   @x[10] * h[9]
120	 SMLABT    r12, r9, r3, r12                   @x[11] * h[9]
121
122         LDR       r3, [r2], #4                       @h[10], h[11]
123	 LDRSH     r4, [r1], #2                       @x[12]
124         SMLABB    r10, r6, r3, r10                   @x[10] * h[10]
125	 SMLABB    r11, r9, r3, r11                   @x[11] * h[10]
126	 SMLABB    r12, r4, r3, r12                   @x[12] * h[10]
127
128	 LDRSH     r6, [r1], #2                       @x[13]
129	 SMLABT    r10, r9, r3, r10                   @x[11] * h[11]
130	 SMLABT    r11, r4, r3, r11                   @x[12] * h[11]
131	 SMLABT    r12, r6, r3, r12                   @x[13] * h[11]
132
133	 LDR       r3, [r2], #4                       @h[12], h[13]
134	 LDRSH     r9, [r1], #2                       @x[14]
135	 SMLABB    r10, r4, r3, r10                   @x[12] * h[12]
136	 SMLABB    r11, r6, r3, r11                   @x[13] * h[12]
137	 SMLABB    r12, r9, r3, r12                   @x[14] * h[12]
138
139	 LDRSH     r4, [r1], #2                       @x[15]
140	 SMLABT    r10, r6, r3, r10                   @x[13] * h[13]
141	 SMLABT    r11, r9, r3, r11                   @x[14] * h[13]
142	 SMLABT    r12, r4, r3, r12                   @x[15] * h[13]
143
144	 LDR       r3, [r2], #4                       @h[14], h[15]
145	 LDRSH     r6, [r1], #2                       @x[16]
146	 SMLABB    r10, r9, r3, r10                   @x[14] * h[14]
147	 SMLABB    r11, r4, r3, r11                   @x[15] * h[14]
148	 SMLABB    r12, r6, r3, r12                   @x[16] * h[14]
149
150	 LDRSH     r9, [r1], #2                       @x[17]
151         SMLABT    r10, r4, r3, r10                   @x[15] * h[15]
152	 SMLABT    r11, r6, r3, r11                   @x[16] * h[15]
153	 SMLABT    r12, r9, r3, r12                   @x[17] * h[15]
154
155	 LDR       r3, [r2], #4                       @h[16], h[17]
156	 LDRSH     r4, [r1], #2                       @x[18]
157	 SMLABB    r10, r6, r3, r10                   @x[16] * h[16]
158	 SMLABB    r11, r9, r3, r11                   @x[17] * h[16]
159	 SMLABB    r12, r4, r3, r12                   @x[18] * h[16]
160
161         LDRSH     r6, [r1], #2                       @x[19]
162	 SMLABT    r10, r9, r3, r10                   @x[17] * h[17]
163	 SMLABT    r11, r4, r3, r11                   @x[18] * h[17]
164	 SMLABT    r12, r6, r3, r12                   @x[19] * h[17]
165
166	 LDR       r3, [r2], #4                       @h[18], h[19]
167         LDRSH     r9, [r1], #2                       @x[20]
168	 SMLABB    r10, r4, r3, r10                   @x[18] * h[18]
169	 SMLABB    r11, r6, r3, r11                   @x[19] * h[18]
170	 SMLABB    r12, r9, r3, r12                   @x[20] * h[18]
171
172	 LDRSH     r4, [r1], #2                       @x[21]
173	 SMLABT    r10, r6, r3, r10                   @x[19] * h[19]
174	 SMLABT    r11, r9, r3, r11                   @x[20] * h[19]
175	 SMLABT    r12, r4, r3, r12                   @x[21] * h[19]
176
177	 LDR       r3, [r2], #4                       @h[20], h[21]
178	 LDRSH     r6, [r1], #2                       @x[22]
179	 SMLABB    r10, r9, r3, r10                   @x[20] * h[20]
180	 SMLABB    r11, r4, r3, r11                   @x[21] * h[20]
181	 SMLABB    r12, r6, r3, r12                   @x[22] * h[20]
182
183	 LDRSH     r9, [r1], #2                       @x[23]
184	 SMLABT    r10, r4, r3, r10                   @x[21] * h[21]
185	 SMLABT    r11, r6, r3, r11                   @x[22] * h[21]
186	 SMLABT    r12, r9, r3, r12                   @x[23] * h[21]
187
188	 LDR       r3, [r2], #4                       @h[22], h[23]
189	 LDRSH     r4, [r1], #2                       @x[24]
190	 SMLABB    r10, r6, r3, r10                   @x[22] * h[22]
191	 SMLABB    r11, r9, r3, r11                   @x[23] * h[22]
192	 SMLABB    r12, r4, r3, r12                   @x[24] * h[22]
193
194         LDRSH     r6, [r1], #2                       @x[25]
195	 SMLABT    r10, r9, r3, r10                   @x[23] * h[23]
196	 SMLABT    r11, r4, r3, r11                   @x[24] * h[23]
197	 SMLABT    r12, r6, r3, r12                   @x[25] * h[23]
198
199	 LDR       r3, [r2], #4                       @h[24], h[25]
200         LDRSH     r9, [r1], #2                       @x[26]
201	 SMLABB    r10, r4, r3, r10                   @x[24] * h[24]
202	 SMLABB    r11, r6, r3, r11                   @x[25] * h[24]
203	 SMLABB    r12, r9, r3, r12                   @x[26] * h[24]
204
205	 LDRSH     r4, [r1], #2                       @x[27]
206	 SMLABT    r10, r6, r3, r10                   @x[25] * h[25]
207	 SMLABT    r11, r9, r3, r11                   @x[26] * h[25]
208	 SMLABT    r12, r4, r3, r12                   @x[27] * h[25]
209
210	 LDR       r3, [r2], #4                       @h[26], h[27]
211	 LDRSH     r6, [r1], #2                       @x[28]
212	 SMLABB    r10, r9, r3, r10                   @x[26] * h[26]
213	 SMLABB    r11, r4, r3, r11                   @x[27] * h[26]
214	 SMLABB    r12, r6, r3, r12                   @x[28] * h[26]
215
216	 LDRSH     r9, [r1], #2                       @x[29]
217	 SMLABT    r10, r4, r3, r10                   @x[27] * h[27]
218	 SMLABT    r11, r6, r3, r11                   @x[28] * h[27]
219	 SMLABT    r12, r9, r3, r12                   @x[29] * h[27]
220
221	 LDR       r3, [r2], #4                       @h[28], h[29]
222	 LDRSH     r4, [r1], #2                       @x[30]
223	 SMLABB    r10, r6, r3, r10                   @x[28] * h[28]
224	 SMLABB    r11, r9, r3, r11                   @x[29] * h[28]
225	 SMLABB    r12, r4, r3, r12                   @x[30] * h[28]
226
227         LDRSH     r6, [r1], #2                       @x[31]
228	 SMLABT    r10, r9, r3, r10                   @x[29] * h[29]
229	 SMLABT    r11, r4, r3, r11                   @x[30] * h[29]
230	 SMLABT    r12, r6, r3, r12                   @x[31] * h[29]
231
232	 LDR       r3, [r2], #4                       @h[30], h[31]
233         LDRSH     r9, [r1], #2                       @x[32]
234	 SMLABB    r10, r4, r3, r10                   @x[30] * h[30]
235	 SMLABB    r11, r6, r3, r11                   @x[31] * h[30]
236	 SMLABB    r12, r9, r3, r12                   @x[32] * h[30]
237
238	 LDRSH     r4, [r1], #-60                     @x[33]
239	 SMLABT    r10, r6, r3, r10                   @x[31] * h[31]
240	 SMLABT    r11, r9, r3, r11                   @x[32] * h[31]
241	 SMLABT    r12, r4, r3, r12                   @x[33] * h[31]
242
243	 @SSAT      r10, #32, r10, LSL #2
244	 @SSAT      r11, #32, r11, LSL #2
245	 @SSAT      r12, #32, r12, LSL #2
246
247	 MOV       r10, r10, LSL #1
248	 MOV       r11, r11, LSL #1
249	 MOV       r12, r12, LSL #1
250
251	 QADD      r10, r10, r10
252	 QADD      r11, r11, r11
253	 QADD      r12, r12, r12
254
255	 QADD      r10, r10, r5
256	 QADD      r11, r11, r5
257	 QADD      r12, r12, r5
258
259	 SUBS      r14, r14, #1
260
261	 MOV       r10, r10, ASR #16
262	 MOV       r11, r11, ASR #16
263	 MOV       r12, r12, ASR #16
264
265	 STRH      r10, [r0], #2
266	 STRH      r11, [r0], #2
267	 STRH      r12, [r0], #2
268	 BNE       THREE_LOOP
269
270	 MOV       r2, r8                             @ptr = ptr2
271
272Last2LOOP:
273
274         LDR       r3, [r2], #4                       @h[0], h[1]
275	 LDRSH     r4, [r1], #2                       @x[0]
276	 LDRSH     r6, [r1], #2                       @x[1]
277	 LDRSH     r9, [r1], #2                       @x[2]
278
279	 SMULBB    r10, r4, r3                        @x[0] * h[0]
280	 SMULBB    r11, r6, r3                        @x[1] * h[0]
281
282	 SMLABT    r10, r6, r3, r10                   @x[1] * h[1]
283	 SMLABT    r11, r9, r3, r11                   @x[2] * h[1]
284
285	 LDR       r3, [r2], #4                       @h[2], h[3]
286	 LDRSH     r4, [r1], #2                       @x[3]
287         LDRSH     r6, [r1], #2                       @x[4]
288
289	 SMLABB    r10, r9, r3, r10                   @x[2] * h[2]
290         SMLABB    r11, r4, r3, r11                   @x[3] * h[2]
291
292	 SMLABT    r10, r4, r3, r10                   @x[3] * h[3]
293	 SMLABT    r11, r6, r3, r11                   @x[4] * h[3]
294
295	 LDR       r3, [r2], #4                       @h[4], h[5]
296	 LDRSH     r9, [r1], #2                       @x[5]
297	 LDRSH     r4, [r1], #2                       @x[6]
298
299	 SMLABB    r10, r6, r3, r10                   @x[4] * h[4]
300	 SMLABB    r11, r9, r3, r11                   @x[5] * h[4]
301
302	 SMLABT    r10, r9, r3, r10                   @x[5] * h[5]
303	 SMLABT    r11, r4, r3, r11                   @x[6] * h[5]
304
305	 LDR       r3, [r2], #4                       @h[6], h[7]
306	 LDRSH     r6, [r1], #2                       @x[7]
307	 LDRSH     r9, [r1], #2                       @x[8]
308
309	 SMLABB    r10, r4, r3, r10                   @x[6] * h[6]
310	 SMLABB    r11, r6, r3, r11                   @x[7] * h[6]
311
312	 SMLABT    r10, r6, r3, r10                   @x[7] * h[7]
313	 SMLABT    r11, r9, r3, r11                   @x[8] * h[7]
314
315	 LDR       r3, [r2], #4                       @h[8], h[9]
316	 LDRSH     r4, [r1], #2                       @x[9]
317	 LDRSH     r6, [r1], #2                       @x[10]
318
319	 SMLABB    r10, r9, r3, r10                   @x[8] * h[8]
320	 SMLABB    r11, r4, r3, r11                   @x[9] * h[8]
321
322	 SMLABT    r10, r4, r3, r10                   @x[9] * h[9]
323	 SMLABT    r11, r6, r3, r11                   @x[10] * h[9]
324
325	 LDR       r3, [r2], #4                       @h[10], h[11]
326	 LDRSH     r9, [r1], #2                       @x[11]
327	 LDRSH     r4, [r1], #2                       @x[12]
328
329	 SMLABB    r10, r6, r3, r10                   @x[10] * h[10]
330	 SMLABB    r11, r9, r3, r11                   @x[11] * h[10]
331
332	 SMLABT    r10, r9, r3, r10                   @x[11] * h[11]
333	 SMLABT    r11, r4, r3, r11                   @x[12] * h[11]
334
335	 LDR       r3, [r2], #4                       @h[12], h[13]
336	 LDRSH     r6, [r1], #2                       @x[13]
337	 LDRSH     r9, [r1], #2                       @x[14]
338
339	 SMLABB    r10, r4, r3, r10                   @x[12] * h[12]
340	 SMLABB    r11, r6, r3, r11                   @x[13] * h[12]
341
342	 SMLABT    r10, r6, r3, r10                   @x[13] * h[13]
343	 SMLABT    r11, r9, r3, r11                   @x[14] * h[13]
344
345	 LDR       r3, [r2], #4                       @h[14], h[15]
346	 LDRSH     r4, [r1], #2                       @x[15]
347	 LDRSH     r6, [r1], #2                       @x[16]
348
349	 SMLABB    r10, r9, r3, r10                   @x[14] * h[14]
350	 SMLABB    r11, r4, r3, r11                   @x[15] * h[14]
351
352	 SMLABT    r10, r4, r3, r10                   @x[15] * h[15]
353	 SMLABT    r11, r6, r3, r11                   @x[16] * h[15]
354
355	 LDR       r3, [r2], #4                       @h[16], h[17]
356	 LDRSH     r9, [r1], #2                       @x[17]
357	 LDRSH     r4, [r1], #2                       @x[18]
358
359	 SMLABB    r10, r6, r3, r10                   @x[16] * h[16]
360	 SMLABB    r11, r9, r3, r11                   @x[17] * h[16]
361
362	 SMLABT    r10, r9, r3, r10                   @x[17] * h[17]
363	 SMLABT    r11, r4, r3, r11                   @x[18] * h[17]
364
365	 LDR       r3, [r2], #4                       @h[18], h[19]
366	 LDRSH     r6, [r1], #2                       @x[19]
367	 LDRSH     r9, [r1], #2                       @x[20]
368
369	 SMLABB    r10, r4, r3, r10                   @x[18] * h[18]
370	 SMLABB    r11, r6, r3, r11                   @x[19] * h[18]
371
372	 SMLABT    r10, r6, r3, r10                   @x[19] * h[19]
373	 SMLABT    r11, r9, r3, r11                   @x[20] * h[19]
374
375	 LDR       r3, [r2], #4                       @h[20], h[21]
376	 LDRSH     r4, [r1], #2                       @x[21]
377	 LDRSH     r6, [r1], #2                       @x[22]
378
379	 SMLABB    r10, r9, r3, r10                   @x[20] * h[20]
380	 SMLABB    r11, r4, r3, r11                   @x[21] * h[20]
381
382	 SMLABT    r10, r4, r3, r10                   @x[21] * h[21]
383	 SMLABT    r11, r6, r3, r11                   @x[22] * h[21]
384
385	 LDR       r3, [r2], #4                       @h[22], h[23]
386	 LDRSH     r9, [r1], #2                       @x[23]
387	 LDRSH     r4, [r1], #2                       @x[24]
388
389	 SMLABB    r10, r6, r3, r10                   @x[22] * h[22]
390	 SMLABB    r11, r9, r3, r11                   @x[23] * h[22]
391
392	 SMLABT    r10, r9, r3, r10                   @x[23] * h[23]
393	 SMLABT    r11, r4, r3, r11                   @x[24] * h[23]
394
395	 LDR       r3, [r2], #4                       @h[24], h[25]
396	 LDRSH     r6, [r1], #2                       @x[25]
397	 LDRSH     r9, [r1], #2                       @x[26]
398
399	 SMLABB    r10, r4, r3, r10                   @x[24] * h[24]
400	 SMLABB    r11, r6, r3, r11                   @x[25] * h[24]
401
402	 SMLABT    r10, r6, r3, r10                   @x[25] * h[25]
403	 SMLABT    r11, r9, r3, r11                   @x[26] * h[25]
404
405	 LDR       r3, [r2], #4                       @h[26], h[27]
406	 LDRSH     r4, [r1], #2                       @x[27]
407	 LDRSH     r6, [r1], #2                       @x[28]
408
409	 SMLABB    r10, r9, r3, r10                   @x[26] * h[26]
410	 SMLABB    r11, r4, r3, r11                   @x[27] * h[26]
411
412	 SMLABT    r10, r4, r3, r10                   @x[27] * h[27]
413	 SMLABT    r11, r6, r3, r11                   @x[28] * h[27]
414
415	 LDR       r3, [r2], #4                       @h[28], h[29]
416	 LDRSH     r9, [r1], #2                       @x[29]
417	 LDRSH     r4, [r1], #2                       @x[30]
418
419	 SMLABB    r10, r6, r3, r10                   @x[28] * h[28]
420	 SMLABB    r11, r9, r3, r11                   @x[29] * h[28]
421
422	 SMLABT    r10, r9, r3, r10                   @x[29] * h[29]
423	 SMLABT    r11, r4, r3, r11                   @x[30] * h[29]
424
425	 LDR       r3, [r2], #4                       @h[30], h[31]
426	 LDRSH     r6, [r1], #2                       @x[31]
427	 LDRSH     r9, [r1], #2                       @x[32]
428
429	 SMLABB    r10, r4, r3, r10                   @x[30] * h[30]
430	 SMLABB    r11, r6, r3, r11                   @x[31] * h[30]
431
432	 SMLABT    r10, r6, r3, r10                   @x[31] * h[31]
433	 SMLABT    r11, r9, r3, r11                   @x[32] * h[31]
434
435	 @SSAT      r10, #32, r10, LSL #2
436	 @SSAT      r11, #32, r11, LSL #2
437	 MOV       r10, r10, LSL #1
438	 MOV       r11, r11, LSL #1
439
440	 QADD      r10, r10, r10
441	 QADD      r11, r11, r11
442
443	 QADD      r10, r10, r5
444	 QADD      r11, r11, r5
445
446	 MOV       r10, r10, ASR #16
447	 MOV       r11, r11, ASR #16
448
449	 STRH      r10, [r0], #2
450	 STRH      r11, [r0], #2
451
452
453pred_lt4_end:
454         LDMFD     r13!, {r4 - r12, r15}
455
456Table:
457         .word       inter4_2-Table
458	 @ENDFUNC
459	 .END
460
461
462
463
464