Filt_6k_7k_opt.s revision e17bb5cafdbc6089716d8e8c5afbb00f207a59bb
1@/*
2@ ** Copyright 2003-2010, VisualOn, Inc.
3@ **
4@ ** Licensed under the Apache License, Version 2.0 (the "License");
5@ ** you may not use this file except in compliance with the License.
6@ ** You may obtain a copy of the License at
7@ **
8@ **     http://www.apache.org/licenses/LICENSE-2.0
9@ **
10@ ** Unless required by applicable law or agreed to in writing, software
11@ ** distributed under the License is distributed on an "AS IS" BASIS,
12@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13@ ** See the License for the specific language governing permissions and
14@ ** limitations under the License.
15@ */
16
17@**********************************************************************/
18@void Filt_6k_7k(
19@     Word16 signal[],                      /* input:  signal                  */
20@     Word16 lg,                            /* input:  length of input         */
21@     Word16 mem[]                          /* in/out: memory (size=30)        */
22@)
23@******************************************************************
24@ r0    ---  signal[]
25@ r1    ---  lg
26@ r2    ---  mem[]
27
28          .section  .text
29          .global  Filt_6k_7k_asm
30          .extern  voAWB_Copy
31          .extern  fir_6k_7k
32          .hidden  fir_6k_7k
33
34Filt_6k_7k_asm:
35
36          STMFD   		r13!, {r4 - r12, r14}
37          SUB    		r13, r13, #240              @ x[L_SUBFR16k + (L_FIR - 1)]
38          MOV     		r8, r0                      @ copy signal[] address
39          MOV     		r4, r1                      @ copy lg address
40          MOV     		r5, r2                      @ copy mem[] address
41
42          MOV     		r1, r13
43          MOV     		r0, r2
44          MOV     		r2, #30                     @ L_FIR - 1
45          BL      		voAWB_Copy                   @ memcpy(x, mem, (L_FIR - 1)<<1)
46
47          ADR    		r3, Lable1                  @ get fir_7k address
48          LDR   		r10, [r3]
49          ADD   		r10, r3
50
51          MOV           	r14, #0
52          MOV                   r3, r8                      @ change myMemCopy to Copy, due to Copy will change r3 content
53          ADD     	    	r6, r13, #60                @ get x[L_FIR - 1] address
54          MOV           	r7, r3                      @ get signal[i]
55LOOP1:
56          LDRSH         	r8,  [r7], #2
57          LDRSH         	r9,  [r7], #2
58          MOV           	r8, r8, ASR #2
59          MOV           	r9, r9, ASR #2
60          LDRSH         	r11, [r7], #2
61          LDRSH         	r12, [r7], #2
62          MOV           	r11, r11, ASR #2
63          MOV           	r12, r12, ASR #2
64          STRH          	r8, [r6], #2
65          STRH          	r9, [r6], #2
66          STRH          	r11, [r6], #2
67          STRH          	r12, [r6], #2
68          LDRSH         	r8,  [r7], #2
69          LDRSH         	r9,  [r7], #2
70          MOV           	r8, r8, ASR #2
71          MOV           	r9, r9, ASR #2
72          LDRSH         	r11, [r7], #2
73          LDRSH         	r12, [r7], #2
74          MOV           	r11, r11, ASR #2
75          MOV           	r12, r12, ASR #2
76          STRH          	r8, [r6], #2
77          STRH          	r9, [r6], #2
78          STRH          	r11, [r6], #2
79          STRH          	r12, [r6], #2
80          ADD           	r14, r14, #8
81          CMP           	r14, #80
82          BLT           	LOOP1
83
84
85          STR     		r5, [sp, #-4]               @ PUSH  r5 to stack
86
87          @ not use registers: r4, r10, r12, r14, r5
88          MOV     		r4, r13
89          MOV     		r5, #0                      @ i = 0
90LOOP2:
91          LDR           	r0, [r10]
92
93          LDRSH  	        r1, [r4]                   @ load x[i]
94          LDRSH   	        r2, [r4, #60]              @ load x[i + 30]
95          LDRSH                 r6, [r4, #2]               @ load x[i + 1]
96          LDRSH                 r7, [r4, #58]              @ load x[i + 29]
97          ADD                   r1, r1, r2                 @ x[i] + x[i + 30]
98          ADD                   r6, r6, r7                 @ x[i + 1] + x[i + 29]
99          LDRSH                 r8, [r4, #4]               @ load x[i + 2]
100          LDRSH                 r9, [r4, #56]              @ load x[i + 28]
101
102          SMULBB                r14, r1, r0                @ (x[i] + x[i + 30]) * fir_7k[0]
103          ADD                   r8, r8, r9                 @ x[i + 2] + x[i + 28]
104          SMLABT                r14, r6, r0, r14           @ (x[i + 1] + x[i + 29]) * fir_7k[1]
105
106          LDR                   r0, [r10, #4]
107          LDRSH                 r1, [r4, #6]               @ load x[i+3]
108          LDRSH                 r2, [r4, #54]              @ load x[i+27]
109          LDRSH                 r6, [r4, #8]               @ load x[i+4]
110          LDRSH                 r7, [r4, #52]              @ load x[i+26]
111          ADD                   r1, r1, r2                 @ x[i+3] + x[i+27]
112          ADD                   r6, r6, r7                 @ x[i+4] + x[i+26]
113          SMLABB                r14, r8, r0, r14           @ (x[i + 2] + x[i + 28]) * fir_7k[2]
114          LDRSH                 r8, [r4, #10]              @ load x[i+5]
115          LDRSH                 r9, [r4, #50]              @ load x[i+25]
116          SMLABT                r14, r1, r0, r14           @ (x[i+3] + x[i+27]) * fir_7k[3]
117          ADD                   r8, r8, r9                 @ x[i+5] + x[i+25]
118
119          LDR                   r0, [r10, #8]
120          LDRSH                 r1, [r4, #12]              @ x[i+6]
121          LDRSH                 r2, [r4, #48]              @ x[i+24]
122          SMLABB                r14, r6, r0, r14           @ (x[i+4] + x[i+26]) * fir_7k[4]
123          LDRSH                 r6, [r4, #14]              @ x[i+7]
124          LDRSH                 r7, [r4, #46]              @ x[i+23]
125          SMLABT                r14, r8, r0, r14           @ (x[i+5] + x[i+25]) * fir_7k[5]
126          LDR                   r0, [r10, #12]
127          ADD                   r1, r1, r2                 @ (x[i+6] + x[i+24])
128          ADD                   r6, r6, r7                 @ (x[i+7] + x[i+23])
129          SMLABB                r14, r1, r0, r14           @ (x[i+6] + x[i+24]) * fir_7k[6]
130          LDRSH                 r8, [r4, #16]              @ x[i+8]
131          LDRSH                 r9, [r4, #44]              @ x[i+22]
132          SMLABT                r14, r6, r0, r14           @ (x[i+7] + x[i+23]) * fir_7k[7]
133          LDR                   r0, [r10, #16]
134          LDRSH                 r1, [r4, #18]              @ x[i+9]
135          LDRSH                 r2, [r4, #42]              @ x[i+21]
136          LDRSH                 r6, [r4, #20]              @ x[i+10]
137          LDRSH                 r7, [r4, #40]              @ x[i+20]
138          ADD                   r8, r8, r9                 @ (x[i+8] + x[i+22])
139          ADD                   r1, r1, r2                 @ (x[i+9] + x[i+21])
140          ADD                   r6, r6, r7                 @ (x[i+10] + x[i+20])
141          SMLABB                r14, r8, r0, r14           @ (x[i+8] + x[i+22]) * fir_7k[8]
142          LDRSH                 r8, [r4, #22]              @ x[i+11]
143          LDRSH                 r9, [r4, #38]              @ x[i+19]
144          SMLABT                r14, r1, r0, r14           @ (x[i+9] + x[i+21]) * fir_7k[9]
145          LDR                   r0, [r10, #20]
146          LDRSH                 r1, [r4, #24]              @ x[i+12]
147          LDRSH                 r2, [r4, #36]              @ x[i+18]
148          SMLABB                r14, r6, r0, r14           @ (x[i+10] + x[i+20]) * fir_7k[10]
149          LDRSH                 r6, [r4, #26]              @ x[i+13]
150          ADD                   r8, r8, r9                 @ (x[i+11] + x[i+19])
151          LDRSH                 r7, [r4, #34]              @ x[i+17]
152          SMLABT                r14, r8, r0, r14           @ (x[i+11] + x[i+19]) * fir_7k[11]
153          LDR                   r0, [r10, #24]
154          ADD                   r1, r1, r2                 @ x[i+12] + x[i+18]
155          LDRSH                 r8, [r4, #28]              @ x[i+14]
156          SMLABB                r14, r1, r0, r14           @ (x[i+12] + x[i+18]) * fir_7k[12]
157          ADD                   r6, r6, r7                 @ (x[i+13] + x[i+17])
158          LDRSH                 r9, [r4, #32]              @ x[i+16]
159          SMLABT                r14, r6, r0, r14           @ (x[i+13] + x[i+17]) * fir_7k[13]
160          LDR                   r0, [r10, #28]
161          ADD                   r8, r8, r9                 @ (x[i+14] + x[i+16])
162          LDRSH                 r1, [r4, #30]              @ x[i+15]
163          SMLABB                r14, r8, r0, r14           @ (x[i+14] + x[i+16]) * fir_7k[14]
164          SMLABT                r14, r1, r0, r14           @ x[i+15] * fir_7k[15]
165
166          ADD     		r5, r5, #1
167          ADD     		r14, r14, #0x4000
168          ADD     		r4, r4, #2
169          MOV     		r1, r14, ASR #15
170          CMP     		r5, #80
171          STRH    		r1, [r3], #2               @signal[i] = (L_tmp + 0x4000) >> 15
172          BLT     		LOOP2
173
174          LDR     		r1, [sp, #-4]               @mem address
175          ADD     		r0, r13, #160               @x + lg
176          MOV     		r2, #30
177          BL      		voAWB_Copy
178
179Filt_6k_7k_end:
180          ADD     		r13, r13, #240
181          LDMFD   		r13!, {r4 - r12, r15}
182
183Lable1:
184          .word   		fir_6k_7k-Lable1
185          @ENDFUNC
186          .END
187
188
189