Filt_6k_7k_opt.s revision f3664ae9369a861ffbc2354e8e93e48983802062
1@/* 2@ ** Copyright 2003-2010, VisualOn, Inc. 3@ ** 4@ ** Licensed under the Apache License, Version 2.0 (the "License"); 5@ ** you may not use this file except in compliance with the License. 6@ ** You may obtain a copy of the License at 7@ ** 8@ ** http://www.apache.org/licenses/LICENSE-2.0 9@ ** 10@ ** Unless required by applicable law or agreed to in writing, software 11@ ** distributed under the License is distributed on an "AS IS" BASIS, 12@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13@ ** See the License for the specific language governing permissions and 14@ ** limitations under the License. 15@ */ 16 17@**********************************************************************/ 18@void Filt_6k_7k( 19@ Word16 signal[], /* input: signal */ 20@ Word16 lg, /* input: length of input */ 21@ Word16 mem[] /* in/out: memory (size=30) */ 22@) 23@****************************************************************** 24@ r0 --- signal[] 25@ r1 --- lg 26@ r2 --- mem[] 27 28 .section .text 29 .global Filt_6k_7k_asm 30 .extern voAWB_Copy 31 .extern fir_6k_7k 32 33Filt_6k_7k_asm: 34 35 STMFD r13!, {r4 - r12, r14} 36 SUB r13, r13, #240 @ x[L_SUBFR16k + (L_FIR - 1)] 37 MOV r8, r0 @ copy signal[] address 38 MOV r4, r1 @ copy lg address 39 MOV r5, r2 @ copy mem[] address 40 41 MOV r1, r13 42 MOV r0, r2 43 MOV r2, #30 @ L_FIR - 1 44 BL voAWB_Copy @ memcpy(x, mem, (L_FIR - 1)<<1) 45 46 LDR r10, Lable1 @ get fir_7k address 47 48 MOV r14, #0 49 MOV r3, r8 @ change myMemCopy to Copy, due to Copy will change r3 content 50 ADD r6, r13, #60 @ get x[L_FIR - 1] address 51 MOV r7, r3 @ get signal[i] 52LOOP1: 53 LDRSH r8, [r7], #2 54 LDRSH r9, [r7], #2 55 MOV r8, r8, ASR #2 56 MOV r9, r9, ASR #2 57 LDRSH r11, [r7], #2 58 LDRSH r12, [r7], #2 59 MOV r11, r11, ASR #2 60 MOV r12, r12, ASR #2 61 STRH r8, [r6], #2 62 STRH r9, [r6], #2 63 STRH r11, [r6], #2 64 STRH r12, [r6], #2 65 LDRSH r8, [r7], #2 66 LDRSH r9, [r7], #2 67 MOV r8, r8, ASR #2 68 MOV r9, r9, ASR #2 69 LDRSH r11, [r7], #2 70 LDRSH r12, [r7], #2 71 MOV r11, r11, ASR #2 72 MOV r12, r12, ASR #2 73 STRH r8, [r6], #2 74 STRH r9, [r6], #2 75 STRH r11, [r6], #2 76 STRH r12, [r6], #2 77 ADD r14, r14, #8 78 CMP r14, #80 79 BLT LOOP1 80 81 82 STR r5, [sp, #-4] @ PUSH r5 to stack 83 84 @ not use registers: r4, r10, r12, r14, r5 85 MOV r4, r13 86 MOV r5, #0 @ i = 0 87LOOP2: 88 LDR r0, [r10] 89 90 LDRSH r1, [r4] @ load x[i] 91 LDRSH r2, [r4, #60] @ load x[i + 30] 92 LDRSH r6, [r4, #2] @ load x[i + 1] 93 LDRSH r7, [r4, #58] @ load x[i + 29] 94 ADD r1, r1, r2 @ x[i] + x[i + 30] 95 ADD r6, r6, r7 @ x[i + 1] + x[i + 29] 96 LDRSH r8, [r4, #4] @ load x[i + 2] 97 LDRSH r9, [r4, #56] @ load x[i + 28] 98 99 SMULBB r14, r1, r0 @ (x[i] + x[i + 30]) * fir_7k[0] 100 ADD r8, r8, r9 @ x[i + 2] + x[i + 28] 101 SMLABT r14, r6, r0, r14 @ (x[i + 1] + x[i + 29]) * fir_7k[1] 102 103 LDR r0, [r10, #4] 104 LDRSH r1, [r4, #6] @ load x[i+3] 105 LDRSH r2, [r4, #54] @ load x[i+27] 106 LDRSH r6, [r4, #8] @ load x[i+4] 107 LDRSH r7, [r4, #52] @ load x[i+26] 108 ADD r1, r1, r2 @ x[i+3] + x[i+27] 109 ADD r6, r6, r7 @ x[i+4] + x[i+26] 110 SMLABB r14, r8, r0, r14 @ (x[i + 2] + x[i + 28]) * fir_7k[2] 111 LDRSH r8, [r4, #10] @ load x[i+5] 112 LDRSH r9, [r4, #50] @ load x[i+25] 113 SMLABT r14, r1, r0, r14 @ (x[i+3] + x[i+27]) * fir_7k[3] 114 ADD r8, r8, r9 @ x[i+5] + x[i+25] 115 116 LDR r0, [r10, #8] 117 LDRSH r1, [r4, #12] @ x[i+6] 118 LDRSH r2, [r4, #48] @ x[i+24] 119 SMLABB r14, r6, r0, r14 @ (x[i+4] + x[i+26]) * fir_7k[4] 120 LDRSH r6, [r4, #14] @ x[i+7] 121 LDRSH r7, [r4, #46] @ x[i+23] 122 SMLABT r14, r8, r0, r14 @ (x[i+5] + x[i+25]) * fir_7k[5] 123 LDR r0, [r10, #12] 124 ADD r1, r1, r2 @ (x[i+6] + x[i+24]) 125 ADD r6, r6, r7 @ (x[i+7] + x[i+23]) 126 SMLABB r14, r1, r0, r14 @ (x[i+6] + x[i+24]) * fir_7k[6] 127 LDRSH r8, [r4, #16] @ x[i+8] 128 LDRSH r9, [r4, #44] @ x[i+22] 129 SMLABT r14, r6, r0, r14 @ (x[i+7] + x[i+23]) * fir_7k[7] 130 LDR r0, [r10, #16] 131 LDRSH r1, [r4, #18] @ x[i+9] 132 LDRSH r2, [r4, #42] @ x[i+21] 133 LDRSH r6, [r4, #20] @ x[i+10] 134 LDRSH r7, [r4, #40] @ x[i+20] 135 ADD r8, r8, r9 @ (x[i+8] + x[i+22]) 136 ADD r1, r1, r2 @ (x[i+9] + x[i+21]) 137 ADD r6, r6, r7 @ (x[i+10] + x[i+20]) 138 SMLABB r14, r8, r0, r14 @ (x[i+8] + x[i+22]) * fir_7k[8] 139 LDRSH r8, [r4, #22] @ x[i+11] 140 LDRSH r9, [r4, #38] @ x[i+19] 141 SMLABT r14, r1, r0, r14 @ (x[i+9] + x[i+21]) * fir_7k[9] 142 LDR r0, [r10, #20] 143 LDRSH r1, [r4, #24] @ x[i+12] 144 LDRSH r2, [r4, #36] @ x[i+18] 145 SMLABB r14, r6, r0, r14 @ (x[i+10] + x[i+20]) * fir_7k[10] 146 LDRSH r6, [r4, #26] @ x[i+13] 147 ADD r8, r8, r9 @ (x[i+11] + x[i+19]) 148 LDRSH r7, [r4, #34] @ x[i+17] 149 SMLABT r14, r8, r0, r14 @ (x[i+11] + x[i+19]) * fir_7k[11] 150 LDR r0, [r10, #24] 151 ADD r1, r1, r2 @ x[i+12] + x[i+18] 152 LDRSH r8, [r4, #28] @ x[i+14] 153 SMLABB r14, r1, r0, r14 @ (x[i+12] + x[i+18]) * fir_7k[12] 154 ADD r6, r6, r7 @ (x[i+13] + x[i+17]) 155 LDRSH r9, [r4, #32] @ x[i+16] 156 SMLABT r14, r6, r0, r14 @ (x[i+13] + x[i+17]) * fir_7k[13] 157 LDR r0, [r10, #28] 158 ADD r8, r8, r9 @ (x[i+14] + x[i+16]) 159 LDRSH r1, [r4, #30] @ x[i+15] 160 SMLABB r14, r8, r0, r14 @ (x[i+14] + x[i+16]) * fir_7k[14] 161 SMLABT r14, r1, r0, r14 @ x[i+15] * fir_7k[15] 162 163 ADD r5, r5, #1 164 ADD r14, r14, #0x4000 165 ADD r4, r4, #2 166 MOV r1, r14, ASR #15 167 CMP r5, #80 168 STRH r1, [r3], #2 @signal[i] = (L_tmp + 0x4000) >> 15 169 BLT LOOP2 170 171 LDR r1, [sp, #-4] @mem address 172 ADD r0, r13, #160 @x + lg 173 MOV r2, #30 174 BL voAWB_Copy 175 176Filt_6k_7k_end: 177 ADD r13, r13, #240 178 LDMFD r13!, {r4 - r12, r15} 179 180Lable1: 181 .word fir_6k_7k 182 @ENDFUNC 183 .END 184 185 186