1@/* 2@ ** Copyright 2003-2010, VisualOn, Inc. 3@ ** 4@ ** Licensed under the Apache License, Version 2.0 (the "License"); 5@ ** you may not use this file except in compliance with the License. 6@ ** You may obtain a copy of the License at 7@ ** 8@ ** http://www.apache.org/licenses/LICENSE-2.0 9@ ** 10@ ** Unless required by applicable law or agreed to in writing, software 11@ ** distributed under the License is distributed on an "AS IS" BASIS, 12@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13@ ** See the License for the specific language governing permissions and 14@ ** limitations under the License. 15@ */ 16 17 18@*void Convolve ( 19@* Word16 x[], /* (i) : input vector */ 20@* Word16 h[], /* (i) : impulse response */ 21@* Word16 y[], /* (o) : output vector */ 22@* Word16 L /* (i) : vector size */ 23@*) 24@ r0 --- x[] 25@ r1 --- h[] 26@ r2 --- y[] 27@ r3 --- L 28 29 .section .text 30 .global Convolve_asm 31 32Convolve_asm: 33 34 STMFD r13!, {r4 - r12, r14} 35 MOV r3, #0 @ n 36 MOV r11, #0x8000 37 38LOOP: 39 ADD r4, r1, r3, LSL #1 @ tmpH address 40 ADD r5, r3, #1 @ i = n + 1 41 MOV r6, r0 @ tmpX = x 42 LDRSH r9, [r6], #2 @ *tmpX++ 43 LDRSH r10, [r4], #-2 @ *tmpH-- 44 SUB r5, r5, #1 45 MUL r8, r9, r10 46 47LOOP1: 48 CMP r5, #0 49 BLE L1 50 LDRSH r9, [r6], #2 @ *tmpX++ 51 LDRSH r10, [r4], #-2 @ *tmpH-- 52 LDRSH r12, [r6], #2 @ *tmpX++ 53 LDRSH r14, [r4], #-2 @ *tmpH-- 54 MLA r8, r9, r10, r8 55 MLA r8, r12, r14, r8 56 LDRSH r9, [r6], #2 @ *tmpX++ 57 LDRSH r10, [r4], #-2 @ *tmpH-- 58 LDRSH r12, [r6], #2 @ *tmpX++ 59 LDRSH r14, [r4], #-2 @ *tmpH-- 60 MLA r8, r9, r10, r8 61 SUBS r5, r5, #4 62 MLA r8, r12, r14, r8 63 64 B LOOP1 65 66L1: 67 68 ADD r5, r11, r8, LSL #1 69 MOV r5, r5, LSR #16 @extract_h(s) 70 ADD r3, r3, #1 71 STRH r5, [r2], #2 @y[n] 72 73 74 ADD r4, r1, r3, LSL #1 @tmpH address 75 ADD r5, r3, #1 76 MOV r6, r0 77 LDRSH r9, [r6], #2 @ *tmpX++ 78 LDRSH r10, [r4], #-2 79 LDRSH r12, [r6], #2 80 LDRSH r14, [r4], #-2 81 82 MUL r8, r9, r10 83 SUB r5, r5, #2 84 MLA r8, r12, r14, r8 85 86LOOP2: 87 CMP r5, #0 88 BLE L2 89 LDRSH r9, [r6], #2 @ *tmpX++ 90 LDRSH r10, [r4], #-2 @ *tmpH-- 91 LDRSH r12, [r6], #2 @ *tmpX++ 92 LDRSH r14, [r4], #-2 @ *tmpH-- 93 MLA r8, r9, r10, r8 94 MLA r8, r12, r14, r8 95 LDRSH r9, [r6], #2 @ *tmpX++ 96 LDRSH r10, [r4], #-2 @ *tmpH-- 97 LDRSH r12, [r6], #2 @ *tmpX++ 98 LDRSH r14, [r4], #-2 @ *tmpH-- 99 MLA r8, r9, r10, r8 100 SUBS r5, r5, #4 101 MLA r8, r12, r14, r8 102 B LOOP2 103 104L2: 105 ADD r8, r11, r8, LSL #1 106 MOV r8, r8, LSR #16 @extract_h(s) 107 ADD r3, r3, #1 108 STRH r8, [r2], #2 @y[n] 109 110 ADD r4, r1, r3, LSL #1 111 ADD r5, r3, #1 112 MOV r6, r0 113 LDRSH r9, [r6], #2 114 LDRSH r10, [r4], #-2 115 LDRSH r12, [r6], #2 116 LDRSH r14, [r4], #-2 117 MUL r8, r9, r10 118 LDRSH r9, [r6], #2 119 LDRSH r10, [r4], #-2 120 MLA r8, r12, r14, r8 121 SUB r5, r5, #3 122 MLA r8, r9, r10, r8 123 124LOOP3: 125 CMP r5, #0 126 BLE L3 127 LDRSH r9, [r6], #2 @ *tmpX++ 128 LDRSH r10, [r4], #-2 @ *tmpH-- 129 LDRSH r12, [r6], #2 @ *tmpX++ 130 LDRSH r14, [r4], #-2 @ *tmpH-- 131 MLA r8, r9, r10, r8 132 MLA r8, r12, r14, r8 133 LDRSH r9, [r6], #2 @ *tmpX++ 134 LDRSH r10, [r4], #-2 @ *tmpH-- 135 LDRSH r12, [r6], #2 @ *tmpX++ 136 LDRSH r14, [r4], #-2 @ *tmpH-- 137 MLA r8, r9, r10, r8 138 SUBS r5, r5, #4 139 MLA r8, r12, r14, r8 140 B LOOP3 141 142L3: 143 ADD r8, r11, r8, LSL #1 144 MOV r8, r8, LSR #16 @extract_h(s) 145 ADD r3, r3, #1 146 STRH r8, [r2], #2 @y[n] 147 148 ADD r5, r3, #1 @ i = n + 1 149 ADD r4, r1, r3, LSL #1 @ tmpH address 150 MOV r6, r0 151 MOV r8, #0 152 153LOOP4: 154 CMP r5, #0 155 BLE L4 156 LDRSH r9, [r6], #2 @ *tmpX++ 157 LDRSH r10, [r4], #-2 @ *tmpH-- 158 LDRSH r12, [r6], #2 @ *tmpX++ 159 LDRSH r14, [r4], #-2 @ *tmpH-- 160 MLA r8, r9, r10, r8 161 MLA r8, r12, r14, r8 162 LDRSH r9, [r6], #2 @ *tmpX++ 163 LDRSH r10, [r4], #-2 @ *tmpH-- 164 LDRSH r12, [r6], #2 @ *tmpX++ 165 LDRSH r14, [r4], #-2 @ *tmpH-- 166 MLA r8, r9, r10, r8 167 SUBS r5, r5, #4 168 MLA r8, r12, r14, r8 169 B LOOP4 170L4: 171 ADD r5, r11, r8, LSL #1 172 MOV r5, r5, LSR #16 @extract_h(s) 173 ADD r3, r3, #1 174 STRH r5, [r2], #2 @y[n] 175 176 CMP r3, #64 177 BLT LOOP 178 179Convolve_asm_end: 180 181 LDMFD r13!, {r4 - r12, r15} 182 183 @ENDFUNC 184 .end 185 186 187