convolve_opt.s revision b676a05348e4c516fa8b57e33b10548e6142c3f8
1956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@/* 2956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@ ** Copyright 2003-2010, VisualOn, Inc. 3956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@ ** 4956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@ ** Licensed under the Apache License, Version 2.0 (the "License"); 5956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@ ** you may not use this file except in compliance with the License. 6956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@ ** You may obtain a copy of the License at 7956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@ ** 8956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@ ** http://www.apache.org/licenses/LICENSE-2.0 9956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@ ** 10956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@ ** Unless required by applicable law or agreed to in writing, software 11956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@ ** distributed under the License is distributed on an "AS IS" BASIS, 12956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@ ** See the License for the specific language governing permissions and 14956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@ ** limitations under the License. 15956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@ */ 16956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong 17956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong 18956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@*void Convolve ( 19956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@* Word16 x[], /* (i) : input vector */ 20956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@* Word16 h[], /* (i) : impulse response */ 21956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@* Word16 y[], /* (o) : output vector */ 22956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@* Word16 L /* (i) : vector size */ 23956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@*) 24956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@ r0 --- x[] 25956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@ r1 --- h[] 26956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@ r2 --- y[] 27956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong@ r3 --- L 28956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong 29956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong .section .text 30b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard .global Convolve_asm 31956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong 32956c553ab0ce72f8074ad0fda2ffd66a0305700cJames DongConvolve_asm: 33956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong 34b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard STMFD r13!, {r4 - r12, r14} 35956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MOV r3, #0 @ n 36956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MOV r11, #0x8000 37b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 38b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardLOOP: 39956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong ADD r4, r1, r3, LSL #1 @ tmpH address 40956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong ADD r5, r3, #1 @ i = n + 1 41956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MOV r6, r0 @ tmpX = x 42956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r9, [r6], #2 @ *tmpX++ 43956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r10, [r4], #-2 @ *tmpH-- 44956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong SUB r5, r5, #1 45b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard MUL r8, r9, r10 46956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong 47b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardLOOP1: 48956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong CMP r5, #0 49956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong BLE L1 50956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r9, [r6], #2 @ *tmpX++ 51956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r10, [r4], #-2 @ *tmpH-- 52956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r12, [r6], #2 @ *tmpX++ 53956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r14, [r4], #-2 @ *tmpH-- 54956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MLA r8, r9, r10, r8 55956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MLA r8, r12, r14, r8 56956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r9, [r6], #2 @ *tmpX++ 57956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r10, [r4], #-2 @ *tmpH-- 58956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r12, [r6], #2 @ *tmpX++ 59956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r14, [r4], #-2 @ *tmpH-- 60956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MLA r8, r9, r10, r8 61b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard SUBS r5, r5, #4 62956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MLA r8, r12, r14, r8 63956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong 64b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard B LOOP1 65b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 66b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardL1: 67956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong 68956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong ADD r5, r11, r8, LSL #1 69956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MOV r5, r5, LSR #16 @extract_h(s) 70956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong ADD r3, r3, #1 71956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong STRH r5, [r2], #2 @y[n] 72956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong 73956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong 74956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong ADD r4, r1, r3, LSL #1 @tmpH address 75956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong ADD r5, r3, #1 76956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MOV r6, r0 77956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r9, [r6], #2 @ *tmpX++ 78b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard LDRSH r10, [r4], #-2 79956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r12, [r6], #2 80956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r14, [r4], #-2 81956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong 82956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MUL r8, r9, r10 83956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong SUB r5, r5, #2 84956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MLA r8, r12, r14, r8 85b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 86956c553ab0ce72f8074ad0fda2ffd66a0305700cJames DongLOOP2: 87956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong CMP r5, #0 88956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong BLE L2 89956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r9, [r6], #2 @ *tmpX++ 90956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r10, [r4], #-2 @ *tmpH-- 91956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r12, [r6], #2 @ *tmpX++ 92956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r14, [r4], #-2 @ *tmpH-- 93956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MLA r8, r9, r10, r8 94956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MLA r8, r12, r14, r8 95956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r9, [r6], #2 @ *tmpX++ 96956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r10, [r4], #-2 @ *tmpH-- 97956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r12, [r6], #2 @ *tmpX++ 98956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r14, [r4], #-2 @ *tmpH-- 99956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MLA r8, r9, r10, r8 100b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard SUBS r5, r5, #4 101956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MLA r8, r12, r14, r8 102956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong B LOOP2 103956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong 104956c553ab0ce72f8074ad0fda2ffd66a0305700cJames DongL2: 105956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong ADD r8, r11, r8, LSL #1 106956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MOV r8, r8, LSR #16 @extract_h(s) 107b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard ADD r3, r3, #1 108956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong STRH r8, [r2], #2 @y[n] 109956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong 110956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong ADD r4, r1, r3, LSL #1 111956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong ADD r5, r3, #1 112956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MOV r6, r0 113956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r9, [r6], #2 114956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r10, [r4], #-2 115956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r12, [r6], #2 116956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r14, [r4], #-2 117956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MUL r8, r9, r10 118956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r9, [r6], #2 119956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r10, [r4], #-2 120b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard MLA r8, r12, r14, r8 121956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong SUB r5, r5, #3 122956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MLA r8, r9, r10, r8 123956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong 124956c553ab0ce72f8074ad0fda2ffd66a0305700cJames DongLOOP3: 125956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong CMP r5, #0 126956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong BLE L3 127956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r9, [r6], #2 @ *tmpX++ 128956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r10, [r4], #-2 @ *tmpH-- 129956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r12, [r6], #2 @ *tmpX++ 130956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r14, [r4], #-2 @ *tmpH-- 131956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MLA r8, r9, r10, r8 132956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MLA r8, r12, r14, r8 133956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r9, [r6], #2 @ *tmpX++ 134956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r10, [r4], #-2 @ *tmpH-- 135956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r12, [r6], #2 @ *tmpX++ 136956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r14, [r4], #-2 @ *tmpH-- 137956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MLA r8, r9, r10, r8 138b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard SUBS r5, r5, #4 139b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard MLA r8, r12, r14, r8 140b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard B LOOP3 141956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong 142956c553ab0ce72f8074ad0fda2ffd66a0305700cJames DongL3: 143956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong ADD r8, r11, r8, LSL #1 144956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MOV r8, r8, LSR #16 @extract_h(s) 145956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong ADD r3, r3, #1 146956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong STRH r8, [r2], #2 @y[n] 147956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong 148956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong ADD r5, r3, #1 @ i = n + 1 149956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong ADD r4, r1, r3, LSL #1 @ tmpH address 150956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MOV r6, r0 151956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MOV r8, #0 152956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong 153b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardLOOP4: 154956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong CMP r5, #0 155956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong BLE L4 156956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r9, [r6], #2 @ *tmpX++ 157956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r10, [r4], #-2 @ *tmpH-- 158956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r12, [r6], #2 @ *tmpX++ 159956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r14, [r4], #-2 @ *tmpH-- 160956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MLA r8, r9, r10, r8 161956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MLA r8, r12, r14, r8 162956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r9, [r6], #2 @ *tmpX++ 163956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r10, [r4], #-2 @ *tmpH-- 164956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r12, [r6], #2 @ *tmpX++ 165956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDRSH r14, [r4], #-2 @ *tmpH-- 166956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MLA r8, r9, r10, r8 167b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard SUBS r5, r5, #4 168b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard MLA r8, r12, r14, r8 169b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard B LOOP4 170b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardL4: 171956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong ADD r5, r11, r8, LSL #1 172956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong MOV r5, r5, LSR #16 @extract_h(s) 173956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong ADD r3, r3, #1 174956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong STRH r5, [r2], #2 @y[n] 175b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 176956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong CMP r3, #64 177956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong BLT LOOP 178b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 179b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardConvolve_asm_end: 180b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 181956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong LDMFD r13!, {r4 - r12, r15} 182b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 183956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong @ENDFUNC 184956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong .END 185956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong 186956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong 187