117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@/* 217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** Copyright 2003-2010, VisualOn, Inc. 317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** 417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** Licensed under the Apache License, Version 2.0 (the "License"); 517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** you may not use this file except in compliance with the License. 617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** You may obtain a copy of the License at 717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** 817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** http://www.apache.org/licenses/LICENSE-2.0 917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** 1017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** Unless required by applicable law or agreed to in writing, software 1117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** distributed under the License is distributed on an "AS IS" BASIS, 1217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** See the License for the specific language governing permissions and 1417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** limitations under the License. 1517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ */ 1617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ 1717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@**********************************************************************/ 1817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@Word32 Dot_product12( /* (o) Q31: normalized result (1 < val <= -1) */ 1917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ Word16 x[], /* (i) 12bits: x vector */ 2017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ Word16 y[], /* (i) 12bits: y vector */ 2117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ Word16 lg, /* (i) : vector length */ 2217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ Word16 * exp /* (o) : exponent of result (0..+30) */ 2317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@) 2417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@************************************************************************ 2517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ x[] --- r0 2617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ y[] --- r1 2717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ lg --- r2 2817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ *exp --- r3 2917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong 3017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong .section .text 3117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong .global Dot_product12_asm 3217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong 3317299ab50ceb70d904e610e3b2d7fb2361a11e03James DongDot_product12_asm: 3417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong 3517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong STMFD r13!, {r4 - r12, r14} 3617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong CMP r0, r1 3717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong BEQ LOOP_EQ 3817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong 3917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VLD1.S16 {Q0, Q1}, [r0]! @load 16 Word16 x[] 4017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VLD1.S16 {Q2, Q3}, [r0]! @load 16 Word16 x[] 4117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VLD1.S16 {Q4, Q5}, [r0]! @load 16 Word16 x[] 4217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VLD1.S16 {Q6, Q7}, [r0]! @load 16 Word16 x[] 4317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VLD1.S16 {Q8, Q9}, [r1]! @load 16 Word16 y[] 4417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VLD1.S16 {Q10, Q11}, [r1]! @load 16 Word16 y[] 4517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VLD1.S16 {Q12, Q13}, [r1]! @load 16 Word16 y[] 4617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong 4717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMULL.S16 Q15, D16, D0 4817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D17, D1 4917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D18, D2 5017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D19, D3 5117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VLD1.S16 {Q0, Q1}, [r1]! @load 16 Word16 y[] 5217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D20, D4 5317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D21, D5 5417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D22, D6 5517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D23, D7 5617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D24, D8 5717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D25, D9 5817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D26, D10 5917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D27, D11 6017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D0, D12 6117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D1, D13 6217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D2, D14 6317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D3, D15 6417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong 6517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong CMP r2, #64 6617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong BEQ Lable1 6717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VLD1.S16 {Q0, Q1}, [r0]! @load 16 Word16 x[] 6817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VLD1.S16 {Q2, Q3}, [r1]! 6917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D4, D0 7017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D5, D1 7117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D6, D2 7217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D7, D3 7317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong BL Lable1 7417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong 7517299ab50ceb70d904e610e3b2d7fb2361a11e03James DongLOOP_EQ: 7617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VLD1.S16 {Q0, Q1}, [r0]! 7717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VLD1.S16 {Q2, Q3}, [r0]! 7817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VLD1.S16 {Q4, Q5}, [r0]! 7917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VLD1.S16 {Q6, Q7}, [r0]! 8017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMULL.S16 Q15, D0, D0 8117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D1, D1 8217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D2, D2 8317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D3, D3 8417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D4, D4 8517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D5, D5 8617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D6, D6 8717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D7, D7 8817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D8, D8 8917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D9, D9 9017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D10, D10 9117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D11, D11 9217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D12, D12 9317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D13, D13 9417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D14, D14 9517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D15, D15 9617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong 9717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong CMP r2, #64 9817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong BEQ Lable1 9917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VLD1.S16 {Q0, Q1}, [r0]! 10017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D0, D0 10117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D1, D1 10217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D2, D2 10317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMLAL.S16 Q15, D3, D3 10417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong 10517299ab50ceb70d904e610e3b2d7fb2361a11e03James DongLable1: 10617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong 10717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VQADD.S32 D30, D30, D31 10817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VPADD.S32 D30, D30, D30 10917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong VMOV.S32 r12, D30[0] 11017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong 11117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong ADD r12, r12, r12 11217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong ADD r12, r12, #1 @ L_sum = (L_sum << 1) + 1 11317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong MOV r4, r12 11417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong CMP r12, #0 11517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong RSBLT r4, r12, #0 11617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong CLZ r10, r4 11717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong SUB r10, r10, #1 @ sft = norm_l(L_sum) 11817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong MOV r0, r12, LSL r10 @ L_sum = L_sum << sft 11917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong RSB r11, r10, #30 @ *exp = 30 - sft 12017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong STRH r11, [r3] 12117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong 12217299ab50ceb70d904e610e3b2d7fb2361a11e03James DongDot_product12_end: 12317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong 12417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong LDMFD r13!, {r4 - r12, r15} 12517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong 12617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong .END 12717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong 128