117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@/*
217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** Copyright 2003-2010, VisualOn, Inc.
317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ **
417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** Licensed under the Apache License, Version 2.0 (the "License");
517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** you may not use this file except in compliance with the License.
617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** You may obtain a copy of the License at
717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ **
817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ **     http://www.apache.org/licenses/LICENSE-2.0
917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ **
1017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** Unless required by applicable law or agreed to in writing, software
1117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** distributed under the License is distributed on an "AS IS" BASIS,
1217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** See the License for the specific language governing permissions and
1417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** limitations under the License.
1517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ */
1617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@
1717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@**********************************************************************/
1817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@Word32 Dot_product12(                      /* (o) Q31: normalized result (1 < val <= -1) */
1917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@       Word16 x[],                           /* (i) 12bits: x vector                       */
2017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@       Word16 y[],                           /* (i) 12bits: y vector                       */
2117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@       Word16 lg,                            /* (i)    : vector length                     */
2217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@       Word16 * exp                          /* (o)    : exponent of result (0..+30)       */
2317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@)
2417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@************************************************************************
2517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@  x[]   ---  r0
2617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@  y[]   ---  r1
2717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@  lg    ---  r2
2817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@  *exp  ---  r3
2917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
3017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          .section   .text
3117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          .global    Dot_product12_asm
3217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
3317299ab50ceb70d904e610e3b2d7fb2361a11e03James DongDot_product12_asm:
3417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
3517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          STMFD   	    r13!, {r4 - r12, r14}
3617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  CMP               r0, r1
3717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  BEQ               LOOP_EQ
3817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
3917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VLD1.S16          {Q0, Q1}, [r0]!               @load 16 Word16 x[]
4017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VLD1.S16          {Q2, Q3}, [r0]!               @load 16 Word16 x[]
4117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VLD1.S16          {Q4, Q5}, [r0]!               @load 16 Word16 x[]
4217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VLD1.S16          {Q6, Q7}, [r0]!               @load 16 Word16 x[]
4317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VLD1.S16          {Q8, Q9}, [r1]!               @load 16 Word16 y[]
4417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VLD1.S16          {Q10, Q11}, [r1]!             @load 16 Word16 y[]
4517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VLD1.S16          {Q12, Q13}, [r1]!             @load 16 Word16 y[]
4617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
4717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMULL.S16         Q15, D16, D0
4817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMLAL.S16         Q15, D17, D1
4917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMLAL.S16         Q15, D18, D2
5017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMLAL.S16         Q15, D19, D3
5117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VLD1.S16          {Q0, Q1}, [r1]!               @load 16 Word16 y[]
5217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMLAL.S16         Q15, D20, D4
5317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMLAL.S16         Q15, D21, D5
5417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMLAL.S16         Q15, D22, D6
5517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMLAL.S16         Q15, D23, D7
5617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMLAL.S16         Q15, D24, D8
5717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMLAL.S16         Q15, D25, D9
5817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMLAL.S16         Q15, D26, D10
5917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMLAL.S16         Q15, D27, D11
6017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMLAL.S16         Q15, D0, D12
6117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMLAL.S16         Q15, D1, D13
6217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMLAL.S16         Q15, D2, D14
6317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMLAL.S16         Q15, D3, D15
6417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
6517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          CMP               r2, #64
6617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          BEQ               Lable1
6717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VLD1.S16          {Q0, Q1}, [r0]!               @load 16 Word16 x[]
6817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VLD1.S16          {Q2, Q3}, [r1]!
6917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMLAL.S16         Q15, D4, D0
7017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMLAL.S16         Q15, D5, D1
7117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMLAL.S16         Q15, D6, D2
7217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMLAL.S16         Q15, D7, D3
7317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  BL                Lable1
7417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
7517299ab50ceb70d904e610e3b2d7fb2361a11e03James DongLOOP_EQ:
7617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VLD1.S16          {Q0, Q1}, [r0]!
7717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VLD1.S16          {Q2, Q3}, [r0]!
7817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VLD1.S16          {Q4, Q5}, [r0]!
7917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VLD1.S16          {Q6, Q7}, [r0]!
8017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VMULL.S16         Q15, D0, D0
8117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VMLAL.S16         Q15, D1, D1
8217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VMLAL.S16         Q15, D2, D2
8317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VMLAL.S16         Q15, D3, D3
8417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VMLAL.S16         Q15, D4, D4
8517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VMLAL.S16         Q15, D5, D5
8617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VMLAL.S16         Q15, D6, D6
8717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VMLAL.S16         Q15, D7, D7
8817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VMLAL.S16         Q15, D8, D8
8917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VMLAL.S16         Q15, D9, D9
9017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VMLAL.S16         Q15, D10, D10
9117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VMLAL.S16         Q15, D11, D11
9217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VMLAL.S16         Q15, D12, D12
9317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VMLAL.S16         Q15, D13, D13
9417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VMLAL.S16         Q15, D14, D14
9517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VMLAL.S16         Q15, D15, D15
9617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
9717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  CMP               r2, #64
9817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  BEQ               Lable1
9917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VLD1.S16          {Q0, Q1}, [r0]!
10017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VMLAL.S16         Q15, D0, D0
10117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VMLAL.S16         Q15, D1, D1
10217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VMLAL.S16         Q15, D2, D2
10317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  VMLAL.S16         Q15, D3, D3
10417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
10517299ab50ceb70d904e610e3b2d7fb2361a11e03James DongLable1:
10617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
10717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VQADD.S32         D30, D30, D31
10817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VPADD.S32         D30, D30, D30
10917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          VMOV.S32          r12, D30[0]
11017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
11117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  ADD               r12, r12, r12
11217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          ADD               r12, r12, #1                         @ L_sum = (L_sum << 1)  + 1
11317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  MOV               r4, r12
11417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  CMP               r12, #0
11517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	  RSBLT             r4, r12, #0
11617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          CLZ               r10, r4
11717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          SUB               r10, r10, #1                         @ sft = norm_l(L_sum)
11817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          MOV               r0, r12, LSL r10                     @ L_sum = L_sum << sft
11917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          RSB               r11, r10, #30                        @ *exp = 30 - sft
12017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          STRH              r11, [r3]
12117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
12217299ab50ceb70d904e610e3b2d7fb2361a11e03James DongDot_product12_end:
12317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
12417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          LDMFD   	    r13!, {r4 - r12, r15}
12517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
12617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong          .END
12717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
128