1e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@/*
2e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Copyright 2003-2010, VisualOn, Inc.
3e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ **
4e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Licensed under the Apache License, Version 2.0 (the "License");
5e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** you may not use this file except in compliance with the License.
6e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** You may obtain a copy of the License at
7e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ **
8e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ **     http://www.apache.org/licenses/LICENSE-2.0
9e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ **
10e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Unless required by applicable law or agreed to in writing, software
11e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** distributed under the License is distributed on an "AS IS" BASIS,
12e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** See the License for the specific language governing permissions and
14e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** limitations under the License.
15e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ */
16e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@
17e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@**********************************************************************/
18e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@Word32 Dot_product12(                      /* (o) Q31: normalized result (1 < val <= -1) */
19e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@       Word16 x[],                           /* (i) 12bits: x vector                       */
20e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@       Word16 y[],                           /* (i) 12bits: y vector                       */
21e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@       Word16 lg,                            /* (i)    : vector length                     */
22e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@       Word16 * exp                          /* (o)    : exponent of result (0..+30)       */
23e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@)
24e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@************************************************************************
25e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@  x[]   ---  r0
26e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@  y[]   ---  r1
27e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@  lg    ---  r2
28e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@  *exp  ---  r3
29e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
30e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          .section   .text
31e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          .global    Dot_product12_asm
32e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
33e2e838afcf03e603a41a0455846eaf9614537c16Mans RullgardDot_product12_asm:
34e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
35e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          STMFD   	    r13!, {r4 - r12, r14}
36e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  CMP               r0, r1
37e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  BEQ               LOOP_EQ
38e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
39e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VLD1.S16          {Q0, Q1}, [r0]!               @load 16 Word16 x[]
40e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VLD1.S16          {Q2, Q3}, [r0]!               @load 16 Word16 x[]
41e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VLD1.S16          {Q4, Q5}, [r0]!               @load 16 Word16 x[]
42e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VLD1.S16          {Q6, Q7}, [r0]!               @load 16 Word16 x[]
43e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VLD1.S16          {Q8, Q9}, [r1]!               @load 16 Word16 y[]
44e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VLD1.S16          {Q10, Q11}, [r1]!             @load 16 Word16 y[]
45e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VLD1.S16          {Q12, Q13}, [r1]!             @load 16 Word16 y[]
46e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
47e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VMULL.S16         Q15, D16, D0
48b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard          VMLAL.S16         Q15, D17, D1
49e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VMLAL.S16         Q15, D18, D2
50e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VMLAL.S16         Q15, D19, D3
51b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	  VLD1.S16          {Q0, Q1}, [r1]!               @load 16 Word16 y[]
52b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard          VMLAL.S16         Q15, D20, D4
53e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VMLAL.S16         Q15, D21, D5
54e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VMLAL.S16         Q15, D22, D6
55b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard          VMLAL.S16         Q15, D23, D7
56e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VMLAL.S16         Q15, D24, D8
57e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VMLAL.S16         Q15, D25, D9
58e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VMLAL.S16         Q15, D26, D10
59e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VMLAL.S16         Q15, D27, D11
60e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VMLAL.S16         Q15, D0, D12
61e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VMLAL.S16         Q15, D1, D13
62e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VMLAL.S16         Q15, D2, D14
63e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VMLAL.S16         Q15, D3, D15
64e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
65e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          CMP               r2, #64
66e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          BEQ               Lable1
67b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard          VLD1.S16          {Q0, Q1}, [r0]!               @load 16 Word16 x[]
68b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	  VLD1.S16          {Q2, Q3}, [r1]!
69b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard          VMLAL.S16         Q15, D4, D0
70e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VMLAL.S16         Q15, D5, D1
71e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VMLAL.S16         Q15, D6, D2
72e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VMLAL.S16         Q15, D7, D3
73e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  BL                Lable1
74e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
75e2e838afcf03e603a41a0455846eaf9614537c16Mans RullgardLOOP_EQ:
76e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VLD1.S16          {Q0, Q1}, [r0]!
77e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VLD1.S16          {Q2, Q3}, [r0]!
78e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VLD1.S16          {Q4, Q5}, [r0]!
79e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VLD1.S16          {Q6, Q7}, [r0]!
80e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VMULL.S16         Q15, D0, D0
81e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VMLAL.S16         Q15, D1, D1
82e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VMLAL.S16         Q15, D2, D2
83e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VMLAL.S16         Q15, D3, D3
84e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VMLAL.S16         Q15, D4, D4
85e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VMLAL.S16         Q15, D5, D5
86e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VMLAL.S16         Q15, D6, D6
87e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VMLAL.S16         Q15, D7, D7
88e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VMLAL.S16         Q15, D8, D8
89e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VMLAL.S16         Q15, D9, D9
90e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VMLAL.S16         Q15, D10, D10
91e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VMLAL.S16         Q15, D11, D11
92e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VMLAL.S16         Q15, D12, D12
93e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VMLAL.S16         Q15, D13, D13
94e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VMLAL.S16         Q15, D14, D14
95e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VMLAL.S16         Q15, D15, D15
96e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
97e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  CMP               r2, #64
98e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  BEQ               Lable1
99e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VLD1.S16          {Q0, Q1}, [r0]!
100e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VMLAL.S16         Q15, D0, D0
101e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VMLAL.S16         Q15, D1, D1
102e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VMLAL.S16         Q15, D2, D2
103e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  VMLAL.S16         Q15, D3, D3
104e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
105b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardLable1:
106e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
107e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VQADD.S32         D30, D30, D31
108e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          VPADD.S32         D30, D30, D30
109b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard          VMOV.S32          r12, D30[0]
110e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
111e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  ADD               r12, r12, r12
112e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          ADD               r12, r12, #1                         @ L_sum = (L_sum << 1)  + 1
113e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  MOV               r4, r12
114e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  CMP               r12, #0
115e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	  RSBLT             r4, r12, #0
116e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          CLZ               r10, r4
117e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          SUB               r10, r10, #1                         @ sft = norm_l(L_sum)
118e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          MOV               r0, r12, LSL r10                     @ L_sum = L_sum << sft
119e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          RSB               r11, r10, #30                        @ *exp = 30 - sft
120b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard          STRH              r11, [r3]
121e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
122e2e838afcf03e603a41a0455846eaf9614537c16Mans RullgardDot_product12_end:
123b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
124b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard          LDMFD   	    r13!, {r4 - r12, r15}
125e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
126e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard          .END
127e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
128