Dot_p_opt.s revision 41050cdb033641ddf26831d9272c0930f7b40a2d
1@/*
2@ ** Copyright 2003-2010, VisualOn, Inc.
3@ **
4@ ** Licensed under the Apache License, Version 2.0 (the "License");
5@ ** you may not use this file except in compliance with the License.
6@ ** You may obtain a copy of the License at
7@ **
8@ **     http://www.apache.org/licenses/LICENSE-2.0
9@ **
10@ ** Unless required by applicable law or agreed to in writing, software
11@ ** distributed under the License is distributed on an "AS IS" BASIS,
12@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13@ ** See the License for the specific language governing permissions and
14@ ** limitations under the License.
15@ */
16@
17@Word32 Dot_product12(                      /* (o) Q31: normalized result (1 < val <= -1) */
18@       Word16 x[],                           /* (i) 12bits: x vector                       */
19@       Word16 y[],                           /* (i) 12bits: y vector                       */
20@       Word16 lg,                            /* (i)    : vector length                     */
21@       Word16 * exp                          /* (o)    : exponent of result (0..+30)       */
22@)
23@****************************************************************
24@  x[]   ---  r0
25@  y[]   ---  r1
26@  lg    ---  r2
27@  *exp  ---  r3
28
29          .section  .text
30 	  .global   Dot_product12_asm
31
32Dot_product12_asm:
33
34          STMFD   	    r13!, {r4 - r12, r14}
35          MOV               r4, #0                                 @ L_sum = 0
36          MOV               r5, #0                                 @ i = 0
37
38LOOP:
39          LDR           r6, [r0], #4
40          LDR           r7, [r1], #4
41          LDR           r8, [r0], #4
42          SMLABB        r4, r6, r7, r4
43          LDR           r9, [r1], #4
44	  SMLATT        r4, r6, r7, r4
45
46	  LDR           r6, [r0], #4
47	  SMLABB        r4, r8, r9, r4
48
49	  LDR           r7, [r1], #4
50	  SMLATT        r4, r8, r9, r4
51	  LDR           r8, [r0], #4
52
53	  SMLABB        r4, r6, r7, r4
54	  LDR           r9, [r1], #4
55	  SMLATT        r4, r6, r7, r4
56	  ADD           r5, r5, #8
57	  SMLABB        r4, r8, r9, r4
58	  CMP           r5, r2
59	  SMLATT        r4, r8, r9, r4
60	  BLT           LOOP
61
62          MOV           r12, r4, LSL #1
63          ADD           r12, r12, #1                         @ L_sum = (L_sum << 1)  + 1
64	  MOV           r4, r12
65
66          CMP           r12, #0
67	  RSBLT         r4, r12, #0
68          CLZ           r10, r4
69          SUB           r10, r10, #1                         @ sft = norm_l(L_sum)
70          MOV           r0, r12, LSL r10                     @ L_sum = L_sum << sft
71          RSB           r11, r10, #30                        @ *exp = 30 - sft
72          STRH          r11, [r3]
73
74Dot_product12_end:
75
76          LDMFD   	    r13!, {r4 - r12, r15}
77          @ENDFUNC
78          .END
79
80
81