syn_filt_neon.s revision 41050cdb033641ddf26831d9272c0930f7b40a2d
1@/*
2@ ** Copyright 2003-2010, VisualOn, Inc.
3@ **
4@ ** Licensed under the Apache License, Version 2.0 (the "License");
5@ ** you may not use this file except in compliance with the License.
6@ ** You may obtain a copy of the License at
7@ **
8@ **     http://www.apache.org/licenses/LICENSE-2.0
9@ **
10@ ** Unless required by applicable law or agreed to in writing, software
11@ ** distributed under the License is distributed on an "AS IS" BASIS,
12@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13@ ** See the License for the specific language governing permissions and
14@ ** limitations under the License.
15@ */
16@
17@void Syn_filt(
18@     Word16 a[],                           /* (i) Q12 : a[m+1] prediction coefficients           */
19@     Word16 x[],                           /* (i)     : input signal                             */
20@     Word16 y[],                           /* (o)     : output signal                            */
21@     Word16 mem[],                         /* (i/o)   : memory associated with this filtering.   */
22@)
23@***********************************************************************
24@ a[]    ---   r0
25@ x[]    ---   r1
26@ y[]    ---   r2
27@ mem[]  ---   r3
28@ m ---  16  lg --- 80  update --- 1
29
30          .section  .text
31          .global   Syn_filt_asm
32
33Syn_filt_asm:
34
35          STMFD   	r13!, {r4 - r12, r14}
36          SUB           r13, r13, #700                   @ y_buf[L_FRAME16k + M16k]
37
38          MOV           r4, r3                           @ copy mem[] address
39          MOV           r5, r13                          @ copy yy = y_buf address
40
41          @ for(i = 0@ i < m@ i++)
42          @{
43          @    *yy++ = mem[i]@
44          @}
45          VLD1.S16      {D0, D1, D2, D3}, [r4]!          @load 16 mems
46	  VST1.S16      {D0, D1, D2, D3}, [r5]!          @store 16 mem[] to *yy
47
48          LDRSH         r5, [r0], #2                     @ load a[0]
49          MOV           r8, #0                           @ i = 0
50          MOV           r5, r5, ASR #1                   @ a0 = a[0] >> 1
51          VMOV.S16      D8[0], r5
52          @ load all a[]
53          VLD1.S16      {D0, D1, D2, D3}, [r0]!          @ load a[1] ~ a[16]
54	  VREV64.16     D0, D0
55	  VREV64.16     D1, D1
56	  VREV64.16     D2, D2
57	  VREV64.16     D3, D3
58	  MOV           r8, #0                           @ loop times
59	  MOV           r10, r13                         @ temp = y_buf
60	  ADD           r4, r13, #32                     @ yy[i] address
61
62          VLD1.S16      {D4, D5, D6, D7}, [r10]!         @ first 16 temp_p
63
64SYN_LOOP:
65
66          LDRSH         r6, [r1], #2                     @ load x[i]
67	  MUL           r12, r6, r5                      @ L_tmp = x[i] * a0
68	  ADD           r10, r4, r8, LSL #1              @ y[i], yy[i] address
69
70	  VDUP.S32      Q10, r12
71	  VMULL.S16     Q5, D3, D4
72          VMLAL.S16     Q5, D2, D5
73          VMLAL.S16     Q5, D1, D6
74          VMLAL.S16     Q5, D0, D7
75          VEXT.8        D4, D4, D5, #2
76          VEXT.8        D5, D5, D6, #2
77          VEXT.8        D6, D6, D7, #2
78          VPADD.S32     D12, D10, D11
79          ADD           r8, r8, #1
80          VPADD.S32     D10, D12, D12
81
82	  VDUP.S32      Q7, D10[0]
83
84	  VSUB.S32      Q9, Q10, Q7
85          VQRSHRN.S32   D20, Q9, #12
86          VMOV.S16      r9, D20[0]
87          VEXT.8        D7, D7, D20, #2
88          CMP           r8, #80
89          STRH          r9, [r10]                        @ yy[i]
90          STRH          r9, [r2], #2                     @ y[i]
91
92          BLT           SYN_LOOP
93
94          @ update mem[]
95          ADD           r5, r13, #160                    @ yy[64] address
96	  VLD1.S16      {D0, D1, D2, D3}, [r5]!
97	  VST1.S16      {D0, D1, D2, D3}, [r3]!
98
99Syn_filt_asm_end:
100
101          ADD           r13, r13, #700
102          LDMFD   	r13!, {r4 - r12, r15}
103          @ENDFUNC
104          .END
105
106
107