Syn_filt_32_neon.s revision e2e838afcf03e603a41a0455846eaf9614537c16
1@/*
2@ ** Copyright 2003-2010, VisualOn, Inc.
3@ **
4@ ** Licensed under the Apache License, Version 2.0 (the "License");
5@ ** you may not use this file except in compliance with the License.
6@ ** You may obtain a copy of the License at
7@ **
8@ **     http://www.apache.org/licenses/LICENSE-2.0
9@ **
10@ ** Unless required by applicable law or agreed to in writing, software
11@ ** distributed under the License is distributed on an "AS IS" BASIS,
12@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13@ ** See the License for the specific language governing permissions and
14@ ** limitations under the License.
15@ */
16@
17@**********************************************************************/
18@void Syn_filt_32(
19@     Word16 a[],                           /* (i) Q12 : a[m+1] prediction coefficients */
20@     Word16 m,                             /* (i)     : order of LP filter             */
21@     Word16 exc[],                         /* (i) Qnew: excitation (exc[i] >> Qnew)    */
22@     Word16 Qnew,                          /* (i)     : exc scaling = 0(min) to 8(max) */
23@     Word16 sig_hi[],                      /* (o) /16 : synthesis high                 */
24@     Word16 sig_lo[],                      /* (o) /16 : synthesis low                  */
25@     Word16 lg                             /* (i)     : size of filtering              */
26@)
27@***********************************************************************
28@ a[]      --- r0
29@ m        --- r1
30@ exc[]    --- r2
31@ Qnew     --- r3
32@ sig_hi[] --- r4
33@ sig_lo[] --- r5
34@ lg       --- r6
35
36          .section  .text
37          .global   Syn_filt_32_asm
38
39Syn_filt_32_asm:
40
41          STMFD   	r13!, {r4 - r12, r14}
42          LDR           r4,  [r13, #40]                  @ get sig_hi[] address
43          LDR           r5,  [r13, #44]                  @ get sig_lo[] address
44
45          LDRSH         r6,  [r0], #2                    @ load Aq[0]
46          ADD           r7,  r3, #4                      @ 4 + Q_new
47          MOV           r3, r6, ASR r7                   @ a0 = Aq[0] >> (4 + Q_new)
48
49	  SUB           r10, r4, #32                     @ sig_hi[-16] address
50	  SUB           r11, r5, #32                     @ sig_lo[-16] address
51
52	  VLD1.S16      {D0, D1, D2, D3}, [r0]!          @a[1] ~ a[16]
53
54          MOV           r8, #0                           @ i = 0
55
56	  VLD1.S16      {D4, D5, D6, D7}, [r10]!         @ sig_hi[-16] ~ sig_hi[-1]
57          VREV64.16     D0, D0
58          VREV64.16     D1, D1
59	  VLD1.S16      {D8, D9, D10, D11}, [r11]!       @ sig_lo[-16] ~ sig_lo[-1]
60          VREV64.16     D2, D2
61          VREV64.16     D3, D3
62          VDUP.S32      Q15, r8
63
64SYN_LOOP:
65
66          LDRSH         r6, [r2], #2                     @exc[i]
67	  @L_tmp = L_msu(L_tmp, sig_lo[i - j], a[j])@
68	  VMULL.S16     Q10, D8, D3
69	  VEXT.8        D8, D8, D9, #2
70	  VMLAL.S16     Q10, D9, D2
71	  VMLAL.S16     Q10, D10, D1
72	  VMLAL.S16     Q10, D11, D0
73
74	  VEXT.8        D9, D9, D10, #2
75	  VEXT.8        D10, D10, D11, #2
76
77	  VPADD.S32     D28, D20, D21
78          MUL           r12, r6, r3                      @exc[i] * a0
79	  VPADD.S32     D29, D28, D28
80	  VDUP.S32      Q10, D29[0]                      @result1
81
82	  VMULL.S16     Q11, D4, D3
83	  VMLAL.S16     Q11, D5, D2
84          VSUB.S32      Q10, Q15, Q10
85	  @L_tmp = L_msu(L_tmp, sig_hi[i - j], a[j])@
86
87	  VMLAL.S16     Q11, D6, D1
88	  VEXT.8        D4, D4, D5, #2
89	  VMLAL.S16     Q11, D7, D0
90
91
92	  VEXT.8        D5, D5, D6, #2
93	  VEXT.8        D6, D6, D7, #2
94
95	  VPADD.S32     D28, D22, D23
96          VPADD.S32     D29, D28, D28
97          MOV           r14, r12, LSL #1                 @exc[i] * a0 << 1
98          VDUP.S32      Q11, D29[0]                      @result2
99
100
101
102	  VSHR.S32      Q10, Q10, #11                    @result1 >>= 11
103	  VSHL.S32      Q11, Q11, #1                     @result2 <<= 1
104	  VDUP.S32      Q12, r14
105	  VADD.S32      Q12, Q12, Q10                    @L_tmp = L_tmp - (result1 >>= 11) - (result2 <<= 1)
106	  VSUB.S32      Q12, Q12, Q11
107
108	  VSHL.S32      Q12, Q12, #3                     @L_tmp <<= 3
109
110
111	  VSHRN.S32     D20, Q12, #16                    @sig_hi[i] = L_tmp >> 16@
112	  VMOV.S16      r10, D20[0]
113	  VSHR.S32      Q12, Q12, #4                     @L_tmp >>= 4
114	  VEXT.8        D7, D7, D20, #2
115	  STRH          r10, [r4], #2                    @store sig_hi[i]
116          VMOV.S32      r11, D24[0]                      @r11 --- L_tmp >>= 4
117	  ADD           r8, r8, #1
118	  SUB           r12, r11, r10, LSL #12
119	  @MOV           r11, r12, ASR #16                @sig_lo[i]
120	  VDUP.S16      D21, r12
121	  VEXT.8        D11, D11, D21, #2
122	  STRH          r12, [r5], #2                    @stroe sig_lo[i]
123
124          CMP           r8, #64
125          BLT           SYN_LOOP
126
127Syn_filt_32_end:
128
129          LDMFD   	    r13!, {r4 - r12, r15}
130          @ENDFUNC
131          .END
132
133
134