1@/*
2@ ** Copyright 2003-2010, VisualOn, Inc.
3@ **
4@ ** Licensed under the Apache License, Version 2.0 (the "License");
5@ ** you may not use this file except in compliance with the License.
6@ ** You may obtain a copy of the License at
7@ **
8@ **     http://www.apache.org/licenses/LICENSE-2.0
9@ **
10@ ** Unless required by applicable law or agreed to in writing, software
11@ ** distributed under the License is distributed on an "AS IS" BASIS,
12@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13@ ** See the License for the specific language governing permissions and
14@ ** limitations under the License.
15@ */
16
17@static void Norm_Corr (Word16 exc[],                    /* (i)     : excitation buffer          */
18@                       Word16 xn[],                     /* (i)     : target vector              */
19@                       Word16 h[],                      /* (i) Q15 : impulse response of synth/wgt filters */
20@                       Word16 L_subfr,                  /* (i)     : sub-frame length */
21@                       Word16 t_min,                    /* (i)     : minimum value of pitch lag.   */
22@                       Word16 t_max,                    /* (i)     : maximum value of pitch lag.   */
23@                       Word16 corr_norm[])              /* (o) Q15 : normalized correlation    */
24@
25
26@ r0 --- exc[]
27@ r1 --- xn[]
28@ r2 --- h[]
29@ r3 --- L_subfr
30@ r4 --- t_min
31@ r5 --- t_max
32@ r6 --- corr_norm[]
33
34
35	.section  .text
36        .global   Norm_corr_asm
37        .extern   Convolve_asm
38        .extern   Isqrt_n
39@******************************
40@ constant
41@******************************
42.equ         EXC           ,   0
43.equ         XN            ,   4
44.equ         H             ,   8
45.equ         L_SUBFR       ,   12
46.equ         voSTACK       ,   172
47.equ         T_MIN         ,   212
48.equ         T_MAX         ,   216
49.equ         CORR_NORM     ,   220
50
51Norm_corr_asm:
52
53        STMFD      r13!, {r4 - r12, r14}
54        SUB        r13, r13, #voSTACK
55
56        ADD        r8, r13, #20                 @get the excf[L_SUBFR]
57        LDR        r4, [r13, #T_MIN]            @get t_min
58        RSB        r11, r4, #0                  @k = -t_min
59        ADD        r5, r0, r11, LSL #1          @get the &exc[k]
60
61        @transfer Convolve function
62        STMFD       sp!, {r0 - r3}
63        MOV         r0, r5
64        MOV         r1, r2
65        MOV         r2, r8                       @r2 --- excf[]
66        BL          Convolve_asm
67        LDMFD       sp!, {r0 - r3}
68
69        @ r8 --- excf[]
70
71	MOV         r14, r1                       @copy xn[] address
72        MOV         r5, #64
73        MOV         r6, #0                       @L_tmp = 0
74        MOV         r7, #1
75
76LOOP1:
77        LDR         r9,  [r14], #4
78	LDR         r10, [r14], #4
79	LDR         r11, [r14], #4
80	LDR         r12, [r14], #4
81	SMLABB      r6, r9, r9, r6               @L_tmp += (xn[i] * xn[i])
82	SMLATT      r6, r9, r9, r6               @L_tmp += (xn[i+1] * xn[i+1])
83        SMLABB      r6, r10, r10, r6
84	SMLATT      r6, r10, r10, r6
85	SMLABB      r6, r11, r11, r6
86        SMLATT      r6, r11, r11, r6
87        SMLABB      r6, r12, r12, r6
88        SMLATT      r6, r12, r12, r6
89        SUBS        r5, r5, #8
90        BNE         LOOP1
91
92	ADD         r9, r7, r6, LSL #1           @L_tmp = (L_tmp << 1) + 1
93	CLZ         r7, r9
94	SUB         r6, r7, #1                   @exp = norm_l(L_tmp)
95        RSB         r7, r6, #32                  @exp = 32 - exp
96	MOV         r6, r7, ASR #1
97	RSB         r7, r6, #0                   @scale = -(exp >> 1)
98
99        @loop for every possible period
100	@for(t = t_min@ t <= t_max@ t++)
101	@r7 --- scale r4 --- t_min r8 --- excf[]
102
103LOOPFOR:
104        MOV         r5, #0                       @L_tmp  = 0
105	MOV         r6, #0                       @L_tmp1 = 0
106	MOV         r9, #64
107	MOV         r12, r1                      @copy of xn[]
108	ADD         r14, r13, #20                @copy of excf[]
109	MOV         r8, #0x8000
110
111LOOPi:
112	LDR         r11, [r14], #4               @load excf[i], excf[i+1]
113        LDR         r10, [r12], #4               @load xn[i], xn[i+1]
114	SMLABB      r6, r11, r11, r6             @L_tmp1 += excf[i] * excf[i]
115	SMLATT      r6, r11, r11, r6             @L_tmp1 += excf[i+1] * excf[i+1]
116        SMLABB      r5, r10, r11, r5             @L_tmp += xn[i] * excf[i]
117	SMLATT      r5, r10, r11, r5             @L_tmp += xn[i+1] * excf[i+1]
118	LDR         r11, [r14], #4               @load excf[i+2], excf[i+3]
119	LDR         r10, [r12], #4               @load xn[i+2], xn[i+3]
120        SMLABB      r6, r11, r11, r6
121	SMLATT      r6, r11, r11, r6
122	SMLABB      r5, r10, r11, r5
123	SMLATT      r5, r10, r11, r5
124	SUBS        r9, r9, #4
125        BNE         LOOPi
126
127	@r5 --- L_tmp, r6 --- L_tmp1
128	MOV         r10, #1
129	ADD         r5, r10, r5, LSL #1          @L_tmp = (L_tmp << 1) + 1
130	ADD         r6, r10, r6, LSL #1          @L_tmp1 = (L_tmp1 << 1) + 1
131
132	CLZ         r10, r5
133	CMP         r5, #0
134	RSBLT       r11, r5, #0
135	CLZLT       r10, r11
136	SUB         r10, r10, #1                 @exp = norm_l(L_tmp)
137
138	MOV         r5, r5, LSL r10              @L_tmp = (L_tmp << exp)
139	RSB         r10, r10, #30                @exp_corr = 30 - exp
140	MOV         r11, r5, ASR #16             @corr = extract_h(L_tmp)
141
142	CLZ         r5, r6
143	SUB         r5, r5, #1
144	MOV         r6, r6, LSL r5               @L_tmp = (L_tmp1 << exp)
145	RSB         r5, r5, #30                  @exp_norm = 30 - exp
146
147	@r10 --- exp_corr, r11 --- corr
148	@r6  --- L_tmp, r5 --- exp_norm
149
150	@Isqrt_n(&L_tmp, &exp_norm)
151
152	MOV         r14, r0
153	MOV         r12, r1
154
155        STMFD       sp!, {r0 - r4, r7 - r12, r14}
156	ADD         r1, sp, #4
157	ADD         r0, sp, #0
158	STR         r6, [sp]
159	STRH        r5, [sp, #4]
160	BL          Isqrt_n
161	LDR         r6, [sp]
162	LDRSH       r5, [sp, #4]
163        LDMFD       sp!, {r0 - r4, r7 - r12, r14}
164	MOV         r0, r14
165	MOV         r1, r12
166
167
168	MOV         r6, r6, ASR #16              @norm = extract_h(L_tmp)
169	MUL         r12, r6, r11
170	ADD         r12, r12, r12                @L_tmp = vo_L_mult(corr, norm)
171
172	ADD         r6, r10, r5
173	ADD         r6, r6, r7                   @exp_corr + exp_norm + scale
174
175        CMP         r6, #0
176        RSBLT       r6, r6, #0
177	MOVLT       r12, r12, ASR r6
178        MOVGT       r12, r12, LSL r6             @L_tmp = L_shl(L_tmp, exp_corr + exp_norm + scale)
179
180        ADD         r12, r12, r8
181        MOV         r12, r12, ASR #16            @vo_round(L_tmp)
182
183        LDR         r5, [r13, #CORR_NORM]        @ get corr_norm address
184	LDR         r6, [r13, #T_MAX]            @ get t_max
185	ADD         r10, r5, r4, LSL #1          @ get corr_norm[t] address
186	STRH        r12, [r10]                   @ corr_norm[t] = vo_round(L_tmp)
187
188	CMP         r4, r6
189	BEQ         Norm_corr_asm_end
190
191	ADD         r4, r4, #1                   @ t_min ++
192
193	RSB         r5, r4, #0                   @ k
194
195	MOV         r6, #63                      @ i = 63
196	MOV         r8, r0                       @ exc[]
197	MOV         r9, r2                       @ h[]
198	ADD         r10, r13, #20                @ excf[]
199
200	ADD         r8, r8, r5, LSL #1           @ exc[k] address
201	ADD         r9, r9, r6, LSL #1           @ h[i] address
202	ADD         r10, r10, r6, LSL #1         @ excf[i] address
203	LDRSH       r11, [r8]                    @ tmp = exc[k]
204
205LOOPK:
206        LDRSH       r8, [r9], #-2                @ load h[i]
207	LDRSH       r12, [r10, #-2]              @ load excf[i - 1]
208	MUL         r14, r11, r8
209	MOV         r8, r14, ASR #15
210	ADD         r14, r8, r12
211	STRH        r14, [r10], #-2
212	SUBS        r6, r6, #1
213	BGT         LOOPK
214
215	LDRSH       r8, [r9]                     @ load h[0]
216	MUL         r14, r11, r8
217        LDR         r6, [r13, #T_MAX]            @ get t_max
218	MOV         r8, r14, ASR #15
219	STRH        r8, [r10]
220
221	CMP         r4, r6
222	BLE         LOOPFOR
223
224Norm_corr_asm_end:
225
226        ADD            r13, r13, #voSTACK
227        LDMFD          r13!, {r4 - r12, r15}
228
229        .end
230
231
232