dtx.c revision e2e838afcf03e603a41a0455846eaf9614537c16
1/*
2 ** Copyright 2003-2010, VisualOn, Inc.
3 **
4 ** Licensed under the Apache License, Version 2.0 (the "License");
5 ** you may not use this file except in compliance with the License.
6 ** You may obtain a copy of the License at
7 **
8 **     http://www.apache.org/licenses/LICENSE-2.0
9 **
10 ** Unless required by applicable law or agreed to in writing, software
11 ** distributed under the License is distributed on an "AS IS" BASIS,
12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 ** See the License for the specific language governing permissions and
14 ** limitations under the License.
15 */
16
17/***********************************************************************
18*       File: dtx.c                                                    *
19*                                                                      *
20*	    Description:DTX functions                                  *
21*                                                                      *
22************************************************************************/
23
24#include <stdio.h>
25#include <stdlib.h>
26#include "typedef.h"
27#include "basic_op.h"
28#include "oper_32b.h"
29#include "math_op.h"
30#include "cnst.h"
31#include "acelp.h"                         /* prototype of functions    */
32#include "bits.h"
33#include "dtx.h"
34#include "log2.h"
35#include "mem_align.h"
36
37static void aver_isf_history(
38		Word16 isf_old[],
39		Word16 indices[],
40		Word32 isf_aver[]
41		);
42
43static void find_frame_indices(
44		Word16 isf_old_tx[],
45		Word16 indices[],
46		dtx_encState * st
47		);
48
49static Word16 dithering_control(
50		dtx_encState * st
51		);
52
53/* excitation energy adjustment depending on speech coder mode used, Q7 */
54static Word16 en_adjust[9] =
55{
56	230,                                   /* mode0 = 7k  :  -5.4dB  */
57	179,                                   /* mode1 = 9k  :  -4.2dB  */
58	141,                                   /* mode2 = 12k :  -3.3dB  */
59	128,                                   /* mode3 = 14k :  -3.0dB  */
60	122,                                   /* mode4 = 16k :  -2.85dB */
61	115,                                   /* mode5 = 18k :  -2.7dB  */
62	115,                                   /* mode6 = 20k :  -2.7dB  */
63	115,                                   /* mode7 = 23k :  -2.7dB  */
64	115                                    /* mode8 = 24k :  -2.7dB  */
65};
66
67/**************************************************************************
68*
69* Function    : dtx_enc_init
70*
71**************************************************************************/
72Word16 dtx_enc_init(dtx_encState ** st, Word16 isf_init[], VO_MEM_OPERATOR *pMemOP)
73{
74	dtx_encState *s;
75
76	if (st == (dtx_encState **) NULL)
77	{
78		fprintf(stderr, "dtx_enc_init: invalid parameter\n");
79		return -1;
80	}
81	*st = NULL;
82
83	/* allocate memory */
84	if ((s = (dtx_encState *)mem_malloc(pMemOP, sizeof(dtx_encState), 32, VO_INDEX_ENC_AMRWB)) == NULL)
85	{
86		fprintf(stderr, "dtx_enc_init: can not malloc state structure\n");
87		return -1;
88	}
89	dtx_enc_reset(s, isf_init);
90	*st = s;
91	return 0;
92}
93
94/**************************************************************************
95*
96* Function    : dtx_enc_reset
97*
98**************************************************************************/
99Word16 dtx_enc_reset(dtx_encState * st, Word16 isf_init[])
100{
101	Word32 i;
102
103	if (st == (dtx_encState *) NULL)
104	{
105		fprintf(stderr, "dtx_enc_reset: invalid parameter\n");
106		return -1;
107	}
108	st->hist_ptr = 0;
109	st->log_en_index = 0;
110
111	/* Init isf_hist[] */
112	for (i = 0; i < DTX_HIST_SIZE; i++)
113	{
114		Copy(isf_init, &st->isf_hist[i * M], M);
115	}
116	st->cng_seed = RANDOM_INITSEED;
117
118	/* Reset energy history */
119	Set_zero(st->log_en_hist, DTX_HIST_SIZE);
120
121	st->dtxHangoverCount = DTX_HANG_CONST;
122	st->decAnaElapsedCount = 32767;
123
124	for (i = 0; i < 28; i++)
125	{
126		st->D[i] = 0;
127	}
128
129	for (i = 0; i < DTX_HIST_SIZE - 1; i++)
130	{
131		st->sumD[i] = 0;
132	}
133
134	return 1;
135}
136
137/**************************************************************************
138*
139* Function    : dtx_enc_exit
140*
141**************************************************************************/
142void dtx_enc_exit(dtx_encState ** st, VO_MEM_OPERATOR *pMemOP)
143{
144	if (st == NULL || *st == NULL)
145		return;
146	/* deallocate memory */
147	mem_free(pMemOP, *st, VO_INDEX_ENC_AMRWB);
148	*st = NULL;
149	return;
150}
151
152
153/**************************************************************************
154*
155* Function    : dtx_enc
156*
157**************************************************************************/
158Word16 dtx_enc(
159		dtx_encState * st,                    /* i/o : State struct                                         */
160		Word16 isf[M],                        /* o   : CN ISF vector                                        */
161		Word16 * exc2,                        /* o   : CN excitation                                        */
162		Word16 ** prms
163	      )
164{
165	Word32 i, j;
166	Word16 indice[7];
167	Word16 log_en, gain, level, exp, exp0, tmp;
168	Word16 log_en_int_e, log_en_int_m;
169	Word32 L_isf[M], ener32, level32;
170	Word16 isf_order[3];
171	Word16 CN_dith;
172
173	/* VOX mode computation of SID parameters */
174	log_en = 0;
175	for (i = 0; i < M; i++)
176	{
177		L_isf[i] = 0;
178	}
179	/* average energy and isf */
180	for (i = 0; i < DTX_HIST_SIZE; i++)
181	{
182		/* Division by DTX_HIST_SIZE = 8 has been done in dtx_buffer. log_en is in Q10 */
183		log_en = add(log_en, st->log_en_hist[i]);
184
185	}
186	find_frame_indices(st->isf_hist, isf_order, st);
187	aver_isf_history(st->isf_hist, isf_order, L_isf);
188
189	for (j = 0; j < M; j++)
190	{
191		isf[j] = (Word16)(L_isf[j] >> 3);  /* divide by 8 */
192	}
193
194	/* quantize logarithmic energy to 6 bits (-6 : 66 dB) which corresponds to -2:22 in log2(E).  */
195	/* st->log_en_index = (short)( (log_en + 2.0) * 2.625 ); */
196
197	/* increase dynamics to 7 bits (Q8) */
198	log_en = (log_en >> 2);
199
200	/* Add 2 in Q8 = 512 to get log2(E) between 0:24 */
201	log_en = add(log_en, 512);
202
203	/* Multiply by 2.625 to get full 6 bit range. 2.625 = 21504 in Q13. The result is in Q6 */
204	log_en = mult(log_en, 21504);
205
206	/* Quantize Energy */
207	st->log_en_index = shr(log_en, 6);
208
209	if(st->log_en_index > 63)
210	{
211		st->log_en_index = 63;
212	}
213	if (st->log_en_index < 0)
214	{
215		st->log_en_index = 0;
216	}
217	/* Quantize ISFs */
218	Qisf_ns(isf, isf, indice);
219
220
221	Parm_serial(indice[0], 6, prms);
222	Parm_serial(indice[1], 6, prms);
223	Parm_serial(indice[2], 6, prms);
224	Parm_serial(indice[3], 5, prms);
225	Parm_serial(indice[4], 5, prms);
226
227	Parm_serial((st->log_en_index), 6, prms);
228
229	CN_dith = dithering_control(st);
230	Parm_serial(CN_dith, 1, prms);
231
232	/* level = (float)( pow( 2.0f, (float)st->log_en_index / 2.625 - 2.0 ) );    */
233	/* log2(E) in Q9 (log2(E) lies in between -2:22) */
234	log_en = shl(st->log_en_index, 15 - 6);
235
236	/* Divide by 2.625; log_en will be between 0:24  */
237	log_en = mult(log_en, 12483);
238	/* the result corresponds to log2(gain) in Q10 */
239
240	/* Find integer part  */
241	log_en_int_e = (log_en >> 10);
242
243	/* Find fractional part */
244	log_en_int_m = (Word16) (log_en & 0x3ff);
245	log_en_int_m = shl(log_en_int_m, 5);
246
247	/* Subtract 2 from log_en in Q9, i.e divide the gain by 2 (energy by 4) */
248	/* Add 16 in order to have the result of pow2 in Q16 */
249	log_en_int_e = add(log_en_int_e, 16 - 1);
250
251	level32 = Pow2(log_en_int_e, log_en_int_m); /* Q16 */
252	exp0 = norm_l(level32);
253	level32 = (level32 << exp0);        /* level in Q31 */
254	exp0 = (15 - exp0);
255	level = extract_h(level32);            /* level in Q15 */
256
257	/* generate white noise vector */
258	for (i = 0; i < L_FRAME; i++)
259	{
260		exc2[i] = (Random(&(st->cng_seed)) >> 4);
261	}
262
263	/* gain = level / sqrt(ener) * sqrt(L_FRAME) */
264
265	/* energy of generated excitation */
266	ener32 = Dot_product12(exc2, exc2, L_FRAME, &exp);
267
268	Isqrt_n(&ener32, &exp);
269
270	gain = extract_h(ener32);
271
272	gain = mult(level, gain);              /* gain in Q15 */
273
274	exp = add(exp0, exp);
275
276	/* Multiply by sqrt(L_FRAME)=16, i.e. shift left by 4 */
277	exp += 4;
278
279	for (i = 0; i < L_FRAME; i++)
280	{
281		tmp = mult(exc2[i], gain);         /* Q0 * Q15 */
282		exc2[i] = shl(tmp, exp);
283	}
284
285	return 0;
286}
287
288/**************************************************************************
289*
290* Function    : dtx_buffer Purpose     : handles the DTX buffer
291*
292**************************************************************************/
293Word16 dtx_buffer(
294		dtx_encState * st,                    /* i/o : State struct                    */
295		Word16 isf_new[],                     /* i   : isf vector                      */
296		Word32 enr,                           /* i   : residual energy (in L_FRAME)    */
297		Word16 codec_mode
298		)
299{
300	Word16 log_en;
301
302	Word16 log_en_e;
303	Word16 log_en_m;
304	st->hist_ptr = add(st->hist_ptr, 1);
305	if(st->hist_ptr == DTX_HIST_SIZE)
306	{
307		st->hist_ptr = 0;
308	}
309	/* copy lsp vector into buffer */
310	Copy(isf_new, &st->isf_hist[st->hist_ptr * M], M);
311
312	/* log_en = (float)log10(enr*0.0059322)/(float)log10(2.0f);  */
313	Log2(enr, &log_en_e, &log_en_m);
314
315	/* convert exponent and mantissa to Word16 Q7. Q7 is used to simplify averaging in dtx_enc */
316	log_en = shl(log_en_e, 7);             /* Q7 */
317	log_en = add(log_en, shr(log_en_m, 15 - 7));
318
319	/* Find energy per sample by multiplying with 0.0059322, i.e subtract log2(1/0.0059322) = 7.39722 The
320	 * constant 0.0059322 takes into account windowings and analysis length from autocorrelation
321	 * computations; 7.39722 in Q7 = 947  */
322	/* Subtract 3 dB = 0.99658 in log2(E) = 127 in Q7. */
323	/* log_en = sub( log_en, 947 + en_adjust[codec_mode] ); */
324
325	/* Find energy per sample (divide by L_FRAME=256), i.e subtract log2(256) = 8.0  (1024 in Q7) */
326	/* Subtract 3 dB = 0.99658 in log2(E) = 127 in Q7. */
327
328	log_en = sub(log_en, add(1024, en_adjust[codec_mode]));
329
330	/* Insert into the buffer */
331	st->log_en_hist[st->hist_ptr] = log_en;
332	return 0;
333}
334
335/**************************************************************************
336*
337* Function    : tx_dtx_handler Purpose     : adds extra speech hangover
338*                                            to analyze speech on
339*                                            the decoding side.
340**************************************************************************/
341void tx_dtx_handler(dtx_encState * st,     /* i/o : State struct           */
342		Word16 vad_flag,                      /* i   : vad decision           */
343		Word16 * usedMode                     /* i/o : mode changed or not    */
344		)
345{
346
347	/* this state machine is in synch with the GSMEFR txDtx machine      */
348	st->decAnaElapsedCount = add(st->decAnaElapsedCount, 1);
349
350	if (vad_flag != 0)
351	{
352		st->dtxHangoverCount = DTX_HANG_CONST;
353	} else
354	{                                      /* non-speech */
355		if (st->dtxHangoverCount == 0)
356		{                                  /* out of decoder analysis hangover  */
357			st->decAnaElapsedCount = 0;
358			*usedMode = MRDTX;
359		} else
360		{                                  /* in possible analysis hangover */
361			st->dtxHangoverCount = sub(st->dtxHangoverCount, 1);
362
363			/* decAnaElapsedCount + dtxHangoverCount < DTX_ELAPSED_FRAMES_THRESH */
364			if (sub(add(st->decAnaElapsedCount, st->dtxHangoverCount),
365						DTX_ELAPSED_FRAMES_THRESH) < 0)
366			{
367				*usedMode = MRDTX;
368				/* if short time since decoder update, do not add extra HO */
369			}
370			/* else override VAD and stay in speech mode *usedMode and add extra hangover */
371		}
372	}
373
374	return;
375}
376
377
378
379static void aver_isf_history(
380		Word16 isf_old[],
381		Word16 indices[],
382		Word32 isf_aver[]
383		)
384{
385	Word32 i, j, k;
386	Word16 isf_tmp[2 * M];
387	Word32 L_tmp;
388
389	/* Memorize in isf_tmp[][] the ISF vectors to be replaced by */
390	/* the median ISF vector prior to the averaging               */
391	for (k = 0; k < 2; k++)
392	{
393		if ((indices[k] + 1) != 0)
394		{
395			for (i = 0; i < M; i++)
396			{
397				isf_tmp[k * M + i] = isf_old[indices[k] * M + i];
398				isf_old[indices[k] * M + i] = isf_old[indices[2] * M + i];
399			}
400		}
401	}
402
403	/* Perform the ISF averaging */
404	for (j = 0; j < M; j++)
405	{
406		L_tmp = 0;
407
408		for (i = 0; i < DTX_HIST_SIZE; i++)
409		{
410			L_tmp = L_add(L_tmp, L_deposit_l(isf_old[i * M + j]));
411		}
412		isf_aver[j] = L_tmp;
413	}
414
415	/* Retrieve from isf_tmp[][] the ISF vectors saved prior to averaging */
416	for (k = 0; k < 2; k++)
417	{
418		if ((indices[k] + 1) != 0)
419		{
420			for (i = 0; i < M; i++)
421			{
422				isf_old[indices[k] * M + i] = isf_tmp[k * M + i];
423			}
424		}
425	}
426
427	return;
428}
429
430static void find_frame_indices(
431		Word16 isf_old_tx[],
432		Word16 indices[],
433		dtx_encState * st
434		)
435{
436	Word32 L_tmp, summin, summax, summax2nd;
437	Word16 i, j, tmp;
438	Word16 ptr;
439
440	/* Remove the effect of the oldest frame from the column */
441	/* sum sumD[0..DTX_HIST_SIZE-1]. sumD[DTX_HIST_SIZE] is    */
442	/* not updated since it will be removed later.           */
443
444	tmp = DTX_HIST_SIZE_MIN_ONE;
445	j = -1;
446	for (i = 0; i < DTX_HIST_SIZE_MIN_ONE; i++)
447	{
448		j = add(j, tmp);
449		st->sumD[i] = L_sub(st->sumD[i], st->D[j]);
450		tmp = sub(tmp, 1);
451	}
452
453	/* Shift the column sum sumD. The element sumD[DTX_HIST_SIZE-1]    */
454	/* corresponding to the oldest frame is removed. The sum of     */
455	/* the distances between the latest isf and other isfs, */
456	/* i.e. the element sumD[0], will be computed during this call. */
457	/* Hence this element is initialized to zero.                   */
458
459	for (i = DTX_HIST_SIZE_MIN_ONE; i > 0; i--)
460	{
461		st->sumD[i] = st->sumD[i - 1];
462	}
463	st->sumD[0] = 0;
464
465	/* Remove the oldest frame from the distance matrix.           */
466	/* Note that the distance matrix is replaced by a one-         */
467	/* dimensional array to save static memory.                    */
468
469	tmp = 0;
470	for (i = 27; i >= 12; i = (Word16) (i - tmp))
471	{
472		tmp = add(tmp, 1);
473		for (j = tmp; j > 0; j--)
474		{
475			st->D[i - j + 1] = st->D[i - j - tmp];
476		}
477	}
478
479	/* Compute the first column of the distance matrix D            */
480	/* (squared Euclidean distances from isf1[] to isf_old_tx[][]). */
481
482	ptr = st->hist_ptr;
483	for (i = 1; i < DTX_HIST_SIZE; i++)
484	{
485		/* Compute the distance between the latest isf and the other isfs. */
486		ptr = sub(ptr, 1);
487		if (ptr < 0)
488		{
489			ptr = DTX_HIST_SIZE_MIN_ONE;
490		}
491		L_tmp = 0;
492		for (j = 0; j < M; j++)
493		{
494			tmp = sub(isf_old_tx[st->hist_ptr * M + j], isf_old_tx[ptr * M + j]);
495			L_tmp = L_mac(L_tmp, tmp, tmp);
496		}
497		st->D[i - 1] = L_tmp;
498
499		/* Update also the column sums. */
500		st->sumD[0] = L_add(st->sumD[0], st->D[i - 1]);
501		st->sumD[i] = L_add(st->sumD[i], st->D[i - 1]);
502	}
503
504	/* Find the minimum and maximum distances */
505	summax = st->sumD[0];
506	summin = st->sumD[0];
507	indices[0] = 0;
508	indices[2] = 0;
509	for (i = 1; i < DTX_HIST_SIZE; i++)
510	{
511		if (L_sub(st->sumD[i], summax) > 0)
512		{
513			indices[0] = i;
514			summax = st->sumD[i];
515		}
516		if (L_sub(st->sumD[i], summin) < 0)
517		{
518			indices[2] = i;
519			summin = st->sumD[i];
520		}
521	}
522
523	/* Find the second largest distance */
524	summax2nd = -2147483647L;
525	indices[1] = -1;
526	for (i = 0; i < DTX_HIST_SIZE; i++)
527	{
528		if ((L_sub(st->sumD[i], summax2nd) > 0) && (sub(i, indices[0]) != 0))
529		{
530			indices[1] = i;
531			summax2nd = st->sumD[i];
532		}
533	}
534
535	for (i = 0; i < 3; i++)
536	{
537		indices[i] = sub(st->hist_ptr, indices[i]);
538		if (indices[i] < 0)
539		{
540			indices[i] = add(indices[i], DTX_HIST_SIZE);
541		}
542	}
543
544	/* If maximum distance/MED_THRESH is smaller than minimum distance */
545	/* then the median ISF vector replacement is not performed         */
546	tmp = norm_l(summax);
547	summax = (summax << tmp);
548	summin = (summin << tmp);
549	L_tmp = L_mult(voround(summax), INV_MED_THRESH);
550	if(L_tmp <= summin)
551	{
552		indices[0] = -1;
553	}
554	/* If second largest distance/MED_THRESH is smaller than     */
555	/* minimum distance then the median ISF vector replacement is    */
556	/* not performed                                                 */
557	summax2nd = L_shl(summax2nd, tmp);
558	L_tmp = L_mult(voround(summax2nd), INV_MED_THRESH);
559	if(L_tmp <= summin)
560	{
561		indices[1] = -1;
562	}
563	return;
564}
565
566static Word16 dithering_control(
567		dtx_encState * st
568		)
569{
570	Word16 tmp, mean, CN_dith, gain_diff;
571	Word32 i, ISF_diff;
572
573	/* determine how stationary the spectrum of background noise is */
574	ISF_diff = 0;
575	for (i = 0; i < 8; i++)
576	{
577		ISF_diff = L_add(ISF_diff, st->sumD[i]);
578	}
579	if ((ISF_diff >> 26) > 0)
580	{
581		CN_dith = 1;
582	} else
583	{
584		CN_dith = 0;
585	}
586
587	/* determine how stationary the energy of background noise is */
588	mean = 0;
589	for (i = 0; i < DTX_HIST_SIZE; i++)
590	{
591		mean = add(mean, st->log_en_hist[i]);
592	}
593	mean = (mean >> 3);
594	gain_diff = 0;
595	for (i = 0; i < DTX_HIST_SIZE; i++)
596	{
597		tmp = abs_s(sub(st->log_en_hist[i], mean));
598		gain_diff = add(gain_diff, tmp);
599	}
600	if (gain_diff > GAIN_THR)
601	{
602		CN_dith = 1;
603	}
604	return CN_dith;
605}
606