wb_vad.c revision 5d5c3a132bb446ac78a37dfaac24a46cacf0dd73
1/*
2 ** Copyright 2003-2010, VisualOn, Inc.
3 **
4 ** Licensed under the Apache License, Version 2.0 (the "License");
5 ** you may not use this file except in compliance with the License.
6 ** You may obtain a copy of the License at
7 **
8 **     http://www.apache.org/licenses/LICENSE-2.0
9 **
10 ** Unless required by applicable law or agreed to in writing, software
11 ** distributed under the License is distributed on an "AS IS" BASIS,
12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 ** See the License for the specific language governing permissions and
14 ** limitations under the License.
15 */
16
17/***********************************************************************
18*      File: wb_vad.c                                                  *
19*                                                                      *
20*      Description: Voice Activity Detection                           *
21*                                                                      *
22************************************************************************/
23
24#include <stdlib.h>
25#include <stdio.h>
26#include "cnst.h"
27#include "wb_vad.h"
28#include "typedef.h"
29#include "basic_op.h"
30#include "math_op.h"
31#include "wb_vad_c.h"
32#include "mem_align.h"
33
34/******************************************************************************
35*  Calculate Log2 and scale the signal:
36*
37*    ilog2(Word32 in) = -1024*log10(in * 2^-31)/log10(2), where in = [1, 2^31-1]
38*
39*  input   output
40*  32768   16384
41*  1       31744
42*
43* When input is in the range of [1,2^16], max error is 0.0380%.
44*********************************************************************************/
45
46static Word16 ilog2(                       /* return: output value of the log2 */
47        Word16 mant                        /* i: value to be converted */
48        )
49{
50    Word16 ex, ex2, res;
51    Word32 i, l_temp;
52
53    if (mant <= 0)
54    {
55        mant = 1;
56    }
57    ex = norm_s(mant);
58    mant = mant << ex;
59
60    for (i = 0; i < 3; i++)
61        mant = vo_mult(mant, mant);
62    l_temp = vo_L_mult(mant, mant);
63
64    ex2 = norm_l(l_temp);
65    mant = extract_h(l_temp << ex2);
66
67    res = (ex + 16) << 10;
68    res = add1(res, (ex2 << 6));
69    res = vo_sub(add1(res, 127), (mant >> 8));
70    return (res);
71}
72
73/******************************************************************************
74*
75*     Function     : filter5
76*     Purpose      : Fifth-order half-band lowpass/highpass filter pair with
77*                    decimation.
78*
79*******************************************************************************/
80
81static void filter5(
82        Word16 * in0,                         /* i/o : input values; output low-pass part  */
83        Word16 * in1,                         /* i/o : input values; output high-pass part */
84        Word16 data[]                         /* i/o : filter memory                       */
85        )
86{
87    Word16 temp0, temp1, temp2;
88
89    temp0 = vo_sub(*in0, vo_mult(COEFF5_1, data[0]));
90    temp1 = add1(data[0], vo_mult(COEFF5_1, temp0));
91    data[0] = temp0;
92
93    temp0 = vo_sub(*in1, vo_mult(COEFF5_2, data[1]));
94    temp2 = add1(data[1], vo_mult(COEFF5_2, temp0));
95    data[1] = temp0;
96
97    *in0 = extract_h((vo_L_add(temp1, temp2) << 15));
98    *in1 = extract_h((vo_L_sub(temp1, temp2) << 15));
99}
100
101/******************************************************************************
102*
103*     Function     : filter3
104*     Purpose      : Third-order half-band lowpass/highpass filter pair with
105*                    decimation.
106*
107*******************************************************************************/
108
109static void filter3(
110        Word16 * in0,                         /* i/o : input values; output low-pass part  */
111        Word16 * in1,                         /* i/o : input values; output high-pass part */
112        Word16 * data                         /* i/o : filter memory                       */
113        )
114{
115    Word16 temp1, temp2;
116
117    temp1 = vo_sub(*in1, vo_mult(COEFF3, *data));
118    temp2 = add1(*data, vo_mult(COEFF3, temp1));
119    *data = temp1;
120
121    *in1 = extract_h((vo_L_sub(*in0, temp2) << 15));
122    *in0 = extract_h((vo_L_add(*in0, temp2) << 15));
123}
124
125/******************************************************************************
126*
127*     Function   : level_calculation
128*     Purpose    : Calculate signal level in a sub-band. Level is calculated
129*                  by summing absolute values of the input data.
130*
131*                  Signal level calculated from of the end of the frame
132*                  (data[count1 - count2]) is stored to (*sub_level)
133*                  and added to the level of the next frame.
134*
135******************************************************************************/
136
137static Word16 level_calculation(                      /* return: signal level */
138        Word16 data[],                        /* i   : signal buffer                                    */
139        Word16 * sub_level,                   /* i   : level calculated at the end of the previous frame*/
140                                              /* o   : level of signal calculated from the last         */
141                                              /*       (count2 - count1) samples                        */
142        Word16 count1,                        /* i   : number of samples to be counted                  */
143        Word16 count2,                        /* i   : number of samples to be counted                  */
144        Word16 ind_m,                         /* i   : step size for the index of the data buffer       */
145        Word16 ind_a,                         /* i   : starting index of the data buffer                */
146        Word16 scale                          /* i   : scaling for the level calculation                */
147        )
148{
149    Word32 i, l_temp1, l_temp2;
150    Word16 level;
151
152    l_temp1 = 0L;
153    for (i = count1; i < count2; i++)
154    {
155        l_temp1 += (abs_s(data[ind_m * i + ind_a])<<1);
156    }
157
158    l_temp2 = vo_L_add(l_temp1, L_shl(*sub_level, 16 - scale));
159    *sub_level = extract_h(L_shl(l_temp1, scale));
160
161    for (i = 0; i < count1; i++)
162    {
163        l_temp2 += (abs_s(data[ind_m * i + ind_a])<<1);
164    }
165    level = extract_h(L_shl2(l_temp2, scale));
166
167    return level;
168}
169
170/******************************************************************************
171*
172*     Function     : filter_bank
173*     Purpose      : Divide input signal into bands and calculate level of
174*                    the signal in each band
175*
176*******************************************************************************/
177
178static void filter_bank(
179        VadVars * st,                         /* i/o : State struct               */
180        Word16 in[],                          /* i   : input frame                */
181        Word16 level[]                        /* o   : signal levels at each band */
182        )
183{
184    Word32 i;
185    Word16 tmp_buf[FRAME_LEN];
186
187    /* shift input 1 bit down for safe scaling */
188    for (i = 0; i < FRAME_LEN; i++)
189    {
190        tmp_buf[i] = in[i] >> 1;
191    }
192
193    /* run the filter bank */
194    for (i = 0; i < 128; i++)
195    {
196        filter5(&tmp_buf[2 * i], &tmp_buf[2 * i + 1], st->a_data5[0]);
197    }
198    for (i = 0; i < 64; i++)
199    {
200        filter5(&tmp_buf[4 * i], &tmp_buf[4 * i + 2], st->a_data5[1]);
201        filter5(&tmp_buf[4 * i + 1], &tmp_buf[4 * i + 3], st->a_data5[2]);
202    }
203    for (i = 0; i < 32; i++)
204    {
205        filter5(&tmp_buf[8 * i], &tmp_buf[8 * i + 4], st->a_data5[3]);
206        filter5(&tmp_buf[8 * i + 2], &tmp_buf[8 * i + 6], st->a_data5[4]);
207        filter3(&tmp_buf[8 * i + 3], &tmp_buf[8 * i + 7], &st->a_data3[0]);
208    }
209    for (i = 0; i < 16; i++)
210    {
211        filter3(&tmp_buf[16 * i + 0], &tmp_buf[16 * i + 8], &st->a_data3[1]);
212        filter3(&tmp_buf[16 * i + 4], &tmp_buf[16 * i + 12], &st->a_data3[2]);
213        filter3(&tmp_buf[16 * i + 6], &tmp_buf[16 * i + 14], &st->a_data3[3]);
214    }
215
216    for (i = 0; i < 8; i++)
217    {
218        filter3(&tmp_buf[32 * i + 0], &tmp_buf[32 * i + 16], &st->a_data3[4]);
219        filter3(&tmp_buf[32 * i + 8], &tmp_buf[32 * i + 24], &st->a_data3[5]);
220    }
221
222    /* calculate levels in each frequency band */
223
224    /* 4800 - 6400 Hz */
225    level[11] = level_calculation(tmp_buf, &st->sub_level[11], 16, 64, 4, 1, 14);
226    /* 4000 - 4800 Hz */
227    level[10] = level_calculation(tmp_buf, &st->sub_level[10], 8, 32, 8, 7, 15);
228    /* 3200 - 4000 Hz */
229    level[9] = level_calculation(tmp_buf, &st->sub_level[9],8, 32, 8, 3, 15);
230    /* 2400 - 3200 Hz */
231    level[8] = level_calculation(tmp_buf, &st->sub_level[8],8, 32, 8, 2, 15);
232    /* 2000 - 2400 Hz */
233    level[7] = level_calculation(tmp_buf, &st->sub_level[7],4, 16, 16, 14, 16);
234    /* 1600 - 2000 Hz */
235    level[6] = level_calculation(tmp_buf, &st->sub_level[6],4, 16, 16, 6, 16);
236    /* 1200 - 1600 Hz */
237    level[5] = level_calculation(tmp_buf, &st->sub_level[5],4, 16, 16, 4, 16);
238    /* 800 - 1200 Hz */
239    level[4] = level_calculation(tmp_buf, &st->sub_level[4],4, 16, 16, 12, 16);
240    /* 600 - 800 Hz */
241    level[3] = level_calculation(tmp_buf, &st->sub_level[3],2, 8, 32, 8, 17);
242    /* 400 - 600 Hz */
243    level[2] = level_calculation(tmp_buf, &st->sub_level[2],2, 8, 32, 24, 17);
244    /* 200 - 400 Hz */
245    level[1] = level_calculation(tmp_buf, &st->sub_level[1],2, 8, 32, 16, 17);
246    /* 0 - 200 Hz */
247    level[0] = level_calculation(tmp_buf, &st->sub_level[0],2, 8, 32, 0, 17);
248}
249
250/******************************************************************************
251*
252*     Function   : update_cntrl
253*     Purpose    : Control update of the background noise estimate.
254*
255*******************************************************************************/
256
257static void update_cntrl(
258        VadVars * st,                         /* i/o : State structure                    */
259        Word16 level[]                        /* i   : sub-band levels of the input frame */
260        )
261{
262    Word32 i;
263    Word16 num, temp, stat_rat, exp, denom;
264    Word16 alpha;
265
266    /* if a tone has been detected for a while, initialize stat_count */
267    if (sub((Word16) (st->tone_flag & 0x7c00), 0x7c00) == 0)
268    {
269        st->stat_count = STAT_COUNT;
270    } else
271    {
272        /* if 8 last vad-decisions have been "0", reinitialize stat_count */
273        if ((st->vadreg & 0x7f80) == 0)
274        {
275            st->stat_count = STAT_COUNT;
276        } else
277        {
278            stat_rat = 0;
279            for (i = 0; i < COMPLEN; i++)
280            {
281                if(level[i] > st->ave_level[i])
282                {
283                    num = level[i];
284                    denom = st->ave_level[i];
285                } else
286                {
287                    num = st->ave_level[i];
288                    denom = level[i];
289                }
290                /* Limit nimimum value of num and denom to STAT_THR_LEVEL */
291                if(num < STAT_THR_LEVEL)
292                {
293                    num = STAT_THR_LEVEL;
294                }
295                if(denom < STAT_THR_LEVEL)
296                {
297                    denom = STAT_THR_LEVEL;
298                }
299                exp = norm_s(denom);
300                denom = denom << exp;
301
302                /* stat_rat = num/denom * 64 */
303                temp = div_s(num >> 1, denom);
304                stat_rat = add1(stat_rat, shr(temp, (8 - exp)));
305            }
306
307            /* compare stat_rat with a threshold and update stat_count */
308            if(stat_rat > STAT_THR)
309            {
310                st->stat_count = STAT_COUNT;
311            } else
312            {
313                if ((st->vadreg & 0x4000) != 0)
314                {
315
316                    if (st->stat_count != 0)
317                    {
318                        st->stat_count = st->stat_count - 1;
319                    }
320                }
321            }
322        }
323    }
324
325    /* Update average amplitude estimate for stationarity estimation */
326    alpha = ALPHA4;
327    if(st->stat_count == STAT_COUNT)
328    {
329        alpha = 32767;
330    } else if ((st->vadreg & 0x4000) == 0)
331    {
332        alpha = ALPHA5;
333    }
334    for (i = 0; i < COMPLEN; i++)
335    {
336        st->ave_level[i] = add1(st->ave_level[i], vo_mult_r(alpha, vo_sub(level[i], st->ave_level[i])));
337    }
338}
339
340/******************************************************************************
341*
342*     Function     : hangover_addition
343*     Purpose      : Add hangover after speech bursts
344*
345*******************************************************************************/
346
347static Word16 hangover_addition(                      /* return: VAD_flag indicating final VAD decision */
348        VadVars * st,                         /* i/o : State structure                     */
349        Word16 low_power,                     /* i   : flag power of the input frame    */
350        Word16 hang_len,                      /* i   : hangover length */
351        Word16 burst_len                      /* i   : minimum burst length for hangover addition */
352        )
353{
354    /* if the input power (pow_sum) is lower than a threshold, clear counters and set VAD_flag to "0"         */
355    if (low_power != 0)
356    {
357        st->burst_count = 0;
358        st->hang_count = 0;
359        return 0;
360    }
361    /* update the counters (hang_count, burst_count) */
362    if ((st->vadreg & 0x4000) != 0)
363    {
364        st->burst_count = st->burst_count + 1;
365        if(st->burst_count >= burst_len)
366        {
367            st->hang_count = hang_len;
368        }
369        return 1;
370    } else
371    {
372        st->burst_count = 0;
373        if (st->hang_count > 0)
374        {
375            st->hang_count = st->hang_count - 1;
376            return 1;
377        }
378    }
379    return 0;
380}
381
382/******************************************************************************
383*
384*     Function   : noise_estimate_update
385*     Purpose    : Update of background noise estimate
386*
387*******************************************************************************/
388
389static void noise_estimate_update(
390        VadVars * st,                         /* i/o : State structure                       */
391        Word16 level[]                        /* i   : sub-band levels of the input frame */
392        )
393{
394    Word32 i;
395    Word16 alpha_up, alpha_down, bckr_add = 2;
396
397    /* Control update of bckr_est[] */
398    update_cntrl(st, level);
399
400    /* Choose update speed */
401    if ((0x7800 & st->vadreg) == 0)
402    {
403        alpha_up = ALPHA_UP1;
404        alpha_down = ALPHA_DOWN1;
405    } else
406    {
407        if (st->stat_count == 0)
408        {
409            alpha_up = ALPHA_UP2;
410            alpha_down = ALPHA_DOWN2;
411        } else
412        {
413            alpha_up = 0;
414            alpha_down = ALPHA3;
415            bckr_add = 0;
416        }
417    }
418
419    /* Update noise estimate (bckr_est) */
420    for (i = 0; i < COMPLEN; i++)
421    {
422        Word16 temp;
423        temp = (st->old_level[i] - st->bckr_est[i]);
424
425        if (temp < 0)
426        {                                  /* update downwards */
427            st->bckr_est[i] = add1(-2, add(st->bckr_est[i],vo_mult_r(alpha_down, temp)));
428            /* limit minimum value of the noise estimate to NOISE_MIN */
429            if(st->bckr_est[i] < NOISE_MIN)
430            {
431                st->bckr_est[i] = NOISE_MIN;
432            }
433        } else
434        {                                  /* update upwards */
435            st->bckr_est[i] = add1(bckr_add, add1(st->bckr_est[i],vo_mult_r(alpha_up, temp)));
436
437            /* limit maximum value of the noise estimate to NOISE_MAX */
438            if(st->bckr_est[i] > NOISE_MAX)
439            {
440                st->bckr_est[i] = NOISE_MAX;
441            }
442        }
443    }
444
445    /* Update signal levels of the previous frame (old_level) */
446    for (i = 0; i < COMPLEN; i++)
447    {
448        st->old_level[i] = level[i];
449    }
450}
451
452/******************************************************************************
453*
454*     Function     : vad_decision
455*     Purpose      : Calculates VAD_flag
456*
457*******************************************************************************/
458
459static Word16 vad_decision(                           /* return value : VAD_flag */
460        VadVars * st,                         /* i/o : State structure                       */
461        Word16 level[COMPLEN],                /* i   : sub-band levels of the input frame */
462        Word32 pow_sum                        /* i   : power of the input frame           */
463        )
464{
465    Word32 i;
466    Word32 L_snr_sum;
467    Word32 L_temp;
468    Word16 vad_thr, temp, noise_level;
469    Word16 low_power_flag;
470    Word16 hang_len, burst_len;
471    Word16 ilog2_speech_level, ilog2_noise_level;
472    Word16 temp2;
473
474    /* Calculate squared sum of the input levels (level) divided by the background noise components
475     * (bckr_est). */
476    L_snr_sum = 0;
477    for (i = 0; i < COMPLEN; i++)
478    {
479        Word16 exp;
480
481        exp = norm_s(st->bckr_est[i]);
482        temp = (st->bckr_est[i] << exp);
483        temp = div_s((level[i] >> 1), temp);
484        temp = shl(temp, (exp - (UNIRSHFT - 1)));
485        L_snr_sum = L_mac(L_snr_sum, temp, temp);
486    }
487
488    /* Calculate average level of estimated background noise */
489    L_temp = 0;
490    for (i = 1; i < COMPLEN; i++)          /* ignore lowest band */
491    {
492        L_temp = vo_L_add(L_temp, st->bckr_est[i]);
493    }
494
495    noise_level = extract_h((L_temp << 12));
496    /* if SNR is lower than a threshold (MIN_SPEECH_SNR), and increase speech_level */
497    temp = vo_mult(noise_level, MIN_SPEECH_SNR) << 3;
498
499    if(st->speech_level < temp)
500    {
501        st->speech_level = temp;
502    }
503    ilog2_noise_level = ilog2(noise_level);
504
505    /* If SNR is very poor, speech_level is probably corrupted by noise level. This is correctred by
506     * subtracting MIN_SPEECH_SNR*noise_level from speech level */
507    ilog2_speech_level = ilog2(st->speech_level - temp);
508
509    temp = add1(vo_mult(NO_SLOPE, (ilog2_noise_level - NO_P1)), THR_HIGH);
510
511    temp2 = add1(SP_CH_MIN, vo_mult(SP_SLOPE, (ilog2_speech_level - SP_P1)));
512    if (temp2 < SP_CH_MIN)
513    {
514        temp2 = SP_CH_MIN;
515    }
516    if (temp2 > SP_CH_MAX)
517    {
518        temp2 = SP_CH_MAX;
519    }
520    vad_thr = temp + temp2;
521
522    if(vad_thr < THR_MIN)
523    {
524        vad_thr = THR_MIN;
525    }
526    /* Shift VAD decision register */
527    st->vadreg = (st->vadreg >> 1);
528
529    /* Make intermediate VAD decision */
530    if(L_snr_sum > vo_L_mult(vad_thr, (512 * COMPLEN)))
531    {
532        st->vadreg = (Word16) (st->vadreg | 0x4000);
533    }
534    /* check if the input power (pow_sum) is lower than a threshold" */
535    if(pow_sum < VAD_POW_LOW)
536    {
537        low_power_flag = 1;
538    } else
539    {
540        low_power_flag = 0;
541    }
542    /* Update background noise estimates */
543    noise_estimate_update(st, level);
544
545    /* Calculate values for hang_len and burst_len based on vad_thr */
546    hang_len = add1(vo_mult(HANG_SLOPE, (vad_thr - HANG_P1)), HANG_HIGH);
547    if(hang_len < HANG_LOW)
548    {
549        hang_len = HANG_LOW;
550    }
551    burst_len = add1(vo_mult(BURST_SLOPE, (vad_thr - BURST_P1)), BURST_HIGH);
552
553    return (hangover_addition(st, low_power_flag, hang_len, burst_len));
554}
555
556/******************************************************************************
557*
558*     Function : Estimate_Speech()
559*     Purpose  : Estimate speech level
560*
561* Maximum signal level is searched and stored to the variable sp_max.
562* The speech frames must locate within SP_EST_COUNT number of frames.
563* Thus, noisy frames having occasional VAD = "1" decisions will not
564* affect to the estimated speech_level.
565*
566*******************************************************************************/
567
568static void Estimate_Speech(
569        VadVars * st,                         /* i/o : State structure    */
570        Word16 in_level                       /* level of the input frame */
571        )
572{
573    Word16 alpha;
574
575    /* if the required activity count cannot be achieved, reset counters */
576    if((st->sp_est_cnt - st->sp_max_cnt) > (SP_EST_COUNT - SP_ACTIVITY_COUNT))
577    {
578        st->sp_est_cnt = 0;
579        st->sp_max = 0;
580        st->sp_max_cnt = 0;
581    }
582    st->sp_est_cnt += 1;
583
584    if (((st->vadreg & 0x4000)||(in_level > st->speech_level)) && (in_level > MIN_SPEECH_LEVEL1))
585    {
586        /* update sp_max */
587        if(in_level > st->sp_max)
588        {
589            st->sp_max = in_level;
590        }
591        st->sp_max_cnt += 1;
592
593        if(st->sp_max_cnt >= SP_ACTIVITY_COUNT)
594        {
595            Word16 tmp;
596            /* update speech estimate */
597            tmp = (st->sp_max >> 1);      /* scale to get "average" speech level */
598
599            /* select update speed */
600            if(tmp > st->speech_level)
601            {
602                alpha = ALPHA_SP_UP;
603            } else
604            {
605                alpha = ALPHA_SP_DOWN;
606            }
607            if(tmp > MIN_SPEECH_LEVEL2)
608            {
609                st->speech_level = add1(st->speech_level, vo_mult_r(alpha, vo_sub(tmp, st->speech_level)));
610            }
611            /* clear all counters used for speech estimation */
612            st->sp_max = 0;
613            st->sp_max_cnt = 0;
614            st->sp_est_cnt = 0;
615        }
616    }
617}
618
619/******************************************************************************
620*
621*  Function:   wb_vad_init
622*  Purpose:    Allocates state memory and initializes state memory
623*
624*******************************************************************************/
625
626Word16 wb_vad_init(                        /* return: non-zero with error, zero for ok. */
627        VadVars ** state,                     /* i/o : State structure    */
628        VO_MEM_OPERATOR *pMemOP
629        )
630{
631    VadVars *s;
632
633    if (state == (VadVars **) NULL)
634    {
635        fprintf(stderr, "vad_init: invalid parameter\n");
636        return -1;
637    }
638    *state = NULL;
639
640    /* allocate memory */
641    if ((s = (VadVars *) mem_malloc(pMemOP, sizeof(VadVars), 32, VO_INDEX_ENC_AMRWB)) == NULL)
642    {
643        fprintf(stderr, "vad_init: can not malloc state structure\n");
644        return -1;
645    }
646    wb_vad_reset(s);
647
648    *state = s;
649
650    return 0;
651}
652
653/******************************************************************************
654*
655*  Function:   wb_vad_reset
656*  Purpose:    Initializes state memory
657*
658*******************************************************************************/
659
660Word16 wb_vad_reset(                       /* return: non-zero with error, zero for ok. */
661        VadVars * state                       /* i/o : State structure    */
662        )
663{
664    Word32 i, j;
665
666    if (state == (VadVars *) NULL)
667    {
668        fprintf(stderr, "vad_reset: invalid parameter\n");
669        return -1;
670    }
671    state->tone_flag = 0;
672    state->vadreg = 0;
673    state->hang_count = 0;
674    state->burst_count = 0;
675    state->hang_count = 0;
676
677    /* initialize memory used by the filter bank */
678    for (i = 0; i < F_5TH_CNT; i++)
679    {
680        for (j = 0; j < 2; j++)
681        {
682            state->a_data5[i][j] = 0;
683        }
684    }
685
686    for (i = 0; i < F_3TH_CNT; i++)
687    {
688        state->a_data3[i] = 0;
689    }
690
691    /* initialize the rest of the memory */
692    for (i = 0; i < COMPLEN; i++)
693    {
694        state->bckr_est[i] = NOISE_INIT;
695        state->old_level[i] = NOISE_INIT;
696        state->ave_level[i] = NOISE_INIT;
697        state->sub_level[i] = 0;
698    }
699
700    state->sp_est_cnt = 0;
701    state->sp_max = 0;
702    state->sp_max_cnt = 0;
703    state->speech_level = SPEECH_LEVEL_INIT;
704    state->prev_pow_sum = 0;
705    return 0;
706}
707
708/******************************************************************************
709*
710*  Function:   wb_vad_exit
711*  Purpose:    The memory used for state memory is freed
712*
713*******************************************************************************/
714
715void wb_vad_exit(
716        VadVars ** state,                      /* i/o : State structure    */
717        VO_MEM_OPERATOR *pMemOP
718        )
719{
720    if (state == NULL || *state == NULL)
721        return;
722    /* deallocate memory */
723    mem_free(pMemOP, *state, VO_INDEX_ENC_AMRWB);
724    *state = NULL;
725    return;
726}
727
728/******************************************************************************
729*
730*     Function     : wb_vad_tone_detection
731*     Purpose      : Search maximum pitch gain from a frame. Set tone flag if
732*                    pitch gain is high. This is used to detect
733*                    signaling tones and other signals with high pitch gain.
734*
735*******************************************************************************/
736
737void wb_vad_tone_detection(
738        VadVars * st,                         /* i/o : State struct            */
739        Word16 p_gain                         /* pitch gain      */
740        )
741{
742    /* update tone flag */
743    st->tone_flag = (st->tone_flag >> 1);
744
745    /* if (pitch_gain > TONE_THR) set tone flag */
746    if (p_gain > TONE_THR)
747    {
748        st->tone_flag = (Word16) (st->tone_flag | 0x4000);
749    }
750}
751
752/******************************************************************************
753*
754*     Function     : wb_vad
755*     Purpose      : Main program for Voice Activity Detection (VAD) for AMR
756*
757*******************************************************************************/
758
759Word16 wb_vad(                                /* Return value : VAD Decision, 1 = speech, 0 = noise */
760        VadVars * st,                         /* i/o : State structure                 */
761        Word16 in_buf[]                       /* i   : samples of the input frame   */
762         )
763{
764    Word16 level[COMPLEN];
765    Word32 i;
766    Word16 VAD_flag, temp;
767    Word32 L_temp, pow_sum;
768
769    /* Calculate power of the input frame. */
770    L_temp = 0L;
771    for (i = 0; i < FRAME_LEN; i++)
772    {
773        L_temp = L_mac(L_temp, in_buf[i], in_buf[i]);
774    }
775
776    /* pow_sum = power of current frame and previous frame */
777    pow_sum = L_add(L_temp, st->prev_pow_sum);
778
779    /* save power of current frame for next call */
780    st->prev_pow_sum = L_temp;
781
782    /* If input power is very low, clear tone flag */
783    if (pow_sum < POW_TONE_THR)
784    {
785        st->tone_flag = (Word16) (st->tone_flag & 0x1fff);
786    }
787    /* Run the filter bank and calculate signal levels at each band */
788    filter_bank(st, in_buf, level);
789
790    /* compute VAD decision */
791    VAD_flag = vad_decision(st, level, pow_sum);
792
793    /* Calculate input level */
794    L_temp = 0;
795    for (i = 1; i < COMPLEN; i++)          /* ignore lowest band */
796    {
797        L_temp = vo_L_add(L_temp, level[i]);
798    }
799
800    temp = extract_h(L_temp << 12);
801
802    Estimate_Speech(st, temp);             /* Estimate speech level */
803    return (VAD_flag);
804}
805
806
807
808
809