bgnscd.cpp revision 4f1efc098cb5791c3e9f483f2af84aef70d2d0a0
1/* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18/****************************************************************************************
19Portions of this file are derived from the following 3GPP standard:
20
21    3GPP TS 26.073
22    ANSI-C code for the Adaptive Multi-Rate (AMR) speech codec
23    Available from http://www.3gpp.org
24
25(C) 2004, 3GPP Organizational Partners (ARIB, ATIS, CCSA, ETSI, TTA, TTC)
26Permission to distribute, modify and use this file under the standard license
27terms listed above has been obtained from the copyright holder.
28****************************************************************************************/
29/*
30------------------------------------------------------------------------------
31
32 Pathname: ./audio/gsm-amr/c/src/bgnscd.c
33 Functions:
34           Bgn_scd_reset
35           Bgn_scd
36
37------------------------------------------------------------------------------
38 MODULE DESCRIPTION
39
40 Background noise source characteristic detector (SCD)
41
42------------------------------------------------------------------------------
43*/
44
45
46/*----------------------------------------------------------------------------
47; INCLUDES
48----------------------------------------------------------------------------*/
49#include <string.h>
50
51#include    "bgnscd.h"
52#include    "typedef.h"
53#include    "basic_op.h"
54#include    "cnst.h"
55#include    "copy.h"
56#include    "gmed_n.h"
57#include    "sqrt_l.h"
58
59/*----------------------------------------------------------------------------
60; MACROS
61; Define module specific macros here
62----------------------------------------------------------------------------*/
63
64
65/*----------------------------------------------------------------------------
66; DEFINES
67; Include all pre-processor statements here. Include conditional
68; compile variables also.
69----------------------------------------------------------------------------*/
70#define TRUE  1
71#define FALSE 0
72
73/*----------------------------------------------------------------------------
74; LOCAL FUNCTION DEFINITIONS
75; Function Prototype declaration
76----------------------------------------------------------------------------*/
77
78/*----------------------------------------------------------------------------
79; LOCAL VARIABLE DEFINITIONS
80; Variable declaration - defined here and used outside this module
81----------------------------------------------------------------------------*/
82
83
84/*
85------------------------------------------------------------------------------
86 FUNCTION NAME: Bgn_scd_reset
87------------------------------------------------------------------------------
88 INPUT AND OUTPUT DEFINITIONS
89
90 Inputs:
91    state = points to memory of type Bgn_scdState.
92
93 Outputs:
94    The memory of type Bgn_scdState pointed to by state is set to all
95        zeros.
96
97 Returns:
98    Returns 0 if memory was successfully initialized,
99        otherwise returns -1.
100
101 Global Variables Used:
102    None.
103
104 Local Variables Needed:
105    None.
106
107------------------------------------------------------------------------------
108 FUNCTION DESCRIPTION
109
110 Resets state memory.
111
112------------------------------------------------------------------------------
113 REQUIREMENTS
114
115 None
116
117------------------------------------------------------------------------------
118 REFERENCES
119
120 bgnscd.c, UMTS GSM AMR speech codec, R99 - Version 3.2.0, March 2, 2001
121
122------------------------------------------------------------------------------
123 PSEUDO-CODE
124
125Word16 Bgn_scd_reset (Bgn_scdState *state)
126{
127   if (state == (Bgn_scdState *) NULL){
128      fprintf(stderr, "Bgn_scd_reset: invalid parameter\n");
129      return -1;
130   }
131
132   // Static vectors to zero
133   Set_zero (state->frameEnergyHist, L_ENERGYHIST);
134
135   // Initialize hangover handling
136   state->bgHangover = 0;
137
138   return 0;
139}
140
141------------------------------------------------------------------------------
142 RESOURCES USED [optional]
143
144 When the code is written for a specific target processor the
145 the resources used should be documented below.
146
147 HEAP MEMORY USED: x bytes
148
149 STACK MEMORY USED: x bytes
150
151 CLOCK CYCLES: (cycle count equation for this function) + (variable
152                used to represent cycle count for each subroutine
153                called)
154     where: (cycle count variable) = cycle count for [subroutine
155                                     name]
156
157------------------------------------------------------------------------------
158 CAUTION [optional]
159 [State any special notes, constraints or cautions for users of this function]
160
161------------------------------------------------------------------------------
162*/
163
164Word16  Bgn_scd_reset(Bgn_scdState *state)
165{
166    if (state == (Bgn_scdState *) NULL)
167    {
168        /* fprintf(stderr, "Bgn_scd_reset: invalid parameter\n");  */
169        return(-1);
170    }
171
172    /* Static vectors to zero */
173    memset(state->frameEnergyHist, 0, L_ENERGYHIST*sizeof(Word16));
174
175    /* Initialize hangover handling */
176    state->bgHangover = 0;
177
178    return(0);
179}
180
181/****************************************************************************/
182
183/*
184------------------------------------------------------------------------------
185 FUNCTION NAME: Bgn_scd
186------------------------------------------------------------------------------
187 INPUT AND OUTPUT DEFINITIONS
188
189 Inputs:
190    st = pointer to state variables of type Bgn_scdState
191    ltpGainHist[] = LTP gain history (Word16)
192    speech[] = synthesis speech frame (Word16)
193    voicedHangover = pointer to # of frames after last voiced frame (Word16)
194    pOverflow      = pointer to overflow indicator (Flag)
195
196 Outputs:
197    st = function updates the state variables of type Bgn_scdState
198        pointed to by st.
199    voicedHangover = function updates the # of frames after last voiced
200        frame pointed to by voicedHangover.
201    pOverflow = 1 if the basic math function L_add() results in saturation.
202                  else pOverflow is zero.
203
204 Returns:
205    inbgNoise = flag if background noise is present (Word16)
206
207 Global Variables Used:
208    None.
209
210 Local Variables Needed:
211    None.
212
213------------------------------------------------------------------------------
214 FUNCTION DESCRIPTION
215
216 Characterize synthesis speech and detect background noise.
217
218------------------------------------------------------------------------------
219 REQUIREMENTS
220
221 None
222
223------------------------------------------------------------------------------
224 REFERENCES
225
226 bgnscd.c, UMTS GSM AMR speech codec, R99 - Version 3.2.0, March 2, 2001
227
228------------------------------------------------------------------------------
229 PSEUDO-CODE
230
231Word16 Bgn_scd (Bgn_scdState *st,      // i : State variables for bgn SCD
232                Word16 ltpGainHist[],  // i : LTP gain history
233                Word16 speech[],       // o : synthesis speech frame
234                Word16 *voicedHangover // o : # of frames after last
235                                              voiced frame
236                )
237{
238   Word16 i;
239   Word16 prevVoiced, inbgNoise;
240   Word16 temp;
241   Word16 ltpLimit, frameEnergyMin;
242   Word16 currEnergy, noiseFloor, maxEnergy, maxEnergyLastPart;
243   Word32 s;
244
245   // Update the inBackgroundNoise flag (valid for use in next frame if BFI)
246   // it now works as a energy detector floating on top
247   // not as good as a VAD.
248
249   currEnergy = 0;
250   s = (Word32) 0;
251
252   for (i = 0; i < L_FRAME; i++)
253   {
254       s = L_mac (s, speech[i], speech[i]);
255   }
256
257   s = L_shl(s, 2);
258
259   currEnergy = extract_h (s);
260
261   frameEnergyMin = 32767;
262
263   for (i = 0; i < L_ENERGYHIST; i++)
264   {
265      if (sub(st->frameEnergyHist[i], frameEnergyMin) < 0)
266         frameEnergyMin = st->frameEnergyHist[i];
267   }
268
269   noiseFloor = shl (frameEnergyMin, 4); // Frame Energy Margin of 16
270
271   maxEnergy = st->frameEnergyHist[0];
272   for (i = 1; i < L_ENERGYHIST-4; i++)
273   {
274      if ( sub (maxEnergy, st->frameEnergyHist[i]) < 0)
275      {
276         maxEnergy = st->frameEnergyHist[i];
277      }
278   }
279
280   maxEnergyLastPart = st->frameEnergyHist[2*L_ENERGYHIST/3];
281   for (i = 2*L_ENERGYHIST/3+1; i < L_ENERGYHIST; i++)
282   {
283      if ( sub (maxEnergyLastPart, st->frameEnergyHist[i] ) < 0)
284      {
285         maxEnergyLastPart = st->frameEnergyHist[i];
286      }
287   }
288
289   inbgNoise = 0;        // false
290
291   // Do not consider silence as noise
292   // Do not consider continuous high volume as noise
293   // Or if the current noise level is very low
294   // Mark as noise if under current noise limit
295   // OR if the maximum energy is below the upper limit
296
297   if ( (sub(maxEnergy, LOWERNOISELIMIT) > 0) &&
298        (sub(currEnergy, FRAMEENERGYLIMIT) < 0) &&
299        (sub(currEnergy, LOWERNOISELIMIT) > 0) &&
300        ( (sub(currEnergy, noiseFloor) < 0) ||
301          (sub(maxEnergyLastPart, UPPERNOISELIMIT) < 0)))
302   {
303      if (sub(add(st->bgHangover, 1), 30) > 0)
304      {
305         st->bgHangover = 30;
306      } else
307      {
308         st->bgHangover = add(st->bgHangover, 1);
309      }
310   }
311   else
312   {
313      st->bgHangover = 0;
314   }
315
316   // make final decision about frame state , act somewhat cautiosly
317   if (sub(st->bgHangover,1) > 0)
318      inbgNoise = 1;       // true
319
320   for (i = 0; i < L_ENERGYHIST-1; i++)
321   {
322      st->frameEnergyHist[i] = st->frameEnergyHist[i+1];
323   }
324   st->frameEnergyHist[L_ENERGYHIST-1] = currEnergy;
325
326   // prepare for voicing decision; tighten the threshold after some
327      time in noise
328   ltpLimit = 13926;             // 0.85  Q14
329   if (sub(st->bgHangover, 8) > 0)
330   {
331      ltpLimit = 15565;          // 0.95  Q14
332   }
333   if (sub(st->bgHangover, 15) > 0)
334   {
335      ltpLimit = 16383;          // 1.00  Q14
336   }
337
338   // weak sort of voicing indication.
339   prevVoiced = 0;        // false
340
341   if (sub(gmed_n(&ltpGainHist[4], 5), ltpLimit) > 0)
342   {
343      prevVoiced = 1;     // true
344   }
345   if (sub(st->bgHangover, 20) > 0) {
346      if (sub(gmed_n(ltpGainHist, 9), ltpLimit) > 0)
347      {
348         prevVoiced = 1;  // true
349      }
350      else
351      {
352         prevVoiced = 0;  // false
353      }
354   }
355
356   if (prevVoiced)
357   {
358      *voicedHangover = 0;
359   }
360   else
361   {
362      temp = add(*voicedHangover, 1);
363      if (sub(temp, 10) > 0)
364      {
365         *voicedHangover = 10;
366      }
367      else
368      {
369         *voicedHangover = temp;
370      }
371   }
372
373   return inbgNoise;
374}
375
376------------------------------------------------------------------------------
377 RESOURCES USED [optional]
378
379 When the code is written for a specific target processor the
380 the resources used should be documented below.
381
382 HEAP MEMORY USED: x bytes
383
384 STACK MEMORY USED: x bytes
385
386 CLOCK CYCLES: (cycle count equation for this function) + (variable
387                used to represent cycle count for each subroutine
388                called)
389     where: (cycle count variable) = cycle count for [subroutine
390                                     name]
391
392------------------------------------------------------------------------------
393 CAUTION [optional]
394 [State any special notes, constraints or cautions for users of this function]
395
396------------------------------------------------------------------------------
397*/
398
399Word16  Bgn_scd(Bgn_scdState *st,       /* i : State variables for bgn SCD  */
400                Word16 ltpGainHist[],  /* i : LTP gain history             */
401                Word16 speech[],       /* o : synthesis speech frame       */
402                Word16 *voicedHangover,/* o : # of frames after last
403                                               voiced frame                 */
404                Flag   *pOverflow
405               )
406{
407    Word16  i;
408    Word16  prevVoiced, inbgNoise;
409    Word16  temp;
410    Word16  ltpLimit, frameEnergyMin;
411    Word16  currEnergy, noiseFloor, maxEnergy, maxEnergyLastPart;
412    Word32  s, L_temp;
413
414
415    /* Update the inBackgroundNoise flag (valid for use in next frame if BFI)   */
416    /* it now works as a energy detector floating on top                        */
417    /* not as good as a VAD.                                                    */
418
419    s = (Word32) 0;
420
421    for (i = L_FRAME - 1; i >= 0; i--)
422    {
423        L_temp = ((Word32) speech[i]) * speech[i];
424        if (L_temp != (Word32) 0x40000000L)
425        {
426            L_temp = L_temp << 1;
427        }
428        else
429        {
430            L_temp = MAX_32;
431        }
432        s = L_add(s, L_temp, pOverflow);
433    }
434
435    /* s is a sum of squares, so don't need to check for neg overflow */
436    if (s > (Word32)0x1fffffffL)
437    {
438        currEnergy = MAX_16;
439    }
440    else
441    {
442        currEnergy = (Word16)(s >> 14);
443    }
444
445    frameEnergyMin = 32767;
446    for (i = L_ENERGYHIST - 1; i >= 0; i--)
447    {
448        if (st->frameEnergyHist[i] < frameEnergyMin)
449        {
450            frameEnergyMin = st->frameEnergyHist[i];
451        }
452    }
453
454    /* Frame Energy Margin of 16 */
455    L_temp = (Word32)frameEnergyMin << 4;
456    if (L_temp != (Word32)((Word16) L_temp))
457    {
458        if (L_temp > 0)
459        {
460            noiseFloor = MAX_16;
461        }
462        else
463        {
464            noiseFloor = MIN_16;
465        }
466    }
467    else
468    {
469        noiseFloor = (Word16)(L_temp);
470    }
471
472    maxEnergy = st->frameEnergyHist[0];
473    for (i = L_ENERGYHIST - 5; i >= 1; i--)
474    {
475        if (maxEnergy < st->frameEnergyHist[i])
476        {
477            maxEnergy = st->frameEnergyHist[i];
478        }
479    }
480
481    maxEnergyLastPart = st->frameEnergyHist[2*L_ENERGYHIST/3];
482    for (i = 2 * L_ENERGYHIST / 3 + 1; i < L_ENERGYHIST; i++)
483    {
484        if (maxEnergyLastPart < st->frameEnergyHist[i])
485        {
486            maxEnergyLastPart = st->frameEnergyHist[i];
487        }
488    }
489
490    /* Do not consider silence as noise */
491    /* Do not consider continuous high volume as noise */
492    /* Or if the current noise level is very low */
493    /* Mark as noise if under current noise limit */
494    /* OR if the maximum energy is below the upper limit */
495
496    if ((maxEnergy > LOWERNOISELIMIT) &&
497            (currEnergy < FRAMEENERGYLIMIT) &&
498            (currEnergy > LOWERNOISELIMIT) &&
499            ((currEnergy < noiseFloor) ||
500             (maxEnergyLastPart < UPPERNOISELIMIT)))
501    {
502        if ((st->bgHangover + 1) > 30)
503        {
504            st->bgHangover = 30;
505        }
506        else
507        {
508            st->bgHangover += 1;
509        }
510    }
511    else
512    {
513        st->bgHangover = 0;
514    }
515
516    /* make final decision about frame state , act somewhat cautiosly */
517
518    if (st->bgHangover > 1)
519    {
520        inbgNoise = TRUE;
521    }
522    else
523    {
524        inbgNoise = FALSE;
525    }
526
527    for (i = 0; i < L_ENERGYHIST - 1; i++)
528    {
529        st->frameEnergyHist[i] = st->frameEnergyHist[i+1];
530    }
531    st->frameEnergyHist[L_ENERGYHIST-1] = currEnergy;
532
533    /* prepare for voicing decision; tighten the threshold after some
534       time in noise */
535
536    if (st->bgHangover > 15)
537    {
538        ltpLimit = 16383;       /* 1.00  Q14 */
539    }
540    else if (st->bgHangover > 8)
541    {
542        ltpLimit = 15565;       /* 0.95  Q14 */
543    }
544    else
545    {
546        ltpLimit = 13926;       /* 0.85  Q14 */
547    }
548
549    /* weak sort of voicing indication. */
550    prevVoiced = FALSE;
551
552    if (gmed_n(&ltpGainHist[4], 5) > ltpLimit)
553    {
554        prevVoiced = TRUE;
555    }
556
557    if (st->bgHangover > 20)
558    {
559        if (gmed_n(ltpGainHist, 9) > ltpLimit)
560        {
561            prevVoiced = TRUE;
562        }
563        else
564        {
565            prevVoiced = FALSE;
566        }
567    }
568
569
570    if (prevVoiced)
571    {
572        *voicedHangover = 0;
573    }
574    else
575    {
576        temp = *voicedHangover + 1;
577
578        if (temp > 10)
579        {
580            *voicedHangover = 10;
581        }
582        else
583        {
584            *voicedHangover = temp;
585        }
586    }
587
588    return(inbgNoise);
589}
590