1/* Copyright (C) 2002 Jean-Marc Valin
2   File: vbr.c
3
4   VBR-related routines
5
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions
8   are met:
9
10   - Redistributions of source code must retain the above copyright
11   notice, this list of conditions and the following disclaimer.
12
13   - Redistributions in binary form must reproduce the above copyright
14   notice, this list of conditions and the following disclaimer in the
15   documentation and/or other materials provided with the distribution.
16
17   - Neither the name of the Xiph.org Foundation nor the names of its
18   contributors may be used to endorse or promote products derived from
19   this software without specific prior written permission.
20
21   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
25   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
33*/
34
35#ifdef HAVE_CONFIG_H
36#include "config.h"
37#endif
38
39#include "vbr.h"
40#include <math.h>
41
42
43#define sqr(x) ((x)*(x))
44
45#define MIN_ENERGY 6000
46#define NOISE_POW .3
47
48#ifndef DISABLE_VBR
49
50const float vbr_nb_thresh[9][11]={
51   {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /*   CNG   */
52   { 4.0f,  2.5f,  2.0f,  1.2f,  0.5f,  0.0f, -0.5f, -0.7f, -0.8f, -0.9f, -1.0f}, /*  2 kbps */
53   {10.0f,  6.5f,  5.2f,  4.5f,  3.9f,  3.5f,  3.0f,  2.5f,  2.3f,  1.8f,  1.0f}, /*  6 kbps */
54   {11.0f,  8.8f,  7.5f,  6.5f,  5.0f,  3.9f,  3.9f,  3.9f,  3.5f,  3.0f,  1.0f}, /*  8 kbps */
55   {11.0f, 11.0f,  9.9f,  8.5f,  7.0f,  6.0f,  4.5f,  4.0f,  4.0f,  4.0f,  2.0f}, /* 11 kbps */
56   {11.0f, 11.0f, 11.0f, 11.0f,  9.5f,  8.5f,  8.0f,  7.0f,  6.0f,  5.0f,  3.0f}, /* 15 kbps */
57   {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.5f,  8.5f,  7.0f,  6.0f,  5.0f}, /* 18 kbps */
58   {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.8f,  9.5f,  7.5f}, /* 24 kbps */
59   { 7.0f,  4.5f,  3.7f,  3.0f,  2.5f,  2.0f,  1.8f,  1.5f,  1.0f,  0.0f,  0.0f}  /*  4 kbps */
60};
61
62
63const float vbr_hb_thresh[5][11]={
64   {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */
65   {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /*  2 kbps */
66   {11.0f, 11.0f,  9.5f,  8.5f,  7.5f,  6.0f,  5.0f,  3.9f,  3.0f,  2.0f,  1.0f}, /*  6 kbps */
67   {11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.5f,  8.7f,  7.8f,  7.0f,  6.5f,  4.0f}, /* 10 kbps */
68   {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.8f,  7.5f,  5.5f}  /* 18 kbps */
69};
70
71const float vbr_uhb_thresh[2][11]={
72   {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */
73   { 3.9f,  2.5f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f, -1.0f}  /*  2 kbps */
74};
75
76void vbr_init(VBRState *vbr)
77{
78   int i;
79
80   vbr->average_energy=0;
81   vbr->last_energy=1;
82   vbr->accum_sum=0;
83   vbr->energy_alpha=.1;
84   vbr->soft_pitch=0;
85   vbr->last_pitch_coef=0;
86   vbr->last_quality=0;
87
88   vbr->noise_accum = .05*pow(MIN_ENERGY, NOISE_POW);
89   vbr->noise_accum_count=.05;
90   vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
91   vbr->consec_noise=0;
92
93
94   for (i=0;i<VBR_MEMORY_SIZE;i++)
95      vbr->last_log_energy[i] = log(MIN_ENERGY);
96}
97
98
99/*
100  This function should analyse the signal and decide how critical the
101  coding error will be perceptually. The following factors should be
102  taken into account:
103
104  -Attacks (positive energy derivative) should be coded with more bits
105
106  -Stationary voiced segments should receive more bits
107
108  -Segments with (very) low absolute energy should receive less bits (maybe
109  only shaped noise?)
110
111  -DTX for near-zero energy?
112
113  -Stationary fricative segments should have less bits
114
115  -Temporal masking: when energy slope is decreasing, decrease the bit-rate
116
117  -Decrease bit-rate for males (low pitch)?
118
119  -(wideband only) less bits in the high-band when signal is very
120  non-stationary (harder to notice high-frequency noise)???
121
122*/
123
124float vbr_analysis(VBRState *vbr, spx_word16_t *sig, int len, int pitch, float pitch_coef)
125{
126   int i;
127   float ener=0, ener1=0, ener2=0;
128   float qual=7;
129   int va;
130   float log_energy;
131   float non_st=0;
132   float voicing;
133   float pow_ener;
134
135   for (i=0;i<len>>1;i++)
136      ener1 += ((float)sig[i])*sig[i];
137
138   for (i=len>>1;i<len;i++)
139      ener2 += ((float)sig[i])*sig[i];
140   ener=ener1+ener2;
141
142   log_energy = log(ener+MIN_ENERGY);
143   for (i=0;i<VBR_MEMORY_SIZE;i++)
144      non_st += sqr(log_energy-vbr->last_log_energy[i]);
145   non_st =  non_st/(30*VBR_MEMORY_SIZE);
146   if (non_st>1)
147      non_st=1;
148
149   voicing = 3*(pitch_coef-.4)*fabs(pitch_coef-.4);
150   vbr->average_energy = (1-vbr->energy_alpha)*vbr->average_energy + vbr->energy_alpha*ener;
151   vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
152   pow_ener = pow(ener,NOISE_POW);
153   if (vbr->noise_accum_count<.06 && ener>MIN_ENERGY)
154      vbr->noise_accum = .05*pow_ener;
155
156   if ((voicing<.3 && non_st < .2 && pow_ener < 1.2*vbr->noise_level)
157       || (voicing<.3 && non_st < .05 && pow_ener < 1.5*vbr->noise_level)
158       || (voicing<.4 && non_st < .05 && pow_ener < 1.2*vbr->noise_level)
159       || (voicing<0 && non_st < .05))
160   {
161      float tmp;
162      va = 0;
163      vbr->consec_noise++;
164      if (pow_ener > 3*vbr->noise_level)
165         tmp = 3*vbr->noise_level;
166      else
167         tmp = pow_ener;
168      if (vbr->consec_noise>=4)
169      {
170         vbr->noise_accum = .95*vbr->noise_accum + .05*tmp;
171         vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
172      }
173   } else {
174      va = 1;
175      vbr->consec_noise=0;
176   }
177
178   if (pow_ener < vbr->noise_level && ener>MIN_ENERGY)
179   {
180      vbr->noise_accum = .95*vbr->noise_accum + .05*pow_ener;
181      vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
182   }
183
184   /* Checking for very low absolute energy */
185   if (ener < 30000)
186   {
187      qual -= .7;
188      if (ener < 10000)
189         qual-=.7;
190      if (ener < 3000)
191         qual-=.7;
192   } else {
193      float short_diff, long_diff;
194      short_diff = log((ener+1)/(1+vbr->last_energy));
195      long_diff = log((ener+1)/(1+vbr->average_energy));
196      /*fprintf (stderr, "%f %f\n", short_diff, long_diff);*/
197
198      if (long_diff<-5)
199         long_diff=-5;
200      if (long_diff>2)
201         long_diff=2;
202
203      if (long_diff>0)
204         qual += .6*long_diff;
205      if (long_diff<0)
206         qual += .5*long_diff;
207      if (short_diff>0)
208      {
209         if (short_diff>5)
210            short_diff=5;
211         qual += .5*short_diff;
212      }
213      /* Checking for energy increases */
214      if (ener2 > 1.6*ener1)
215         qual += .5;
216   }
217   vbr->last_energy = ener;
218   vbr->soft_pitch = .6*vbr->soft_pitch + .4*pitch_coef;
219   qual += 2.2*((pitch_coef-.4) + (vbr->soft_pitch-.4));
220
221   if (qual < vbr->last_quality)
222      qual = .5*qual + .5*vbr->last_quality;
223   if (qual<4)
224      qual=4;
225   if (qual>10)
226      qual=10;
227
228   /*
229   if (vbr->consec_noise>=2)
230      qual-=1.3;
231   if (vbr->consec_noise>=5)
232      qual-=1.3;
233   if (vbr->consec_noise>=12)
234      qual-=1.3;
235   */
236   if (vbr->consec_noise>=3)
237      qual=4;
238
239   if (vbr->consec_noise)
240      qual -= 1.0 * (log(3.0 + vbr->consec_noise)-log(3));
241   if (qual<0)
242      qual=0;
243
244   if (ener<60000)
245   {
246      if (vbr->consec_noise>2)
247         qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
248      if (ener<10000&&vbr->consec_noise>2)
249         qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
250      if (qual<0)
251         qual=0;
252      qual += .3*log(.0001+ener/60000.0);
253   }
254   if (qual<-1)
255      qual=-1;
256
257   /*printf ("%f %f %f %f %d\n", qual, voicing, non_st, pow_ener/(.01+vbr->noise_level), va);*/
258
259   vbr->last_pitch_coef = pitch_coef;
260   vbr->last_quality = qual;
261
262   for (i=VBR_MEMORY_SIZE-1;i>0;i--)
263      vbr->last_log_energy[i] = vbr->last_log_energy[i-1];
264   vbr->last_log_energy[0] = log_energy;
265
266   /*printf ("VBR: %f %f %f %d %f\n", (float)(log_energy-log(vbr->average_energy+MIN_ENERGY)), non_st, voicing, va, vbr->noise_level);*/
267
268   return qual;
269}
270
271void vbr_destroy(VBRState *vbr)
272{
273}
274
275#endif /* #ifndef DISABLE_VBR */
276