1/*---------------------------------------------------------------------------*
2 *  spec_anl.c  *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20
21
22#include <stdlib.h>
23#ifndef _RTT
24#include <stdio.h>
25#endif
26#include <string.h>
27#include <math.h>
28#include <limits.h>
29#include <assert.h>
30
31#include "hmm_desc.h"
32#include "front.h"
33#include "pendian.h"
34#include "portable.h"
35#include "LCHAR.h"
36
37#include "../clib/memmove.h"
38
39#define DEBUG           0
40
41#include "sh_down.h"
42
43static int sort_ints_unique(int *list, int *num);
44//static void mask_fft_taps(fftdata *data, int num, front_freq *freqobj);
45
46void peakpick(front_freq *freqobj, fftdata *density, int num_freq);
47void magsq(fftdata *x, fftdata *y, fftdata *z, int ns);
48
49void preemph(fftdata *data, int window_len, samdata *wav_data,
50             int num_samples, coefdata pre_mel,
51             bigdata *last_sample);
52void filtbank(front_freq *freqobj, fftdata *density, cepdata *fbo);
53
54
55
56void filterbank_emulation(front_channel * channel, front_wave *waveobj,
57                          front_freq *freqobj, front_cep *cepobj, samdata *income,
58                          samdata *outgo, int num_samples)
59{
60  /*  Part II. Mel cepstrum coefficients
61  **
62  **  Maintain parameter queue */
63  MEMMOVE(channel->cep + (channel->mel_dim + 1), channel->cep,
64          (Q2 - 1) *(channel->mel_dim + 1), sizeof(cepdata));
65  channel->shift = 0;
66
67  /*  2.01 Pre-emphasize waveform
68  Only the new samples are preemphasized.  To carry on from the previous call,
69  the last sample value is stored in lastx.
70  */
71  preemph(channel->prebuff, freqobj->window_length, income, num_samples,
72          waveobj->pre_mel, &channel->lastx);
73
74#if DEBUG
75  log_report("preemphasized data\n");
76  write_scaled_frames(freqobj->window_length, 1, channel->prebuff, D_FIXED, (float) 1 / (0x01 << WAVE_SHIFT));
77#endif
78  /******************************************************************************
79  **  The "NEW" fft performs shifting operations in fixed point, to maximise
80  **  precision.
81  **
82  *******************************************************************************/
83  channel->shift += place_sample_data(&freqobj->fft, channel->prebuff,
84                                      freqobj->ham, freqobj->window_length);
85#if DEBUG
86  log_report("windowed data\n");
87  if (channel->shift >= 0)
88  {
89    write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.real, D_FIXED, (float)(0x01 << channel->shift));
90    write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.imag, D_FIXED, (float)(0x01 << channel->shift));
91  }
92  else
93  {
94    write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.real, D_FIXED, (float)1 / (0x01 << -channel->shift));
95    write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.imag, D_FIXED, (float)1 / (0x01 << -channel->shift));
96  }
97#endif
98  channel->shift *= 2;
99  channel->shift += fft_perform_and_magsq(&freqobj->fft);
100
101#if DEBUG
102  log_report("After magnitude squared (%d)\n", channel->frame_count);
103  if (channel->shift >= 0)
104    write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.real, D_FIXED, (float)(0x01 << (channel->shift)));
105  else
106    write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.real, D_FIXED, (float)1 / (0x01 << (- channel->shift)));
107#endif
108
109#if DEBUG
110  log_report("After magnitude squared: ");
111  if (channel->shift >= 0)
112    write_scaled_frames(freqobj->fft.size, 1, (void *)freqobj->fft.real, D_FIXED, (float)(0x01 <<  channel->shift));
113  else
114    write_scaled_frames(freqobj->fft.size, 1, (void *)freqobj->fft.real, D_FIXED, (float)1 / (0x01 <<  -channel->shift));
115#endif
116
117  if (freqobj->do_nonlinear_filter)
118    peakpick(freqobj, freqobj->fft.real, freqobj->fft.size + 1);
119
120#if DEBUG
121  log_report("After peakpick: ");
122  if (channel->shift >= 0)
123    write_scaled_frames(freqobj->fft.size + 1, 1, (void *)freqobj->fft.real, D_FIXED, (float)(0x01 << channel->shift));
124  else
125    write_scaled_frames(freqobj->fft.size + 1, 1, (void *)freqobj->fft.real, D_FIXED, (float)1 / (0x01 << -channel->shift));
126#endif
127
128  /*  2.23 Apply filterbank emulation */
129  channel->shift += RAMP_SHIFT;
130  filtbank(freqobj, freqobj->fft.real, channel->filterbank);
131#if DEBUG
132  log_report("After filterbanked: ");
133  if (channel->shift >= 0)
134    write_scaled_frames(freqobj->nf, 1, channel->filterbank, D_FIXED, (float)(0x01 << channel->shift));
135  else
136    write_scaled_frames(freqobj->nf, 1, channel->filterbank, D_FIXED, (float)1 / (0x01 << -channel->shift));
137#endif
138
139  return;
140}
141
142
143void preemph(fftdata *data, int window_len, samdata *wav_data,
144             int num_samples, coefdata pre_mel,
145             bigdata *last_sample)
146/*
147**  pre-emphasize on speech data, check for end of data */
148/*  SCALE: In this stage we're introducing a scale factor of 2 */
149{
150  int i;
151  bigdata temp;
152
153  ASSERT(data);
154  ASSERT(last_sample);
155  ASSERT(wav_data);
156  ASSERT(num_samples >= 0);
157  if (num_samples > window_len)
158    num_samples = window_len;
159
160  if (num_samples < window_len)
161    MEMMOVE(data, data + num_samples, (window_len - num_samples),
162            sizeof(fftdata));
163  data += window_len - num_samples;
164
165  /*  If no preemphasis to do
166  */
167  if (pre_mel == 0)
168  { /* dont't shift */
169    for (i = 0; i < num_samples; i++)
170      data[i] = (fftdata) wav_data[i];
171    return;
172  }
173
174  /*  Otherwise do the preemphasis
175  */
176  for (i = 0; i < num_samples; i++)
177  {
178    temp = SHIFT_UP((bigdata)wav_data[i], COEFDATA_SHIFT);
179    data[i] = (fftdata)(SHIFT_DOWN(temp - (*last_sample), COEFDATA_SHIFT));
180    *last_sample = (bigdata)pre_mel * wav_data[i];
181
182  }
183  return;
184}
185
186void magsq(fftdata *x, fftdata *y, fftdata *z, int ns)
187/*
188**  magnitude squared, tailored for TI FFT routines
189**  The dynamic range should fit 32 - RAMP_SHIFT */
190{
191  int i;
192
193  ASSERT((float)x[0] *(float)x[0] < LONG_MAX);
194  ASSERT((float)x[0] *(float)x[0] > LONG_MIN);
195  z[0] = (fftdata)((bigdata)x[0] * (bigdata)x[0]);
196  for (i = 1; i < ns; i++)
197  {
198    ASSERT(((fftdata)x[i] *(fftdata)x[i]) >= 0);
199    ASSERT(((fftdata)y[i] *(fftdata)y[i]) >= 0);
200    ASSERT((float)x[i] *(float)x[i] < LONG_MAX);
201    ASSERT((float)x[i] *(float)x[i] > LONG_MIN);
202    ASSERT((float)y[i] *(float)y[i] < LONG_MAX);
203    ASSERT((float)y[i] *(float)y[i] > LONG_MIN);
204    /*    z[i]= (fftdata) SHIFT_DOWN ((bigdata)x[i] * (bigdata)x[i] + (bigdata)y[i] * (bigdata)y[i], RAMP_SHIFT);
205    */
206    z[i] = (fftdata)(((bigdata)x[i] * (bigdata)x[i])
207                     + ((bigdata)y[i] * (bigdata)y[i]));
208    if (z[i] <= 0)
209      z[i] = (fftdata) 1;
210  }
211  return;
212}
213
214void peakpick(front_freq *freqobj, fftdata *density, int num_freq)
215{
216  int i;
217  fftdata peak;
218  fftdata bdecay;
219  fftdata fdecay;
220  int first;
221  int last;
222
223  ASSERT(freqobj);
224  /* Fixed pt requires scale up of COEFDATA_SHIFT on these pars (coefdata) */
225  bdecay = freqobj->peakpickdown;
226  fdecay = freqobj->peakpickup;
227
228  if ((bdecay <= (fftdata) 0.0) && (fdecay <= (fftdata) 0.0))
229    return;
230
231  first = freqobj->cut_off_below;
232  last  = freqobj->cut_off_above;
233  /* this filters from cut_off_below to       */
234  /* cut_off_above inclusive          */
235
236  if (last >= num_freq)
237    last = num_freq - 1;
238  /* as most routines seem to check both      */
239  /* limits                           */
240
241  if (bdecay > 0.0)
242  {
243    ASSERT(density[last] >= 0);
244    peak = density[last];
245    for (i = last - 1; i >= first; i--)
246    {
247      peak = (fftdata)(SHIFT_DOWN((bigdata)peak, COEFDATA_SHIFT) * (bigdata)bdecay);
248      ASSERT(peak >= 0);
249      if (density[i] > peak)
250        peak = density[i];
251      else
252        density[i] = peak;
253    }
254  }
255  if (fdecay > 0.0)
256  {
257    peak = density[first];
258    for (i = first + 1; i <= last; i++)
259    {
260      peak = (fftdata)(SHIFT_DOWN((bigdata)peak, COEFDATA_SHIFT) * (bigdata)fdecay);
261      if (density[i] > peak)
262        peak = density[i];
263      else
264        density[i] = peak;
265    }
266  }
267  return;
268}
269
270void filtbank(front_freq *freqobj, fftdata *density, cepdata *fbo)
271/*
272**  pwr spect -> filter bank output (linear) */
273{
274  int i, j, k;
275  bigdata t, sum, mom, nxt;
276
277  /*  Scale down before starting mel-filterbank operations
278  */
279  for (i = 0; i < freqobj->cut_off_above; i++)
280    density[i] = SHIFT_DOWN(density[i], RAMP_SHIFT);
281
282  j = MAX(freqobj->fcmid[0], freqobj->cut_off_below);
283  nxt = 0;
284  for (; j < freqobj->fcmid[1]; j++)
285  {
286    ASSERT(((float)nxt + (float)freqobj->framp[j] *(float)density[j]) < LONG_MAX);
287    ASSERT(((float)nxt + (float)freqobj->framp[j] *(float)density[j]) > -LONG_MAX);
288    nxt += (bigdata) SHIFT_DOWN((bigdata)freqobj->framp[j] * (bigdata)density[j], RAMP_SHIFT);
289  }
290  for (i = 0, k = 2; i < freqobj->nf; i++, k++)
291  {
292    sum = mom = 0;
293    for (; j < freqobj->fcmid[k]; j++)
294    {
295      /* TODO: Tidy up this fixed pt shifting. BP */
296
297      ASSERT((float) freqobj->framp[j] *(float) density[j] < LONG_MAX);
298      ASSERT((float) freqobj->framp[j] *(float) density[j] > LONG_MIN);
299      ASSERT((float) sum + (float)density[j] < LONG_MAX);
300      ASSERT((float) sum + (float)density[j] > LONG_MIN);
301      sum += (bigdata) density[j];
302      ASSERT((float) mom + (float) freqobj->framp[j] *(float) density[j] < LONG_MAX);
303      ASSERT((float) mom + (float) freqobj->framp[j] *(float) density[j] > LONG_MIN);
304
305      mom += (bigdata)(long) SHIFT_DOWN((bigdata)freqobj->framp[j] * (bigdata)density[j], RAMP_SHIFT);
306    }
307
308    ASSERT(((float)nxt + (float)sum - (float)mom) < LONG_MAX);
309    ASSERT(((float)nxt + (float)sum - (float)mom) > LONG_MIN);
310
311    /* TODO: refine this expression. Shift down fcscl in advance.  */
312    t = (bigdata)((SHIFT_UP(nxt + sum - mom, HALF_RAMP_SHIFT)
313                   + SHIFT_DOWN(freqobj->fcscl[i+1], HALF_RAMP_SHIFT + 1))
314                  / SHIFT_DOWN(freqobj->fcscl[i+1], HALF_RAMP_SHIFT));
315    /* TODO: cleanup and also check for division by zero */
316    nxt = mom;
317    fbo[i] = (cepdata) t;
318  }
319  return;
320}
321
322int create_spectrum_filter(front_freq *freqobj, int *freq, int *spread)
323{
324  int ii, jj, freq_step;
325  int lo, hi;
326  ASSERT(freqobj);
327  ASSERT(freqobj->spectrum_filter_num == 0);
328  ASSERT(freqobj->samplerate > 0);
329  /* Convert to FFT taps. Mark adjacent taps as well as taps within spread */
330  freq_step = (freqobj->samplerate << 12) / (2 * freqobj->fft.size);
331  freqobj->spectrum_filter = (int *) CALLOC_CLR(freqobj->fft.size + 1, sizeof(int), "cfront.spectrum_filter");
332  freqobj->spectrum_filter_num = 0;
333  for (ii = 0 ; ii < MAX_FILTER_NUM; ii++)
334  {
335    if (freq[ii] == 0)
336      continue;
337    lo = (((freq[ii] - spread[ii]) * 2 * freqobj->fft.size) + freqobj->samplerate / 2) / freqobj->samplerate;
338    hi = (((freq[ii] + spread[ii]) * 2 * freqobj->fft.size) + freqobj->samplerate / 2) / freqobj->samplerate;
339
340
341    for (jj = lo; jj <= hi;jj++)
342    {
343      if (freqobj->spectrum_filter_num >= (int) freqobj->fft.size)
344        SERVICE_ERROR(MAX_FILTER_POINTS_EXCEEDED);
345      freqobj->spectrum_filter[freqobj->spectrum_filter_num++] = jj;
346    }
347    /* jj=0;
348     while (((jj+1)*freq_step)>>12 <= freq[ii]-spread[ii])
349         jj++;
350     while (((jj-1)*freq_step>>12) < freq[ii]+spread[ii]){
351         if (freqobj->spectrum_filter_num >= (int) freqobj->fft.size)
352      SERVICE_ERROR (MAX_FILTER_POINTS_EXCEEDED);
353         freqobj->spectrum_filter[freqobj->spectrum_filter_num++]= jj;
354         jj++;
355     }
356    */
357  }
358  sort_ints_unique(freqobj->spectrum_filter, &freqobj->spectrum_filter_num);
359  return (freqobj->spectrum_filter_num);
360}
361
362void clear_spectrum_filter(front_freq *freqobj)
363{
364  ASSERT(freqobj->spectrum_filter);
365  if (freqobj->spectrum_filter)
366    FREE((char *) freqobj->spectrum_filter);
367  freqobj->spectrum_filter = NULL;
368  freqobj->spectrum_filter_num = 0;
369  return;
370}
371
372static int sort_ints_unique(int *list, int *num)
373{
374  /*  Sort a list of ints and make unique */
375  int ii, jj, temp;
376  for (ii = 1; ii < *num; ii++)
377  {
378    for (jj = 0; jj < ii; jj++)
379    {
380      temp = list[ii];
381      if (temp < list[jj])
382      {
383        MEMMOVE(&list[jj+1], &list[jj], (ii - jj), sizeof(int));
384        list[jj] = temp;
385        break;
386      }
387      if (temp == list[jj])
388      {
389        MEMMOVE(&list[ii], &list[ii+1], (*num - ii), sizeof(int));
390
391
392
393        (*num)--;
394      }
395    }
396  }
397  return *num;
398}
399
400//static void mask_fft_taps(fftdata *data, int num, front_freq *freqobj)
401//{
402//  for (int i = 0; i < freqobj->spectrum_filter_num; ++i)
403//  {
404//    ASSERT(freqobj->spectrum_filter[i] < num);
405//    data[freqobj->spectrum_filter[i]] = 0;
406//  }
407//}
408
409/* --------------------------------------------------
410 freq_warp will do pure linear warping if the warp
411 scale > 1.0. Otherwise it will do piecewise warp
412 which means warping the second part, from xstart
413 to the bandwidth with another scale which is
414 determined by b and c in the formulation.
415 In general, 0.7 < wscale < 1.4, and xstart <= 1
416 08/15/01, Puming Zhan
417 --------------------------------------------------- */
418void freq_warp(front_freq *freqobj, fftdata *inbuf, int ns)
419{
420  int i;
421  int nsE;
422  float x1, y1, b, c, wscale;
423  fftdata *tmpbuf;
424
425  ASSERT(freqobj && inbuf);
426
427  ASSERT(freqobj->warp_scale != 0);
428
429  wscale = freqobj->warp_scale;
430  x1     = freqobj->piecewise_start;
431  tmpbuf = (fftdata *) CALLOC(ns, sizeof(fftdata), "cfront.tmpbuf");
432
433  if (wscale < MIN_WARP_SCALE || wscale > MAX_WARP_SCALE)
434  {
435    SERVICE_ERROR(WARP_SCALE);
436  }
437  if (x1 > 1.0 || x1 < 0.5)
438  {
439    SERVICE_ERROR(PIECEWISE_START);
440  }
441
442  y1 = x1 < wscale ? (float)x1 / wscale : (float)1.0;
443
444  b = y1 < 1.0 ? (float)((1.0 - x1) / (1.0 - y1)) : (float)0.0;
445
446  c = (float)((1.0 - b) * (ns - 1));
447
448  nsE = (int)(y1 * (ns - 1));
449
450  for (i = 0; i < ns; i++)
451  {
452    float x = i > nsE ? b * i + c : wscale * i;
453    int   u = (int)ceil((double)x);
454    int   l = (int)floor((double)x);
455    float w1 = x - l;
456    float w2 = 1 - w1;
457
458    if (u < ns)
459    {
460      tmpbuf[i] = (int)(w1 * inbuf[u] + w2 * inbuf[l]);
461    }
462    else
463    {
464      tmpbuf[i] = inbuf[ns-1];
465    }
466  }
467
468  /* need to copy the warped fft into inbuf    */
469  /* because the following function filtbank() */
470  /* will take inbuf as input                  */
471  /* considering that this function will be    */
472  /* for every frame, it may not be a good idea*/
473  /* to do malloc here                         */
474
475  for (i = 0; i < ns; i++)
476    inbuf[i] = tmpbuf[i];
477
478  FREE((char *) tmpbuf);
479}
480