1/*---------------------------------------------------------------------------*
2 *  get_fram.c  *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20
21#include <stdlib.h>
22#ifndef _RTT
23#include "pstdio.h"
24#endif
25#include <limits.h>
26#include <math.h>
27#include <string.h>
28#include "passert.h"
29
30#include "c42mul.h"
31#include "portable.h"
32
33#include "../clib/fpi_tgt.inl"
34
35#define DEBUG   0
36#define FUDGE_FACTOR 1.2f
37
38const float root_pi_over_2 = (float) 1.2533141;
39
40static const char get_fram[] = "$Id: get_fram.c,v 1.7.6.13 2007/10/15 18:06:24 dahan Exp $";
41
42static void create_cepstrum_offsets(preprocessed *prep);
43static void destroy_cepstrum_offsets(preprocessed *prep);
44static void apply_channel_offset(preprocessed *prep);
45static int compare_cached_frame(preprocessed *prep, utterance_info *utt);
46
47void init_utterance(utterance_info *utt, int utt_type, int dimen,
48                    int buffer_size, int keep_frames, int num_chan, int do_voicing)
49/*
50**  To setup the utterance structure
51*/
52{
53  /*  Construct frame buffer  and voice buffer here
54  */
55  ASSERT(utt);
56  ASSERT(dimen > 0);
57  if (buffer_size < keep_frames)
58    SERVICE_ERROR(BAD_ARGUMENT);
59  utt->utt_type = utt_type;
60  utt->gen_utt.dim = dimen;
61  utt->gen_utt.frame = createFrameBuffer(buffer_size,
62                                         dimen, keep_frames, do_voicing);
63  utt->gen_utt.num_chan = num_chan;
64
65  setup_ambient_estimation(utt->gen_utt.backchan,
66                           utt->gen_utt.num_chan, 100);
67  return;
68}
69
70void set_voicing_durations(utterance_info *utt, int voice_duration,
71                           int quiet_duration, int unsure_duration,
72                           int start_windback)
73{
74  utt->gen_utt.voice_duration = voice_duration;
75  utt->gen_utt.quiet_duration = quiet_duration;
76  utt->gen_utt.unsure_duration = unsure_duration;
77  utt->gen_utt.start_windback = start_windback;
78  return;
79}
80
81void free_utterance(utterance_info *utt)
82/*
83**  To close data file pointers etc.
84*/
85{
86  /*  Destroy frame buffer
87  */
88  ASSERT(utt);
89
90  clear_ambient_estimation(utt->gen_utt.backchan, utt->gen_utt.dim);
91  if (utt->gen_utt.frame)
92  {
93    destroyFrameBuffer(utt->gen_utt.frame);
94    utt->gen_utt.frame = NULL;
95  }
96  return;
97}
98
99void init_preprocessed(preprocessed *prep, int dimen, float imelda_scale)
100/*
101**  To setup the preprocessed structure
102*/
103{
104
105  ASSERT(prep);
106  ASSERT(dimen > 0);
107  prep->dim = dimen;
108  prep->seq = (imeldata *) CALLOC(prep->dim, sizeof(imeldata),
109                                        "srec.prep->seq");
110  prep->seq_unnorm = (imeldata *) CALLOC(prep->dim, sizeof(imeldata),
111                     "srec.prep->seq_unnorm");
112  prep->last_frame = (featdata *) CALLOC(prep->dim, sizeof(featdata),
113                     "srec.prep->last_frame");
114
115  /*  Setup constants for distance calculation
116  */
117  /* TODO: check numbers for non-zero */
118  prep->add.scale = (prdata)((2 * imelda_scale * imelda_scale) / MUL_SCALE
119                             + 0.5) - (prdata)0.5;
120  prep->add.inv_scale = (prdata)(((float)(0x01 << 12) * MUL_SCALE) /
121                                 (2 * imelda_scale * imelda_scale) + 0.5) -
122                        (prdata)0.5;
123  prep->mul.multable_factor_gaussian = 1;
124  prep->mul.multable_factor = (prdata)(((MUL_SCALE * (0x01 << EUCLID_SHIFT)
125                                         * prep->uni_score_scale)
126                                        / (2 * (imelda_scale * imelda_scale
127                                                * FUDGE_FACTOR * FUDGE_FACTOR))) / 128 + 0.5)
128                              - (prdata)0.5;
129  prep->mul.grand_mod_cov = (prdata)((MUL_SCALE * prep->uni_score_scale *
130                                      prep->whole_dim *
131                                      log((imelda_scale * FUDGE_FACTOR) /
132                                          (SIGMA_BIAS * root_pi_over_2))) / 128 + 0.5)
133                            - (prdata)0.5 - prep->uni_score_offset;
134  prep->mul.grand_mod_cov_gaussian = (prdata)(2 * imelda_scale * imelda_scale *
135                                     prep->use_dim *
136                                     log(imelda_scale /
137                                         (SIGMA_BIAS * root_pi_over_2)) + 0.5)
138                                     - (prdata)0.5;
139#if DEBUG
140  log_report("grand_mod_cov %.1f, grand_mod_cov_gaussian %.1f\n",
141             (float)prep->mul.grand_mod_cov,
142             (float)prep->mul.grand_mod_cov_gaussian);
143  log_report("multable_factor %f, multable_factor_gaussian %f\n",
144             (float)prep->mul.multable_factor,
145             (float)prep->mul.multable_factor_gaussian);
146#endif
147
148
149  create_cepstrum_offsets(prep);
150  return;
151}
152
153void clear_preprocessed(preprocessed *prep)
154/*
155**  To setup the preprocessed structure
156*/
157{
158  ASSERT(prep);
159  destroy_cepstrum_offsets(prep);
160  prep->dim = 0;
161  FREE((char *)prep->last_frame);
162  FREE((char *)prep->seq);
163  FREE((char *)prep->seq_unnorm);
164  return;
165}
166
167int get_data_frame(preprocessed *prep, utterance_info *utt)
168/*
169**  To get a frame amount of data and perform preprocessing functions
170*/
171{
172  int status_code;
173
174  ASSERT(prep);
175  ASSERT(utt);
176  if (utt->gen_utt.channorm && !utt->gen_utt.channorm->adj_valid)
177    convert_adjustment_to_imelda(utt->gen_utt.channorm, prep);
178  if (utt->gen_utt.dim != prep->dim)
179    SERVICE_ERROR(UTTERANCE_DIMEN_MISMATCH);
180
181  if (prep->post_proc & VFR)
182  {
183    if ((status_code = get_utterance_frame(prep, utt)) <= 0)
184      return (status_code);
185
186    log_report("get_data_frame vfr not supported\n");
187    SERVICE_ERROR(FEATURE_NOT_SUPPORTED);
188  }
189  else
190  {
191    status_code = get_utterance_frame(prep, utt);
192    if (status_code == 0) return(status_code);
193    else if (status_code == -1) return(1);
194  }
195
196  if (prep->chan_offset)
197    apply_channel_offset(prep);
198
199  /*  Apply linear transformation if necessary
200  */
201  if (prep->post_proc & LIN_TRAN)
202    linear_transform_frame(prep, prep->seq, True);
203
204  memcpy(prep->seq_unnorm, prep->seq, prep->dim * sizeof(imeldata));
205  if (utt->gen_utt.channorm)
206    apply_channel_normalization_in_imelda(utt->gen_utt.channorm,
207                                          prep->seq, prep->seq_unnorm,
208                                          utt->gen_utt.channorm->dim);
209  return (1);
210}
211
212int get_utterance_frame(preprocessed *prep, utterance_info *utt)
213/*
214**  To get a frame amount of data
215**  Maintains a single data buffer and passes the pointers to frame of data.
216**  Post-increments after copying
217*/
218{
219  featdata  *frame_ptr;
220  int ii;
221
222  ASSERT(prep);
223  ASSERT(utt);
224
225  /*  Get the next data frame in
226  */
227  if (getFrameGap(utt->gen_utt.frame) > 0)
228  {
229    /*  is it a cloned object */
230    if (prep->ref_count > 1 && compare_cached_frame(prep, utt))
231      return (-1);
232
233    frame_ptr = currentRECframePtr(utt->gen_utt.frame);
234    if (frame_ptr == NULL)
235      return (0);
236    if (prep->ref_count > 1)
237    {
238      ASSERT(prep->last_frame);
239      memcpy(prep->last_frame, frame_ptr,
240             prep->dim* sizeof(featdata));
241    }
242    for (ii = 0; ii < utt->gen_utt.dim; ii++)
243      prep->seq[ii] = (imeldata)frame_ptr[ii];
244    /*  Apply fast-voice corrections if necessary */
245    if (utt->gen_utt.frame->haveVoiced)
246    {
247      utterance_detection_fixup(utt->gen_utt.frame,
248                                &utt->gen_utt.last_push, utt->gen_utt.voice_duration,
249                                utt->gen_utt.quiet_duration, utt->gen_utt.unsure_duration);
250      /*     if (isFrameBufferActive (utt->gen_utt.frame)
251        && getFrameGap (utt->gen_utt.frame) <= utt->gen_utt.quiet_duration)
252            SERVICE_ERROR (INTERNAL_ERROR); */
253      prep->voicing_status =
254        rec_frame_voicing_status(utt->gen_utt.frame);
255    }
256    return (1);
257  }
258  return (0);
259}
260
261
262int advance_utterance_frame(utterance_info *utt)
263/*
264**  To get a frame amount of data
265*/
266{
267  ASSERT(utt);
268  /*  if more samples are needed then read from file if the type matched
269  */
270  /*  Get the next data frame in
271  */
272  if (getFrameGap(utt->gen_utt.frame) > 0)
273  {
274    if (incRECframePtr(utt->gen_utt.frame) != False)
275      return (0);
276    return (1);
277  }
278  return (0);
279}
280
281int retreat_utterance_frame(utterance_info *utt)
282/*
283**  To get a frame amount of data
284*/
285{
286  ASSERT(utt);
287
288  if (getBlockGap(utt->gen_utt.frame) > 0)
289  {
290    if (decRECframePtr(utt->gen_utt.frame) != False)
291      return (0);
292    return (1);
293  }
294  return (0);
295}
296
297void prepare_data_frame(preprocessed *prep)
298{
299  int ii;
300  prdata sum_sq;
301
302  sum_sq = 0;
303
304  for (ii = 0; ii < prep->whole_dim; ii++)
305    sum_sq += (prdata) SQR((prdata)prep->seq[ii]);
306  prep->seq_sq_sum_whole = -sum_sq;
307
308  ASSERT(prep->whole_dim <= prep->use_dim);
309  for (ii = 0; ii < prep->use_dim; ii++)
310    sum_sq += (prdata) SQR((prdata)prep->seq[ii]);
311  prep->seq_sq_sum = -sum_sq;
312
313  sum_sq = 0;
314
315  for (ii = 0; ii < prep->whole_dim; ii++)
316    sum_sq += (prdata) SQR((prdata)prep->seq_unnorm[ii]);
317  prep->seq_unnorm_sq_sum_whole = -sum_sq;
318
319  return;
320}
321
322int utterance_started(utterance_info *utt)
323{
324  ASSERT(utt);
325  if (utt->gen_utt.frame->haveVoiced
326      && utt->gen_utt.frame->voicingDetected)
327    return (True);
328  else
329    return (False);
330}
331
332int utterance_ended(utterance_info *utt)
333{
334  ASSERT(utt);
335  return (utt->gen_utt.frame->utt_ended);
336}
337
338int load_utterance_frame(utterance_info *utt, unsigned char* pUttFrame, int voicing)
339{
340  featdata framdata[MAX_DIMEN];
341  int      ii;
342
343  ASSERT(utt);
344  ASSERT(pUttFrame);
345
346  for (ii = 0; ii < utt->gen_utt.frame->uttDim; ii++)
347    framdata[ii] = (featdata) pUttFrame[ii];
348
349  if (pushSingleFEPframe(utt->gen_utt.frame, framdata, voicing) != False)
350    return (0);
351
352  return (1);
353}
354
355int copy_utterance_frame(utterance_info *oututt, utterance_info *inutt)
356{
357  int      voicedata;
358  featdata *framdata;
359
360  ASSERT(oututt);
361  ASSERT(inutt);
362
363  if ((framdata = currentRECframePtr(inutt->gen_utt.frame)) == NULL)
364    return (0);
365
366  voicedata = getVoicingCode(inutt->gen_utt.frame, framdata);
367
368  if (pushSingleFEPframe(oututt->gen_utt.frame, framdata, voicedata) != False)
369    return (0);
370
371  return (1);
372}
373
374int copy_pattern_frame(utterance_info *oututt, preprocessed *prep)
375{
376  int      ii;
377  featdata frame_ptr[MAX_DIMEN];
378
379  ASSERT(oututt);
380  ASSERT(prep);
381  ASSERT(oututt->gen_utt.dim < MAX_DIMEN);
382  for (ii = 0; ii < oututt->gen_utt.dim; ii++)
383    frame_ptr[ii] = (featdata) RANGE(prep->seq[ii], 0, 255);
384  if (pushSingleFEPframe(oututt->gen_utt.frame, frame_ptr,
385                         prep->voicing_status)
386      != False) return(0);
387  return (1);
388}
389
390static void create_cepstrum_offsets(preprocessed *prep)
391{
392  ASSERT(prep);
393  prep->chan_offset = (imeldata *) CALLOC_CLR(prep->dim,
394                      sizeof(imeldata), "srec.chan_offset");
395  return;
396}
397
398void set_cepstrum_offset(preprocessed *prep, int index, int value)
399{
400  ASSERT(prep);
401  ASSERT(prep->chan_offset);
402  ASSERT(index >= 0 && index < prep->dim);
403  prep->chan_offset[index] = (imeldata) value;
404  return;
405}
406
407static void destroy_cepstrum_offsets(preprocessed *prep)
408{
409  ASSERT(prep);
410  FREE((char *)prep->chan_offset);
411  prep->chan_offset = 0;
412  return;
413}
414
415static void apply_channel_offset(preprocessed *prep)
416{
417  int ii;
418
419  for (ii = 0; ii < prep->dim; ii++)
420    prep->seq[ii] += prep->chan_offset[ii];
421  return;
422}
423
424static int compare_cached_frame(preprocessed *prep, utterance_info *utt)
425{
426  int      ii;
427  featdata *frame_ptr;
428
429  frame_ptr = currentRECframePtr(utt->gen_utt.frame);
430  if (frame_ptr == NULL)
431    return (False);
432  for (ii = 0; ii < utt->gen_utt.dim; ii++)
433    if (prep->last_frame[ii] != frame_ptr[ii])
434      return (False);
435  return (True);
436}
437
438void convert_adjustment_to_imelda(norm_info *norm, preprocessed *prep)
439{
440  int      ii;
441  imeldata fram[MAX_DIMEN];
442
443  ASSERT(prep);
444  ASSERT(norm);
445  for (ii = 0; ii < 12; ii++)      /* TODO: fix dimension properly, and sort out rouding/type */
446    fram[ii] = (imeldata) norm->adjust[ii]; /* TODO: review types */
447  for (; ii < prep->dim; ii++)
448    fram[ii] = 0;
449
450  linear_transform_frame(prep, fram, False);
451
452  for (ii = 0; ii < prep->dim; ii++)
453    norm->imelda_adjust[ii] = fram[ii];
454#if DEBUG
455  log_report("NORM AUX: ");
456  for (ii = 0; ii < norm->dim; ii++)
457    log_report("%d ", (int)norm->imelda_adjust[ii]);
458  log_report("\n");
459#endif
460  norm->adj_valid = True;
461  return;
462}
463