1/*---------------------------------------------------------------------------*
2 *  pre_desc.h  *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20
21
22#ifndef _h_pre_desc_
23#define _h_pre_desc_
24
25#ifdef SET_RCSID
26static const char pre_desc_h[] = "$Id: pre_desc.h,v 1.3.6.10 2008/03/07 19:41:39 dahan Exp $";
27#endif
28
29
30#include "all_defs.h"
31#include "hmm_type.h"
32#include "specnorm.h"
33#ifndef _RTT
34#include "duk_io.h"
35#endif
36
37#define DO_SUBTRACTED_SEGMENTATION  0
38
39#ifndef NONE
40#define NONE   0
41#endif
42#define SCALE   1 /* Scaling the channels */
43#define LIN_TRAN  2 /* Linear Transformation */
44#define VFR   4 /* Variable frame rate */
45#define USE_MULTAB      8 /* Set up multable distance calculations */
46
47/**
48 * @todo document
49 */
50typedef struct
51{  /* mul-table data types */
52  unsigned short sigma;
53  int   num;
54  short *pdf;
55}
56mul_table;
57
58/**
59 * @todo document
60 */
61typedef struct
62{
63  unsigned short num_dev8_index;
64  unsigned char  *dev8_index;
65  unsigned short *wt_index;
66  short    *gauss_dist_table;
67  short    **dist_ptr;
68  prdata    multable_factor; /* euclidean to multable */
69  prdata    multable_factor_gaussian; /* euclidean to multable */
70  prdata    grand_mod_cov; /* grand covariance modulus */
71  prdata    grand_mod_cov_gaussian; /* grand covariance modulus */
72}
73mul_table_info;
74
75/**
76 * @todo document
77 */
78typedef struct
79{
80  const prdata *table;
81  prdata add_log_limit;
82  prdata scale;   /* X - scale to log function */
83  prdata inv_scale;
84  float logscale;  /* Y - scale to log function */
85}
86logadd_table_info;
87
88/**
89 * @todo document
90 */
91typedef struct
92{
93  unsigned long num;
94  accdata **between;
95  accdata *bmean;
96  accdata **within;
97  accdata *wmean;
98}
99transform_info;
100
101/**
102 * @todo document
103 */
104typedef struct
105{   /* Segmentation parameters */
106  int  rel_low;
107  int  rel_high;
108  int  gap_period;
109  int  click_period;
110  int  breath_period;
111  int  extend_annotation;
112  int  param;
113  int         min_initial_quiet_frames;    /* num silence frames needed before input */
114  int         min_annotation_frames;          /* minimum overall length */
115  int         max_annotation_frames;          /* maximum overall length */
116  int         delete_leading_segments;        /* num segments to delete. 0=no action */
117  int         leading_segment_accept_if_not_found; /* Do not reject segmentation if not found */
118  int         leading_segment_min_frames;   /* remove unless shorter */
119  int         leading_segment_max_frames;   /* remove unless exceeded */
120  int         leading_segment_min_silence_gap_frames;/* remove if good silence gap to next segment */
121  int  beep_size;  /*X201 beep filter */
122  int  beep_threshold;  /*X201 beep filter */
123  int  min_segment_rel_c0; /* Any segment gets deleted whose peak c0 is < max - min_segment_rel_c0 */
124
125#if DO_SUBTRACTED_SEGMENTATION
126  int         snr_holdoff;    /* Ignore first n frames when estimating speech level for SNR measure */
127  int         min_acceptable_snr; /* for an acceptable segmentation */
128#endif
129}
130endpoint_info;
131
132
133/**
134 * @todo document
135 */
136typedef struct
137{  /* processed speech data/front end output */
138  int  ref_count; /* reference counts */
139  /* Pattern vector section */
140  int  dim;  /* dimension of frame vector */
141  int  use_dim; /* dimension used for recognition */
142  int  whole_dim; /* reduced feature use. Set unused to 127 (0) on model construction */
143  int  use_from; /* first channel used for recognition */
144  featdata *last_frame; /* last frame processed in frame buffer */
145  imeldata *seq;  /* current valid frame */
146  imeldata *seq_unnorm; /* current valid frame, for whole-word models */
147  prdata seq_sq_sum; /* sum of the squared of frames */
148  prdata seq_sq_sum_whole; /* sum of the squared of frames, for wholeword */
149  prdata seq_unnorm_sq_sum_whole; /* sum of the squared of frames, for wholeword */
150  int  voicing_status; /* voicing code */
151  int  post_proc; /* post processing functions */
152  imeldata *offset; /* offset vector with transformation */
153  imeldata **matrix; /* linear transformation matrix */
154  int  imel_shift; /* Imelda scale factor (in shifts) */
155  covdata **imelda; /* linear transformation matrix, PMC or RN */
156  imeldata **invmat; /* inverse transformation matrix */
157  int  inv_shift; /* inverse Imelda scale factor (in shifts) */
158  covdata **inverse; /* inverse linear transformation matrix, PMC or RN */
159#if PARTIAL_DISTANCE_APPROX /* Gaussian tail approximation? */
160  int  partial_distance_calc_dim;  /* number of params to calc distance over, before approximating if beyond threshold */
161  scodata partial_distance_threshold;
162  prdata partial_distance_calc_threshold;
163  prdata partial_distance_offset;
164  prdata global_distance_over_n_params;
165  int  global_model_means[MAX_DIMEN];
166  prdata partial_mean_sq_sum;
167  prdata partial_seq_sq_sum;
168  prdata partial_seq_unnorm_sq_sum;
169#endif
170  imeldata *chan_offset;
171  /* Channel Normalization etc */
172
173  /* Tables */
174  prdata exp_wt[MAX_WTS]; /* weights exp lookup table */
175  mul_table_info mul;  /* Mul-table */
176  logadd_table_info add; /* logadd-table */
177  /* ENC */
178  booldata is_setup_for_noise;
179  booldata do_whole_enc; /* to enable ENC */
180  booldata do_sub_enc; /* to enable ENC */
181  booldata enc_count;
182  booldata ambient_valid; /* ambient estimates valid */
183  imeldata **pmc_fixmat; /* ENC matrix */
184  imeldata **pmc_fixinv; /* inverse ENC matrix */
185  covdata **pmc_matrix; /* ENC matrix in float */
186  covdata **pmc_inverse; /* inverse ENC matrix in float */
187  int  pmc_matshift; /* scaling */
188  int  pmc_invshift; /* scaling */
189  imeldata    *ambient_mean; /* ambient mean vector */
190  imeldata    *ambient_prof; /* ambient estimates, pseudo space */
191  imeldata    *ambient_prof_unnorm; /* ambient estimates, unnormalised */
192  logadd_table_info fbadd; /* logadd-table for ENC */
193#if DO_SUBTRACTED_SEGMENTATION
194  int  mel_dim;
195  covdata **spec_inverse;
196  imeldata **spec_fixinv;
197  int  spec_invshift;
198  int  *cep_offset;
199#endif
200  /* Parameters */
201  prdata mix_score_scale; /* Mixture score scaling constant */
202  prdata uni_score_scale; /* Unimodal score scaling constant */
203  prdata uni_score_offset; /* Unimodal score offset constant */
204  prdata imelda_scale;  /* Imelda grand variance */
205  /* Endpoint data */
206  endpoint_info end;
207
208}
209preprocessed;
210
211/**
212 * @todo document
213 */
214typedef struct
215{
216  preprocessed    *prep; /* The preprocessed data structure */
217  /* The following stuff cannot be cloned */
218  booldata do_imelda; /* Alignment based accumulation */
219  transform_info  imelda_acc;
220}
221pattern_info;
222
223#endif /* _h_pre_desc_ */
224