1/*---------------------------------------------------------------------------* 2 * pre_desc.h * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20 21 22#ifndef _h_pre_desc_ 23#define _h_pre_desc_ 24 25#ifdef SET_RCSID 26static const char pre_desc_h[] = "$Id: pre_desc.h,v 1.3.6.10 2008/03/07 19:41:39 dahan Exp $"; 27#endif 28 29 30#include "all_defs.h" 31#include "hmm_type.h" 32#include "specnorm.h" 33#ifndef _RTT 34#include "duk_io.h" 35#endif 36 37#define DO_SUBTRACTED_SEGMENTATION 0 38 39#ifndef NONE 40#define NONE 0 41#endif 42#define SCALE 1 /* Scaling the channels */ 43#define LIN_TRAN 2 /* Linear Transformation */ 44#define VFR 4 /* Variable frame rate */ 45#define USE_MULTAB 8 /* Set up multable distance calculations */ 46 47/** 48 * @todo document 49 */ 50typedef struct 51{ /* mul-table data types */ 52 unsigned short sigma; 53 int num; 54 short *pdf; 55} 56mul_table; 57 58/** 59 * @todo document 60 */ 61typedef struct 62{ 63 unsigned short num_dev8_index; 64 unsigned char *dev8_index; 65 unsigned short *wt_index; 66 short *gauss_dist_table; 67 short **dist_ptr; 68 prdata multable_factor; /* euclidean to multable */ 69 prdata multable_factor_gaussian; /* euclidean to multable */ 70 prdata grand_mod_cov; /* grand covariance modulus */ 71 prdata grand_mod_cov_gaussian; /* grand covariance modulus */ 72} 73mul_table_info; 74 75/** 76 * @todo document 77 */ 78typedef struct 79{ 80 const prdata *table; 81 prdata add_log_limit; 82 prdata scale; /* X - scale to log function */ 83 prdata inv_scale; 84 float logscale; /* Y - scale to log function */ 85} 86logadd_table_info; 87 88/** 89 * @todo document 90 */ 91typedef struct 92{ 93 unsigned long num; 94 accdata **between; 95 accdata *bmean; 96 accdata **within; 97 accdata *wmean; 98} 99transform_info; 100 101/** 102 * @todo document 103 */ 104typedef struct 105{ /* Segmentation parameters */ 106 int rel_low; 107 int rel_high; 108 int gap_period; 109 int click_period; 110 int breath_period; 111 int extend_annotation; 112 int param; 113 int min_initial_quiet_frames; /* num silence frames needed before input */ 114 int min_annotation_frames; /* minimum overall length */ 115 int max_annotation_frames; /* maximum overall length */ 116 int delete_leading_segments; /* num segments to delete. 0=no action */ 117 int leading_segment_accept_if_not_found; /* Do not reject segmentation if not found */ 118 int leading_segment_min_frames; /* remove unless shorter */ 119 int leading_segment_max_frames; /* remove unless exceeded */ 120 int leading_segment_min_silence_gap_frames;/* remove if good silence gap to next segment */ 121 int beep_size; /*X201 beep filter */ 122 int beep_threshold; /*X201 beep filter */ 123 int min_segment_rel_c0; /* Any segment gets deleted whose peak c0 is < max - min_segment_rel_c0 */ 124 125#if DO_SUBTRACTED_SEGMENTATION 126 int snr_holdoff; /* Ignore first n frames when estimating speech level for SNR measure */ 127 int min_acceptable_snr; /* for an acceptable segmentation */ 128#endif 129} 130endpoint_info; 131 132 133/** 134 * @todo document 135 */ 136typedef struct 137{ /* processed speech data/front end output */ 138 int ref_count; /* reference counts */ 139 /* Pattern vector section */ 140 int dim; /* dimension of frame vector */ 141 int use_dim; /* dimension used for recognition */ 142 int whole_dim; /* reduced feature use. Set unused to 127 (0) on model construction */ 143 int use_from; /* first channel used for recognition */ 144 featdata *last_frame; /* last frame processed in frame buffer */ 145 imeldata *seq; /* current valid frame */ 146 imeldata *seq_unnorm; /* current valid frame, for whole-word models */ 147 prdata seq_sq_sum; /* sum of the squared of frames */ 148 prdata seq_sq_sum_whole; /* sum of the squared of frames, for wholeword */ 149 prdata seq_unnorm_sq_sum_whole; /* sum of the squared of frames, for wholeword */ 150 int voicing_status; /* voicing code */ 151 int post_proc; /* post processing functions */ 152 imeldata *offset; /* offset vector with transformation */ 153 imeldata **matrix; /* linear transformation matrix */ 154 int imel_shift; /* Imelda scale factor (in shifts) */ 155 covdata **imelda; /* linear transformation matrix, PMC or RN */ 156 imeldata **invmat; /* inverse transformation matrix */ 157 int inv_shift; /* inverse Imelda scale factor (in shifts) */ 158 covdata **inverse; /* inverse linear transformation matrix, PMC or RN */ 159#if PARTIAL_DISTANCE_APPROX /* Gaussian tail approximation? */ 160 int partial_distance_calc_dim; /* number of params to calc distance over, before approximating if beyond threshold */ 161 scodata partial_distance_threshold; 162 prdata partial_distance_calc_threshold; 163 prdata partial_distance_offset; 164 prdata global_distance_over_n_params; 165 int global_model_means[MAX_DIMEN]; 166 prdata partial_mean_sq_sum; 167 prdata partial_seq_sq_sum; 168 prdata partial_seq_unnorm_sq_sum; 169#endif 170 imeldata *chan_offset; 171 /* Channel Normalization etc */ 172 173 /* Tables */ 174 prdata exp_wt[MAX_WTS]; /* weights exp lookup table */ 175 mul_table_info mul; /* Mul-table */ 176 logadd_table_info add; /* logadd-table */ 177 /* ENC */ 178 booldata is_setup_for_noise; 179 booldata do_whole_enc; /* to enable ENC */ 180 booldata do_sub_enc; /* to enable ENC */ 181 booldata enc_count; 182 booldata ambient_valid; /* ambient estimates valid */ 183 imeldata **pmc_fixmat; /* ENC matrix */ 184 imeldata **pmc_fixinv; /* inverse ENC matrix */ 185 covdata **pmc_matrix; /* ENC matrix in float */ 186 covdata **pmc_inverse; /* inverse ENC matrix in float */ 187 int pmc_matshift; /* scaling */ 188 int pmc_invshift; /* scaling */ 189 imeldata *ambient_mean; /* ambient mean vector */ 190 imeldata *ambient_prof; /* ambient estimates, pseudo space */ 191 imeldata *ambient_prof_unnorm; /* ambient estimates, unnormalised */ 192 logadd_table_info fbadd; /* logadd-table for ENC */ 193#if DO_SUBTRACTED_SEGMENTATION 194 int mel_dim; 195 covdata **spec_inverse; 196 imeldata **spec_fixinv; 197 int spec_invshift; 198 int *cep_offset; 199#endif 200 /* Parameters */ 201 prdata mix_score_scale; /* Mixture score scaling constant */ 202 prdata uni_score_scale; /* Unimodal score scaling constant */ 203 prdata uni_score_offset; /* Unimodal score offset constant */ 204 prdata imelda_scale; /* Imelda grand variance */ 205 /* Endpoint data */ 206 endpoint_info end; 207 208} 209preprocessed; 210 211/** 212 * @todo document 213 */ 214typedef struct 215{ 216 preprocessed *prep; /* The preprocessed data structure */ 217 /* The following stuff cannot be cloned */ 218 booldata do_imelda; /* Alignment based accumulation */ 219 transform_info imelda_acc; 220} 221pattern_info; 222 223#endif /* _h_pre_desc_ */ 224