RecognizerImpl.c revision 8fc5a7f51e62cb4ae44a27bdf4176d04adc80ede
1/*---------------------------------------------------------------------------* 2 * RecognizerImpl.c * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20/*#define SREC_MEASURE_LATENCY 1*/ 21 22#ifdef SREC_MEASURE_LATENCY 23#include <sys/time.h> 24 25struct timeval latency_start; 26#endif 27 28 29#include "ESR_Session.h" 30#include "ESR_SessionTypeImpl.h" 31#include "IntArrayList.h" 32#include "LCHAR.h" 33#include "passert.h" 34#include "plog.h" 35#include "pstdio.h" 36#include "pmemory.h" 37#include "ptimestamp.h" 38#include "SR_AcousticModelsImpl.h" 39#include "SR_AcousticStateImpl.h" 40#include "SR_GrammarImpl.h" 41#include "SR_SemprocDefinitions.h" 42#include "SR_SemanticResult.h" 43#include "SR_SemanticResultImpl.h" 44#include "SR_Recognizer.h" 45#include "SR_RecognizerImpl.h" 46#include "SR_RecognizerResultImpl.h" 47#include "SR_SemanticResultImpl.h" 48#include "SR_EventLog.h" 49#include "srec.h" 50 51#define MTAG NULL 52#define FILTER_NBEST_BY_SEM_RESULT 1 53#define AUDIO_CIRC_BUFFER_SIZE 20000 54#define SEMPROC_ACTIVE 1 55#define SAMPLE_SIZE (16 / CHAR_BIT) /* 16-bits / sample */ 56 57/* milliseconds per FRAME = 1/FRAMERATE * 1000 */ 58/* We multiple by 2 because we skip even frames */ 59#define MSEC_PER_FRAME (2000/FRAMERATE) 60#define MAX_ENTRY_LENGTH 512 61#define PREFIX_WORD "-pau-" 62#define PREFIX_WORD_LEN 5 63#define SUFFIX_WORD "-pau2-" 64#define SUFFIX_WORD_LEN 6 65 66#ifdef MEASURE_SAMPLE_TIMES 67#include <sys/time.h> 68#include <stdio.h> 69 70#define MAX_SAMPLES_TO_MEASURE 500 71 72static long sample_buffers_received = 0; 73static long total_samples_received = 0; 74static long samples_in_buffer [MAX_SAMPLES_TO_MEASURE]; 75static long seconds_buffer_received [MAX_SAMPLES_TO_MEASURE]; 76static long micro_seconds_buffer_received [MAX_SAMPLES_TO_MEASURE]; 77static struct timeval buffer_received_time; 78 79static void SR_Recognizer_Log_Samples_Received ( void ); 80 81static void SR_Recognizer_Log_Samples_Received ( void ) 82{ 83 FILE *log_file; 84 char file_name [256]; 85 char log_buffer [256]; 86 long loop_counter; 87 88 if ( sample_buffers_received > 0 ) 89 { 90 gettimeofday ( &buffer_received_time, NULL ); 91 sprintf ( file_name, "reco_recvd_%ld_%ld.txt", buffer_received_time.tv_sec, buffer_received_time.tv_usec ); 92 log_file = fopen ( file_name, "w" ); 93 94 if ( log_file != NULL ) 95 { 96 for ( loop_counter = 0; loop_counter < sample_buffers_received; loop_counter++ ) 97 { 98 sprintf ( log_buffer, "%ld %ld %ld %ld\n", loop_counter + 1, samples_in_buffer [loop_counter], 99 seconds_buffer_received [loop_counter], micro_seconds_buffer_received [loop_counter] ); 100 fwrite ( log_buffer, 1, strlen ( log_buffer ), log_file ); 101 } 102 fclose ( log_file ); 103 } 104 sample_buffers_received = 0; 105 } 106 } 107#endif 108 109 110static ESR_ReturnCode SR_Recognizer_Reset_Buffers ( SR_RecognizerImpl *impl ); 111 112/** 113 * Initializes recognizer properties to default values. 114 * 115 * Replaces setup_recognition_parameters() 116 */ 117ESR_ReturnCode SR_RecognizerToSessionImpl() 118{ 119 ESR_ReturnCode rc; 120 121 /* Old comment: remember to keep "ca_rip.h" up to date with these parameters... */ 122 123 /* CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_acoustic_models", 2)); */ 124 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Recognizer.partial_results", ESR_FALSE)); 125 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.NBest", 1)); 126 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.eou_threshold", 100)); 127 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_altword_tokens", 400)); 128 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_frames", 1000)); 129 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_fsm_arcs", 3000)); 130 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_fsm_nodes", 3000)); 131 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_fsmnode_tokens", 1000)); 132 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_hmm_tokens", 1000)); 133 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_model_states", 1000)); 134 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_searches", 2)); 135 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_word_tokens", 1000)); 136 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.non_terminal_timeout", 50)); 137 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.num_wordends_per_frame", 10)); 138 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.often", 10)); 139 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.optional_terminal_timeout", 30)); 140 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.reject", 500)); 141 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.terminal_timeout", 10)); 142 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.viterbi_prune_thresh", 5000)); 143 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.wordpen", 0)); 144 145 CHKLOG(rc, ESR_SessionSetSize_tIfEmpty("SREC.Recognizer.utterance_timeout", 400)); 146 147 return ESR_SUCCESS; 148CLEANUP: 149 return rc; 150} 151 152/** 153 * Initializes frontend properties to default values. 154 * 155 * Replaces load_up_parameter_list() 156 */ 157ESR_ReturnCode SR_RecognizerFrontendToSessionImpl() 158{ 159 IntArrayList* intList = NULL; 160 ESR_ReturnCode rc; 161 ESR_BOOL exists; 162 size_t i; 163 164 /* Old comment: Remember to keep "ca_pip.h" up to date with these parameters... */ 165 166 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.mel_dim", 12)); 167 CHKLOG(rc, ESR_SessionSetSize_tIfEmpty("CREC.Frontend.samplerate", 8000)); 168 CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.premel", 0.98f)); 169 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.lowcut", 260)); /* Hz */ 170 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.highcut", 4000)); /* Hz */ 171 CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.window_factor", 2.0)); /* times the frame size */ 172 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.do_skip_even_frames", ESR_FALSE)); /* 10/20 ms rate */ 173 CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.offset", 0)); /* additional */ 174 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.ddmel", ESR_FALSE)); /* delta-delta mel pars */ 175 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.forgetfactor", 40)); 176 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.sv6_margin", 10)); 177 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.rasta", ESR_FALSE)); 178 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.rastac0", ESR_FALSE)); 179 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.spectral_subtraction", ESR_FALSE)); 180 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.spec_sub_dur", 0)); 181 CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.spec_sub_scale", 1.0)); 182 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.do_filterbank_dump", ESR_FALSE)); /* Output is filterbank (30 floats) */ 183 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.do_filterbank_input", ESR_FALSE)); /* Input is filterbank (30 floats) in place of audio samples */ 184 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.do_smooth_c0", ESR_TRUE)); 185 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.plp", ESR_FALSE)); /* Do PLP instead of MEL */ 186 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.lpcorder", 12)); /* order of lpc analysis in plp processing */ 187 CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.warp_scale", 1.0)); 188 CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.piecewise_start", 1.0)); 189 CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.peakdecayup", -1.0)); /* If +ve, decay factor on peakpicker (low to high) */ 190 CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.peakdecaydown", -1.0)); /* If +ve, decay factor on peakpicker (high to low) */ 191 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.cuberoot", ESR_FALSE)); /* Use cube root instead of log */ 192 193 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.mel_offset", &exists)); 194 if (!exists) 195 { 196 CHKLOG(rc, IntArrayListCreate(&intList)); 197 for (i = 0; i < 32; ++i) 198 CHKLOG(rc, IntArrayListAdd(intList, 0)); 199 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.mel_offset", intList, TYPES_INTARRAYLIST)); 200 intList = NULL; 201 } 202 203 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.mel_loop", &exists)); 204 if (!exists) 205 { 206 CHKLOG(rc, IntArrayListCreate(&intList)); 207 for (i = 0; i < 32; ++i) 208 CHKLOG(rc, IntArrayListAdd(intList, 1)); 209 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.mel_loop", intList, TYPES_INTARRAYLIST)); 210 intList = NULL; 211 } 212 213 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.melA", &exists)); 214 if (!exists) 215 { 216 CHKLOG(rc, IntArrayListCreate(&intList)); 217 CHKLOG(rc, IntArrayListAdd(intList, (int) 13.2911)); 218 CHKLOG(rc, IntArrayListAdd(intList, (int) 47.2229)); 219 CHKLOG(rc, IntArrayListAdd(intList, (int) 79.2485)); 220 CHKLOG(rc, IntArrayListAdd(intList, (int) 92.1967)); 221 CHKLOG(rc, IntArrayListAdd(intList, (int) 136.3855)); 222 CHKLOG(rc, IntArrayListAdd(intList, (int) 152.2896)); 223 CHKLOG(rc, IntArrayListAdd(intList, (int) 183.3601)); 224 CHKLOG(rc, IntArrayListAdd(intList, (int) 197.4200)); 225 CHKLOG(rc, IntArrayListAdd(intList, (int) 217.8278)); 226 CHKLOG(rc, IntArrayListAdd(intList, (int) 225.6556)); 227 CHKLOG(rc, IntArrayListAdd(intList, (int) 263.3073)); 228 CHKLOG(rc, IntArrayListAdd(intList, (int) 277.193)); 229 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.melA", intList, TYPES_INTARRAYLIST)); 230 intList = NULL; 231 } 232 233 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.melB", &exists)); 234 if (!exists) 235 { 236 CHKLOG(rc, IntArrayListCreate(&intList)); 237 CHKLOG(rc, IntArrayListAdd(intList, (int) 37.0847)); 238 CHKLOG(rc, IntArrayListAdd(intList, (int) 91.3289)); 239 CHKLOG(rc, IntArrayListAdd(intList, (int) 113.9995)); 240 CHKLOG(rc, IntArrayListAdd(intList, (int) 123.0336)); 241 CHKLOG(rc, IntArrayListAdd(intList, (int) 131.2704)); 242 CHKLOG(rc, IntArrayListAdd(intList, (int) 128.9942)); 243 CHKLOG(rc, IntArrayListAdd(intList, (int) 120.5267)); 244 CHKLOG(rc, IntArrayListAdd(intList, (int) 132.0079)); 245 CHKLOG(rc, IntArrayListAdd(intList, (int) 129.8076)); 246 CHKLOG(rc, IntArrayListAdd(intList, (int) 126.5029)); 247 CHKLOG(rc, IntArrayListAdd(intList, (int) 121.8519)); 248 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.melB", intList, TYPES_INTARRAYLIST)); 249 intList = NULL; 250 } 251 252 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.dmelA", &exists)); 253 if (!exists) 254 { 255 CHKLOG(rc, IntArrayListCreate(&intList)); 256 CHKLOG(rc, IntArrayListAdd(intList, (int) 91.6305)); 257 CHKLOG(rc, IntArrayListAdd(intList, (int) 358.3790)); 258 CHKLOG(rc, IntArrayListAdd(intList, (int) 527.5946)); 259 CHKLOG(rc, IntArrayListAdd(intList, (int) 536.3163)); 260 CHKLOG(rc, IntArrayListAdd(intList, (int) 731.2385)); 261 CHKLOG(rc, IntArrayListAdd(intList, (int) 757.8382)); 262 CHKLOG(rc, IntArrayListAdd(intList, (int) 939.4460)); 263 CHKLOG(rc, IntArrayListAdd(intList, (int) 1028.4136)); 264 CHKLOG(rc, IntArrayListAdd(intList, (int) 1071.3193)); 265 CHKLOG(rc, IntArrayListAdd(intList, (int) 1183.7922)); 266 CHKLOG(rc, IntArrayListAdd(intList, (int) 1303.1014)); 267 CHKLOG(rc, IntArrayListAdd(intList, (int) 1447.7766)); 268 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.dmelA", intList, TYPES_INTARRAYLIST)); 269 intList = NULL; 270 } 271 272 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.dmelB", &exists)); 273 if (!exists) 274 { 275 CHKLOG(rc, IntArrayListCreate(&intList)); 276 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4785)); 277 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.3878)); 278 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4029)); 279 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.3182)); 280 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.3706)); 281 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5394)); 282 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5150)); 283 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4270)); 284 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4871)); 285 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4088)); 286 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4361)); 287 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5449)); 288 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.dmelB", intList, TYPES_INTARRAYLIST)); 289 intList = NULL; 290 } 291 292 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.ddmelA", &exists)); 293 if (!exists) 294 { 295 CHKLOG(rc, IntArrayListCreate(&intList)); 296 CHKLOG(rc, IntArrayListAdd(intList, (int) 10.7381)); 297 CHKLOG(rc, IntArrayListAdd(intList, (int) 32.6775)); 298 CHKLOG(rc, IntArrayListAdd(intList, (int) 46.2301)); 299 CHKLOG(rc, IntArrayListAdd(intList, (int) 51.5438)); 300 CHKLOG(rc, IntArrayListAdd(intList, (int) 57.6636)); 301 CHKLOG(rc, IntArrayListAdd(intList, (int) 57.0581)); 302 CHKLOG(rc, IntArrayListAdd(intList, (int) 65.3696)); 303 CHKLOG(rc, IntArrayListAdd(intList, (int) 70.1910)); 304 CHKLOG(rc, IntArrayListAdd(intList, (int) 71.6751)); 305 CHKLOG(rc, IntArrayListAdd(intList, (int) 78.2364)); 306 CHKLOG(rc, IntArrayListAdd(intList, (int) 83.2440)); 307 CHKLOG(rc, IntArrayListAdd(intList, (int) 89.6261)); 308 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.ddmelA", intList, TYPES_INTARRAYLIST)); 309 intList = NULL; 310 } 311 312 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.ddmelB", &exists)); 313 if (!exists) 314 { 315 CHKLOG(rc, IntArrayListCreate(&intList)); 316 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5274)); 317 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5098)); 318 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5333)); 319 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5963)); 320 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5132)); 321 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5282)); 322 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5530)); 323 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5682)); 324 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4662)); 325 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4342)); 326 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5235)); 327 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4061)); 328 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.ddmelB", intList, TYPES_INTARRAYLIST)); 329 intList = NULL; 330 } 331 332 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.rastaA", &exists)); 333 if (!exists) 334 { 335 CHKLOG(rc, IntArrayListCreate(&intList)); 336 CHKLOG(rc, IntArrayListAdd(intList, (int) 7.80)); 337 CHKLOG(rc, IntArrayListAdd(intList, (int) 37.0)); 338 CHKLOG(rc, IntArrayListAdd(intList, (int) 54.0)); 339 CHKLOG(rc, IntArrayListAdd(intList, (int) 57.0)); 340 CHKLOG(rc, IntArrayListAdd(intList, (int) 84.0)); 341 CHKLOG(rc, IntArrayListAdd(intList, (int) 86.5)); 342 CHKLOG(rc, IntArrayListAdd(intList, (int) 98.1)); 343 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.0)); 344 CHKLOG(rc, IntArrayListAdd(intList, (int) 153.0)); 345 CHKLOG(rc, IntArrayListAdd(intList, (int) 160.0)); 346 CHKLOG(rc, IntArrayListAdd(intList, (int) 188.0)); 347 CHKLOG(rc, IntArrayListAdd(intList, (int) 199.0)); 348 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.rastaA", intList, TYPES_INTARRAYLIST)); 349 intList = NULL; 350 } 351 352 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.rastaB", &exists)); 353 if (!exists) 354 { 355 CHKLOG(rc, IntArrayListCreate(&intList)); 356 CHKLOG(rc, IntArrayListAdd(intList, 117)); 357 CHKLOG(rc, IntArrayListAdd(intList, 121)); 358 CHKLOG(rc, IntArrayListAdd(intList, 114)); 359 CHKLOG(rc, IntArrayListAdd(intList, 111)); 360 CHKLOG(rc, IntArrayListAdd(intList, 113)); 361 CHKLOG(rc, IntArrayListAdd(intList, 126)); 362 CHKLOG(rc, IntArrayListAdd(intList, 134)); 363 CHKLOG(rc, IntArrayListAdd(intList, 130)); 364 CHKLOG(rc, IntArrayListAdd(intList, 135)); 365 CHKLOG(rc, IntArrayListAdd(intList, 129)); 366 CHKLOG(rc, IntArrayListAdd(intList, 139)); 367 CHKLOG(rc, IntArrayListAdd(intList, 138)); 368 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.rastaB", intList, TYPES_INTARRAYLIST)); 369 intList = NULL; 370 } 371 372 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.speech_detect", 18)); 373 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.speech_above", 18)); 374 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.ambient_within", 12)); 375 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.start_windback", 50)); 376 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.utterance_allowance", 40)); 377 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.speech_duration", 6)); 378 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.quiet_duration", 20)); 379 380 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.high_clip", 32767)); 381 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.low_clip", -32768)); 382 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.max_per10000_clip", 10)); 383 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.max_dc_offset", 1000)); 384 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.high_noise_level_bit", 11)); 385 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.low_speech_level_bit", 11)); 386 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.min_samples", 10000)); 387 388 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.spectrum_filter_freq", &exists)); 389 if (!exists) 390 { 391 CHKLOG(rc, IntArrayListCreate(&intList)); 392 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.spectrum_filter_freq", intList, TYPES_INTARRAYLIST)); 393 intList = NULL; 394 } 395 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.spectrum_filter_spread", &exists)); 396 if (!exists) 397 { 398 CHKLOG(rc, IntArrayListCreate(&intList)); 399 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.spectrum_filter_spread", intList, TYPES_INTARRAYLIST)); 400 intList = NULL; 401 } 402 return ESR_SUCCESS; 403CLEANUP: 404 if (intList != NULL) 405 intList->destroy(intList); 406 return rc; 407} 408 409/** 410 * Generate legacy frontend parameter structure from ESR_Session. 411 * 412 * @param impl SR_RecognizerImpl handle 413 * @param params Resulting structure 414 */ 415ESR_ReturnCode SR_RecognizerGetFrontendLegacyParametersImpl(CA_FrontendInputParams* params) 416{ 417 ESR_ReturnCode rc; 418 IntArrayList* intList; 419 size_t size, i, size_tValue; 420 int iValue; 421 422 passert(params != NULL); 423 params->is_loaded = ESR_FALSE; 424 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.mel_dim", ¶ms->mel_dim)); 425 CHKLOG(rc, ESR_SessionGetSize_t("CREC.Frontend.samplerate", &size_tValue)); 426 params->samplerate = (int) size_tValue; 427 CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.premel", ¶ms->pre_mel)); 428 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.lowcut", ¶ms->low_cut)); 429 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.highcut", ¶ms->high_cut)); 430 CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.window_factor", ¶ms->window_factor)); 431 CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.do_skip_even_frames", ¶ms->do_skip_even_frames)); 432 CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.offset", ¶ms->offset)); 433 CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.ddmel", ¶ms->do_dd_mel)); 434 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.forgetfactor", ¶ms->forget_factor)); 435 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.sv6_margin", ¶ms->sv6_margin)); 436 CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.rastac0", ¶ms->do_rastac0)); 437 CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.spectral_subtraction", ¶ms->do_spectral_sub)); 438 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.spec_sub_dur", ¶ms->spectral_sub_frame_dur)); 439 CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.spec_sub_scale", ¶ms->spec_sub_scale)); 440 CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.do_filterbank_dump", ¶ms->do_filterbank_input)); 441 CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.do_filterbank_input", ¶ms->do_filterbank_input)); 442 CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.do_smooth_c0", ¶ms->do_smooth_c0)); 443 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.lpcorder", ¶ms->lpc_order)); 444 CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.warp_scale", ¶ms->warp_scale)); 445 CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.piecewise_start", ¶ms->piecewise_start)); 446 CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.peakdecayup", ¶ms->peakpickup)); 447 CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.peakdecaydown", ¶ms->peakpickdown)); 448 449 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.mel_offset", (void **)&intList, TYPES_INTARRAYLIST)); 450 if (intList == NULL) 451 { 452 PLogError(L("ESR_INVALID_STATE")); 453 return ESR_INVALID_STATE; 454 } 455 CHKLOG(rc, IntArrayListGetSize(intList, &size)); 456 for (i = 0; i < size; ++i) 457 CHKLOG(rc, IntArrayListGet(intList, i, ¶ms->mel_offset[i])); 458 459 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.mel_loop", (void **)&intList, TYPES_INTARRAYLIST)); 460 if (intList == NULL) 461 { 462 PLogError(L("ESR_INVALID_STATE")); 463 return ESR_INVALID_STATE; 464 } 465 CHKLOG(rc, IntArrayListGetSize(intList, &size)); 466 for (i = 0; i < size; ++i) 467 CHKLOG(rc, IntArrayListGet(intList, i, ¶ms->mel_loop[i])); 468 469 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.melA", (void **)&intList, TYPES_INTARRAYLIST)); 470 CHKLOG(rc, IntArrayListGetSize(intList, &size)); 471 for (i = 0; i < size; ++i) 472 CHKLOG(rc, IntArrayListGet(intList, i, ¶ms->melA_scale[i])); 473 474 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.melB", (void **)&intList, TYPES_INTARRAYLIST)); 475 CHKLOG(rc, IntArrayListGetSize(intList, &size)); 476 for (i = 0; i < size; ++i) 477 CHKLOG(rc, IntArrayListGet(intList, i, ¶ms->melB_scale[i])); 478 479 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.dmelA", (void **)&intList, TYPES_INTARRAYLIST)); 480 CHKLOG(rc, IntArrayListGetSize(intList, &size)); 481 for (i = 0; i < size; ++i) 482 CHKLOG(rc, IntArrayListGet(intList, i, ¶ms->dmelA_scale[i])); 483 484 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.dmelB", (void **)&intList, TYPES_INTARRAYLIST)); 485 CHKLOG(rc, IntArrayListGetSize(intList, &size)); 486 for (i = 0; i < size; ++i) 487 CHKLOG(rc, IntArrayListGet(intList, i, ¶ms->dmelB_scale[i])); 488 489 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.ddmelA", (void **)&intList, TYPES_INTARRAYLIST)); 490 CHKLOG(rc, IntArrayListGetSize(intList, &size)); 491 for (i = 0; i < size; ++i) 492 CHKLOG(rc, IntArrayListGet(intList, i, ¶ms->ddmelA_scale[i])); 493 494 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.ddmelB", (void **)&intList, TYPES_INTARRAYLIST)); 495 CHKLOG(rc, IntArrayListGetSize(intList, &size)); 496 for (i = 0; i < size; ++i) 497 CHKLOG(rc, IntArrayListGet(intList, i, ¶ms->ddmelB_scale[i])); 498 499 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.rastaA", (void **)&intList, TYPES_INTARRAYLIST)); 500 CHKLOG(rc, IntArrayListGetSize(intList, &size)); 501 for (i = 0; i < size; ++i) 502 CHKLOG(rc, IntArrayListGet(intList, i, ¶ms->rastaA_scale[i])); 503 504 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.rastaB", (void **)&intList, TYPES_INTARRAYLIST)); 505 CHKLOG(rc, IntArrayListGetSize(intList, &size)); 506 for (i = 0; i < size; ++i) 507 CHKLOG(rc, IntArrayListGet(intList, i, ¶ms->rastaB_scale[i])); 508 509 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.speech_detect", ¶ms->voice_margin)); 510 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.speech_above", ¶ms->fast_voice_margin)); 511 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.ambient_within", ¶ms->tracker_margin)); 512 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.start_windback", ¶ms->start_windback)); 513 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.utterance_allowance", ¶ms->unsure_duration)); 514 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.speech_duration", ¶ms->voice_duration)); 515 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.quiet_duration", ¶ms->quiet_duration)); 516 517 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.high_clip", ¶ms->high_clip)); 518 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.low_clip", ¶ms->low_clip)); 519 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.max_per10000_clip", ¶ms->max_per10000_clip)); 520 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.max_dc_offset", ¶ms->max_dc_offset)); 521 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.high_noise_level_bit", ¶ms->high_noise_level_bit)); 522 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.low_speech_level_bit", ¶ms->low_speech_level_bit)); 523 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.min_samples", ¶ms->min_samples)); 524 525 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.spectrum_filter_freq", (void **)&intList, TYPES_INTARRAYLIST)); 526 if (intList == NULL) 527 { 528 PLogError(L("ESR_INVALID_STATE")); 529 return ESR_INVALID_STATE; 530 } 531 CHKLOG(rc, IntArrayListGetSize(intList, &size)); 532 for (i = 0; i < size; ++i) 533 { 534 CHKLOG(rc, IntArrayListGet(intList, i, &iValue)); 535 params->spectrum_filter_freq[i] = iValue; 536 } 537 538 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.spectrum_filter_spread", (void **)&intList, TYPES_INTARRAYLIST)); 539 if (intList == NULL) 540 { 541 PLogError(L("ESR_INVALID_STATE")); 542 return ESR_INVALID_STATE; 543 } 544 CHKLOG(rc, IntArrayListGetSize(intList, &size)); 545 for (i = 0; i < size; ++i) 546 { 547 CHKLOG(rc, IntArrayListGet(intList, i, &iValue)); 548 params->spectrum_filter_spread[i] = iValue; 549 } 550 params->is_loaded = ESR_TRUE; 551 return ESR_SUCCESS; 552CLEANUP: 553 return rc; 554} 555 556/** 557 * Creates frontend components of SR_Recognizer. 558 * 559 * @param impl SR_RecognizerImpl handle 560 */ 561ESR_ReturnCode SR_RecognizerCreateFrontendImpl(SR_RecognizerImpl* impl) 562{ 563 ESR_ReturnCode rc; 564 CA_FrontendInputParams* frontendParams; 565 566 /* Create a frontend object */ 567 impl->frontend = CA_AllocateFrontend(1, 0, 1); 568 frontendParams = CA_AllocateFrontendParameters(); 569 CHKLOG(rc, SR_RecognizerGetFrontendLegacyParametersImpl(frontendParams)); 570 571 CA_ConfigureFrontend(impl->frontend, frontendParams); 572 573 /* Create a wave object */ 574 impl->wavein = CA_AllocateWave('N'); 575 if (impl->wavein == NULL) 576 { 577 rc = ESR_OUT_OF_MEMORY; 578 PLogError(ESR_rc2str(rc)); 579 goto CLEANUP; 580 } 581 CA_ConfigureWave(impl->wavein, impl->frontend); 582 CA_ConfigureVoicingAnalysis(impl->wavein, frontendParams); 583 584 CA_LoadCMSParameters(impl->wavein, NULL, frontendParams); 585 586 /* Create an utterance object */ 587 impl->utterance = CA_AllocateUtterance(); 588 if (impl->utterance == NULL) 589 { 590 rc = ESR_OUT_OF_MEMORY; 591 PLogError(ESR_rc2str(rc)); 592 goto CLEANUP; 593 } 594 CA_InitUtteranceForFrontend(impl->utterance, frontendParams); 595 CA_AttachCMStoUtterance(impl->wavein, impl->utterance); 596 CA_FreeFrontendParameters(frontendParams); 597 return ESR_SUCCESS; 598 599CLEANUP: 600 if (impl->frontend != NULL) 601 { 602 CA_UnconfigureFrontend(impl->frontend); 603 CA_FreeFrontend(impl->frontend); 604 impl->frontend = NULL; 605 } 606 if (impl->wavein != NULL) 607 { 608 CA_UnconfigureWave(impl->wavein); 609 CA_FreeWave(impl->wavein); 610 impl->wavein = NULL; 611 } 612 if (impl->utterance != NULL) 613 { 614 CA_ClearUtterance(impl->utterance); 615 CA_FreeUtterance(impl->utterance); 616 impl->utterance = NULL; 617 } 618 if (frontendParams != NULL) 619 CA_FreeFrontendParameters(frontendParams); 620 return rc; 621} 622 623/** 624 * Populates legacy recognizer parameters from the session. 625 * 626 * Replaces setup_pattern_parameters() 627 */ 628ESR_ReturnCode SR_AcousticModels_LoadLegacyRecognizerParameters(CA_RecInputParams* params) 629{ 630 ESR_ReturnCode rc; 631 632 passert(params != NULL); 633 params->is_loaded = ESR_FALSE; 634 CHKLOG(rc, ESR_SessionGetBool("CREC.Recognizer.partial_results", ¶ms->do_partial)); 635 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.NBest", ¶ms->top_choices)); 636 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.eou_threshold", ¶ms->eou_threshold)); 637 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_altword_tokens", ¶ms->max_altword_tokens)); 638 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_frames", ¶ms->max_frames)); 639 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_fsm_arcs", ¶ms->max_fsm_arcs)); 640 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_fsm_nodes", ¶ms->max_fsm_nodes)); 641 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_fsmnode_tokens", ¶ms->max_fsmnode_tokens)); 642 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_hmm_tokens", ¶ms->max_hmm_tokens)); 643 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_model_states", ¶ms->max_model_states)); 644 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_searches", ¶ms->max_searches)); 645 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_word_tokens", ¶ms->max_word_tokens)); 646 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.non_terminal_timeout", ¶ms->non_terminal_timeout)); 647 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.num_wordends_per_frame", ¶ms->num_wordends_per_frame)); 648 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.often", ¶ms->traceback_freq)); 649 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.optional_terminal_timeout", ¶ms->optional_terminal_timeout)); 650 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.reject", ¶ms->reject_score)); 651 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.terminal_timeout", ¶ms->terminal_timeout)); 652 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.viterbi_prune_thresh", ¶ms->viterbi_prune_thresh)); 653 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.wordpen", ¶ms->word_penalty)); 654 params->is_loaded = ESR_TRUE; 655 656 return ESR_SUCCESS; 657CLEANUP: 658 return rc; 659} 660 661ESR_ReturnCode SR_RecognizerCreate(SR_Recognizer** self) 662{ 663 SR_RecognizerImpl* impl; 664 CA_RecInputParams* recogParams = NULL; 665 ESR_ReturnCode rc; 666 LCHAR recHandle[20] = { 0 }; 667 668 if (self == NULL) 669 { 670 PLogError(L("ESR_INVALID_ARGUMENT")); 671 return ESR_INVALID_ARGUMENT; 672 } 673 impl = NEW(SR_RecognizerImpl, MTAG); 674 if (impl == NULL) 675 { 676 PLogError(L("ESR_OUT_OF_MEMORY")); 677 return ESR_OUT_OF_MEMORY; 678 } 679 680 impl->Interface.start = &SR_RecognizerStartImpl; 681 impl->Interface.stop = &SR_RecognizerStopImpl; 682 impl->Interface.destroy = &SR_RecognizerDestroyImpl; 683 impl->Interface.setup = &SR_RecognizerSetupImpl; 684 impl->Interface.unsetup = &SR_RecognizerUnsetupImpl; 685 impl->Interface.isSetup = &SR_RecognizerIsSetupImpl; 686 impl->Interface.getParameter = &SR_RecognizerGetParameterImpl; 687 impl->Interface.getSize_tParameter = &SR_RecognizerGetSize_tParameterImpl; 688 impl->Interface.getBoolParameter = &SR_RecognizerGetBoolParameterImpl; 689 impl->Interface.setParameter = &SR_RecognizerSetParameterImpl; 690 impl->Interface.setSize_tParameter = &SR_RecognizerSetSize_tParameterImpl; 691 impl->Interface.setBoolParameter = &SR_RecognizerSetBoolParameterImpl; 692 impl->Interface.setLockFunction = &SR_RecognizerSetLockFunctionImpl; 693 impl->Interface.hasSetupRules = &SR_RecognizerHasSetupRulesImpl; 694 impl->Interface.activateRule = &SR_RecognizerActivateRuleImpl; 695 impl->Interface.deactivateRule = &SR_RecognizerDeactivateRuleImpl; 696 impl->Interface.deactivateAllRules = &SR_RecognizerDeactivateAllRulesImpl; 697 impl->Interface.isActiveRule = &SR_RecognizerIsActiveRuleImpl; 698 impl->Interface.setWordAdditionCeiling = &SR_RecognizerSetWordAdditionCeilingImpl; 699 impl->Interface.checkGrammarConsistency = &SR_RecognizerCheckGrammarConsistencyImpl; 700 impl->Interface.getModels = &SR_RecognizerGetModelsImpl; 701 impl->Interface.putAudio = &SR_RecognizerPutAudioImpl; 702 impl->Interface.advance = &SR_RecognizerAdvanceImpl; 703 impl->Interface.loadUtterance = &SR_RecognizerLoadUtteranceImpl; 704 impl->Interface.loadWaveFile = &SR_RecognizerLoadWaveFileImpl; 705 impl->Interface.logEvent = &SR_RecognizerLogEventImpl; 706 impl->Interface.logToken = &SR_RecognizerLogTokenImpl; 707 impl->Interface.logTokenInt = &SR_RecognizerLogTokenIntImpl; 708 impl->Interface.logSessionStart = &SR_RecognizerLogSessionStartImpl; 709 impl->Interface.logSessionEnd = &SR_RecognizerLogSessionEndImpl; 710 impl->Interface.logWaveformData = &SR_RecognizerLogWaveformDataImpl; 711 impl->Interface.isSignalClipping = &SR_RecognizerIsSignalClippingImpl; 712 impl->Interface.isSignalDCOffset = &SR_RecognizerIsSignalDCOffsetImpl; 713 impl->Interface.isSignalNoisy = &SR_RecognizerIsSignalNoisyImpl; 714 impl->Interface.isSignalTooFewSamples = &SR_RecognizerIsSignalTooFewSamplesImpl; 715 impl->Interface.isSignalTooManySamples = &SR_RecognizerIsSignalTooManySamplesImpl; 716 impl->Interface.isSignalTooQuiet = &SR_RecognizerIsSignalTooQuietImpl; 717 718 impl->frontend = NULL; 719 impl->wavein = NULL; 720 impl->utterance = NULL; 721 impl->confidenceScorer = NULL; 722 impl->recognizer = NULL; 723 impl->models = NULL; 724 impl->grammars = NULL; 725 impl->result = NULL; 726 impl->parameters = NULL; 727 impl->acousticState = NULL; 728 impl->audioBuffer = NULL; 729 impl->buffer = NULL; 730 impl->frames = impl->processed; 731 impl->internalState = SR_RECOGNIZER_INTERNAL_BEGIN; 732 impl->isStarted = ESR_FALSE; 733 impl->isRecognizing = ESR_FALSE; 734 impl->gotLastFrame = ESR_FALSE; 735 impl->sampleRate = 0; 736 impl->lockFunction = NULL; 737 impl->lockData = NULL; 738 impl->eventLog = NULL; 739 impl->osi_log_level = 0; 740 impl->waveformBuffer = NULL; 741 impl->isSignalQualityInitialized = ESR_FALSE; 742 impl->beginningOfSpeechOffset = 0; 743 impl->gatedMode = ESR_TRUE; 744 impl->bgsniff = 0; 745 impl->isSignalClipping = ESR_FALSE; 746 impl->isSignalDCOffset = ESR_FALSE; 747 impl->isSignalNoisy = ESR_FALSE; 748 impl->isSignalTooFewSamples = ESR_FALSE; 749 impl->isSignalTooManySamples = ESR_FALSE; 750 impl->isSignalTooQuiet = ESR_FALSE; 751 752 CHKLOG(rc, ESR_SessionTypeCreate(&impl->parameters)); 753 CHKLOG(rc, SR_RecognizerToSessionImpl()); 754 CHKLOG(rc, ESR_SessionGetSize_t(L("SREC.Recognizer.osi_log_level"), &impl->osi_log_level)); 755 756 /* create the event log */ 757 if (impl->osi_log_level) /* do some logging if non-zero val */ 758 CHKLOG(rc, ESR_SessionGetProperty(L("eventlog"), (void **)&impl->eventLog, TYPES_SR_EVENTLOG)); 759 760 /* Record the OSI log event */ 761 psprintf(recHandle, L("%p"), impl); 762 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle)); 763 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIcrst"))); 764 765 CHKLOG(rc, SR_RecognizerFrontendToSessionImpl()); 766 CHKLOG(rc, SR_RecognizerCreateFrontendImpl(impl)); 767 rc = ESR_SessionGetProperty("recognizer.confidenceScorer", (void **)&impl->confidenceScorer, TYPES_CONFIDENCESCORER); 768 if (rc == ESR_NO_MATCH_ERROR) 769 { 770 impl->confidenceScorer = CA_AllocateConfidenceScorer(); 771 772 if (!CA_LoadConfidenceScorer(impl->confidenceScorer)) { 773 rc = ESR_INVALID_STATE; 774 PLogError(ESR_rc2str(rc)); 775 goto CLEANUP; 776 } 777 CHKLOG(rc, ESR_SessionSetProperty("recognizer.confidenceScorer", impl->confidenceScorer, TYPES_CONFIDENCESCORER)); 778 } 779 else if (rc != ESR_SUCCESS) 780 { 781 PLogError(ESR_rc2str(rc)); 782 goto CLEANUP; 783 } 784 785 recogParams = CA_AllocateRecognitionParameters(); 786 if (recogParams == NULL) 787 { 788 rc = ESR_OUT_OF_MEMORY; 789 PLogError(ESR_rc2str(rc)); 790 goto CLEANUP; 791 } 792 CHKLOG(rc, SR_AcousticModels_LoadLegacyRecognizerParameters(recogParams)); 793 impl->recognizer = CA_AllocateRecognition(); 794 if (impl->recognizer == NULL) 795 { 796 PLogError(ESR_rc2str(rc)); 797 goto CLEANUP; 798 } 799 CA_ConfigureRecognition(impl->recognizer, recogParams); 800 CA_FreeRecognitionParameters(recogParams); 801 CHKLOG(rc, HashMapCreate(&impl->grammars)); 802 CHKLOG(rc, CircularBufferCreate(sizeof(asr_int16_t) * AUDIO_CIRC_BUFFER_SIZE, MTAG, &impl->buffer)); 803 CHKLOG(rc, ESR_SessionGetSize_t("CREC.Frontend.samplerate", &impl->sampleRate)); 804 805 impl->FRAME_SIZE = impl->sampleRate / FRAMERATE * SAMPLE_SIZE; 806 807 if ((impl->audioBuffer = MALLOC(impl->FRAME_SIZE, MTAG)) == NULL) 808 { 809 rc = ESR_OUT_OF_MEMORY; 810 goto CLEANUP; 811 } 812 813 /* create the waveform buffer */ 814 CHKLOG(rc, WaveformBuffer_Create(&impl->waveformBuffer, impl->FRAME_SIZE)); 815 816 CHKLOG(rc, ESR_SessionGetSize_t("SREC.Recognizer.utterance_timeout", &impl->utterance_timeout)); 817 818 /* OSI logging (SUCCESS) */ 819 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle)); 820 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("SUCCESS"), L("ESR_SUCCESS"))); 821 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIcrnd"))); 822 823 CHKLOG(rc, SR_AcousticStateCreateImpl(&impl->Interface)); 824 825 CHKLOG(rc, ESR_SessionGetSize_t(L("cmdline.bgsniff"), &impl->bgsniff)); 826 /* gated mode == beginning of speech detection */ 827 CHKLOG(rc, ESR_SessionGetBool(L("cmdline.gatedmode"), &impl->gatedMode)); 828 829 *self = (SR_Recognizer*) impl; 830 return ESR_SUCCESS; 831CLEANUP: 832 /* OSI logging (FAILURE) */ 833 if (impl->eventLog != NULL) 834 { 835 SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle); 836 SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("FAILURE"), ESR_rc2str(rc)); 837 SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIcrnd")); 838 } 839 840 if (recogParams != NULL) 841 CA_FreeRecognitionParameters(recogParams); 842 impl->Interface.destroy(&impl->Interface); 843 return rc; 844} 845 846ESR_ReturnCode SR_RecognizerDestroyImpl(SR_Recognizer* self) 847{ 848 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 849 ESR_BOOL exists; // isSetup; 850 ESR_ReturnCode rc; 851 LCHAR recHandle[20] = { 0 }; 852 853 if (impl->result != NULL) 854 { 855 SR_RecognizerResult_Destroy(impl->result); 856 impl->result = NULL; 857 } 858 859 if (impl->eventLog != NULL) 860 { 861 /* Record the OSI log event */ 862 psprintf(recHandle, L("%p"), impl); 863 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle)); 864 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIdesst"))); 865 } 866 867 /* Clean session */ 868 CHKLOG(rc, ESR_SessionContains("recognizer.confidenceScorer", &exists)); 869 if (exists) 870 CHKLOG(rc, ESR_SessionRemoveProperty("recognizer.confidenceScorer")); 871 872 if (impl->confidenceScorer != NULL) 873 { 874 CA_FreeConfidenceScorer(impl->confidenceScorer); 875 impl->confidenceScorer = NULL; 876 } 877 878 /* Clear CMS, CRS_RecognizerClose() */ 879 if (impl->wavein != NULL) 880 { 881 ESR_BOOL isAttached, isConfigured; 882 883 CHKLOG(rc, CA_IsCMSAttachedtoUtterance(impl->wavein, &isAttached)); 884 if (isAttached) 885 CA_DetachCMSfromUtterance(impl->wavein, impl->utterance); 886 887 CHKLOG(rc, CA_IsConfiguredForAgc(impl->wavein, &isConfigured)); 888 if (isConfigured) 889 CA_ClearCMSParameters(impl->wavein); 890 } 891 892 /* Free Utterance */ 893 if (impl->utterance != NULL) 894 { 895 CA_ClearUtterance(impl->utterance); 896 CA_FreeUtterance(impl->utterance); 897 impl->utterance = NULL; 898 } 899 900 /* Free WaveformBuffer */ 901 if (impl->waveformBuffer != NULL) 902 { 903 WaveformBuffer_Destroy(impl->waveformBuffer); 904 impl->waveformBuffer = NULL; 905 } 906 907 /* Free recognizer */ 908/* CHKLOG(rc, self->isSetup(self, &isSetup)); 909 if (isSetup) 910 CHKLOG(rc, self->unsetup(self));*/ 911 if (impl->grammars != NULL) 912 CHKLOG(rc, self->deactivateAllRules(self)); 913 if (impl->recognizer != NULL) 914 { 915 CA_UnloadRecognitionModels(impl->recognizer); 916 CA_UnconfigureRecognition(impl->recognizer); 917 CA_FreeRecognition(impl->recognizer); 918 impl->recognizer = NULL; 919 } 920 921 if (impl->grammars != NULL) 922 { 923 CHKLOG(rc, HashMapDestroy(impl->grammars)); 924 impl->grammars = NULL; 925 } 926 927 if (impl->buffer != NULL) 928 { 929 FREE(impl->buffer); 930 impl->buffer = NULL; 931 } 932 933 if (impl->audioBuffer != NULL) 934 { 935 FREE(impl->audioBuffer); 936 impl->audioBuffer = NULL; 937 } 938 939 /* Free frontend */ 940 if (impl->frontend) 941 { 942 CA_UnconfigureFrontend(impl->frontend); 943 CA_FreeFrontend(impl->frontend); 944 impl->frontend = NULL; 945 } 946 947 /* Free wave */ 948 if (impl->wavein) 949 { 950 CA_UnconfigureWave(impl->wavein); 951 CA_FreeWave(impl->wavein); 952 impl->wavein = NULL; 953 } 954 955 if (impl->parameters != NULL) 956 CHKLOG(rc, impl->parameters->destroy(impl->parameters)); 957 958 if (impl->eventLog != NULL) 959 { 960 /* OSI logging (SUCCESS) */ 961 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle)); 962 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("SUCCESS"), L("ESR_SUCCESS"))); 963 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIdesnd"))); 964 impl->eventLog = NULL; 965 } 966 967 if (impl->acousticState != NULL) 968 { 969 impl->acousticState->destroy(self); 970 impl->acousticState = NULL; 971 } 972 FREE(impl); 973 return ESR_SUCCESS; 974CLEANUP: 975 return rc; 976} 977 978ESR_ReturnCode beginRecognizing(SR_RecognizerImpl* impl) 979{ 980 CA_RecInputParams* recogParams; 981 LCHAR tok[80]; 982 LCHAR* val; 983 PTimeStamp BORT; 984 size_t i, grammarSize; 985 ESR_ReturnCode rc; 986 987 /* Setup recognizer for new utterance */ 988 recogParams = CA_AllocateRecognitionParameters(); 989 if (recogParams == NULL) 990 { 991 rc = ESR_OUT_OF_MEMORY; 992 PLogError(ESR_rc2str(rc)); 993 goto CLEANUP; 994 } 995 SR_AcousticModels_LoadLegacyRecognizerParameters(recogParams); 996 CA_BeginRecognition(impl->recognizer, NULL, 1, recogParams); 997 CA_FreeRecognitionParameters(recogParams); 998 impl->isRecognizing = ESR_TRUE; 999 1000 /* OSI log the grammars */ 1001 CHKLOG(rc, HashMapGetSize(impl->grammars, &grammarSize)); 1002 for (i = 0; i < grammarSize; ++i) 1003 { 1004 psprintf(tok, L("GURI%d"), i); 1005 /* use the key as the grammar URI */ 1006 CHKLOG(rc, HashMapGetKeyAtIndex(impl->grammars, i, &val)); 1007 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, tok, val)); 1008 } 1009 /* OSI ACST acoustic state reset */ 1010 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("ACST"), 0)); 1011 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("LANG"), L("en-us"))); 1012 1013 /* OSI log the start of recognition */ 1014 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIrcst"))); 1015 1016 /* save the BORT timing (begin of recog) */ 1017 PTimeStampSet(&BORT); 1018 impl->recogLogTimings.BORT = PTimeStampDiff(&BORT, &impl->timestamp); 1019 1020 return ESR_SUCCESS; 1021CLEANUP: 1022 if (recogParams != NULL) 1023 CA_FreeRecognitionParameters(recogParams); 1024 return rc; 1025} 1026 1027ESR_ReturnCode SR_RecognizerStartImpl(SR_Recognizer* self) 1028{ 1029 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 1030 size_t silence_duration_in_frames; 1031 size_t end_of_utterance_hold_off_in_frames; 1032 size_t grammarCount; 1033 ESR_ReturnCode rc; 1034 ESR_BOOL enableGetWaveform = ESR_FALSE; 1035 1036 CHKLOG(rc, impl->grammars->getSize(impl->grammars, &grammarCount)); 1037 if (impl->models == NULL) 1038 { 1039 PLogError("ESR_INVALID_STATE: No rule has been set up"); 1040 return ESR_INVALID_STATE; 1041 } 1042 if (grammarCount < 1) 1043 { 1044 PLogError("ESR_INVALID_STATE: No rule has been activated"); 1045 return ESR_INVALID_STATE; 1046 } 1047 1048 if (!CA_OpenWaveFromDevice(impl->wavein, DEVICE_RAW_PCM, impl->frontend->samplerate, 0, WAVE_DEVICE_RAW)) 1049 { 1050 rc = ESR_INVALID_STATE; 1051 PLogError(ESR_rc2str(rc)); 1052 goto CLEANUP; 1053 } 1054 1055 /* Setup utterance */ 1056 CA_UnlockUtteranceForInput(impl->utterance); 1057 1058 /* Setup utterance */ 1059 CHKLOG(rc, ESR_SessionGetSize_t(L("cmdline.silence_duration_in_frames"), &silence_duration_in_frames)); 1060 CHKLOG(rc, ESR_SessionGetSize_t(L("cmdline.end_of_utterance_hold_off_in_frames"), &end_of_utterance_hold_off_in_frames)); 1061 CA_SetEndOfUtteranceByLevelTimeout(impl->utterance, silence_duration_in_frames, end_of_utterance_hold_off_in_frames); 1062 1063 CA_ResetVoicing(impl->utterance); 1064 1065 /* 1066 * NOTE: We don't actually begin the recognizer here, the beginning of speech 1067 * detector will do that. 1068 */ 1069 1070 impl->gotLastFrame = ESR_FALSE; 1071 impl->isStarted = ESR_TRUE; 1072 impl->isRecognizing = ESR_FALSE; 1073 impl->isSignalQualityInitialized = ESR_FALSE; 1074 impl->internalState = SR_RECOGNIZER_INTERNAL_BEGIN; 1075 PTimeStampSet(&impl->timestamp); 1076 1077 /* reset waveform buffer at start of every recognition */ 1078 CHKLOG(rc, WaveformBuffer_Reset(impl->waveformBuffer)); 1079 1080 /* is waveform buffering active? */ 1081 rc = impl->parameters->getBool(impl->parameters, L("enableGetWaveform"), &enableGetWaveform); 1082 if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR) 1083 { 1084 PLogError(L("%s: could determine whether VoiceEnrollment active or not"), ESR_rc2str(rc)); 1085 goto CLEANUP; 1086 } 1087 if (enableGetWaveform) 1088 CHKLOG(rc, WaveformBuffer_SetBufferingState(impl->waveformBuffer, WAVEFORM_BUFFERING_ON_CIRCULAR)); 1089 else 1090 CHKLOG(rc, WaveformBuffer_SetBufferingState(impl->waveformBuffer, WAVEFORM_BUFFERING_OFF)); 1091 1092 /* I am going to try to open the audio waveform file here */ 1093 if (impl->osi_log_level & OSI_LOG_LEVEL_AUDIO) 1094 { 1095 /* open a new audio waveform file */ 1096 rc = SR_EventLogAudioOpen(impl->eventLog, L("audio/L16"), impl->sampleRate, SAMPLE_SIZE); 1097 if (rc != ESR_SUCCESS) 1098 { 1099 PLogError(L("%s: could not open the RIFF audio file"), ESR_rc2str(rc)); 1100 goto CLEANUP; 1101 } 1102 } 1103 impl->frames = impl->processed = 0; 1104 return ESR_SUCCESS; 1105CLEANUP: 1106/* self->stop(self);*/ 1107 return rc; 1108} 1109 1110ESR_ReturnCode SR_RecognizerStopImpl(SR_Recognizer* self) 1111{ 1112 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 1113 SR_AcousticModelsImpl* modelsImpl; 1114 ESR_ReturnCode rc; 1115 1116#ifdef MEASURE_SAMPLE_TIMES 1117 SR_Recognizer_Log_Samples_Received ( ); 1118#endif 1119 1120 PLOG_DBG_API_ENTER(); 1121 if (!impl->isStarted) 1122 { 1123 /* In case the user calls stop() twice */ 1124 return ESR_SUCCESS; 1125 } 1126 modelsImpl = (SR_AcousticModelsImpl*) impl->models; 1127 1128 /* Clean-up recognizer and utterance */ 1129 switch (impl->internalState) 1130 { 1131 case SR_RECOGNIZER_INTERNAL_BEGIN: 1132 /* Recognizer was never started */ 1133 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("BEGIN"))); 1134 CA_LockUtteranceFromInput(impl->utterance); 1135 impl->internalState = SR_RECOGNIZER_INTERNAL_END; 1136 if (impl->eventLog != NULL) 1137 { 1138 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_BEGIN -> SR_RECOGNIZER_INTERNAL_END"))); 1139 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 1140 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 1141 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 1142 } 1143 break; 1144 1145 case SR_RECOGNIZER_INTERNAL_BOS_TIMEOUT: 1146 /* Recognizer was never started */ 1147 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("BOS_TIMEOUT"))); 1148 CA_LockUtteranceFromInput(impl->utterance); 1149 impl->internalState = SR_RECOGNIZER_INTERNAL_END; 1150 if (impl->eventLog != NULL) 1151 { 1152 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_BOS_TIMEOUT -> SR_RECOGNIZER_INTERNAL_END"))); 1153 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 1154 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 1155 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 1156 } 1157 break; 1158 1159 case SR_RECOGNIZER_INTERNAL_BOS_NO_MATCH: 1160 /* Recognizer was never started */ 1161 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("BOS_NO_MATCH"))); 1162 CA_LockUtteranceFromInput(impl->utterance); 1163 impl->internalState = SR_RECOGNIZER_INTERNAL_END; 1164 if (impl->eventLog != NULL) 1165 { 1166 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_BOS_NO_MATCH -> SR_RECOGNIZER_INTERNAL_END"))); 1167 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 1168 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 1169 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 1170 } 1171 break; 1172 1173 case SR_RECOGNIZER_INTERNAL_BOS_DETECTION: 1174 /* Recognizer was never started */ 1175 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("BOS_DETECTION"))); 1176 CA_LockUtteranceFromInput(impl->utterance); 1177 impl->internalState = SR_RECOGNIZER_INTERNAL_END; 1178 if (impl->eventLog != NULL) 1179 { 1180 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_BOS_DETECTION -> SR_RECOGNIZER_INTERNAL_END"))); 1181 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 1182 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 1183 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 1184 } 1185 break; 1186 1187 case SR_RECOGNIZER_INTERNAL_EOS_DETECTION: 1188 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("EOS_DETECTION"))); 1189 CA_LockUtteranceFromInput(impl->utterance); 1190 if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance)) 1191 { 1192 rc = ESR_INVALID_STATE; 1193 PLogError(ESR_rc2str(rc)); 1194 goto CLEANUP; 1195 } 1196 impl->internalState = SR_RECOGNIZER_INTERNAL_END; 1197 if (impl->eventLog != NULL) 1198 { 1199 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_EOS_DETECTION -> SR_RECOGNIZER_INTERNAL_END"))); 1200 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 1201 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 1202 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 1203 } 1204 break; 1205 1206 case SR_RECOGNIZER_INTERNAL_EOI: 1207 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("EOI"))); 1208 CA_LockUtteranceFromInput(impl->utterance); 1209 if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance)) 1210 { 1211 rc = ESR_INVALID_STATE; 1212 PLogError(ESR_rc2str(rc)); 1213 goto CLEANUP; 1214 } 1215 impl->internalState = SR_RECOGNIZER_INTERNAL_END; 1216 if (impl->eventLog != NULL) 1217 { 1218 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_EOI -> SR_RECOGNIZER_INTERNAL_END"))); 1219 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 1220 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 1221 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 1222 } 1223 break; 1224 1225 case SR_RECOGNIZER_INTERNAL_EOS: 1226 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("EOS"))); 1227 CA_LockUtteranceFromInput(impl->utterance); 1228 if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance)) 1229 { 1230 rc = ESR_INVALID_STATE; 1231 PLogError(ESR_rc2str(rc)); 1232 goto CLEANUP; 1233 } 1234 impl->internalState = SR_RECOGNIZER_INTERNAL_END; 1235 if (impl->eventLog != NULL) 1236 { 1237 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_EOS -> SR_RECOGNIZER_INTERNAL_END"))); 1238 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 1239 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 1240 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 1241 } 1242 break; 1243 1244 case SR_RECOGNIZER_INTERNAL_END: 1245 /* Recognizer already shut down */ 1246 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("END"))); 1247 break; 1248 1249 default: 1250 /* Shut down recognizer */ 1251 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), impl->internalState)); 1252 if (impl->eventLog != NULL) 1253 { 1254 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("unknown state -> SR_RECOGNIZER_INTERNAL_END"))); 1255 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 1256 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 1257 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 1258 } 1259 CA_LockUtteranceFromInput(impl->utterance); 1260 if (impl->isRecognizing) 1261 { 1262 if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance)) 1263 { 1264 rc = ESR_INVALID_STATE; 1265 PLogError(ESR_rc2str(rc)); 1266 goto CLEANUP; 1267 } 1268 } 1269 rc = ESR_INVALID_STATE; 1270 PLogError(L("%s: %d"), ESR_rc2str(rc), impl->internalState); 1271 impl->internalState = SR_RECOGNIZER_INTERNAL_END; 1272 goto CLEANUP; 1273 } 1274 if (impl->eventLog != NULL) 1275 { 1276 int n; 1277 LCHAR result[MAX_ENTRY_LENGTH]; 1278 result[0] = L('\0'); 1279 1280 n = CA_GetUnprocessedFramesInUtterance(impl->utterance); 1281 CHKLOG(rc, SR_EventLogTokenInt(impl->eventLog, L("CA_GetUnprocessedFramesInUtterance() (x10ms)"), n)); 1282 CA_FullResultLabel(impl->recognizer, result, MAX_ENTRY_LENGTH - 1); 1283 CHKLOG(rc, SR_EventLogToken(impl->eventLog, L("CA_FullResultLabel() (x20ms)"), result)); 1284 n = CircularBufferGetSize(impl->buffer); 1285 CHKLOG(rc, SR_EventLogTokenInt(impl->eventLog, L("CircularBufferGetSize() (samples)"), n / SAMPLE_SIZE)); 1286 } 1287 if (impl->lockFunction) 1288 impl->lockFunction(ESR_LOCK, impl->lockData); 1289 CircularBufferReset(impl->buffer); 1290 if (impl->lockFunction) 1291 impl->lockFunction(ESR_UNLOCK, impl->lockData); 1292 if (CA_RecognitionHasResults(impl->recognizer)) 1293 CA_ClearResults(impl->recognizer); 1294 CA_FlushUtteranceFrames(impl->utterance); 1295 CA_CalculateCMSParameters(impl->wavein); 1296 CA_CloseDevice(impl->wavein); 1297 1298 /* record the OSI event */ 1299 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIstop"))); 1300 1301 if (impl->result != NULL) 1302 { 1303 CHKLOG(rc, SR_RecognizerResult_Destroy(impl->result)); 1304 impl->result = NULL; 1305 } 1306 1307 if (impl->lockFunction) 1308 impl->lockFunction(ESR_LOCK, impl->lockData); 1309 impl->gotLastFrame = ESR_TRUE; 1310 PLOG_DBG_TRACE((L("SR_Recognizer shutdown occured"))); 1311 impl->isStarted = ESR_FALSE; 1312 impl->isRecognizing = ESR_FALSE; 1313 if (impl->osi_log_level & OSI_LOG_LEVEL_AUDIO) 1314 SR_EventLogAudioClose(impl->eventLog); 1315 1316 impl->recogLogTimings.BORT = 0; 1317 impl->recogLogTimings.DURS = 0; 1318 impl->recogLogTimings.EORT = 0; 1319 impl->recogLogTimings.EOSD = 0; 1320 impl->recogLogTimings.EOSS = 0; 1321 impl->recogLogTimings.BOSS = 0; 1322 impl->recogLogTimings.EOST = 0; 1323 impl->eos_reason = L("undefined"); 1324 1325 if (impl->lockFunction) 1326 impl->lockFunction(ESR_UNLOCK, impl->lockData); 1327 PLOG_DBG_API_EXIT(rc); 1328 return rc; 1329CLEANUP: 1330 PLOG_DBG_API_EXIT(rc); 1331 return rc; 1332} 1333 1334ESR_ReturnCode SR_RecognizerSetupImpl(SR_Recognizer* self) 1335{ 1336 ESR_ReturnCode rc; 1337 CA_AcoustInputParams* acousticParams = NULL; 1338 SR_AcousticModelsImpl* modelsImpl; 1339 SR_AcousticModels* models; 1340 SR_RecognizerImpl* recogImpl = NULL; 1341 CA_Acoustic* acoustic; 1342 size_t size, i; 1343 LCHAR filenames[P_PATH_MAX]; 1344 size_t len; 1345 1346 len = P_PATH_MAX; 1347 CHKLOG(rc, ESR_SessionGetLCHAR ( L("cmdline.modelfiles"), filenames, &len )); 1348 1349 CHKLOG(rc, SR_AcousticModelsLoad ( filenames, &models )); 1350 1351 if (models == NULL) 1352 { 1353 PLogError(L("ESR_INVALID_STATE while finding cmdline.modelfiles")); 1354 return ESR_INVALID_STATE; 1355 } 1356 modelsImpl = (SR_AcousticModelsImpl*) models; 1357 recogImpl = (SR_RecognizerImpl*) self; 1358 acousticParams = NULL; 1359 1360 CHKLOG(rc, SR_AcousticModelsGetCount(models, &size)); 1361 acousticParams = CA_AllocateAcousticParameters(); 1362 if (acousticParams == NULL) 1363 { 1364 rc = ESR_OUT_OF_MEMORY; 1365 PLogError(ESR_rc2str(rc)); 1366 goto CLEANUP; 1367 } 1368 CHKLOG(rc, modelsImpl->getLegacyParameters(acousticParams)); 1369 CHKLOG(rc, ArrayListGetSize(modelsImpl->acoustic, &size)); 1370 for (i = 0; i < size; ++i) 1371 { 1372 CHKLOG(rc, ArrayListGet(modelsImpl->acoustic, i, (void **)&acoustic)); 1373 CA_LoadModelsInAcoustic(recogImpl->recognizer, acoustic, acousticParams); 1374 } 1375 CA_FreeAcousticParameters(acousticParams); 1376 1377 recogImpl->models = models; 1378 CHKLOG(rc, modelsImpl->setupPattern(recogImpl->models, self)); 1379 return ESR_SUCCESS; 1380 CLEANUP: 1381 if (acousticParams != NULL) 1382 CA_FreeAcousticParameters(acousticParams); 1383 if (recogImpl != NULL) 1384 CA_UnloadRecognitionModels(recogImpl->recognizer); 1385 return rc; 1386} 1387 1388ESR_ReturnCode SR_RecognizerUnsetupImpl(SR_Recognizer* self) 1389{ 1390 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 1391 SR_AcousticModelsImpl* modelsImpl = (SR_AcousticModelsImpl*) impl->models; 1392 ESR_ReturnCode rc; 1393 1394 CHKLOG(rc, modelsImpl->unsetupPattern(impl->models)); 1395 CA_UnloadRecognitionModels(impl->recognizer); 1396 CHKLOG(rc, SR_AcousticModelsDestroy ( impl->models )); 1397 impl->models = NULL; 1398 return ESR_SUCCESS; 1399 CLEANUP: 1400 return rc; 1401} 1402 1403ESR_ReturnCode SR_RecognizerIsSetupImpl(SR_Recognizer* self, ESR_BOOL* isSetup) 1404{ 1405 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 1406 1407 if (isSetup == NULL) 1408 { 1409 PLogError(L("ESR_INVALID_ARGUMENT")); 1410 return ESR_INVALID_ARGUMENT; 1411 } 1412 *isSetup = impl->models != NULL; 1413 return ESR_SUCCESS; 1414} 1415 1416ESR_ReturnCode SR_RecognizerGetParameterImpl(SR_Recognizer* self, const LCHAR* key, 1417 LCHAR* value, size_t* len) 1418{ 1419 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 1420 ESR_ReturnCode rc; 1421 1422 rc = impl->parameters->getLCHAR(impl->parameters, key, value, len); 1423 if (rc == ESR_NO_MATCH_ERROR) 1424 { 1425 CHKLOG(rc, ESR_SessionGetLCHAR(key, value, len)); 1426 return ESR_SUCCESS; 1427 } 1428 else if (rc != ESR_SUCCESS) 1429 { 1430 PLogError(ESR_rc2str(rc)); 1431 goto CLEANUP; 1432 } 1433 return ESR_SUCCESS; 1434CLEANUP: 1435 return rc; 1436} 1437 1438/* 1439 * The get / set code is a mess. Since we only use size_t parameters, that's all 1440 * that I am going to make work. The impl->parameters don't work so you always 1441 * have to get them from the session. The impl always logs an error. SteveR 1442 */ 1443 1444ESR_ReturnCode SR_RecognizerGetSize_tParameterImpl(SR_Recognizer* self, const LCHAR* key, 1445 size_t* value) 1446{ 1447 ESR_ReturnCode rc; 1448 1449 CHKLOG(rc, ESR_SessionGetSize_t(key, value)); 1450 return ESR_SUCCESS; 1451CLEANUP: 1452 return rc; 1453} 1454 1455ESR_ReturnCode SR_RecognizerGetBoolParameterImpl(SR_Recognizer* self, const LCHAR* key, ESR_BOOL* value) 1456{ 1457 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 1458 ESR_ReturnCode rc; 1459 1460 rc = impl->parameters->getBool(impl->parameters, key, value); 1461 if (rc == ESR_NO_MATCH_ERROR) 1462 { 1463 CHKLOG(rc, ESR_SessionGetBool(key, value)); 1464 return ESR_SUCCESS; 1465 } 1466 else if (rc != ESR_SUCCESS) 1467 { 1468 PLogError(ESR_rc2str(rc)); 1469 goto CLEANUP; 1470 } 1471 return ESR_SUCCESS; 1472CLEANUP: 1473 return rc; 1474} 1475 1476ESR_ReturnCode SR_RecognizerSetParameterImpl(SR_Recognizer* self, const LCHAR* key, 1477 LCHAR* value) 1478{ 1479 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 1480 LCHAR temp[256]; 1481 ESR_ReturnCode rc; 1482 size_t len = 256; 1483 1484 rc = impl->parameters->getLCHAR(impl->parameters, key, temp, &len); 1485 if (rc == ESR_SUCCESS) 1486 { 1487 if (LSTRCMP(temp, value) == 0) 1488 return ESR_SUCCESS; 1489 CHKLOG(rc, impl->parameters->removeAndFreeProperty(impl->parameters, key)); 1490 } 1491 else if (rc != ESR_NO_MATCH_ERROR && rc != ESR_INVALID_RESULT_TYPE) 1492 { 1493 PLogError(ESR_rc2str(rc)); 1494 goto CLEANUP; 1495 } 1496 1497 CHKLOG(rc, impl->parameters->setLCHAR(impl->parameters, key, value)); 1498 return ESR_SUCCESS; 1499CLEANUP: 1500 return rc; 1501} 1502/* 1503 * The only set param function that is working is for the size_t parameters; and not 1504 * all of them are working, only the ones specified in the function itself. There are 1505 * two reasons for this: first most of the set functions just put the value in an unused 1506 * table that has no effect; second many of the changes need to be propogated to a specific 1507 * part of the code. This needs to be evaluated on a per parameter basis. SteveR 1508 */ 1509 1510/* 1511 * This function will be used to set parameters in the session. We need to go through 1512 * the recognizer so as to propogate the values into the recognizer. We will rely on 1513 * the session to do the right thing. SteveR 1514 */ 1515 1516ESR_ReturnCode SR_RecognizerSetSize_tParameterImpl(SR_Recognizer* self, const LCHAR* key, 1517 size_t value) 1518{ 1519 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 1520 ESR_ReturnCode rc; 1521 1522 rc = ESR_SessionSetSize_t ( key, value ); 1523 1524 if (rc == ESR_SUCCESS) 1525 { 1526 if ( LSTRCMP ( L("SREC.Recognizer.utterance_timeout"), key ) == 0 ) 1527 { 1528 impl->utterance_timeout = value; 1529 } 1530 else if ( LSTRCMP ( L("CREC.Recognizer.terminal_timeout"), key ) == 0 ) 1531 { 1532 impl->recognizer->eosd_parms->endnode_timeout = value; 1533 } 1534 else if ( LSTRCMP ( L("CREC.Recognizer.optional_terminal_timeout"), key ) == 0 ) 1535 { 1536 impl->recognizer->eosd_parms->optendnode_timeout = value; 1537 } 1538 else if ( LSTRCMP ( L("CREC.Recognizer.non_terminal_timeout"), key ) == 0 ) 1539 { 1540 impl->recognizer->eosd_parms->internalnode_timeout = value; 1541 } 1542 else if ( LSTRCMP ( L("CREC.Recognizer.eou_threshold"), key ) == 0 ) 1543 { 1544 impl->recognizer->eosd_parms->eos_costdelta = (frameID)value; 1545 impl->recognizer->eosd_parms->opt_eos_costdelta = (frameID)value; 1546 } 1547 else 1548 { 1549 PLogError(L("ESR_INVALID_ARGUMENT")); 1550 rc = ESR_INVALID_ARGUMENT; 1551 } 1552 } 1553 return rc; 1554} 1555 1556 1557ESR_ReturnCode SR_RecognizerSetBoolParameterImpl(SR_Recognizer* self, const LCHAR* key, ESR_BOOL value) 1558{ 1559 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 1560 ESR_BOOL temp; 1561 ESR_ReturnCode rc; 1562 1563 rc = impl->parameters->getBool(impl->parameters, key, &temp); 1564 if (rc == ESR_SUCCESS) 1565 { 1566 if (temp == value) 1567 return ESR_SUCCESS; 1568 CHKLOG(rc, impl->parameters->removeAndFreeProperty(impl->parameters, key)); 1569 } 1570 else if (rc != ESR_NO_MATCH_ERROR && rc != ESR_INVALID_RESULT_TYPE) 1571 return rc; 1572 1573 CHKLOG(rc, impl->parameters->setBool(impl->parameters, key, value)); 1574 return ESR_SUCCESS; 1575CLEANUP: 1576 return rc; 1577} 1578 1579ESR_ReturnCode SR_RecognizerHasSetupRulesImpl(SR_Recognizer* self, ESR_BOOL* hasSetupRules) 1580{ 1581 SR_RecognizerImpl* recogImpl = (SR_RecognizerImpl*) self; 1582 size_t size; 1583 ESR_ReturnCode rc; 1584 1585 if (hasSetupRules == NULL) 1586 { 1587 PLogError(L("ESR_INVALID_ARGUMENT")); 1588 return ESR_INVALID_ARGUMENT; 1589 } 1590 CHKLOG(rc, HashMapGetSize(recogImpl->grammars, &size)); 1591 *hasSetupRules = size > 0; 1592 return ESR_SUCCESS; 1593CLEANUP: 1594 return rc; 1595} 1596 1597ESR_ReturnCode SR_RecognizerActivateRuleImpl(SR_Recognizer* self, SR_Grammar* grammar, 1598 const LCHAR* ruleName, unsigned int weight) 1599{ 1600 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 1601 SR_GrammarImpl* grammarImpl = (SR_GrammarImpl*) grammar; 1602 SR_AcousticModelsImpl* modelsImpl; 1603 LCHAR grammarID[80]; 1604 ESR_ReturnCode rc; 1605 char *failure_reason = NULL; 1606 1607 if (grammar == NULL) 1608 { 1609 if (impl->eventLog) 1610 failure_reason = "badinput"; 1611 rc = ESR_INVALID_ARGUMENT; 1612 PLogError(L("ESR_INVALID_ARGUMENT")); 1613 goto CLEANUP; 1614 } 1615 1616 if (impl->models == NULL) 1617 { 1618 failure_reason = "nomodels"; 1619 rc = ESR_INVALID_STATE; 1620 PLogError(L("acoustic models must be configured")); 1621 goto CLEANUP; 1622 } 1623 1624 modelsImpl = (SR_AcousticModelsImpl*) impl->models; 1625 1626 if (ruleName == NULL) 1627 psprintf(grammarID, L("%p"), grammar); 1628 else 1629 { 1630 if (LSTRLEN(ruleName) > 80) 1631 { 1632 rc = ESR_BUFFER_OVERFLOW; 1633 PLogError(ESR_rc2str(rc)); 1634 goto CLEANUP; 1635 } 1636 LSTRCPY(grammarID, ruleName); 1637 } 1638 1639 CHKLOG(rc, HashMapPut(impl->grammars, grammarID, grammar)); 1640 if (CA_SetupSyntaxForRecognizer(grammarImpl->syntax, impl->recognizer)) 1641 { 1642 failure_reason = "cafailed"; 1643 rc = ESR_INVALID_STATE; 1644 PLogError(L("ESR_INVALID_STATE")); 1645 goto CLEANUP; 1646 } 1647 1648 CHKLOG(rc, SR_Grammar_SetupRecognizer(grammar, self)); 1649 grammarImpl->isActivated = ESR_TRUE; 1650 1651 /* 1652 * If we want to log dynamically added words, then we must give the grammar a reference 1653 * to our event log. The grammar logs word additions if and only if its reference to 1654 * eventLog is non-null. 1655 */ 1656 if (impl->osi_log_level & OSI_LOG_LEVEL_ADDWD) 1657 grammarImpl->eventLog = impl->eventLog; 1658 else 1659 grammarImpl->eventLog = NULL; 1660 1661 rc = ESR_SUCCESS; 1662 1663CLEANUP: 1664 if (impl->eventLog) 1665 { 1666 if (failure_reason) 1667 { 1668 SR_EventLogTokenInt(impl->eventLog, L("igrm"), (int) grammar); 1669 SR_EventLogToken(impl->eventLog, L("rule"), ruleName); 1670 SR_EventLogToken(impl->eventLog, L("rslt"), "fail"); 1671 SR_EventLogToken(impl->eventLog, L("reason"), failure_reason); 1672 SR_EventLogEvent(impl->eventLog, L("ESRacGrm")); 1673 } 1674 else 1675 { 1676 SR_EventLogTokenInt(impl->eventLog, L("igrm"), (int) grammar); 1677 SR_EventLogToken(impl->eventLog, L("rule"), ruleName); 1678 SR_EventLogToken(impl->eventLog, L("rslt"), "ok"); 1679 SR_EventLogEvent(impl->eventLog, L("ESRacGrm")); 1680 } 1681 } 1682 return rc; 1683} 1684 1685ESR_ReturnCode SR_RecognizerDeactivateRuleImpl(SR_Recognizer* self, SR_Grammar* grammar, 1686 const LCHAR* ruleName) 1687{ 1688 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 1689 SR_GrammarImpl* grammarImpl = (SR_GrammarImpl*) grammar; 1690 LCHAR grammarID[MAX_INT_DIGITS+1]; 1691 ESR_ReturnCode rc; 1692 1693 if (ruleName == NULL) 1694 { 1695 psprintf(grammarID, L("%p"), grammar); 1696 CHKLOG(rc, HashMapRemove(impl->grammars, grammarID)); 1697 } 1698 else 1699 CHKLOG(rc, HashMapRemove(impl->grammars, ruleName)); 1700 grammarImpl->isActivated = ESR_FALSE; 1701 return ESR_SUCCESS; 1702CLEANUP: 1703 return rc; 1704} 1705 1706ESR_ReturnCode SR_RecognizerDeactivateAllRulesImpl(SR_Recognizer* self) 1707{ 1708 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 1709 ESR_ReturnCode rc; 1710 1711 CHKLOG(rc, HashMapRemoveAll(impl->grammars)); 1712 CA_ClearSyntaxForRecognizer(0, impl->recognizer); 1713 return ESR_SUCCESS; 1714CLEANUP: 1715 return rc; 1716} 1717 1718ESR_ReturnCode SR_RecognizerIsActiveRuleImpl(SR_Recognizer* self, SR_Grammar* grammar, 1719 const LCHAR* ruleName, ESR_BOOL* isActiveRule) 1720{ 1721 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 1722 LCHAR grammarID[MAX_INT_DIGITS+1]; 1723 ESR_ReturnCode rc; 1724 1725 psprintf(grammarID, L("%p"), grammar); 1726 CHKLOG(rc, HashMapContainsKey(impl->grammars, (LCHAR*) &grammarID, isActiveRule)); 1727 return ESR_SUCCESS; 1728CLEANUP: 1729 return rc; 1730} 1731 1732ESR_ReturnCode SR_RecognizerSetWordAdditionCeilingImpl(SR_Recognizer* self, SR_Grammar* grammar) 1733{ 1734 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 1735 SR_GrammarImpl* grammarImpl = (SR_GrammarImpl*)grammar; 1736 int iRc; 1737 1738 if(!impl || !grammarImpl) 1739 return ESR_INVALID_ARGUMENT; 1740 iRc = CA_CeilingSyntaxForRecognizer( grammarImpl->syntax, impl->recognizer); 1741 if(iRc) return ESR_INVALID_STATE; 1742 1743 return ESR_SUCCESS; 1744} 1745 1746ESR_ReturnCode SR_RecognizerCheckGrammarConsistencyImpl(SR_Recognizer* self, SR_Grammar* grammar, 1747 ESR_BOOL* isConsistent) 1748{ 1749 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 1750 SR_GrammarImpl* grammarImpl; 1751 SR_RecognizerImpl* impl2; 1752 1753 1754 grammarImpl = (SR_GrammarImpl*) grammar; 1755 impl2 = (SR_RecognizerImpl*)grammarImpl->recognizer; 1756 // *isConsistent = grammarImpl->models == impl->models; 1757 *isConsistent = (impl2->models == impl->models); 1758 return ESR_SUCCESS; 1759} 1760 1761ESR_ReturnCode SR_RecognizerGetModelsImpl(SR_Recognizer* self, SR_AcousticModels** pmodels) 1762{ 1763 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 1764 *pmodels = impl->models; 1765 return ESR_SUCCESS; 1766} 1767 1768ESR_ReturnCode SR_RecognizerPutAudioImpl(SR_Recognizer* self, asr_int16_t* buffer, size_t* bufferSize, 1769 ESR_BOOL isLast) 1770{ 1771 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 1772 ESR_ReturnCode rc; 1773 int rcBufWrite; 1774 size_t nbWritten; 1775 1776#ifdef MEASURE_SAMPLE_TIMES 1777 if ( sample_buffers_received < MAX_SAMPLES_TO_MEASURE ) 1778 { 1779 gettimeofday ( &buffer_received_time, NULL ); 1780 seconds_buffer_received [sample_buffers_received] = buffer_received_time.tv_sec; 1781 micro_seconds_buffer_received [sample_buffers_received] = buffer_received_time.tv_usec; 1782 samples_in_buffer [sample_buffers_received] = *bufferSize; 1783 total_samples_received += *bufferSize; 1784 sample_buffers_received++; 1785 } 1786#endif 1787 1788 if (isLast == ESR_FALSE && (buffer == NULL || bufferSize == NULL)) 1789 { 1790 PLogError(L("ESR_INVALID_ARGUMENT")); 1791 return ESR_INVALID_ARGUMENT; 1792 } 1793 1794 if (impl->lockFunction) 1795 impl->lockFunction(ESR_LOCK, impl->lockData); 1796 if (!impl->isStarted) 1797 { 1798 if (impl->lockFunction) 1799 impl->lockFunction(ESR_UNLOCK, impl->lockData); 1800 PLogMessage(L("ESR_INVALID_STATE: Tried pushing audio while recognizer was offline")); 1801 return ESR_INVALID_STATE; 1802 } 1803 if (impl->gotLastFrame) 1804 { 1805 if (impl->lockFunction) 1806 impl->lockFunction(ESR_UNLOCK, impl->lockData); 1807 PLogMessage(L("ESR_INVALID_STATE: isLast=TRUE")); 1808 return ESR_INVALID_STATE; 1809 } 1810 if (buffer == NULL && isLast == ESR_FALSE) 1811 { 1812 if (impl->lockFunction) 1813 impl->lockFunction(ESR_UNLOCK, impl->lockData); 1814 PLogError(L("ESR_INVALID_ARGUMENT: got NULL buffer on non-terminal frame")); 1815 return ESR_INVALID_ARGUMENT; 1816 } 1817 1818 rcBufWrite = CircularBufferWrite(impl->buffer, buffer, *bufferSize * SAMPLE_SIZE); 1819 if (rcBufWrite < 0) 1820 { 1821 rc = ESR_INVALID_STATE; 1822 PLogError(L("%s: error writing to buffer (buffer=%d, available=%u)"), ESR_rc2str(rc), (int) impl->buffer, CircularBufferGetAvailable(impl->buffer)); 1823 goto CLEANUP; 1824 } 1825 1826 nbWritten = (size_t)rcBufWrite; 1827 if (nbWritten % SAMPLE_SIZE != 0) 1828 { 1829 size_t amountUnwritten; 1830 1831 /* The buffer is byte-based while we're sample based. Make sure we write entire samples or not at all */ 1832 amountUnwritten = CircularBufferUnwrite(impl->buffer, nbWritten % SAMPLE_SIZE); 1833 passert(amountUnwritten == nbWritten % SAMPLE_SIZE); 1834 nbWritten -= amountUnwritten; 1835 } 1836 passert(nbWritten % 2 == 0); /* make sure CircularBufferSize is divisible by 2 */ 1837 1838 if (nbWritten < *bufferSize * SAMPLE_SIZE) 1839 { 1840 rc = ESR_BUFFER_OVERFLOW; 1841#ifndef NDEBUG 1842 PLOG_DBG_TRACE((L("%s: writing to circular buffer"), ESR_rc2str(rc))); 1843#endif 1844 *bufferSize = nbWritten / SAMPLE_SIZE; 1845 if (impl->lockFunction) 1846 impl->lockFunction(ESR_UNLOCK, impl->lockData); 1847 goto CLEANUP; 1848 } 1849 if (impl->lockFunction) 1850 impl->lockFunction(ESR_UNLOCK, impl->lockData); 1851 1852 if (isLast) 1853 impl->gotLastFrame = ESR_TRUE; 1854 return ESR_SUCCESS; 1855CLEANUP: 1856 return rc; 1857} 1858 1859/* utility function to sort the ArrayList of nbest list results by the score of the first 1860 semantic result */ 1861ESR_ReturnCode SemanticResults_SortByScore(ArrayList *results, size_t nbestSize) 1862{ 1863 ESR_ReturnCode rc; 1864 ArrayList* semanticResultList; 1865 ArrayList* semanticResultList_swap; 1866 SR_SemanticResult* semanticResult_i; 1867 SR_SemanticResult* semanticResult_j; 1868 size_t i, j; 1869 LCHAR scoreStr[MAX_ENTRY_LENGTH] ; 1870 size_t scoreStrLen = MAX_ENTRY_LENGTH ; 1871 int score_i, score_j; 1872 1873 /* bubble sort */ 1874 for (i = 0; i < (size_t)nbestSize; ++i) 1875 { 1876 for (j = i + 1; j < (size_t)nbestSize; ++j) 1877 { 1878 /* get for i */ 1879 CHKLOG(rc, ArrayListGet(results, i, (void **)&semanticResultList)); /* nbest index */ 1880 CHKLOG(rc, ArrayListGet(semanticResultList, 0, (void **)&semanticResult_i)); /* semresult 0 */ 1881 1882 /* get for j */ 1883 CHKLOG(rc, ArrayListGet(results, j, (void **)&semanticResultList)); /* nbest index */ 1884 CHKLOG(rc, ArrayListGet(semanticResultList, 0, (void **)&semanticResult_j)); /* semresult 0 */ 1885 1886 scoreStrLen = MAX_ENTRY_LENGTH ; 1887 CHKLOG(rc, semanticResult_i->getValue(semanticResult_i, "raws", scoreStr, &scoreStrLen)); 1888 CHKLOG(rc, lstrtoi(scoreStr, &score_i, 10)); 1889 scoreStrLen = MAX_ENTRY_LENGTH ; 1890 CHKLOG(rc, semanticResult_j->getValue(semanticResult_j, "raws", scoreStr, &scoreStrLen)); 1891 CHKLOG(rc, lstrtoi(scoreStr, &score_j, 10)); 1892 1893 if (score_j < score_i) 1894 { 1895 /* need to swap */ 1896 CHKLOG(rc, ArrayListGet(results, i, (void **)&semanticResultList_swap)); /* put i in swap */ 1897 CHKLOG(rc, ArrayListSet(results, i, semanticResultList)); /* put j in i */ 1898 CHKLOG(rc, ArrayListSet(results, j, semanticResultList_swap)); /* put swap in j */ 1899 } 1900 } 1901 } 1902 return ESR_SUCCESS; 1903CLEANUP: 1904 return rc; 1905} 1906 1907ESR_ReturnCode filter_CA_FullResultLabel(const LCHAR* label, LCHAR *filtered_label, size_t* boss, size_t* eoss) 1908{ 1909 ESR_ReturnCode rc; 1910 enum 1911 { 1912 NO_COPY, 1913 FRAME, 1914 WORD, 1915 } filter_state = WORD; 1916 LCHAR *dst = filtered_label; 1917 LCHAR eosBuf[16]; /* max 9999 + '\0' */ 1918 LCHAR bosBuf[16]; /* max 9999 + '\0' */ 1919 LCHAR* pBuf = NULL; 1920 1921 /** 1922 * example: you want to filter this: 1923 * 1924 * "-pau-@23 clock@97 twenty_four@125 hour@145 " 1925 * ^boss = 23 ^ eoss = 145 1926 * and get this: 1927 * 1928 * "clock twenty_four hour" 1929 */ 1930 1931 passert(LSTRLEN(label) > 0); 1932 while (*label) 1933 { 1934 switch (filter_state) 1935 { 1936 case NO_COPY: 1937 if (*label == L(' ')) 1938 filter_state = WORD; 1939 else if (*label == L('@')) 1940 { 1941 filter_state = FRAME; 1942 if (pBuf == NULL) 1943 pBuf = bosBuf; 1944 else 1945 { 1946 *pBuf = 0; 1947 pBuf = eosBuf; 1948 } 1949 } 1950 break; 1951 case WORD: 1952 if (*label == L('@')) 1953 { 1954 *dst = L(' '); /* insert space */ 1955 dst++; 1956 filter_state = FRAME; 1957 if (pBuf == NULL) 1958 pBuf = bosBuf; 1959 else 1960 { 1961 *pBuf = 0; 1962 pBuf = eosBuf; 1963 } 1964 } 1965 else 1966 { 1967 *dst = *label; 1968 dst++; 1969 } 1970 break; 1971 case FRAME: 1972 if (*label == L(' ')) 1973 filter_state = WORD; 1974 else 1975 { 1976 *pBuf = *label; 1977 pBuf++; 1978 } 1979 break; 1980 } 1981 label++; 1982 } 1983 *dst = 0; /* term the string */ 1984 *pBuf = 0; /* term the string */ 1985 1986 /* trim the end spaces */ 1987 dst--; 1988 while (*dst == ' ') 1989 *dst-- = '\0'; 1990 1991 /* set the eos signal indicated by the end pointed data */ 1992 if (eosBuf[0] != 0) 1993 CHKLOG(rc, lstrtoui(eosBuf, eoss, 10)); 1994 else 1995 eoss = 0; 1996 1997 if (bosBuf[0] != 0) 1998 CHKLOG(rc, lstrtoui(bosBuf, boss, 10)); 1999 else 2000 boss = 0; 2001 2002 return ESR_SUCCESS; 2003CLEANUP: 2004 return rc; 2005} 2006 2007/** 2008 * Populates the recognizer result if it can, otherwise it returns NO MATCH cuz no results exist 2009 * 2010 * INPUT STATE: SR_RECOGNIZER_INTERNAL_EOS 2011 * 2012 * @param self SR_Recognizer handle 2013 * @todo break up into smaller functions 2014 */ 2015ESR_ReturnCode SR_RecognizerCreateResultImpl(SR_Recognizer* self, SR_RecognizerStatus* status, 2016 SR_RecognizerResultType* type) 2017{ 2018 LCHAR label[MAX_ENTRY_LENGTH * 2]; /* run out of buffer */ 2019#define WORDID_COUNT 48 /* can be quite high for voice enrollment! */ 2020 wordID wordIDs[WORDID_COUNT]; 2021 LCHAR tok[80]; 2022 LCHAR waveformFilename[P_PATH_MAX]; 2023 LCHAR* pkey; 2024 SR_GrammarImpl* pgrammar; 2025 asr_int32_t raws; /* raw score */ 2026 size_t iBest, nbestSize, jBest, k, grammarSize, semanticResultsSize, grammarIndex_for_iBest; 2027 LCHAR* lValue; 2028 LCHAR* lValue2; 2029 int confValue; 2030 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 2031 SR_RecognizerResultImpl* resultImpl = (SR_RecognizerResultImpl*) impl->result; 2032 ESR_BOOL containsKey; 2033 int valid, score, recogID; 2034 LCHAR result[MAX_ENTRY_LENGTH]; 2035 size_t len, size; 2036 size_t locale; 2037 int current_choice; 2038 2039 /** 2040 * Semantic result stuff 2041 */ 2042 /* a temp buffer to hold semantic results of a parse (there may be several results) */ 2043 SR_SemanticResult* semanticResults[MAX_SEM_RESULTS]; 2044 ArrayList* semanticList; 2045 ArrayList* semanticList2; 2046 SR_SemanticResultImpl* semanticImpl; 2047 SR_SemanticResultImpl* semanticImpl2; 2048 SR_SemanticResult* semanticResult; 2049 SR_SemanticResult* semanticResult2; 2050 waveform_buffering_state_t buffering_state; 2051 2052 SR_AcousticModelsImpl* modelsImpl = (SR_AcousticModelsImpl*) impl->models; 2053 ESR_ReturnCode rc; 2054 PTimeStamp EORT; 2055 2056 CA_LockUtteranceFromInput(impl->utterance); 2057 if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance)) 2058 { 2059 PLogError(L("ESR_INVALID_STATE")); 2060 return ESR_INVALID_STATE; 2061 } 2062 2063 /* check if the forward search was successful */ 2064 valid = CA_FullResultLabel(impl->recognizer, result, MAX_ENTRY_LENGTH - 1); 2065 CA_GetRecogID(impl->recognizer, &recogID); 2066 CA_FullResultScore(impl->recognizer, &score, 1); 2067#ifdef SREC_ENGINE_VERBOSE_LOGGING 2068 PLogMessage(L("R: %s type %d score %d from recognizer%d"), result, type, score, valid, recogID); 2069 PLogMessage(L("R: %s score %d from recognizer%d"), result, score, valid, recogID); 2070#endif 2071#ifdef _WIN32 2072 //pfprintf(PSTDOUT, ("R: %s type %d score %d from recognizer%d\n"), result, type, score, valid, recogID); 2073#endif 2074 2075 2076 switch (valid) 2077 { 2078 case FULL_RESULT: 2079 CHKLOG(rc, filter_CA_FullResultLabel(result, label, &impl->recogLogTimings.BOSS, &impl->recogLogTimings.EOSS)); 2080#ifdef SREC_ENGINE_VERBOSE_LOGGING 2081 PLogMessage("R: %s", result); 2082#endif 2083 CA_FullResultScore(impl->recognizer, (int*) &raws, 0); 2084#ifdef SREC_ENGINE_VERBOSE_LOGGING 2085 PLogMessage("S: %d", raws); 2086#endif 2087 2088 /* now that we have an endpointed result, we can parse the result transcription 2089 to see where speech started and ended. Then we can trim off excess parts of the 2090 recorded audio waveform (if exists) so that nametags are just the right amount of 2091 audio 2092 */ 2093 CHKLOG(rc, WaveformBuffer_GetBufferingState(impl->waveformBuffer, &buffering_state)); 2094 if (buffering_state != WAVEFORM_BUFFERING_OFF) 2095 { 2096 CHKLOG(rc, WaveformBuffer_GetSize(impl->waveformBuffer, &size)); 2097 if (size > 0) 2098 { 2099 rc = WaveformBuffer_ParseEndPointedResultAndTrim(impl->waveformBuffer, result, impl->FRAME_SIZE); 2100 if (rc == ESR_BUFFER_OVERFLOW) 2101 { 2102 /* Nametag EOS occured beyond end of buffer */ 2103 } 2104 else if (rc != ESR_SUCCESS) 2105 { 2106 PLogError(ESR_rc2str(rc)); 2107 goto CLEANUP; 2108 } 2109 } 2110 } 2111 break; 2112 2113 case REJECT_RESULT: 2114#ifdef SREC_ENGINE_VERBOSE_LOGGING 2115 PLogMessage(L("R: <REJECTED>")); 2116#endif 2117 break; 2118 default: 2119#ifdef SREC_ENGINE_VERBOSE_LOGGING 2120 PLogMessage(L("E: No results available")); 2121 PLogMessage(L("R: <FAILED>")); 2122#endif 2123 break; 2124 } 2125 2126 2127 if (valid == FULL_RESULT) 2128 { 2129 /* Populate SR_RecognizerResult */ 2130 resultImpl->nbestList = CA_PrepareNBestList(impl->recognizer, 10, &raws); 2131 if (resultImpl->nbestList == NULL) 2132 { 2133 /* 2134 * This is not a failure. It simply means that I have not advanced far 2135 * enough in recognition in order to obtain results (no paths in 2136 * graph). This occurs, for instance, when a eof is reached (no more data) 2137 * and I have not even created any paths in my graph. 2138 */ 2139 2140 *status = SR_RECOGNIZER_EVENT_NO_MATCH; 2141 *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE; 2142 impl->internalState = SR_RECOGNIZER_INTERNAL_END; 2143 if (impl->eventLog != NULL) 2144 { 2145 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RecognizerCreateResultImpl() -> SR_RECOGNIZER_INTERNAL_END"))); 2146 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 2147 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 2148 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 2149 } 2150 passert(0); 2151 return ESR_SUCCESS; 2152 } 2153 2154 nbestSize = CA_NBestListCount(resultImpl->nbestList); 2155 } 2156 else 2157 nbestSize = 0; 2158 2159 if (resultImpl->results != NULL) 2160 ArrayListRemoveAll(resultImpl->results); 2161 else 2162 CHKLOG(rc, ArrayListCreate(&resultImpl->results)); 2163 if (nbestSize == 0) 2164 { 2165 /* 2166 * Got empty n-best list even though the recognition was successful. 2167 * We handle this in the same way that recog_startpt does... we consider it a no match. 2168 * We could adjust the CREC.Recognizer.viterbi_prune_thresh to a higher level, but that 2169 * may not fix the problem completely. We need to fix the bug in the astar search!!! 2170 */ 2171 *status = SR_RECOGNIZER_EVENT_NO_MATCH; 2172 *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE; 2173 impl->internalState = SR_RECOGNIZER_INTERNAL_END; 2174 if (impl->eventLog != NULL) 2175 { 2176 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RecognizerCreateResultImpl() -> SR_RECOGNIZER_INTERNAL_END"))); 2177 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 2178 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 2179 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 2180 } 2181#ifdef SREC_ENGINE_VERBOSE_LOGGING 2182 PLogMessage(L("ESR_INVALID_STATE: got empty n-best list even though the recognition was successful")); 2183#endif 2184 return ESR_SUCCESS; /* we do not want to halt the app in this case */ 2185 } 2186 else 2187 { 2188 *status = SR_RECOGNIZER_EVENT_RECOGNITION_RESULT; 2189 *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE; 2190 impl->internalState = SR_RECOGNIZER_INTERNAL_END; 2191 if (impl->eventLog != NULL) 2192 { 2193 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RecognizerCreateResultImpl() -> SR_RECOGNIZER_INTERNAL_END"))); 2194 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 2195 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 2196 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 2197 } 2198 } 2199 2200 /** 2201 * All grammars associated with the recognizer are considered to be active 2202 * and therefore, I do a semantic parse on each. On the first grammar that 2203 * gives one or more semantic results, I stop parsing the other grammars. 2204 */ 2205 CHKLOG(rc, impl->grammars->getSize(impl->grammars, &grammarSize)); 2206 ASSERT( grammarSize == 1); 2207 2208 for (iBest = 0; iBest < nbestSize; ++iBest) 2209 { 2210 len = WORDID_COUNT; 2211 if (CA_NBestListGetResultWordIDs(resultImpl->nbestList, iBest, wordIDs, &len, &raws) != ESR_SUCCESS) 2212 { 2213 *status = SR_RECOGNIZER_EVENT_NO_MATCH; 2214 *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE; 2215 impl->internalState = SR_RECOGNIZER_INTERNAL_END; 2216 if (impl->eventLog != NULL) 2217 { 2218 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RecognizerCreateResultImpl() -> SR_RECOGNIZER_INTERNAL_END"))); 2219 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 2220 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 2221 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 2222 } 2223 PLogError(L("ESR_INVALID_STATE: got bad n-best list entry %d"), iBest); 2224 return ESR_INVALID_STATE; 2225 } 2226 2227 CHKLOG(rc, ArrayListCreate(&semanticList)); 2228 CHKLOG(rc, resultImpl->results->add(resultImpl->results, semanticList)); 2229 2230 grammarIndex_for_iBest = 0; 2231 CHKLOG(rc, impl->grammars->getKeyAtIndex(impl->grammars, grammarIndex_for_iBest, &pkey)); 2232 CHKLOG(rc, impl->grammars->get(impl->grammars, pkey, (void **)&pgrammar)); 2233 2234 CHKLOG(rc, SR_GrammarGetSize_tParameter((SR_Grammar*) pgrammar, L("locale"), &locale)); 2235 resultImpl->locale = locale; 2236 2237 /* I need to manage my semantic results external to the check parse function */ 2238 for (k = 0; k < MAX_SEM_RESULTS; ++k) 2239 SR_SemanticResultCreate(&semanticResults[k]); 2240 2241 /* 2242 The code here tries to make the voice-enrollment more effective. 2243 The VE grammar decodes a sequence of best phonemes, but the nbest 2244 processing may find a better score for an alternative choice than 2245 the score of the viterbi best choice. The reason for this is that 2246 alternative choices don't honor cross-word context-dependency quite 2247 accurately. If we choose an alternative choice then the sequence of 2248 phoneme decoded does not correspond to the sequence of models decoded. 2249 To counter this, we FORCIBLY make sure the top choice here is the 2250 VITERBI top choice. 2251 */ 2252 2253 if (iBest == 0) 2254 { 2255 if (CA_IsEnrollmentSyntax( pgrammar->syntax)) { 2256 /* this was voice enrollment, so let's try to replace */ 2257 // char* word1 = CA_NBestListGetResultWord(resultImpl->nbestList,wordIDs[0]); 2258 // char* word2 = CA_NBestListGetResultWord(resultImpl->nbestList,wordIDs[1]); 2259 // if (!strncmp(word1,voice_enroll_word_prefix,VEWPLEN)&&!strncmp(word2,voice_enroll_word_prefix,VEWPLEN)) 2260 len = WORDID_COUNT; 2261 rc = CA_FullResultWordIDs(impl->recognizer, wordIDs, &len); 2262 if (rc != ESR_SUCCESS) 2263 { 2264 /* in case of problem with viterbi path choice, we revert back */ 2265 len = WORDID_COUNT; 2266 rc = CA_NBestListGetResultWordIDs(resultImpl->nbestList, iBest, wordIDs, &len, &raws) ; 2267 } 2268 } 2269 } 2270 2271 LSTRCPY(label, L("")); 2272 for (k = 0; wordIDs[k] != MAXwordID; ++k) 2273 { 2274 LCHAR* wordk = NULL; 2275 wordk = CA_NBestListGetResultWord(resultImpl->nbestList,wordIDs[k]); 2276 LSTRCAT(label, wordk); 2277 LSTRCAT(label, L(" ")); 2278 } 2279 CHKLOG(rc, CA_ResultStripSlotMarkers(label)); 2280 passert(LSTRCMP(label, L("")) != 0); 2281 2282 /* strip the trailing blank */ 2283 k = LSTRLEN(label) - 1; 2284 if (k > 0 && label[k] == L(' ')) 2285 label[k] = 0; 2286 2287 semanticResultsSize = MAX_SEM_RESULTS; 2288 2289#if SEMPROC_ACTIVE 2290 2291 /* set the literal prior to processing so that semproc can read the value 2292 during processing */ 2293 CHKLOG(rc, pgrammar->semproc->flush(pgrammar->semproc)); 2294 CHKLOG(rc, pgrammar->semproc->setParam(pgrammar->semproc, L("literal"), label)); 2295 2296 rc = pgrammar->semproc->checkParseByWordID(pgrammar->semproc, pgrammar->semgraph, 2297 wordIDs, semanticResults, &semanticResultsSize); 2298 2299 /* rc = pgrammar->semproc->checkParse(pgrammar->semproc, pgrammar->semgraph, 2300 label, semanticResults, &semanticResultsSize); */ 2301 2302 if (rc != ESR_SUCCESS) 2303 { 2304 for (k = 0; k < MAX_SEM_RESULTS; ++k) 2305 { 2306 semanticResults[k]->destroy(semanticResults[k]); 2307 semanticResults[k] = NULL; 2308 } 2309 goto CLEANUP; 2310 } 2311#else 2312 semanticResultsSize = 0; 2313#endif 2314 /* cleanup the empty ones */ 2315 for (k = semanticResultsSize; k < MAX_SEM_RESULTS; ++k) 2316 { 2317 CHKLOG(rc, semanticResults[k]->destroy(semanticResults[k])); 2318 semanticResults[k] = NULL; 2319 } 2320 2321 /* save the good ones */ 2322 for (k = 0; k < semanticResultsSize; ++k) 2323 { 2324 /* 2325 * Save the pointer to the semantic result that was created. 2326 * Remember that the semantic result array only holds pointers 2327 * and for each time that the function is called, new semantic results 2328 * are created, and the pointers overwrite old values in the array 2329 */ 2330 CHKLOG(rc, semanticList->add(semanticList, semanticResults[k])); 2331 } 2332 2333#if SEMPROC_ACTIVE 2334 if (semanticResultsSize > 0) 2335 { 2336 /* OSI log the grammar(s) that was used in recognizing */ 2337 psprintf(tok, L("GURI%d"), grammarIndex_for_iBest); 2338 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("GRMR"), tok)); 2339 } 2340#else 2341 /* OSI log the grammar(s) that was used in recognizing */ 2342 psprintf(tok, L("GURI%d"), grammarIndex_for_iBest); 2343 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("GRMR"), tok)); 2344#endif 2345 2346 /* Populate semantic results for each nbest list entry */ 2347 CHKLOG(rc, semanticList->getSize(semanticList, &semanticResultsSize)); 2348 if (semanticResultsSize == 0) 2349 { 2350 /* 2351 * If there was no semantic result... then I need to create one so that I can store 2352 * literal, conf, meaning which are default keys that must ALWAYS exist 2353 */ 2354 CHKLOG(rc, SR_SemanticResultCreate(&semanticResult)); 2355 CHKLOG(rc, semanticList->add(semanticList, semanticResult)); 2356 semanticResultsSize = 1; 2357 } 2358 2359 for (k = 0; k < semanticResultsSize;++k) 2360 { 2361 CHKLOG(rc, semanticList->get(semanticList, k, (void **)&semanticResult)); 2362 if (semanticResult == NULL) 2363 { 2364 PLogError(L("nbest entry contained NULL semanticResult"), ESR_INVALID_STATE); 2365 return ESR_INVALID_STATE; 2366 } 2367 2368 semanticImpl = (SR_SemanticResultImpl*) semanticResult; 2369 2370 /* put in the literal */ 2371 lValue = MALLOC(sizeof(LCHAR) * (LSTRLEN(label) + 1), MTAG); 2372 if (lValue == NULL) 2373 { 2374 PLogError(L("ESR_OUT_OF_MEMORY")); 2375 return ESR_OUT_OF_MEMORY; 2376 } 2377 LSTRCPY(lValue, label); 2378 CHKLOG(rc, semanticImpl->results->put(semanticImpl->results, L("literal"), lValue)); 2379 2380 /* if the meaning is not set, then put in the meaning which will be the literal */ 2381 CHKLOG(rc, semanticImpl->results->containsKey(semanticImpl->results, L("meaning"), &containsKey)); 2382 if (!containsKey) 2383 { 2384 lValue = MALLOC(sizeof(LCHAR) * (LSTRLEN(label) + 1), MTAG); 2385 if (lValue == NULL) 2386 { 2387 PLogError(L("ESR_OUT_OF_MEMORY")); 2388 return ESR_OUT_OF_MEMORY; 2389 } 2390 LSTRCPY(lValue, label); 2391 CHKLOG(rc, semanticImpl->results->put(semanticImpl->results, L("meaning"), lValue)); 2392 } 2393 2394 /* put in the raw score */ 2395 psprintf(label, L("%d"), raws); 2396 lValue = MALLOC(sizeof(LCHAR) * (LSTRLEN(label) + 1), MTAG); 2397 if (lValue == NULL) 2398 { 2399 PLogError(L("ESR_OUT_OF_MEMORY")); 2400 return ESR_OUT_OF_MEMORY; 2401 } 2402 LSTRCPY(lValue, label); 2403 CHKLOG(rc, semanticImpl->results->put(semanticImpl->results, L("raws"), lValue)); 2404 } 2405 } 2406 2407 /* Now I have an nBest list where each entry has at least one semantic result */ 2408 /* What I need to do is filter out the nBest list entries which have matching 2409 semantic results for 'meaning' */ 2410 /* Once I have filtered out the nBest list based on this criteria, I can calculate the confidence 2411 score and populate the result of the first entry with the raw score */ 2412 2413#if FILTER_NBEST_BY_SEM_RESULT 2414 2415 for (iBest = nbestSize-1; iBest>0; iBest--) /* do not filter out nBest entry 0 */ 2416 { 2417 /** 2418 * This is the entry (indexed by i) targeted for removal 2419 * 2420 */ 2421 2422 /* get the nBest entry which you wish to remove (if duplicate found) */ 2423 CHKLOG(rc, ArrayListGet(resultImpl->results, iBest, (void **)&semanticList)); 2424 2425 /* get the first sem_result for the entry */ 2426 CHKLOG(rc, ArrayListGet(semanticList, 0, (void **)&semanticResult)); 2427 semanticImpl = (SR_SemanticResultImpl*) semanticResult; 2428 2429 /* get the meaning */ 2430 CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("meaning"), (void **)&lValue)); 2431 2432 /* get the other entries to check against (start with 0, end on the current i entry) */ 2433 for (jBest = 0; jBest < iBest; ++jBest) 2434 { 2435 /* 2436 * This is the entry (indexed by jBest) that we will compare with 2437 */ 2438 2439 /* get the nBest entry which you wish to compare with */ 2440 CHKLOG(rc, ArrayListGet(resultImpl->results, jBest, (void **)&semanticList2)); 2441 2442 CHKLOG(rc, ArrayListGet(semanticList2, 0, (void **)&semanticResult2)); 2443 semanticImpl2 = (SR_SemanticResultImpl*) semanticResult2; 2444 2445 CHKLOG(rc, semanticImpl2->results->get(semanticImpl2->results, L("meaning"), (void **)&lValue2)); 2446 if (LSTRCMP(lValue, lValue2) == 0) 2447 { 2448 /* pfprintf(PSTDOUT,"duplicate sem result found %d == %d\n", iBest, jBest); 2449 pfprintf(PSTDOUT,"removing %d\n", iBest); */ 2450 2451 /* removing from the list indexed by iBest */ 2452 CHKLOG(rc, semanticList->remove(semanticList, semanticResult)); 2453 CHKLOG(rc, semanticResult->destroy(semanticResult)); 2454 2455 CHKLOG(rc, resultImpl->results->remove(resultImpl->results, semanticList)); 2456 CHKLOG(rc, semanticList->destroy(semanticList)); 2457 2458 if (!CA_NBestListRemoveResult(resultImpl->nbestList, iBest)) 2459 return ESR_ARGUMENT_OUT_OF_BOUNDS; 2460 break; 2461 } 2462 } 2463 } 2464 nbestSize = CA_NBestListCount(resultImpl->nbestList); 2465#endif 2466 2467 CHKLOG(rc, ArrayListGetSize(resultImpl->results, &nbestSize)); 2468 2469 if (nbestSize) 2470 { 2471 if(CA_ComputeConfidenceValues(impl->confidenceScorer, impl->recognizer, resultImpl->nbestList)) 2472 return ESR_INVALID_STATE; 2473 2474 for(current_choice=nbestSize-1;current_choice>=0;current_choice--) 2475 { 2476 /* get the nBest entry you want to deal with */ 2477 CHKLOG(rc, ArrayListGet(resultImpl->results, current_choice, (void **)&semanticList)); 2478 /* get the first sem_result for that entry */ 2479 CHKLOG(rc, ArrayListGet(semanticList, 0, (void **)&semanticResult)); 2480 semanticImpl = (SR_SemanticResultImpl*) semanticResult; 2481 2482 /* put in the conf value for that nBest entry */ 2483 if(!CA_NBestListGetResultConfidenceValue( resultImpl->nbestList, current_choice, &confValue)) 2484 return ESR_ARGUMENT_OUT_OF_BOUNDS; 2485 2486 psprintf(label, L("%d"), confValue); 2487 lValue = MALLOC(sizeof(LCHAR) * (LSTRLEN(label) + 1), MTAG); 2488 if (lValue == NULL) 2489 { 2490 PLogError(L("ESR_OUT_OF_MEMORY")); 2491 return ESR_OUT_OF_MEMORY; 2492 } 2493 LSTRCPY(lValue, label); 2494 CHKLOG(rc, semanticImpl->results->put(semanticImpl->results, L("conf"),lValue)); 2495 } 2496 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("CMPT"), 0)); 2497 } 2498 2499 /* OSI log the end of recognition and all bufferred tokens */ 2500 2501 /* OSI log end of recognition time */ 2502 PTimeStampSet(&EORT); 2503 impl->recogLogTimings.EORT = PTimeStampDiff(&EORT, &impl->timestamp); 2504 impl->recogLogTimings.DURS = impl->processed * MSEC_PER_FRAME; 2505 2506 /*****************************************/ 2507 /* OSI Logging stuff */ 2508 /*****************************************/ 2509if( impl->osi_log_level != 0) 2510 { 2511 /* get the nBest size (this size may have changed since previous set cuz of nbest list filtering) */ 2512 CHKLOG(rc, ArrayListGetSize(resultImpl->results, &nbestSize)); 2513 /* OSI log the nBest list size */ 2514 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("NBST"), nbestSize)); 2515 2516 2517 for (iBest = 0; iBest < nbestSize; iBest++) /* loop */ 2518 { 2519 /* get the nBest entry */ 2520 CHKLOG(rc, ArrayListGet(resultImpl->results, iBest, (void**)&semanticList)); 2521 2522 /* get the first sem_result for the entry (ther emay be many, but ignore others) */ 2523 CHKLOG(rc, ArrayListGet(semanticList, 0, (void **)&semanticResult)); 2524 semanticImpl = (SR_SemanticResultImpl*) semanticResult; 2525 2526 /* get the meaning and OSI log it */ 2527 CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("meaning"), (void **)&lValue)); 2528 /* OSI log RSLT (meaning) for nbest item */ 2529 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RSLT"), lValue)); 2530 2531 /* get the literal and OSI log it */ 2532 CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("literal"), (void **)&lValue)); 2533 /* OSI log RAWT SPOK (literal) for nbest item */ 2534 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RAWT"), lValue)); 2535 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("SPOK"), lValue)); 2536 2537 /* get the score and OSI log it */ 2538 CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("raws"), (void **)&lValue)); 2539 /* OSI log RAWS (score) for nbest item */ 2540 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RAWS"), lValue)); 2541 /* get the confidence value and OSI log it */ 2542 CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("conf"), (void **)&lValue)); 2543 /* OSI log CONF (values) for nbest item */ 2544 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("CONF"), lValue)); 2545 } 2546 2547 /* log the values */ 2548 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BORT"), impl->recogLogTimings.BORT)); 2549 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("DURS"), impl->recogLogTimings.DURS)); 2550 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("EORT"), impl->recogLogTimings.EORT)); 2551 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("EOSD"), impl->recogLogTimings.EOSD)); 2552 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("EOSS"), impl->recogLogTimings.EOSS)); 2553 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("EOST"), impl->recogLogTimings.EOST)); 2554 if (impl->osi_log_level & OSI_LOG_LEVEL_AUDIO) 2555 { 2556 len = P_PATH_MAX; 2557 CHKLOG(rc, SR_EventLogAudioGetFilename(impl->eventLog, waveformFilename, &len)); 2558 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("WVNM"), waveformFilename)); 2559 } 2560 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RSTT"), L("ok"))); 2561 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RENR"), L("ok"))); 2562 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("ENDR"), impl->eos_reason)); 2563 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIrcnd"))); 2564 2565 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BOSS"), impl->recogLogTimings.BOSS)); /* extra not in OSI spec */ 2566 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("ESRboss"))); 2567 2568 /* 2569 * Record which recognizer was the successful one (male or female) 2570 * this index refers to the order in the swimdllist file. 2571 */ 2572 CHKLOG(rc, CA_GetRecogID(impl->recognizer, &recogID)); 2573 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("RECOG"), recogID)); 2574 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("ESRrcid"))); 2575 2576 /* Record semantic results returned by top nbestlist entry */ 2577 if (1) 2578 { 2579#define MAX_SEMANTIC_KEYS 50 2580 LCHAR* semanticKeys[MAX_SEMANTIC_KEYS]; 2581#define SEMANTIC_VALUE_SIZE 512 2582 LCHAR semanticValue[SEMANTIC_VALUE_SIZE]; 2583 size_t num_semanticKeys; 2584 2585 rc = resultImpl->results->getSize(resultImpl->results, &nbestSize); 2586 if (rc != ESR_SUCCESS) 2587 { 2588 PLogError(ESR_rc2str(rc)); 2589 goto DONE; 2590 } 2591 for (iBest = 0; iBest < nbestSize; ++iBest) /* loop2 */ 2592 { 2593 rc = resultImpl->results->get(resultImpl->results, iBest, (void **)&semanticList); 2594 if (rc != ESR_SUCCESS) 2595 { 2596 PLogError(ESR_rc2str(rc)); 2597 goto DONE; 2598 } 2599 2600 /* semanticResultsSize is the number of semantic meanings, although 2601 ambiguous parses are not entirely supported 2602 num_semanticKeys is associated to a particular parse */ 2603 2604 rc = semanticList->getSize(semanticList, &semanticResultsSize); 2605 if (rc != ESR_SUCCESS) 2606 { 2607 PLogError(ESR_rc2str(rc)); 2608 goto DONE; 2609 } 2610 for (k = 0; k < semanticResultsSize; ++k) 2611 { 2612 size_t iKey; 2613 rc = semanticList->get(semanticList, k, (void **)&semanticResult); 2614 if (rc != ESR_SUCCESS) 2615 { 2616 PLogError(ESR_rc2str(rc)); 2617 goto DONE; 2618 } 2619 num_semanticKeys = MAX_SEMANTIC_KEYS; 2620 rc = semanticResult->getKeyList(semanticResult, (LCHAR**) & semanticKeys, &num_semanticKeys); 2621 if (rc != ESR_SUCCESS) 2622 { 2623 PLogError(ESR_rc2str(rc)); 2624 goto DONE; 2625 } 2626 2627 for (iKey=0; iKey<num_semanticKeys; ++iKey) 2628 { 2629 len = SEMANTIC_VALUE_SIZE; 2630 rc = semanticResult->getValue(semanticResult, semanticKeys[iKey], (LCHAR*) &semanticValue, &len); 2631 if (rc != ESR_SUCCESS) 2632 { 2633 PLogError(ESR_rc2str(rc)); 2634 goto DONE; 2635 } 2636 2637 rc = SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, semanticKeys[iKey], semanticValue); 2638 if (rc != ESR_SUCCESS) 2639 { 2640 PLogError(ESR_rc2str(rc)); 2641 goto DONE; 2642 } 2643 } 2644 } 2645 } 2646 rc = SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("ESR_SemanticResult[0]")); 2647 if (rc != ESR_SUCCESS) 2648 { 2649 PLogError(ESR_rc2str(rc)); 2650 goto DONE; 2651 } 2652 } 2653} 2654DONE: 2655 return ESR_SUCCESS; 2656CLEANUP: 2657 impl->internalState = SR_RECOGNIZER_INTERNAL_END; 2658 return rc; 2659} 2660 2661/** 2662 * Indicates if it is possible to push data from SREC into the internal recognizer. 2663 * If data can be pushed, ESR_CONTINUE_PROCESSING is returned. 2664 * 2665 * INPUT STATES: SR_RECOGNIZER_INTERNAL_BOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOS_DETECTION 2666 * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOI 2667 */ 2668PINLINE ESR_ReturnCode canPushAudioIntoRecognizer(SR_RecognizerImpl* impl) 2669{ 2670 ESR_ReturnCode rc; 2671 2672 if (impl->lockFunction) 2673 impl->lockFunction(ESR_LOCK, impl->lockData); 2674 2675 /* do I have enough to make a frame ? */ 2676 if (CircularBufferGetSize(impl->buffer) < impl->FRAME_SIZE) 2677 { 2678 /* Not enough data */ 2679 if (!impl->gotLastFrame) 2680 { 2681 /* not last frame, so ask for more audio */ 2682 if (impl->lockFunction) 2683 impl->lockFunction(ESR_UNLOCK, impl->lockData); 2684 return ESR_SUCCESS; 2685 } 2686 else 2687 { 2688 /* last frame, make do with what you have */ 2689 if (impl->lockFunction) 2690 impl->lockFunction(ESR_UNLOCK, impl->lockData); 2691#ifdef SREC_ENGINE_VERBOSE_LOGGING 2692 PLogMessage("L: Voicing END (EOI) at %d frames (%d processed)", impl->frames, impl->processed); 2693#endif 2694 impl->isRecognizing = ESR_FALSE; 2695 impl->recogLogTimings.EOSD = impl->frames; 2696 impl->eos_reason = L("EOI"); 2697 impl->internalState = SR_RECOGNIZER_INTERNAL_EOI; 2698 if (impl->eventLog != NULL) 2699 { 2700 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("canPushAudioIntoRecognizer() -> SR_RECOGNIZER_INTERNAL_EOI"))); 2701 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 2702 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 2703 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 2704 } 2705 return ESR_CONTINUE_PROCESSING; 2706 } 2707 } 2708 if (impl->lockFunction) 2709 impl->lockFunction(ESR_UNLOCK, impl->lockData); 2710 return ESR_CONTINUE_PROCESSING; 2711CLEANUP: 2712 return rc; 2713} 2714 2715/** 2716 * Pushes data from SREC into the internal recognizer. 2717 * 2718 * INPUT STATES: SR_RECOGNIZER_INTERNAL_BOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOS_DETECTION 2719 * OUTPUT STATES: same 2720 */ 2721PINLINE ESR_ReturnCode pushAudioIntoRecognizer(SR_RecognizerImpl* impl, SR_RecognizerStatus* status, 2722 SR_RecognizerResultType* type, 2723 SR_RecognizerResult* result) 2724{ 2725 size_t count; 2726 ESR_ReturnCode rc; 2727 2728 if (CA_GetUnprocessedFramesInUtterance(impl->utterance) > 0 && impl->frames >= impl->bgsniff) 2729 { 2730 /* Don't push frames unless they're needed */ 2731 2732 /* Check for leaked state */ 2733 passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_INVALID); 2734 return ESR_CONTINUE_PROCESSING; 2735 } 2736 if (impl->lockFunction) 2737 impl->lockFunction(ESR_LOCK, impl->lockData); 2738 count = CircularBufferRead(impl->buffer, impl->audioBuffer, impl->FRAME_SIZE); 2739 if (impl->lockFunction) 2740 impl->lockFunction(ESR_UNLOCK, impl->lockData); 2741 2742 WaveformBuffer_Write(impl->waveformBuffer, impl->audioBuffer, count); 2743 if (impl->osi_log_level & OSI_LOG_LEVEL_AUDIO) 2744 { 2745 rc = SR_EventLogAudioWrite(impl->eventLog, impl->audioBuffer, count); 2746 if (rc == ESR_BUFFER_OVERFLOW) 2747 rc = ESR_INVALID_STATE; 2748 if (rc != ESR_SUCCESS) 2749 { 2750 PLogError(ESR_rc2str(rc)); 2751 if (impl->lockFunction) 2752 impl->lockFunction(ESR_UNLOCK, impl->lockData); 2753 goto CLEANUP; 2754 } 2755 } 2756 if (count < impl->FRAME_SIZE) 2757 { 2758 rc = ESR_INVALID_STATE; 2759 PLogError(L("%s: error reading buffer data (count=%d, frameSize=%d)"), ESR_rc2str(rc), count, impl->FRAME_SIZE); 2760 goto CLEANUP; 2761 } 2762 if (!CA_LoadSamples(impl->wavein, impl->audioBuffer, impl->sampleRate / FRAMERATE)) 2763 { 2764 PLogError(L("ESR_INVALID_STATE")); 2765 rc = ESR_INVALID_STATE; 2766 goto CLEANUP; 2767 } 2768 2769 CA_ConditionSamples(impl->wavein); 2770 /* Check for leaked state */ 2771 passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_INVALID); 2772 return ESR_CONTINUE_PROCESSING; 2773CLEANUP: 2774 return rc; 2775} 2776 2777/** 2778 * INPUT STATES: SR_RECOGNIZER_INTERNAL_BOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOS_DETECTION 2779 * OUTPUT STATES: same 2780 */ 2781PINLINE ESR_ReturnCode generateFrameFromAudio(SR_RecognizerImpl* impl, SR_RecognizerStatus* status, 2782 SR_RecognizerResultType* type, 2783 SR_RecognizerResult* result) 2784{ 2785 if (CA_GetUnprocessedFramesInUtterance(impl->utterance) > 0 && impl->frames >= impl->bgsniff) 2786 { 2787 /* Don't create frames unless they're needed */ 2788 2789 /* Check for leaked state */ 2790 passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_INVALID); 2791 return ESR_CONTINUE_PROCESSING; 2792 } 2793 2794 /* Try processing one frame */ 2795 if (!CA_MakeFrame(impl->frontend, impl->utterance, impl->wavein)) 2796 { 2797 /* 2798 * One of three cases occured: 2799 * 2800 * - We don't have enough samples to process one frame. This should be impossible because 2801 * pushAudioIntoRecognizer() is always called before us and will not continue if we don't 2802 * have enough samples. 2803 * 2804 * - The internal recognizer needs a minimum amount of audio before it'll begin generating 2805 * frames. This is normal and we return with a success value. 2806 * 2807 * - The recognizer skips every even frame number (for performance reasons). This is normal 2808 * and we return with a success value. 2809 */ 2810 *status = SR_RECOGNIZER_EVENT_INCOMPLETE; 2811 *type = SR_RECOGNIZER_RESULT_TYPE_NONE; 2812 return ESR_SUCCESS; 2813 } 2814 ++impl->frames; 2815 /* Check for leaked state */ 2816 passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_INVALID); 2817 return ESR_CONTINUE_PROCESSING; 2818} 2819 2820/** 2821 * INPUT STATES: SR_RECOGNIZER_INTERNAL_EOS_DETECTION 2822 * OUTPUT STATES: same 2823 */ 2824PINLINE ESR_ReturnCode generateFrameStats(SR_RecognizerImpl* impl, SR_RecognizerStatus* status, 2825 SR_RecognizerResultType* type, 2826 SR_RecognizerResult* result) 2827{ 2828 if (impl->frames < impl->bgsniff) 2829 { 2830 /* Wait until we have enough frames to estimate background stats */ 2831 *status = SR_RECOGNIZER_EVENT_INCOMPLETE; 2832 *type = SR_RECOGNIZER_RESULT_TYPE_NONE; 2833 return ESR_SUCCESS; 2834 } 2835 else if (impl->frames == impl->bgsniff) 2836 CA_CalculateUtteranceStatistics(impl->utterance, 0, impl->bgsniff); 2837 2838 /* Check for leaked state */ 2839 passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_INVALID); 2840 return ESR_CONTINUE_PROCESSING; 2841} 2842 2843/** 2844 * INPUT STATES: SR_RECOGNIZER_INTERNAL_EOS_DETECTION 2845 * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOI, SR_RECOGNIZER_INTERNAL_EOS 2846 */ 2847PINLINE ESR_ReturnCode generatePatternFromFrame(SR_RecognizerImpl* impl, SR_RecognizerStatus* status, 2848 SR_RecognizerResultType* type, 2849 SR_RecognizerResult* result) 2850{ 2851 SR_AcousticModelsImpl* modelsImpl; 2852 ESR_ReturnCode rc; 2853 2854 /* Run the search */ 2855 modelsImpl = (SR_AcousticModelsImpl*) impl->models; 2856 if (!CA_MakePatternFrame(modelsImpl->pattern, impl->utterance)) 2857 { 2858 *status = SR_RECOGNIZER_EVENT_NO_MATCH; 2859 *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE; 2860 impl->internalState = SR_RECOGNIZER_INTERNAL_END; 2861 if (impl->eventLog != NULL) 2862 { 2863 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrame() -> SR_RECOGNIZER_INTERNAL_END"))); 2864 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 2865 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 2866 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 2867 } 2868 PLogError(L("ESR_INVALID_STATE")); 2869 return ESR_INVALID_STATE; 2870 } 2871 if (!CA_AdvanceUtteranceFrame(impl->utterance)) 2872 { 2873 *status = SR_RECOGNIZER_EVENT_NO_MATCH; 2874 *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE; 2875 impl->internalState = SR_RECOGNIZER_INTERNAL_END; 2876 if (impl->eventLog != NULL) 2877 { 2878 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("canPushAudioIntoRecognizer() -> SR_RECOGNIZER_INTERNAL_END"))); 2879 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 2880 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 2881 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 2882 } 2883 PLogError(L("ESR_INVALID_STATE")); 2884 return ESR_INVALID_STATE; 2885 } 2886 CA_AdvanceRecognitionByFrame(impl->recognizer, modelsImpl->pattern, impl->utterance); 2887 ++impl->processed; 2888 2889 if (impl->lockFunction) 2890 impl->lockFunction(ESR_LOCK, impl->lockData); 2891 if (impl->gotLastFrame && CircularBufferGetSize(impl->buffer) < impl->FRAME_SIZE) 2892 { 2893 /* 2894 * SREC have run out of data but the underlying recognizer might have some frames 2895 * queued for processing. 2896 */ 2897 if (CA_GetUnprocessedFramesInUtterance(impl->utterance) > 0) 2898 { 2899 /* EOI means end of input */ 2900#ifdef SREC_ENGINE_VERBOSE_LOGGING 2901 PLogMessage("L: Voicing END (EOI) at %d frames (%d processed)", impl->frames, impl->processed); 2902#endif 2903 impl->isRecognizing = ESR_FALSE; 2904 impl->recogLogTimings.EOSD = impl->frames; 2905 impl->eos_reason = L("EOI"); 2906 impl->internalState = SR_RECOGNIZER_INTERNAL_EOI; 2907 if (impl->eventLog != NULL) 2908 { 2909 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrame() -> SR_RECOGNIZER_INTERNAL_EOI"))); 2910 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 2911 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 2912 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 2913 } 2914 } 2915 else 2916 { 2917#ifdef SREC_ENGINE_VERBOSE_LOGGING 2918 PLogMessage("L: Voicing END (EOF) at %d frames (%d processed)", impl->frames, impl->processed); 2919#endif 2920 2921 impl->isRecognizing = ESR_FALSE; 2922 impl->recogLogTimings.EOSD = impl->frames; 2923 impl->eos_reason = L("EOF"); 2924 impl->internalState = SR_RECOGNIZER_INTERNAL_EOS; 2925 if (impl->eventLog != NULL) 2926 { 2927 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrame() -> SR_RECOGNIZER_INTERNAL_EOS"))); 2928 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 2929 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 2930 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 2931 } 2932 *status = SR_RECOGNIZER_EVENT_END_OF_VOICING; 2933 *type = SR_RECOGNIZER_RESULT_TYPE_NONE; 2934 passert(impl->processed == impl->frames); 2935 if (impl->lockFunction) 2936 impl->lockFunction(ESR_UNLOCK, impl->lockData); 2937 return ESR_SUCCESS; 2938 } 2939 } 2940 if (impl->lockFunction) 2941 impl->lockFunction(ESR_UNLOCK, impl->lockData); 2942 2943 /* Check for leaked state */ 2944 passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_INVALID); 2945 return ESR_CONTINUE_PROCESSING; 2946CLEANUP: 2947 return rc; 2948} 2949 2950/** 2951 * Same as generatePatternFromFrame() only the buffer is known to be empty. 2952 * 2953 * INPUT STATES: SR_RECOGNIZER_INTERNAL_EOI 2954 * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOS 2955 */ 2956PINLINE ESR_ReturnCode generatePatternFromFrameEOI(SR_RecognizerImpl* impl, SR_RecognizerStatus* status, 2957 SR_RecognizerResultType* type, 2958 SR_RecognizerResult* result) 2959{ 2960 SR_AcousticModelsImpl* modelsImpl; 2961 ESR_ReturnCode rc; 2962 2963 /* Run the search */ 2964 modelsImpl = (SR_AcousticModelsImpl*) impl->models; 2965 2966 if (CA_GetUnprocessedFramesInUtterance(impl->utterance) <= 0) 2967 { 2968 passert(impl->processed == impl->frames); 2969 *status = SR_RECOGNIZER_EVENT_END_OF_VOICING; 2970 *type = SR_RECOGNIZER_RESULT_TYPE_NONE; 2971 impl->internalState = SR_RECOGNIZER_INTERNAL_EOS; 2972 return ESR_SUCCESS; 2973 } 2974 2975 if (!CA_MakePatternFrame(modelsImpl->pattern, impl->utterance)) 2976 { 2977 *status = SR_RECOGNIZER_EVENT_NO_MATCH; 2978 *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE; 2979 impl->internalState = SR_RECOGNIZER_INTERNAL_END; 2980 if (impl->eventLog != NULL) 2981 { 2982 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrameEOI() -> SR_RECOGNIZER_INTERNAL_END"))); 2983 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 2984 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 2985 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 2986 } 2987 PLogError(L("ESR_INVALID_STATE")); 2988 return ESR_INVALID_STATE; 2989 } 2990 if (!CA_AdvanceUtteranceFrame(impl->utterance)) 2991 { 2992 *status = SR_RECOGNIZER_EVENT_NO_MATCH; 2993 *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE; 2994 impl->internalState = SR_RECOGNIZER_INTERNAL_END; 2995 if (impl->eventLog != NULL) 2996 { 2997 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrameEOI() -> SR_RECOGNIZER_INTERNAL_END"))); 2998 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 2999 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 3000 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 3001 } 3002 PLogError(L("ESR_INVALID_STATE")); 3003 return ESR_INVALID_STATE; 3004 } 3005 CA_AdvanceRecognitionByFrame(impl->recognizer, modelsImpl->pattern, impl->utterance); 3006 ++impl->processed; 3007 3008 if (impl->lockFunction) 3009 impl->lockFunction(ESR_LOCK, impl->lockData); 3010 3011 if (CA_GetUnprocessedFramesInUtterance(impl->utterance) <= 0) 3012 { 3013 passert(impl->processed == impl->frames); 3014 *status = SR_RECOGNIZER_EVENT_END_OF_VOICING; 3015 *type = SR_RECOGNIZER_RESULT_TYPE_NONE; 3016 impl->internalState = SR_RECOGNIZER_INTERNAL_EOS; 3017 if (impl->eventLog != NULL) 3018 { 3019 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrameEOI() -> SR_RECOGNIZER_INTERNAL_EOS"))); 3020 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 3021 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 3022 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 3023 } 3024 if (impl->lockFunction) 3025 impl->lockFunction(ESR_UNLOCK, impl->lockData); 3026 return ESR_SUCCESS; 3027 } 3028 if (impl->lockFunction) 3029 impl->lockFunction(ESR_UNLOCK, impl->lockData); 3030 3031 /* Check for leaked state */ 3032 passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_INVALID); 3033 return ESR_CONTINUE_PROCESSING; 3034CLEANUP: 3035 if (impl->lockFunction) 3036 impl->lockFunction(ESR_UNLOCK, impl->lockData); 3037 return rc; 3038} 3039 3040 3041/** 3042 * INPUT STATES: SR_RECOGNIZER_INTERNAL_EOI, SR_RECOGNIZER_INTERNAL_EOS_DETECTION 3043 * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOS 3044 */ 3045ESR_ReturnCode detectEndOfSpeech(SR_RecognizerImpl* impl, SR_RecognizerStatus* status, 3046 SR_RecognizerResultType* type, 3047 SR_RecognizerResult* result) 3048{ 3049 EOSrc eos; /* eos means end of speech */ 3050 int eos_by_level; /* eos means end of speech */ 3051 PTimeStamp timestamp; 3052 ESR_ReturnCode rc; 3053 ESR_BOOL enableGetWaveform = ESR_FALSE; 3054 3055 eos_by_level = CA_UtteranceHasEnded(impl->utterance); 3056 if (eos_by_level) 3057 { 3058 eos = SPEECH_ENDED_BY_LEVEL_TIMEOUT; 3059 } 3060 else 3061 { 3062 eos = CA_IsEndOfUtteranceByResults(impl->recognizer); 3063 } 3064 3065 impl->parameters->getBool(impl->parameters, L("enableGetWaveform"), &enableGetWaveform); 3066 3067 if (eos == VALID_SPEECH_CONTINUING && enableGetWaveform && impl->waveformBuffer->overflow_count > 0) 3068 { 3069 size_t bufferSize; 3070 CHKLOG(rc, WaveformBuffer_GetSize(impl->waveformBuffer, &bufferSize)); 3071 PLogMessage("Forcing EOS due to wfbuf overflow (fr=%d,sz=%d,of=%d)", impl->frames, bufferSize, impl->waveformBuffer->overflow_count); 3072 eos = SPEECH_TOO_LONG; 3073 } 3074 3075 if (eos != VALID_SPEECH_CONTINUING) 3076 { 3077 switch (eos) 3078 { 3079 case SPEECH_ENDED: 3080 /* normal */ 3081 impl->eos_reason = L("itimeout"); 3082 break; 3083 3084 case SPEECH_ENDED_WITH_ERROR: 3085 /* error */ 3086 impl->eos_reason = L("err"); 3087 break; 3088 3089 case SPEECH_TOO_LONG: 3090 /* timeout*/ 3091 impl->eos_reason = L("ctimeout"); 3092 break; 3093 3094 case SPEECH_MAYBE_ENDED: 3095 /* normal */ 3096 impl->eos_reason = L("itimeout"); 3097 break; 3098 case SPEECH_ENDED_BY_LEVEL_TIMEOUT: 3099 /* normal */ 3100 impl->eos_reason = L("levelTimeout"); 3101 break; 3102 3103 default: 3104 /* error */ 3105 impl->eos_reason = L("err"); 3106 } 3107 3108#ifdef SREC_ENGINE_VERBOSE_LOGGING 3109 PLogMessage("L: Voicing END (EOS) at %d frames, %d processed (reason: %s)\n", impl->frames, impl->processed, impl->eos_reason); 3110#endif 3111 3112 impl->recogLogTimings.EOSD = impl->frames; /* how many frames have been sent prior to detect EOS */ 3113 PTimeStampSet(×tamp); /* time it took to detect EOS (in millisec) */ 3114 impl->recogLogTimings.EOST = PTimeStampDiff(×tamp, &impl->timestamp); 3115 3116 *status = SR_RECOGNIZER_EVENT_END_OF_VOICING; 3117 *type = SR_RECOGNIZER_RESULT_TYPE_NONE; 3118 impl->internalState = SR_RECOGNIZER_INTERNAL_EOS; 3119 if (impl->eventLog != NULL) 3120 { 3121 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("detectEndOfSpeech() -> SR_RECOGNIZER_INTERNAL_EOS"))); 3122 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("reason"), impl->eos_reason)); 3123 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames)); 3124 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed)); 3125 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer"))); 3126 } 3127 impl->isRecognizing = ESR_FALSE; 3128 return ESR_SUCCESS; 3129 } 3130 3131 /* Check for leaked state */ 3132 passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_INVALID); 3133 return ESR_CONTINUE_PROCESSING; 3134CLEANUP: 3135 return rc; 3136} 3137 3138/** 3139 * INPUT STATES: SR_RECOGNIZER_INTERNAL_BOS_DETECTION 3140 * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOI 3141 */ 3142ESR_ReturnCode detectBeginningOfSpeech(SR_RecognizerImpl* impl, 3143 SR_RecognizerStatus* status, 3144 SR_RecognizerResultType* type, 3145 SR_RecognizerResult* result) 3146{ 3147 ESR_ReturnCode rc; 3148 ESR_BOOL gatedMode; 3149 size_t num_windback_bytes, num_windback_frames; 3150 waveform_buffering_state_t buffering_state; 3151 3152 CHKLOG(rc, ESR_SessionGetBool(L("cmdline.gatedmode"), &gatedMode)); 3153 3154 if (gatedMode || (!gatedMode && impl->frames < impl->bgsniff)) 3155 { 3156 ESR_BOOL pushable = ESR_FALSE; 3157 3158 rc = canPushAudioIntoRecognizer(impl); 3159 if (rc == ESR_SUCCESS) 3160 { 3161 /* Not enough samples to process one frame */ 3162 if (CA_GetUnprocessedFramesInUtterance(impl->utterance) <= 0) 3163 { 3164 *status = SR_RECOGNIZER_EVENT_NEED_MORE_AUDIO; 3165 *type = SR_RECOGNIZER_RESULT_TYPE_NONE; 3166 return ESR_SUCCESS; 3167 } 3168 } 3169 else if (rc != ESR_CONTINUE_PROCESSING) 3170 return rc; 3171 else if (impl->internalState == SR_RECOGNIZER_INTERNAL_EOI) 3172 { 3173 /* Got end of input before beginning of speech */ 3174 *status = SR_RECOGNIZER_EVENT_NO_MATCH; 3175 *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE; 3176 impl->internalState = SR_RECOGNIZER_INTERNAL_BOS_NO_MATCH; 3177 CHKLOG(rc, impl->Interface.stop(&impl->Interface)); 3178 return ESR_SUCCESS; 3179 } 3180 else 3181 pushable = ESR_TRUE; 3182 if (pushable) 3183 { 3184 rc = pushAudioIntoRecognizer(impl, status, type, result); 3185 /* OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOI */ 3186 if (rc != ESR_CONTINUE_PROCESSING) 3187 { 3188 /* Not enough samples to process one frame */ 3189 return rc; 3190 } 3191 rc = generateFrameFromAudio(impl, status, type, result); 3192 /* OUTPUT STATES: same */ 3193 if (rc != ESR_CONTINUE_PROCESSING) 3194 { 3195 /* 3196 * The internal recognizer needs a minimum amount of audio before 3197 * it begins generating frames. 3198 */ 3199 return rc; 3200 } 3201 } 3202 if (!CA_AdvanceUtteranceFrame(impl->utterance)) 3203 { 3204 PLogError(L("ESR_INVALID_STATE: Failed Advancing Utt Frame %d"), impl->frames); 3205 return ESR_INVALID_STATE; 3206 } 3207 if (CA_UtteranceHasVoicing(impl->utterance)) 3208 { 3209 /* Utterance stats for Lombard if enough frames */ 3210 if (impl->frames > impl->bgsniff) 3211 { 3212#ifdef SREC_ENGINE_VERBOSE_LOGGING 3213 PLogMessage("L: Voicing START at %d frames", impl->frames); 3214#endif 3215 /* OSI log the endpointed data */ 3216 3217 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BTIM"), impl->frames * MSEC_PER_FRAME)); 3218 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BRGN"), 0)); /* Barge-in not supported */ 3219 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIendp"))); 3220 3221 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BOSD"), impl->frames)); 3222 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("ESRbosd"))); 3223 3224 if (gatedMode) 3225 CA_CalculateUtteranceStatistics(impl->utterance, (int)(impl->frames * -1), 0); 3226 else 3227 CA_CalculateUtteranceStatistics(impl->utterance, 0, impl->frames); 3228 } 3229 3230 /* OK, we've got voicing or the end of input has occured 3231 ** (or both, I suppose). If we had voicing then progress 3232 ** the recognizer, otherwise skip to the end. 3233 ** Of course, we could be running outside 'Gated Mode' 3234 ** so we won't have any frames processed at all yet - 3235 ** in this case start the recognizer anyway. 3236 */ 3237 3238 /************************************* 3239 ** Run recognition until endOfInput ** 3240 *************************************/ 3241 3242 /* 3243 * Initialize both recognizers first 3244 * and disable reporting of results 3245 */ 3246 if (gatedMode) 3247 { 3248 /* 3249 * We're in Gated Mode - 3250 * Because we'll have had voicing we wind-back 3251 * until the start of voicing (unsure region) 3252 */ 3253 num_windback_frames = CA_SeekStartOfUtterance(impl->utterance); 3254 impl->beginningOfSpeechOffset = impl->frames - num_windback_frames; 3255 num_windback_bytes = num_windback_frames * impl->FRAME_SIZE * 2 /* due to skip even frames */; 3256 3257 /* pfprintf(PSTDOUT,L("audio buffer windback %d frames == %d bytes\n"), num_windback_frames, num_windback_bytes); */ 3258 CHKLOG(rc, WaveformBuffer_GetBufferingState(impl->waveformBuffer, &buffering_state)); 3259 if (buffering_state != WAVEFORM_BUFFERING_OFF) 3260 CHKLOG(rc, WaveformBuffer_WindBack(impl->waveformBuffer, num_windback_bytes)); 3261 3262 /* 3263 * Only transition to linear if it was previously circular (in other words if 3264 * buffering was active in the first place) 3265 */ 3266 if (buffering_state == WAVEFORM_BUFFERING_ON_CIRCULAR) 3267 CHKLOG(rc, WaveformBuffer_SetBufferingState(impl->waveformBuffer, WAVEFORM_BUFFERING_ON_LINEAR)); 3268 impl->frames = CA_GetUnprocessedFramesInUtterance(impl->utterance); 3269 } 3270 else 3271 impl->frames = 0; 3272 /* reset the frames */ 3273 impl->processed = 0; 3274 CHKLOG(rc, beginRecognizing(impl)); 3275 impl->internalState = SR_RECOGNIZER_INTERNAL_EOS_DETECTION; 3276 *status = SR_RECOGNIZER_EVENT_START_OF_VOICING; 3277 *type = SR_RECOGNIZER_RESULT_TYPE_NONE; 3278 return ESR_SUCCESS; 3279 } 3280 else 3281 { 3282 if (impl->frames > impl->utterance_timeout) 3283 { 3284 /* beginning of speech timeout */ 3285 impl->internalState = SR_RECOGNIZER_INTERNAL_BOS_TIMEOUT; 3286 *status = SR_RECOGNIZER_EVENT_START_OF_UTTERANCE_TIMEOUT; 3287 *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE; 3288 CHKLOG(rc, impl->Interface.stop(&impl->Interface)); 3289 return ESR_SUCCESS; 3290 } 3291 } 3292 } 3293 else if (!gatedMode && impl->frames >= impl->bgsniff) 3294 { 3295 /* 3296 * If not gated mode and I have processed enough frames, then start the recognizer 3297 * right away. 3298 */ 3299 impl->internalState = SR_RECOGNIZER_INTERNAL_EOS_DETECTION; 3300 *status = SR_RECOGNIZER_EVENT_INCOMPLETE; 3301 *type = SR_RECOGNIZER_RESULT_TYPE_NONE; 3302 3303 /* reset the frames */ 3304 impl->frames = impl->processed = 0; 3305 CHKLOG(rc, beginRecognizing(impl)); 3306 return ESR_SUCCESS; 3307 } 3308 *status = SR_RECOGNIZER_EVENT_INCOMPLETE; 3309 *type = SR_RECOGNIZER_RESULT_TYPE_NONE; 3310 return ESR_SUCCESS; 3311 3312CLEANUP: 3313 return rc; 3314} 3315 3316ESR_ReturnCode SR_RecognizerAdvanceImpl(SR_Recognizer* self, SR_RecognizerStatus* status, 3317 SR_RecognizerResultType* type, 3318 SR_RecognizerResult** result) 3319{ 3320 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 3321 ESR_BOOL pushable; 3322 ESR_ReturnCode rc; 3323 3324 if (status == NULL || type == NULL || result == NULL) 3325 { 3326 PLogError(L("ESR_INVALID_ARGUMENT")); 3327 return ESR_INVALID_ARGUMENT; 3328 } 3329 3330 /* create the result holder and save the pointer */ 3331 /* creation only happens once (due to the if condition) */ 3332 if (impl->result == NULL) 3333 CHKLOG(rc, SR_RecognizerResult_Create(&impl->result, impl)); 3334 *result = impl->result; 3335 3336 /* 3337 * The following two lines are used to detect bugs whereby we forget to set 3338 * status or type before returning 3339 */ 3340 *status = SR_RECOGNIZER_EVENT_INVALID; 3341 *type = SR_RECOGNIZER_INVALID; 3342 3343MOVE_TO_NEXT_STATE: 3344 switch (impl->internalState) 3345 { 3346 case SR_RECOGNIZER_INTERNAL_BEGIN: 3347 impl->internalState = SR_RECOGNIZER_INTERNAL_BOS_DETECTION; 3348 *status = SR_RECOGNIZER_EVENT_STARTED; 3349 *type = SR_RECOGNIZER_RESULT_TYPE_NONE; 3350 return ESR_SUCCESS; 3351 3352 case SR_RECOGNIZER_INTERNAL_BOS_DETECTION: 3353 rc = detectBeginningOfSpeech(impl, status, type, impl->result); 3354 if (rc != ESR_CONTINUE_PROCESSING) 3355 { 3356 /* 3357 * SR_RECOGNIZER_INTERNAL_BOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOS_DETECTION, or 3358 * SR_RECOGNIZER_INTERNAL_EOI 3359 */ 3360 return rc; 3361 } 3362 /* Leaked state */ 3363 passert(0); 3364 break; 3365 3366 case SR_RECOGNIZER_INTERNAL_EOS_DETECTION: 3367 pushable = ESR_FALSE; 3368 rc = canPushAudioIntoRecognizer(impl); 3369 if (rc == ESR_SUCCESS) 3370 { 3371 /* Not enough samples to process one frame */ 3372 if (CA_GetUnprocessedFramesInUtterance(impl->utterance) <= 0) 3373 { 3374 *status = SR_RECOGNIZER_EVENT_NEED_MORE_AUDIO; 3375 *type = SR_RECOGNIZER_RESULT_TYPE_NONE; 3376 return ESR_SUCCESS; 3377 } 3378 } 3379 else if (rc != ESR_CONTINUE_PROCESSING) 3380 return rc; 3381 else if (impl->internalState == SR_RECOGNIZER_INTERNAL_EOI) 3382 goto MOVE_TO_NEXT_STATE; 3383 else 3384 pushable = ESR_TRUE; 3385 if (pushable) 3386 { 3387 rc = pushAudioIntoRecognizer(impl, status, type, impl->result); 3388 if (rc != ESR_CONTINUE_PROCESSING) 3389 { 3390 /* Not enough samples to process one frame */ 3391 return rc; 3392 } 3393 if (impl->internalState == SR_RECOGNIZER_INTERNAL_EOI) 3394 goto MOVE_TO_NEXT_STATE; 3395 rc = generateFrameFromAudio(impl, status, type, impl->result); 3396 if (rc != ESR_CONTINUE_PROCESSING) 3397 { 3398 /* 3399 * The internal recognizer needs a minimum amount of audio before 3400 * it begins generating frames. 3401 */ 3402 return rc; 3403 } 3404 } 3405 rc = generateFrameStats(impl, status, type, impl->result); 3406 if (rc != ESR_CONTINUE_PROCESSING) 3407 { 3408 /* Not enough frames to calculate stats */ 3409 return rc; 3410 } 3411 rc = generatePatternFromFrame(impl, status, type, impl->result); 3412 if (rc != ESR_CONTINUE_PROCESSING) 3413 { 3414 /* End of speech detected */ 3415 return rc; 3416 } 3417 if (impl->internalState == SR_RECOGNIZER_INTERNAL_END) 3418 goto MOVE_TO_NEXT_STATE; 3419 rc = detectEndOfSpeech(impl, status, type, impl->result); 3420 if (rc != ESR_CONTINUE_PROCESSING) 3421 { 3422#ifdef SREC_MEASURE_LATENCY 3423 gettimeofday ( &latency_start, NULL ); 3424 printf ( "Start Time : %ld Seconds %ld Microseconds\n", latency_start.tv_sec, latency_start.tv_usec ); 3425#endif 3426 3427 /* End of speech detected */ 3428 return rc; 3429 } 3430 *status = SR_RECOGNIZER_EVENT_INCOMPLETE; 3431 *type = SR_RECOGNIZER_RESULT_TYPE_NONE; 3432 return ESR_SUCCESS; 3433 3434 case SR_RECOGNIZER_INTERNAL_EOI: 3435 /* 3436 * On EOI (end of input), we need to process the remaining frames that had not 3437 * been processed when PutAudio set the gotLastFrame flag 3438 */ 3439 rc = generatePatternFromFrameEOI(impl, status, type, impl->result); 3440 if (rc != ESR_CONTINUE_PROCESSING) 3441 { 3442 /* End of speech detected */ 3443 return rc; 3444 } 3445 rc = detectEndOfSpeech(impl, status, type, impl->result); 3446 if (rc != ESR_CONTINUE_PROCESSING) 3447 { 3448#ifdef SREC_MEASURE_LATENCY 3449 gettimeofday ( &latency_start, NULL ); 3450 printf ( "Start Time : %ld Seconds %ld Microseconds\n", latency_start.tv_sec, latency_start.tv_usec ); 3451#endif 3452 /* End of speech detected */ 3453 return rc; 3454 } 3455 *status = SR_RECOGNIZER_EVENT_INCOMPLETE; 3456 *type = SR_RECOGNIZER_RESULT_TYPE_NONE; 3457 return ESR_SUCCESS; 3458 3459 case SR_RECOGNIZER_INTERNAL_EOS: 3460 /* On EOS (end of speech detected - not due to end of input), create the result */ 3461 if (impl->lockFunction) 3462 impl->lockFunction(ESR_LOCK, impl->lockData); 3463 CircularBufferReset(impl->buffer); 3464 if (impl->lockFunction) 3465 impl->lockFunction(ESR_UNLOCK, impl->lockData); 3466 CHKLOG(rc, SR_RecognizerCreateResultImpl((SR_Recognizer*) impl, status, type)); 3467 impl->internalState = SR_RECOGNIZER_INTERNAL_END; 3468 return ESR_SUCCESS; 3469 3470 case SR_RECOGNIZER_INTERNAL_END: 3471 return ESR_SUCCESS; 3472 default: 3473 PLogError(L("ESR_INVALID_STATE")); 3474 return ESR_INVALID_STATE; 3475 } 3476CLEANUP: 3477 return rc; 3478} 3479 3480 3481 3482ESR_ReturnCode SR_RecognizerLoadUtteranceImpl(SR_Recognizer* self, const LCHAR* filename) 3483{ 3484 /* TODO: complete */ 3485 return ESR_SUCCESS; 3486} 3487 3488ESR_ReturnCode SR_RecognizerLoadWaveFileImpl(SR_Recognizer* self, const LCHAR* filename) 3489{ 3490 /* TODO: complete */ 3491 return ESR_SUCCESS; 3492} 3493 3494ESR_ReturnCode SR_RecognizerLogEventImpl(SR_Recognizer* self, const LCHAR* event) 3495{ 3496 ESR_ReturnCode rc; 3497 SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self; 3498 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, event)); 3499 return ESR_SUCCESS; 3500CLEANUP: 3501 return rc; 3502} 3503 3504ESR_ReturnCode SR_RecognizerLogTokenImpl(SR_Recognizer* self, const LCHAR* token, const LCHAR* value) 3505{ 3506 ESR_ReturnCode rc; 3507 SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self; 3508 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, token, value)); 3509 return ESR_SUCCESS; 3510CLEANUP: 3511 return rc; 3512} 3513 3514ESR_ReturnCode SR_RecognizerLogTokenIntImpl(SR_Recognizer* self, const LCHAR* token, int value) 3515{ 3516 ESR_ReturnCode rc; 3517 SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self; 3518 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, token, value)); 3519 return ESR_SUCCESS; 3520CLEANUP: 3521 return rc; 3522} 3523 3524ESR_ReturnCode SR_RecognizerLogSessionStartImpl(SR_Recognizer* self, const LCHAR* sessionName) 3525{ 3526 ESR_ReturnCode rc; 3527 SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self; 3528 /** 3529 * OSI Platform logging. 3530 * In OSR, these events are logged by the platform. We have no platform in ESR, so we 3531 * log them here. 3532 */ 3533 3534 /* call (session) start, tokens optional */ 3535 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIclst"))); 3536 3537 /* service start, in this case SRecTest service */ 3538 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("SVNM"), sessionName)); 3539 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIsvst"))); 3540 if (impl->osi_log_level & OSI_LOG_LEVEL_BASIC) 3541 CHKLOG(rc, SR_EventLogEventSession(impl->eventLog)); 3542 3543 return ESR_SUCCESS; 3544CLEANUP: 3545 return rc; 3546} 3547 3548ESR_ReturnCode SR_RecognizerLogSessionEndImpl(SR_Recognizer* self) 3549{ 3550 ESR_ReturnCode rc; 3551 SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self; 3552 3553 /* OSI log end of call (session) */ 3554 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIclnd"))); 3555 if (impl->osi_log_level & OSI_LOG_LEVEL_BASIC) 3556 CHKLOG(rc, SR_EventLogEventSession(impl->eventLog)); 3557 return ESR_SUCCESS; 3558CLEANUP: 3559 return rc; 3560} 3561 3562 3563ESR_ReturnCode SR_RecognizerLogWaveformDataImpl(SR_Recognizer* self, const LCHAR* waveformFilename, 3564 const LCHAR* transcription, const double bos, 3565 const double eos, ESR_BOOL isInvocab) 3566{ 3567 ESR_ReturnCode rc; 3568 SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self; 3569 LCHAR num[P_PATH_MAX]; 3570 int frame; 3571 3572 CHKLOG(rc, SR_EventLogToken_AUDIO(impl->eventLog, impl->osi_log_level, L("FILE"), waveformFilename)); 3573 CHKLOG(rc, SR_EventLogToken_AUDIO(impl->eventLog, impl->osi_log_level, L("TRANS"), transcription)); 3574 sprintf(num, L("%.2f"), bos); 3575 CHKLOG(rc, SR_EventLogToken_AUDIO(impl->eventLog, impl->osi_log_level, L("BOS_SEC"), num)); 3576 sprintf(num, L("%.2f"), eos); 3577 CHKLOG(rc, SR_EventLogToken_AUDIO(impl->eventLog, impl->osi_log_level, L("EOS_SEC"), num)); 3578 CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("FRAMESIZE"), impl->FRAME_SIZE)); 3579 CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("SAMPLERATE"), impl->sampleRate)); 3580 frame = (int)(bos * impl->sampleRate * 2 /* 2 bytes per sample */) / impl->FRAME_SIZE; 3581 CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("BOS_FR"), frame)); 3582 frame = (int)(eos * impl->sampleRate * 2 /* 2 bytes per sample */) / impl->FRAME_SIZE; 3583 CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("EOS_FR"), frame)); 3584 CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("INVOCAB"), isInvocab)); 3585 CHKLOG(rc, SR_EventLogEvent_AUDIO(impl->eventLog, impl->osi_log_level, L("ESRwfrd"))); 3586 return ESR_SUCCESS; 3587CLEANUP: 3588 return rc; 3589} 3590 3591ESR_ReturnCode SR_RecognizerSetLockFunctionImpl(SR_Recognizer* self, SR_RecognizerLockFunction function, void* data) 3592{ 3593 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 3594 3595 impl->lockFunction = function; 3596 impl->lockData = data; 3597 return ESR_SUCCESS; 3598} 3599 3600static ESR_ReturnCode doSignalQualityInit(SR_RecognizerImpl* impl) 3601{ 3602 CA_DoSignalCheck(impl->wavein, &impl->isSignalClipping, &impl->isSignalDCOffset, 3603 &impl->isSignalNoisy, &impl->isSignalTooQuiet, &impl->isSignalTooFewSamples, 3604 &impl->isSignalTooManySamples); 3605 impl->isSignalQualityInitialized = ESR_TRUE; 3606 return ESR_SUCCESS; 3607} 3608 3609ESR_ReturnCode SR_RecognizerIsSignalClippingImpl(SR_Recognizer* self, ESR_BOOL* isClipping) 3610{ 3611 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 3612 ESR_ReturnCode rc; 3613 3614 if (isClipping == NULL) 3615 { 3616 PLogError("SR_RecognizerIsSignalClippingImpl", ESR_INVALID_ARGUMENT); 3617 return ESR_INVALID_ARGUMENT; 3618 } 3619 if (!impl->isSignalQualityInitialized) 3620 CHKLOG(rc, doSignalQualityInit(impl)); 3621 *isClipping = impl->isSignalClipping; 3622 return ESR_SUCCESS; 3623CLEANUP: 3624 return rc; 3625} 3626 3627ESR_ReturnCode SR_RecognizerIsSignalDCOffsetImpl(SR_Recognizer* self, ESR_BOOL* isDCOffset) 3628{ 3629 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 3630 ESR_ReturnCode rc; 3631 3632 if (isDCOffset == NULL) 3633 { 3634 PLogError("SR_RecognizerIsSignalDCOffsetImpl", ESR_INVALID_ARGUMENT); 3635 return ESR_INVALID_ARGUMENT; 3636 } 3637 if (!impl->isSignalQualityInitialized) 3638 CHKLOG(rc, doSignalQualityInit(impl)); 3639 *isDCOffset = impl->isSignalDCOffset; 3640 return ESR_SUCCESS; 3641CLEANUP: 3642 return rc; 3643} 3644 3645ESR_ReturnCode SR_RecognizerIsSignalNoisyImpl(SR_Recognizer* self, ESR_BOOL* isNoisy) 3646{ 3647 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 3648 ESR_ReturnCode rc; 3649 3650 if (isNoisy == NULL) 3651 { 3652 PLogError("SR_RecognizerIsSignalNoisyImpl", ESR_INVALID_ARGUMENT); 3653 return ESR_INVALID_ARGUMENT; 3654 } 3655 if (!impl->isSignalQualityInitialized) 3656 CHKLOG(rc, doSignalQualityInit(impl)); 3657 *isNoisy = impl->isSignalNoisy; 3658 return ESR_SUCCESS; 3659CLEANUP: 3660 return rc; 3661} 3662 3663ESR_ReturnCode SR_RecognizerIsSignalTooQuietImpl(SR_Recognizer* self, ESR_BOOL* isTooQuiet) 3664{ 3665 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 3666 ESR_ReturnCode rc; 3667 3668 if (isTooQuiet == NULL) 3669 { 3670 PLogError("SR_RecognizerIsSignalTooQuietImpl", ESR_INVALID_ARGUMENT); 3671 return ESR_INVALID_ARGUMENT; 3672 } 3673 if (!impl->isSignalQualityInitialized) 3674 CHKLOG(rc, doSignalQualityInit(impl)); 3675 *isTooQuiet = impl->isSignalTooQuiet; 3676 return ESR_SUCCESS; 3677CLEANUP: 3678 return rc; 3679} 3680 3681ESR_ReturnCode SR_RecognizerIsSignalTooFewSamplesImpl(SR_Recognizer* self, ESR_BOOL* isTooFewSamples) 3682{ 3683 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 3684 ESR_ReturnCode rc; 3685 3686 if (isTooFewSamples == NULL) 3687 { 3688 PLogError("SR_RecognizerIsSignalTooFewSamplesImpl", ESR_INVALID_ARGUMENT); 3689 return ESR_INVALID_ARGUMENT; 3690 } 3691 if (!impl->isSignalQualityInitialized) 3692 CHKLOG(rc, doSignalQualityInit(impl)); 3693 *isTooFewSamples = impl->isSignalTooFewSamples; 3694 return ESR_SUCCESS; 3695CLEANUP: 3696 return rc; 3697} 3698 3699ESR_ReturnCode SR_RecognizerIsSignalTooManySamplesImpl(SR_Recognizer* self, ESR_BOOL* isTooManySamples) 3700{ 3701 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self; 3702 ESR_ReturnCode rc; 3703 3704 if (isTooManySamples == NULL) 3705 { 3706 PLogError("SR_RecognizerIsSignalTooManySamplesImpl", ESR_INVALID_ARGUMENT); 3707 return ESR_INVALID_ARGUMENT; 3708 } 3709 if (!impl->isSignalQualityInitialized) 3710 CHKLOG(rc, doSignalQualityInit(impl)); 3711 *isTooManySamples = impl->isSignalTooManySamples; 3712 return ESR_SUCCESS; 3713CLEANUP: 3714 return rc; 3715} 3716 3717 3718 3719/**************************************/ 3720/* Waveform Buffer stuff */ 3721/**************************************/ 3722ESR_ReturnCode WaveformBuffer_Create(WaveformBuffer** waveformBuffer, size_t frame_size) 3723{ 3724 ESR_ReturnCode rc; 3725 WaveformBuffer *buf; 3726 size_t val_size_t; 3727 int val_int; 3728 ESR_BOOL exists; 3729 3730 buf = NEW(WaveformBuffer, L("SR_RecognizerImpl.wvfmbuf")); 3731 if (buf == NULL) 3732 { 3733 rc = ESR_OUT_OF_MEMORY; 3734 PLogError(L("%s: could not create WaveformBuffer"), ESR_rc2str(rc)); 3735 goto CLEANUP; 3736 } 3737 3738 ESR_SessionContains(L("SREC.voice_enroll.bufsz_kB"), &exists); 3739 if (exists) 3740 ESR_SessionGetSize_t(L("SREC.voice_enroll.bufsz_kB"), &val_size_t); 3741 else 3742 val_size_t = DEFAULT_WAVEFORM_BUFFER_MAX_SIZE; 3743 val_size_t *= 1024; /* convert to kB*/ 3744 CHKLOG(rc, CircularBufferCreate(val_size_t, L("SR_RecognizerImpl.wvfmbuf.cbuffer"), &buf->cbuffer)); 3745 3746 ESR_SessionContains(L("CREC.Frontend.start_windback"), &exists); 3747 if (exists) 3748 ESR_SessionGetInt(L("CREC.Frontend.start_windback"), &val_int); 3749 else 3750 val_int = DEFAULT_WAVEFORM_WINDBACK_FRAMES; 3751 val_int *= frame_size; /* convert frames to bytes */ 3752 buf->windback_buffer_sz = (size_t) val_int; 3753 buf->windback_buffer = MALLOC(buf->windback_buffer_sz, L("SR_RecognizerImpl.wvfmbuf.windback")); 3754 if (buf->windback_buffer == NULL) 3755 { 3756 rc = ESR_OUT_OF_MEMORY; 3757 PLogError(L("%s: could not create Waveform windback buffer"), ESR_rc2str(rc)); 3758 goto CLEANUP; 3759 } 3760 3761 3762 ESR_SessionContains(L("SREC.voice_enroll.eos_comfort_frames"), &exists); 3763 if (exists) 3764 ESR_SessionGetSize_t(L("SREC.voice_enroll.eos_comfort_frames"), &val_size_t); 3765 else 3766 val_size_t = DEFAULT_EOS_COMFORT_FRAMES; 3767 buf->eos_comfort_frames = val_size_t; 3768 3769 ESR_SessionContains(L("SREC.voice_enroll.bos_comfort_frames"), &exists); 3770 if (exists) 3771 ESR_SessionGetSize_t(L("SREC.voice_enroll.bos_comfort_frames"), &val_size_t); 3772 else 3773 val_size_t = DEFAULT_BOS_COMFORT_FRAMES; 3774 buf->bos_comfort_frames = val_size_t; 3775 3776 /* initially off */ 3777 buf->state = WAVEFORM_BUFFERING_OFF; 3778 3779 *waveformBuffer = buf; 3780 return ESR_SUCCESS; 3781CLEANUP: 3782 WaveformBuffer_Destroy(buf); 3783 return rc; 3784} 3785 3786ESR_ReturnCode WaveformBuffer_Write(WaveformBuffer* waveformBuffer, void *data, size_t num_bytes) 3787{ 3788 size_t available_bytes; 3789 size_t done_bytes; 3790 3791 /* do nothing if not active */ 3792 switch (waveformBuffer->state) 3793 { 3794 case WAVEFORM_BUFFERING_OFF: 3795 return ESR_SUCCESS; 3796 3797 case WAVEFORM_BUFFERING_ON_CIRCULAR: 3798 available_bytes = CircularBufferGetAvailable(waveformBuffer->cbuffer); 3799 if (available_bytes < num_bytes) 3800 { 3801 done_bytes = CircularBufferSkip(waveformBuffer->cbuffer, num_bytes - available_bytes); 3802 if (done_bytes != num_bytes - available_bytes) 3803 { 3804 PLogError("WaveformBuffer_Write: error when skipping bytes"); 3805 return ESR_INVALID_STATE; 3806 } 3807 } 3808 done_bytes = CircularBufferWrite(waveformBuffer->cbuffer, data, num_bytes); 3809 if (done_bytes != num_bytes) 3810 { 3811 PLogError("WaveformBuffer_Write: error when writing bytes"); 3812 return ESR_INVALID_STATE; 3813 } 3814 return ESR_SUCCESS; 3815 3816 case WAVEFORM_BUFFERING_ON_LINEAR: 3817 available_bytes = CircularBufferGetAvailable(waveformBuffer->cbuffer); 3818 if (available_bytes < num_bytes) 3819 { 3820 waveformBuffer->overflow_count += num_bytes; 3821 return ESR_BUFFER_OVERFLOW; 3822 } 3823 done_bytes = CircularBufferWrite(waveformBuffer->cbuffer, data, num_bytes); 3824 if (done_bytes != num_bytes) 3825 { 3826 PLogError("WaveformBuffer_Write: error when writing bytes"); 3827 return ESR_INVALID_STATE; 3828 } 3829 return ESR_SUCCESS; 3830 3831 default: 3832 PLogError("WaveformBuffer_Write: bad control path"); 3833 return ESR_INVALID_STATE; 3834 } 3835} 3836 3837ESR_ReturnCode WaveformBuffer_Read(WaveformBuffer* waveformBuffer, void *data, size_t* num_bytes) 3838{ 3839 size_t bytes_to_read; 3840 ESR_ReturnCode rc; 3841 3842 if (num_bytes == NULL) 3843 { 3844 rc = ESR_INVALID_ARGUMENT; 3845 PLogError(ESR_rc2str(rc)); 3846 goto CLEANUP; 3847 } 3848 if (waveformBuffer->overflow_count > 0) 3849 { 3850 memset(data, 0, *num_bytes); 3851 *num_bytes = 0; 3852 PLogError(L("WaveformBuffer_Read: previous overflow causes read to return NULL")); 3853 return ESR_SUCCESS; 3854 } 3855 3856 if (waveformBuffer->read_size != 0 && *num_bytes > waveformBuffer->read_size) 3857 { 3858 PLogError(L("ESR_OUT_OF_MEMORY: waveform buffer too small for read, increase from %d to %d"), *num_bytes, waveformBuffer->read_size); 3859 return ESR_OUT_OF_MEMORY; 3860 } 3861 3862 if (waveformBuffer->read_size == 0) 3863 bytes_to_read = *num_bytes; 3864 else 3865 bytes_to_read = MIN(waveformBuffer->read_size, *num_bytes); 3866 waveformBuffer->read_size -= bytes_to_read; 3867 *num_bytes = CircularBufferRead(waveformBuffer->cbuffer, data, bytes_to_read); 3868 if (*num_bytes != bytes_to_read) 3869 { 3870 PLogError("WaveformBuffer_Read: error reading buffer"); 3871 return ESR_INVALID_STATE; 3872 } 3873 return ESR_SUCCESS; 3874CLEANUP: 3875 return rc; 3876} 3877 3878/* WindBack will save the last num_bytes recorded, reset the buffer, and then load the 3879 saved bytes at the beginning of the buffer */ 3880ESR_ReturnCode WaveformBuffer_WindBack(WaveformBuffer* waveformBuffer, const size_t num_bytes) 3881{ 3882 ESR_ReturnCode rc; 3883 size_t bufferSize; 3884 3885 if (num_bytes <= 0) 3886 { 3887 CHKLOG(rc, WaveformBuffer_Reset(waveformBuffer)); 3888 return ESR_SUCCESS; 3889 } 3890 3891 /* make sure windback buffer is big enough */ 3892 if (num_bytes > waveformBuffer->windback_buffer_sz) 3893 { 3894 rc = ESR_OUT_OF_MEMORY; 3895 PLogError(L("%s: windback buffer is too small (needed=%d, had=%d)"), ESR_rc2str(rc), num_bytes, waveformBuffer->windback_buffer_sz); 3896 goto CLEANUP; 3897 } 3898 3899 CHKLOG(rc, WaveformBuffer_GetSize(waveformBuffer, &bufferSize)); 3900 /* skip the first few bytes written */ 3901 if (bufferSize < num_bytes) 3902 { 3903 PLogError("bufferSize %d num_bytes %d (ESR_INVALID_STATE)\n", bufferSize, num_bytes); 3904 bufferSize = 0; 3905 } 3906 else 3907 { 3908 bufferSize -= num_bytes; 3909 } 3910 CHKLOG(rc, WaveformBuffer_Skip(waveformBuffer, bufferSize)); 3911 /* read the last few bytes written */ 3912 bufferSize = num_bytes; 3913 CHKLOG(rc, WaveformBuffer_Read(waveformBuffer, waveformBuffer->windback_buffer, &bufferSize)); 3914 3915 /* reset buffer */ 3916 CHKLOG(rc, WaveformBuffer_Reset(waveformBuffer)); 3917 3918 /* rewrite the saved bytes at the beginning */ 3919 CHKLOG(rc, WaveformBuffer_Write(waveformBuffer, waveformBuffer->windback_buffer, bufferSize)); 3920 return ESR_SUCCESS; 3921CLEANUP: 3922 return rc; 3923} 3924 3925ESR_ReturnCode WaveformBuffer_Destroy(WaveformBuffer* waveformBuffer) 3926{ 3927 if (waveformBuffer->cbuffer) 3928 FREE(waveformBuffer->cbuffer); 3929 if (waveformBuffer->windback_buffer) 3930 FREE(waveformBuffer->windback_buffer); 3931 if (waveformBuffer) 3932 FREE(waveformBuffer); 3933 return ESR_SUCCESS; 3934} 3935 3936ESR_ReturnCode WaveformBuffer_SetBufferingState(WaveformBuffer* waveformBuffer, waveform_buffering_state_t state) 3937{ 3938 waveformBuffer->state = state; 3939 return ESR_SUCCESS; 3940} 3941 3942ESR_ReturnCode WaveformBuffer_GetBufferingState(WaveformBuffer* waveformBuffer, waveform_buffering_state_t* state) 3943{ 3944 *state = waveformBuffer->state; 3945 return ESR_SUCCESS; 3946} 3947 3948/** 3949 * @return ESR_BUFFER_OVERFLOW if nametag EOS occured beyond end of buffer 3950 */ 3951ESR_ReturnCode WaveformBuffer_ParseEndPointedResultAndTrim(WaveformBuffer* waveformBuffer, const LCHAR* end_pointed_result, const size_t bytes_per_frame) 3952{ 3953 const LCHAR *p; 3954 size_t bos_frame, eos_frame, bufferSize, read_start_offset; 3955 ESR_ReturnCode rc; 3956 3957 /* potential end pointed results 3958 3959 -pau-@19 tape@36 scan@64 down@88 -pau2-@104 3960 -pau-@19 tape@34 off@55 -pau2-@78 3961 -pau-@19 tape@47 help@66 -pau2-@80 3962 -pau-@16 tape@36 reverse@71 -pau2-@91 3963 -pau-@21 tape@42 scan@59 down@80 -pau2-@91 3964 3965 what I need to extract is the integer between "-pau-@" and ' ' 3966 and the integer between '@' and " -pau2-" 3967 */ 3968 3969 3970 p = LSTRSTR( end_pointed_result, PREFIX_WORD); 3971 if(p) p+=PREFIX_WORD_LEN; while(p && *p == '@') p++; 3972 rc = p ? lstrtoui(p, &bos_frame, 10) : ESR_INVALID_ARGUMENT; 3973 if (rc == ESR_INVALID_ARGUMENT) 3974 { 3975 PLogError(L("%s: extracting bos from text=%s"), ESR_rc2str(rc), end_pointed_result); 3976 goto CLEANUP; 3977 } 3978 else if (rc != ESR_SUCCESS) 3979 goto CLEANUP; 3980 3981 p = LSTRSTR( end_pointed_result, SUFFIX_WORD); 3982 while(p && p>end_pointed_result && p[-1]!='@') --p; 3983 rc = p ? lstrtoui(p, &eos_frame, 10) : ESR_INVALID_ARGUMENT; 3984 if (rc == ESR_INVALID_ARGUMENT) 3985 { 3986 PLogError(L("%s: extracting eos from text=%s"), ESR_rc2str(rc), end_pointed_result); 3987 goto CLEANUP; 3988 } 3989 else if (rc != ESR_SUCCESS) 3990 goto CLEANUP; 3991 3992 bos_frame -= (bos_frame > waveformBuffer->bos_comfort_frames ? waveformBuffer->bos_comfort_frames : 0); 3993 eos_frame += waveformBuffer->eos_comfort_frames; 3994 3995 /* 3996 * I know where speech started, so I want to skip frames 0 to bos_frame. 3997 * I also know where speech ended so I want to set the amount of frames(bytes) to read for 3998 * the nametag audio buffer (i.e. the read_size) 3999 */ 4000 4001 read_start_offset = bos_frame * bytes_per_frame * 2 /* times 2 because of skip even frames */; 4002 waveformBuffer->read_size = (eos_frame - bos_frame) * bytes_per_frame * 2 /* times 2 because of skip even frames */; 4003 4004 CHKLOG(rc, WaveformBuffer_GetSize(waveformBuffer, &bufferSize)); 4005 if (read_start_offset + waveformBuffer->read_size > bufferSize) 4006 { 4007 waveformBuffer->overflow_count += read_start_offset + waveformBuffer->read_size - bufferSize; 4008 passert(waveformBuffer->overflow_count > 0); 4009 PLogMessage(L("Warning: Voice Enrollment audio buffer overflow (spoke too much, over by %d bytes)"), 4010 waveformBuffer->overflow_count); 4011 return ESR_BUFFER_OVERFLOW; 4012 } 4013 CHKLOG(rc, WaveformBuffer_Skip(waveformBuffer, read_start_offset)); 4014#ifdef SREC_ENGINE_VERBOSE_LOGGING 4015 PLogMessage(L("Voice Enrollment: bos@%d, eos@%d, therefore sizeof(waveform) should be %d"), bos_frame, eos_frame, waveformBuffer->read_size); 4016#endif 4017 return ESR_SUCCESS; 4018CLEANUP: 4019 return rc; 4020} 4021 4022 4023ESR_ReturnCode WaveformBuffer_Reset(WaveformBuffer* waveformBuffer) 4024{ 4025 CircularBufferReset(waveformBuffer->cbuffer); 4026 waveformBuffer->overflow_count = 0; 4027 waveformBuffer->read_size = 0; 4028 return ESR_SUCCESS; 4029} 4030 4031ESR_ReturnCode WaveformBuffer_GetSize(WaveformBuffer* waveformBuffer, size_t* size) 4032{ 4033 *size = CircularBufferGetSize(waveformBuffer->cbuffer); 4034 return ESR_SUCCESS; 4035} 4036 4037ESR_ReturnCode WaveformBuffer_Skip(WaveformBuffer* waveformBuffer, const size_t bytes) 4038{ 4039 if (CircularBufferSkip(waveformBuffer->cbuffer, bytes) != (int) bytes) 4040 return ESR_INVALID_STATE; 4041 return ESR_SUCCESS; 4042} 4043 4044 4045 4046static ESR_ReturnCode SR_Recognizer_Reset_Buffers ( SR_RecognizerImpl *impl ) 4047 { 4048 ESR_ReturnCode reset_status; 4049 4050 FREE ( impl->audioBuffer ); 4051 impl->audioBuffer = NULL; 4052 impl->audioBuffer = MALLOC ( impl->FRAME_SIZE, MTAG ); 4053 4054 if ( impl->audioBuffer != NULL ) 4055 { 4056 WaveformBuffer_Destroy ( impl->waveformBuffer ); 4057 impl->waveformBuffer = NULL; 4058 reset_status = WaveformBuffer_Create ( &impl->waveformBuffer, impl->FRAME_SIZE ); 4059 } 4060 else 4061 { 4062 reset_status = ESR_OUT_OF_MEMORY; 4063 } 4064 return ( reset_status ); 4065 } 4066 4067 4068 4069static ESR_ReturnCode SR_Recognizer_Validate_Sample_Rate ( size_t sample_rate ) 4070 { 4071 ESR_ReturnCode validate_status; 4072 4073 switch ( sample_rate ) 4074 { 4075 case 8000: 4076 case 11025: 4077 case 16000: 4078 case 22050: 4079 validate_status = ESR_SUCCESS; 4080 break; 4081 4082 default: 4083 validate_status = ESR_INVALID_ARGUMENT; 4084 break; 4085 } 4086 return ( validate_status ); 4087 } 4088 4089 4090 4091static ESR_ReturnCode SR_Recognizer_Sample_Rate_Needs_Change ( size_t new_sample_rate, ESR_BOOL *needs_changing ) 4092 { 4093 ESR_ReturnCode validate_status; 4094 size_t current_sample_rate; 4095 4096 validate_status = ESR_SessionGetSize_t ( "CREC.Frontend.samplerate", ¤t_sample_rate ); 4097 4098 if ( validate_status == ESR_SUCCESS ) 4099 { 4100 if ( new_sample_rate != current_sample_rate ) 4101 *needs_changing = ESR_TRUE; 4102 else 4103 *needs_changing = ESR_TRUE; 4104 } 4105 return ( validate_status ); 4106 } 4107 4108 4109 4110static ESR_ReturnCode SR_Recognizer_Change_Sample_Rate_Session_Params_8K ( void ) 4111 { 4112 ESR_ReturnCode change_status; 4113 LCHAR model_filenames [P_PATH_MAX]; 4114 LCHAR lda_filename [P_PATH_MAX]; 4115 size_t filename_length; 4116 4117 filename_length = P_PATH_MAX; 4118 change_status = ESR_SessionGetLCHAR ( L("cmdline.modelfiles8"), model_filenames, &filename_length ); 4119 4120 if ( change_status == ESR_SUCCESS ) 4121 { 4122 filename_length = P_PATH_MAX; 4123 change_status = ESR_SessionGetLCHAR ( L("cmdline.lda8"), lda_filename, &filename_length ); 4124 4125/* From this point on, if an error occurs, we're screwed and recovery is probably impossible */ 4126 if ( change_status == ESR_SUCCESS ) 4127 { 4128 change_status = ESR_SessionSetSize_t ( "CREC.Frontend.samplerate", 8000 ); 4129 if ( change_status == ESR_SUCCESS ) 4130 { 4131 change_status = ESR_SessionSetInt ( "CREC.Frontend.highcut", 4000 ); 4132 4133 if ( change_status == ESR_SUCCESS ) 4134 { 4135 change_status = ESR_SessionSetLCHAR ( L("cmdline.modelfiles"), model_filenames ); 4136 4137 if ( change_status == ESR_SUCCESS ) 4138 change_status = ESR_SessionSetLCHAR ( L("cmdline.lda"), lda_filename ); 4139 } 4140 } 4141 } 4142 else 4143 { 4144 PLogError (L("\nMissing Parameter lda8\n")); 4145 } 4146 } 4147 else 4148 { 4149 PLogError (L("\nMissing Parameter models8\n")); 4150 } 4151 return ( change_status ); 4152 } 4153 4154 4155 4156static ESR_ReturnCode SR_Recognizer_Change_Sample_Rate_Session_Params_11K_to_22K ( size_t sample_rate ) 4157 { 4158 ESR_ReturnCode change_status; 4159 LCHAR model_filenames [P_PATH_MAX]; 4160 LCHAR lda_filename [P_PATH_MAX]; 4161 size_t filename_length; 4162 4163 filename_length = P_PATH_MAX; 4164 change_status = ESR_SessionGetLCHAR ( L("cmdline.modelfiles11"), model_filenames, &filename_length ); 4165 4166 if ( change_status == ESR_SUCCESS ) 4167 { 4168 filename_length = P_PATH_MAX; 4169 change_status = ESR_SessionGetLCHAR ( L("cmdline.lda11"), lda_filename, &filename_length ); 4170 4171/* From this point on, if an error occurs, we're screwed and recovery is probably impossible */ 4172 if ( change_status == ESR_SUCCESS ) 4173 { 4174 change_status = ESR_SessionSetSize_t ( "CREC.Frontend.samplerate", sample_rate ); 4175 4176 if ( change_status == ESR_SUCCESS ) 4177 { 4178 change_status = ESR_SessionSetInt ( "CREC.Frontend.highcut", 5500 ); 4179 4180 if ( change_status == ESR_SUCCESS ) 4181 { 4182 change_status = ESR_SessionSetLCHAR ( L("cmdline.modelfiles"), model_filenames ); 4183 4184 if ( change_status == ESR_SUCCESS ) 4185 change_status = ESR_SessionSetLCHAR ( L("cmdline.lda"), lda_filename ); 4186 } 4187 } 4188 } 4189 else 4190 { 4191 PLogError (L("\nMissing Parameter lda11\n")); 4192 } 4193 } 4194 else 4195 { 4196 PLogError (L("\nMissing Parameter models11\n")); 4197 } 4198 return ( change_status ); 4199 } 4200 4201 4202 4203static ESR_ReturnCode SR_Recognizer_Change_Sample_Rate_Session_Params ( size_t new_sample_rate ) 4204 { 4205 ESR_ReturnCode change_status; 4206 4207 if ( new_sample_rate == 8000 ) 4208 change_status = SR_Recognizer_Change_Sample_Rate_Session_Params_8K ( ); 4209 else 4210 change_status = SR_Recognizer_Change_Sample_Rate_Session_Params_11K_to_22K ( new_sample_rate ); 4211 4212 return ( change_status ); 4213 } 4214 4215 4216 4217ESR_ReturnCode SR_Recognizer_Change_Sample_RateImpl ( SR_Recognizer *recognizer, size_t new_sample_rate ) 4218 { 4219 ESR_ReturnCode change_status; 4220 ESR_BOOL rate_needs_changing; 4221 SR_RecognizerImpl *impl; 4222 CA_FrontendInputParams *frontendParams; 4223 4224 change_status = SR_Recognizer_Validate_Sample_Rate ( new_sample_rate ); 4225 4226 if ( change_status == ESR_SUCCESS ) 4227 { 4228 change_status = SR_Recognizer_Sample_Rate_Needs_Change ( new_sample_rate, &rate_needs_changing ); 4229 4230 if ( change_status == ESR_SUCCESS ) 4231 { 4232 if ( rate_needs_changing == ESR_TRUE ) 4233 { 4234 change_status = SR_Recognizer_Change_Sample_Rate_Session_Params ( new_sample_rate ); 4235 4236 if ( change_status == ESR_SUCCESS ) 4237 { // SR_RecognizerCreateFrontendImpl 4238 impl = (SR_RecognizerImpl *)recognizer; 4239 change_status = SR_RecognizerUnsetupImpl( recognizer ); 4240 4241 if ( change_status == ESR_SUCCESS ) 4242 { 4243 CA_UnconfigureFrontend ( impl->frontend ); 4244 frontendParams = CA_AllocateFrontendParameters ( ); 4245 4246 if ( frontendParams != NULL ) 4247 { 4248 change_status = SR_RecognizerGetFrontendLegacyParametersImpl ( frontendParams ); 4249 4250 if ( change_status == ESR_SUCCESS ) 4251 { 4252 CA_ConfigureFrontend ( impl->frontend, frontendParams ); 4253 CA_UnconfigureWave ( impl->wavein ); 4254 CA_ConfigureWave ( impl->wavein, impl->frontend ); 4255 impl->sampleRate = new_sample_rate; 4256 impl->FRAME_SIZE = impl->sampleRate / FRAMERATE * SAMPLE_SIZE; 4257 change_status = SR_Recognizer_Reset_Buffers ( impl ); 4258 4259 if ( change_status == ESR_SUCCESS ) 4260 { 4261 change_status = SR_RecognizerSetupImpl( recognizer ); 4262 4263 if ( change_status == ESR_SUCCESS ) 4264 change_status = SR_AcousticStateReset ( recognizer ); 4265 } 4266 else 4267 { 4268 SR_RecognizerSetupImpl( recognizer ); /* Otherwise recognizer is in bad state */ 4269 } 4270 } 4271 CA_FreeFrontendParameters ( frontendParams ); 4272 } 4273 else 4274 { 4275 SR_RecognizerSetupImpl( recognizer ); /* Otherwise recognizer is in bad state */ 4276 change_status = ESR_OUT_OF_MEMORY; 4277 } 4278 } 4279 } 4280 } 4281 } 4282 } 4283 return ( change_status ); 4284 } 4285 4286 4287