1/*---------------------------------------------------------------------------*
2 *  RecognizerImpl.c  *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20
21#include "ESR_Session.h"
22#include "ESR_SessionTypeImpl.h"
23#include "IntArrayList.h"
24#include "LCHAR.h"
25#include "passert.h"
26#include "plog.h"
27#include "pstdio.h"
28#include "pmemory.h"
29#include "ptimestamp.h"
30#include "SR_AcousticModelsImpl.h"
31#include "SR_AcousticStateImpl.h"
32#include "SR_GrammarImpl.h"
33#include "SR_SemprocDefinitions.h"
34#include "SR_SemanticResult.h"
35#include "SR_SemanticResultImpl.h"
36#include "SR_Recognizer.h"
37#include "SR_RecognizerImpl.h"
38#include "SR_RecognizerResultImpl.h"
39#include "SR_SemanticResultImpl.h"
40#include "SR_EventLog.h"
41#include "srec.h"
42
43#define MTAG NULL
44#define FILTER_NBEST_BY_SEM_RESULT 1
45#define AUDIO_CIRC_BUFFER_SIZE 20000
46#define SEMPROC_ACTIVE 1
47#define SAMPLE_SIZE (16 / CHAR_BIT) /* 16-bits / sample */
48
49/* milliseconds per FRAME = 1/FRAMERATE * 1000 */
50/* We multiple by 2 because we skip even frames */
51#define MSEC_PER_FRAME (2000/FRAMERATE)
52#define MAX_ENTRY_LENGTH 512
53#define PREFIX_WORD     "-pau-"
54#define PREFIX_WORD_LEN 5
55#define SUFFIX_WORD     "-pau2-"
56#define SUFFIX_WORD_LEN 6
57
58
59static ESR_ReturnCode SR_Recognizer_Reset_Buffers ( SR_RecognizerImpl *impl );
60
61/**
62 * Initializes recognizer properties to default values.
63 *
64 * Replaces setup_recognition_parameters()
65 */
66ESR_ReturnCode SR_RecognizerToSessionImpl()
67{
68  ESR_ReturnCode rc;
69
70  /* Old comment: remember to keep "ca_rip.h" up to date with these parameters... */
71
72  /* CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_acoustic_models", 2)); */
73  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Recognizer.partial_results", ESR_FALSE));
74  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.NBest", 1));
75  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.eou_threshold", 100));
76  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_altword_tokens", 400));
77  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_frames", 1000));
78  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_fsm_arcs", 3000));
79  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_fsm_nodes", 3000));
80  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_fsmnode_tokens", 1000));
81  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_hmm_tokens", 1000));
82  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_model_states", 1000));
83  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_searches", 2));
84  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_word_tokens", 1000));
85  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.non_terminal_timeout", 50));
86  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.num_wordends_per_frame", 10));
87  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.often", 10));
88  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.optional_terminal_timeout", 30));
89  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.reject", 500));
90  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.terminal_timeout", 10));
91  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.viterbi_prune_thresh", 5000));
92  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.wordpen", 0));
93
94  CHKLOG(rc, ESR_SessionSetSize_tIfEmpty("SREC.Recognizer.utterance_timeout", 400));
95
96  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("enableGetWaveform", ESR_FALSE));
97
98  return ESR_SUCCESS;
99CLEANUP:
100  return rc;
101}
102
103/**
104 * Initializes frontend properties to default values.
105 *
106 * Replaces load_up_parameter_list()
107 */
108ESR_ReturnCode SR_RecognizerFrontendToSessionImpl()
109{
110  IntArrayList* intList = NULL;
111  ESR_ReturnCode rc;
112  ESR_BOOL exists;
113  size_t i;
114
115  /* Old comment: Remember to keep "ca_pip.h" up to date with these parameters... */
116
117  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.mel_dim", 12));
118  CHKLOG(rc, ESR_SessionSetSize_tIfEmpty("CREC.Frontend.samplerate", 8000));
119  CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.premel", 0.98f));
120  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.lowcut", 260));  /* Hz */
121  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.highcut", 4000)); /* Hz */
122  CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.window_factor", 2.0)); /* times the frame size */
123  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.do_skip_even_frames", ESR_FALSE)); /* 10/20 ms rate */
124  CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.offset", 0)); /* additional */
125  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.ddmel", ESR_FALSE)); /* delta-delta mel pars */
126  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.forgetfactor", 40));
127  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.sv6_margin", 10));
128  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.rasta", ESR_FALSE));
129  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.rastac0", ESR_FALSE));
130  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.spectral_subtraction", ESR_FALSE));
131  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.spec_sub_dur", 0));
132  CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.spec_sub_scale", 1.0));
133  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.do_filterbank_dump", ESR_FALSE)); /* Output is filterbank (30 floats) */
134  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.do_filterbank_input", ESR_FALSE)); /* Input is filterbank (30 floats) in place of audio samples */
135  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.do_smooth_c0", ESR_TRUE));
136  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.plp", ESR_FALSE)); /* Do PLP instead of MEL */
137  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.lpcorder", 12)); /* order of lpc analysis in plp processing */
138  CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.warp_scale", 1.0));
139  CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.piecewise_start", 1.0));
140  CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.peakdecayup", -1.0)); /* If +ve, decay factor on peakpicker (low to high) */
141  CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.peakdecaydown", -1.0)); /* If +ve, decay factor on peakpicker (high to low) */
142  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.cuberoot", ESR_FALSE)); /* Use cube root instead of log */
143
144  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.mel_offset", &exists));
145  if (!exists)
146  {
147    CHKLOG(rc, IntArrayListCreate(&intList));
148    for (i = 0; i < 32; ++i)
149      CHKLOG(rc, IntArrayListAdd(intList, 0));
150    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.mel_offset", intList, TYPES_INTARRAYLIST));
151    intList = NULL;
152  }
153
154  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.mel_loop", &exists));
155  if (!exists)
156  {
157    CHKLOG(rc, IntArrayListCreate(&intList));
158    for (i = 0; i < 32; ++i)
159      CHKLOG(rc, IntArrayListAdd(intList, 1));
160    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.mel_loop", intList, TYPES_INTARRAYLIST));
161    intList = NULL;
162  }
163
164  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.melA", &exists));
165  if (!exists)
166  {
167    CHKLOG(rc, IntArrayListCreate(&intList));
168    CHKLOG(rc, IntArrayListAdd(intList, (int) 13.2911));
169    CHKLOG(rc, IntArrayListAdd(intList, (int) 47.2229));
170    CHKLOG(rc, IntArrayListAdd(intList, (int) 79.2485));
171    CHKLOG(rc, IntArrayListAdd(intList, (int) 92.1967));
172    CHKLOG(rc, IntArrayListAdd(intList, (int) 136.3855));
173    CHKLOG(rc, IntArrayListAdd(intList, (int) 152.2896));
174    CHKLOG(rc, IntArrayListAdd(intList, (int) 183.3601));
175    CHKLOG(rc, IntArrayListAdd(intList, (int) 197.4200));
176    CHKLOG(rc, IntArrayListAdd(intList, (int) 217.8278));
177    CHKLOG(rc, IntArrayListAdd(intList, (int) 225.6556));
178    CHKLOG(rc, IntArrayListAdd(intList, (int) 263.3073));
179    CHKLOG(rc, IntArrayListAdd(intList, (int) 277.193));
180    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.melA", intList, TYPES_INTARRAYLIST));
181    intList = NULL;
182  }
183
184  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.melB", &exists));
185  if (!exists)
186  {
187    CHKLOG(rc, IntArrayListCreate(&intList));
188    CHKLOG(rc, IntArrayListAdd(intList, (int) 37.0847));
189    CHKLOG(rc, IntArrayListAdd(intList, (int) 91.3289));
190    CHKLOG(rc, IntArrayListAdd(intList, (int) 113.9995));
191    CHKLOG(rc, IntArrayListAdd(intList, (int) 123.0336));
192    CHKLOG(rc, IntArrayListAdd(intList, (int) 131.2704));
193    CHKLOG(rc, IntArrayListAdd(intList, (int) 128.9942));
194    CHKLOG(rc, IntArrayListAdd(intList, (int) 120.5267));
195    CHKLOG(rc, IntArrayListAdd(intList, (int) 132.0079));
196    CHKLOG(rc, IntArrayListAdd(intList, (int) 129.8076));
197    CHKLOG(rc, IntArrayListAdd(intList, (int) 126.5029));
198    CHKLOG(rc, IntArrayListAdd(intList, (int) 121.8519));
199    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.melB", intList, TYPES_INTARRAYLIST));
200    intList = NULL;
201  }
202
203  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.dmelA", &exists));
204  if (!exists)
205  {
206    CHKLOG(rc, IntArrayListCreate(&intList));
207    CHKLOG(rc, IntArrayListAdd(intList, (int) 91.6305));
208    CHKLOG(rc, IntArrayListAdd(intList, (int) 358.3790));
209    CHKLOG(rc, IntArrayListAdd(intList, (int) 527.5946));
210    CHKLOG(rc, IntArrayListAdd(intList, (int) 536.3163));
211    CHKLOG(rc, IntArrayListAdd(intList, (int) 731.2385));
212    CHKLOG(rc, IntArrayListAdd(intList, (int) 757.8382));
213    CHKLOG(rc, IntArrayListAdd(intList, (int) 939.4460));
214    CHKLOG(rc, IntArrayListAdd(intList, (int) 1028.4136));
215    CHKLOG(rc, IntArrayListAdd(intList, (int) 1071.3193));
216    CHKLOG(rc, IntArrayListAdd(intList, (int) 1183.7922));
217    CHKLOG(rc, IntArrayListAdd(intList, (int) 1303.1014));
218    CHKLOG(rc, IntArrayListAdd(intList, (int) 1447.7766));
219    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.dmelA", intList, TYPES_INTARRAYLIST));
220    intList = NULL;
221  }
222
223  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.dmelB", &exists));
224  if (!exists)
225  {
226    CHKLOG(rc, IntArrayListCreate(&intList));
227    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4785));
228    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.3878));
229    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4029));
230    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.3182));
231    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.3706));
232    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5394));
233    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5150));
234    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4270));
235    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4871));
236    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4088));
237    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4361));
238    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5449));
239    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.dmelB", intList, TYPES_INTARRAYLIST));
240    intList = NULL;
241  }
242
243  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.ddmelA", &exists));
244  if (!exists)
245  {
246    CHKLOG(rc, IntArrayListCreate(&intList));
247    CHKLOG(rc, IntArrayListAdd(intList, (int) 10.7381));
248    CHKLOG(rc, IntArrayListAdd(intList, (int) 32.6775));
249    CHKLOG(rc, IntArrayListAdd(intList, (int) 46.2301));
250    CHKLOG(rc, IntArrayListAdd(intList, (int) 51.5438));
251    CHKLOG(rc, IntArrayListAdd(intList, (int) 57.6636));
252    CHKLOG(rc, IntArrayListAdd(intList, (int) 57.0581));
253    CHKLOG(rc, IntArrayListAdd(intList, (int) 65.3696));
254    CHKLOG(rc, IntArrayListAdd(intList, (int) 70.1910));
255    CHKLOG(rc, IntArrayListAdd(intList, (int) 71.6751));
256    CHKLOG(rc, IntArrayListAdd(intList, (int) 78.2364));
257    CHKLOG(rc, IntArrayListAdd(intList, (int) 83.2440));
258    CHKLOG(rc, IntArrayListAdd(intList, (int) 89.6261));
259    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.ddmelA", intList, TYPES_INTARRAYLIST));
260    intList = NULL;
261  }
262
263  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.ddmelB", &exists));
264  if (!exists)
265  {
266    CHKLOG(rc, IntArrayListCreate(&intList));
267    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5274));
268    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5098));
269    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5333));
270    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5963));
271    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5132));
272    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5282));
273    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5530));
274    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5682));
275    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4662));
276    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4342));
277    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5235));
278    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4061));
279    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.ddmelB", intList, TYPES_INTARRAYLIST));
280    intList = NULL;
281  }
282
283  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.rastaA", &exists));
284  if (!exists)
285  {
286    CHKLOG(rc, IntArrayListCreate(&intList));
287    CHKLOG(rc, IntArrayListAdd(intList, (int) 7.80));
288    CHKLOG(rc, IntArrayListAdd(intList, (int) 37.0));
289    CHKLOG(rc, IntArrayListAdd(intList, (int) 54.0));
290    CHKLOG(rc, IntArrayListAdd(intList, (int) 57.0));
291    CHKLOG(rc, IntArrayListAdd(intList, (int) 84.0));
292    CHKLOG(rc, IntArrayListAdd(intList, (int) 86.5));
293    CHKLOG(rc, IntArrayListAdd(intList, (int) 98.1));
294    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.0));
295    CHKLOG(rc, IntArrayListAdd(intList, (int) 153.0));
296    CHKLOG(rc, IntArrayListAdd(intList, (int) 160.0));
297    CHKLOG(rc, IntArrayListAdd(intList, (int) 188.0));
298    CHKLOG(rc, IntArrayListAdd(intList, (int) 199.0));
299    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.rastaA", intList, TYPES_INTARRAYLIST));
300    intList = NULL;
301  }
302
303  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.rastaB", &exists));
304  if (!exists)
305  {
306    CHKLOG(rc, IntArrayListCreate(&intList));
307    CHKLOG(rc, IntArrayListAdd(intList, 117));
308    CHKLOG(rc, IntArrayListAdd(intList, 121));
309    CHKLOG(rc, IntArrayListAdd(intList, 114));
310    CHKLOG(rc, IntArrayListAdd(intList, 111));
311    CHKLOG(rc, IntArrayListAdd(intList, 113));
312    CHKLOG(rc, IntArrayListAdd(intList, 126));
313    CHKLOG(rc, IntArrayListAdd(intList, 134));
314    CHKLOG(rc, IntArrayListAdd(intList, 130));
315    CHKLOG(rc, IntArrayListAdd(intList, 135));
316    CHKLOG(rc, IntArrayListAdd(intList, 129));
317    CHKLOG(rc, IntArrayListAdd(intList, 139));
318    CHKLOG(rc, IntArrayListAdd(intList, 138));
319    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.rastaB", intList, TYPES_INTARRAYLIST));
320    intList = NULL;
321  }
322
323  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.speech_detect", 18));
324  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.speech_above", 18));
325  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.ambient_within", 12));
326  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.start_windback", 50));
327  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.utterance_allowance", 40));
328  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.speech_duration", 6));
329  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.quiet_duration", 20));
330
331  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.high_clip", 32767));
332  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.low_clip", -32768));
333  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.max_per10000_clip", 10));
334  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.max_dc_offset", 1000));
335  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.high_noise_level_bit", 11));
336  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.low_speech_level_bit", 11));
337  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.min_samples", 10000));
338
339  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.spectrum_filter_freq", &exists));
340  if (!exists)
341  {
342    CHKLOG(rc, IntArrayListCreate(&intList));
343    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.spectrum_filter_freq", intList, TYPES_INTARRAYLIST));
344    intList = NULL;
345  }
346  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.spectrum_filter_spread", &exists));
347  if (!exists)
348  {
349    CHKLOG(rc, IntArrayListCreate(&intList));
350    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.spectrum_filter_spread", intList, TYPES_INTARRAYLIST));
351    intList = NULL;
352  }
353  return ESR_SUCCESS;
354CLEANUP:
355  if (intList != NULL)
356    intList->destroy(intList);
357  return rc;
358}
359
360/**
361 * Generate legacy frontend parameter structure from ESR_Session.
362 *
363 * @param impl SR_RecognizerImpl handle
364 * @param params Resulting structure
365 */
366ESR_ReturnCode SR_RecognizerGetFrontendLegacyParametersImpl(CA_FrontendInputParams* params)
367{
368  ESR_ReturnCode rc;
369  IntArrayList* intList;
370  size_t size, i, size_tValue;
371  int iValue;
372
373  passert(params != NULL);
374  params->is_loaded = ESR_FALSE;
375  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.mel_dim", &params->mel_dim));
376  CHKLOG(rc, ESR_SessionGetSize_t("CREC.Frontend.samplerate", &size_tValue));
377  params->samplerate = (int) size_tValue;
378  CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.premel", &params->pre_mel));
379  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.lowcut", &params->low_cut));
380  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.highcut", &params->high_cut));
381  CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.window_factor", &params->window_factor));
382  CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.do_skip_even_frames", &params->do_skip_even_frames));
383  CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.offset", &params->offset));
384  CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.ddmel", &params->do_dd_mel));
385  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.forgetfactor", &params->forget_factor));
386  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.sv6_margin", &params->sv6_margin));
387  CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.rastac0", &params->do_rastac0));
388  CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.spectral_subtraction", &params->do_spectral_sub));
389  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.spec_sub_dur", &params->spectral_sub_frame_dur));
390  CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.spec_sub_scale", &params->spec_sub_scale));
391  CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.do_filterbank_dump", &params->do_filterbank_input));
392  CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.do_filterbank_input", &params->do_filterbank_input));
393  CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.do_smooth_c0", &params->do_smooth_c0));
394  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.lpcorder", &params->lpc_order));
395  CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.warp_scale", &params->warp_scale));
396  CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.piecewise_start", &params->piecewise_start));
397  CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.peakdecayup", &params->peakpickup));
398  CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.peakdecaydown", &params->peakpickdown));
399
400  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.mel_offset", (void **)&intList, TYPES_INTARRAYLIST));
401  if (intList == NULL)
402  {
403    PLogError(L("ESR_INVALID_STATE"));
404    return ESR_INVALID_STATE;
405  }
406  CHKLOG(rc, IntArrayListGetSize(intList, &size));
407  for (i = 0; i < size; ++i)
408    CHKLOG(rc, IntArrayListGet(intList, i, &params->mel_offset[i]));
409
410  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.mel_loop", (void **)&intList, TYPES_INTARRAYLIST));
411  if (intList == NULL)
412  {
413    PLogError(L("ESR_INVALID_STATE"));
414    return ESR_INVALID_STATE;
415  }
416  CHKLOG(rc, IntArrayListGetSize(intList, &size));
417  for (i = 0; i < size; ++i)
418    CHKLOG(rc, IntArrayListGet(intList, i, &params->mel_loop[i]));
419
420  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.melA", (void **)&intList, TYPES_INTARRAYLIST));
421  CHKLOG(rc, IntArrayListGetSize(intList, &size));
422  for (i = 0; i < size; ++i)
423    CHKLOG(rc, IntArrayListGet(intList, i, &params->melA_scale[i]));
424
425  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.melB", (void **)&intList, TYPES_INTARRAYLIST));
426  CHKLOG(rc, IntArrayListGetSize(intList, &size));
427  for (i = 0; i < size; ++i)
428    CHKLOG(rc, IntArrayListGet(intList, i, &params->melB_scale[i]));
429
430  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.dmelA", (void **)&intList, TYPES_INTARRAYLIST));
431  CHKLOG(rc, IntArrayListGetSize(intList, &size));
432  for (i = 0; i < size; ++i)
433    CHKLOG(rc, IntArrayListGet(intList, i, &params->dmelA_scale[i]));
434
435  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.dmelB", (void **)&intList, TYPES_INTARRAYLIST));
436  CHKLOG(rc, IntArrayListGetSize(intList, &size));
437  for (i = 0; i < size; ++i)
438    CHKLOG(rc, IntArrayListGet(intList, i, &params->dmelB_scale[i]));
439
440  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.ddmelA", (void **)&intList, TYPES_INTARRAYLIST));
441  CHKLOG(rc, IntArrayListGetSize(intList, &size));
442  for (i = 0; i < size; ++i)
443    CHKLOG(rc, IntArrayListGet(intList, i, &params->ddmelA_scale[i]));
444
445  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.ddmelB", (void **)&intList, TYPES_INTARRAYLIST));
446  CHKLOG(rc, IntArrayListGetSize(intList, &size));
447  for (i = 0; i < size; ++i)
448    CHKLOG(rc, IntArrayListGet(intList, i, &params->ddmelB_scale[i]));
449
450  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.rastaA", (void **)&intList, TYPES_INTARRAYLIST));
451  CHKLOG(rc, IntArrayListGetSize(intList, &size));
452  for (i = 0; i < size; ++i)
453    CHKLOG(rc, IntArrayListGet(intList, i, &params->rastaA_scale[i]));
454
455  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.rastaB", (void **)&intList, TYPES_INTARRAYLIST));
456  CHKLOG(rc, IntArrayListGetSize(intList, &size));
457  for (i = 0; i < size; ++i)
458    CHKLOG(rc, IntArrayListGet(intList, i, &params->rastaB_scale[i]));
459
460  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.speech_detect", &params->voice_margin));
461  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.speech_above", &params->fast_voice_margin));
462  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.ambient_within", &params->tracker_margin));
463  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.start_windback", &params->start_windback));
464  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.utterance_allowance", &params->unsure_duration));
465  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.speech_duration", &params->voice_duration));
466  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.quiet_duration", &params->quiet_duration));
467
468  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.high_clip", &params->high_clip));
469  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.low_clip", &params->low_clip));
470  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.max_per10000_clip", &params->max_per10000_clip));
471  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.max_dc_offset", &params->max_dc_offset));
472  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.high_noise_level_bit", &params->high_noise_level_bit));
473  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.low_speech_level_bit", &params->low_speech_level_bit));
474  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.min_samples", &params->min_samples));
475
476  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.spectrum_filter_freq", (void **)&intList, TYPES_INTARRAYLIST));
477  if (intList == NULL)
478  {
479    PLogError(L("ESR_INVALID_STATE"));
480    return ESR_INVALID_STATE;
481  }
482  CHKLOG(rc, IntArrayListGetSize(intList, &size));
483  for (i = 0; i < size; ++i)
484  {
485    CHKLOG(rc, IntArrayListGet(intList, i, &iValue));
486    params->spectrum_filter_freq[i] = iValue;
487  }
488
489  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.spectrum_filter_spread", (void **)&intList, TYPES_INTARRAYLIST));
490  if (intList == NULL)
491  {
492    PLogError(L("ESR_INVALID_STATE"));
493    return ESR_INVALID_STATE;
494  }
495  CHKLOG(rc, IntArrayListGetSize(intList, &size));
496  for (i = 0; i < size; ++i)
497  {
498    CHKLOG(rc, IntArrayListGet(intList, i, &iValue));
499    params->spectrum_filter_spread[i] = iValue;
500  }
501  params->is_loaded = ESR_TRUE;
502  return ESR_SUCCESS;
503CLEANUP:
504  return rc;
505}
506
507/**
508 * Creates frontend components of SR_Recognizer.
509 *
510 * @param impl SR_RecognizerImpl handle
511 */
512ESR_ReturnCode SR_RecognizerCreateFrontendImpl(SR_RecognizerImpl* impl)
513{
514  ESR_ReturnCode rc;
515  CA_FrontendInputParams* frontendParams;
516
517  /* Create a frontend object */
518  impl->frontend = CA_AllocateFrontend(1, 0, 1);
519  frontendParams = CA_AllocateFrontendParameters();
520  CHKLOG(rc, SR_RecognizerGetFrontendLegacyParametersImpl(frontendParams));
521
522  CA_ConfigureFrontend(impl->frontend, frontendParams);
523
524  /* Create a wave object */
525  impl->wavein = CA_AllocateWave('N');
526  if (impl->wavein == NULL)
527  {
528    rc = ESR_OUT_OF_MEMORY;
529    PLogError(ESR_rc2str(rc));
530    goto CLEANUP;
531  }
532  CA_ConfigureWave(impl->wavein, impl->frontend);
533  CA_ConfigureVoicingAnalysis(impl->wavein, frontendParams);
534
535  CA_LoadCMSParameters(impl->wavein, NULL, frontendParams);
536
537  /* Create an utterance object */
538  impl->utterance = CA_AllocateUtterance();
539  if (impl->utterance == NULL)
540  {
541    rc = ESR_OUT_OF_MEMORY;
542    PLogError(ESR_rc2str(rc));
543    goto CLEANUP;
544  }
545  CA_InitUtteranceForFrontend(impl->utterance, frontendParams);
546  CA_AttachCMStoUtterance(impl->wavein, impl->utterance);
547  CA_FreeFrontendParameters(frontendParams);
548  return ESR_SUCCESS;
549
550CLEANUP:
551  if (impl->frontend != NULL)
552  {
553    CA_UnconfigureFrontend(impl->frontend);
554    CA_FreeFrontend(impl->frontend);
555    impl->frontend = NULL;
556  }
557  if (impl->wavein != NULL)
558  {
559    CA_UnconfigureWave(impl->wavein);
560    CA_FreeWave(impl->wavein);
561    impl->wavein = NULL;
562  }
563  if (impl->utterance != NULL)
564  {
565    CA_ClearUtterance(impl->utterance);
566    CA_FreeUtterance(impl->utterance);
567    impl->utterance = NULL;
568  }
569  if (frontendParams != NULL)
570    CA_FreeFrontendParameters(frontendParams);
571  return rc;
572}
573
574/**
575 * Populates legacy recognizer parameters from the session.
576 *
577 * Replaces setup_pattern_parameters()
578 */
579ESR_ReturnCode SR_AcousticModels_LoadLegacyRecognizerParameters(CA_RecInputParams* params)
580{
581  ESR_ReturnCode rc;
582
583  passert(params != NULL);
584  params->is_loaded = ESR_FALSE;
585  CHKLOG(rc, ESR_SessionGetBool("CREC.Recognizer.partial_results", &params->do_partial));
586  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.NBest", &params->top_choices));
587  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.eou_threshold", &params->eou_threshold));
588  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_altword_tokens", &params->max_altword_tokens));
589  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_frames", &params->max_frames));
590  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_fsm_arcs", &params->max_fsm_arcs));
591  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_fsm_nodes", &params->max_fsm_nodes));
592  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_fsmnode_tokens", &params->max_fsmnode_tokens));
593  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_hmm_tokens", &params->max_hmm_tokens));
594  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_model_states", &params->max_model_states));
595  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_searches", &params->max_searches));
596  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_word_tokens", &params->max_word_tokens));
597  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.non_terminal_timeout", &params->non_terminal_timeout));
598  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.num_wordends_per_frame", &params->num_wordends_per_frame));
599  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.often", &params->traceback_freq));
600  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.optional_terminal_timeout", &params->optional_terminal_timeout));
601  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.reject", &params->reject_score));
602  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.terminal_timeout", &params->terminal_timeout));
603  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.viterbi_prune_thresh", &params->viterbi_prune_thresh));
604  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.wordpen", &params->word_penalty));
605  params->is_loaded = ESR_TRUE;
606
607  return ESR_SUCCESS;
608CLEANUP:
609  return rc;
610}
611
612ESR_ReturnCode SR_RecognizerCreate(SR_Recognizer** self)
613{
614  SR_RecognizerImpl* impl;
615  CA_RecInputParams* recogParams = NULL;
616  ESR_ReturnCode rc;
617  LCHAR recHandle[12];
618
619  if (self == NULL)
620  {
621    PLogError(L("ESR_INVALID_ARGUMENT"));
622    return ESR_INVALID_ARGUMENT;
623  }
624  impl = NEW(SR_RecognizerImpl, MTAG);
625  if (impl == NULL)
626  {
627    PLogError(L("ESR_OUT_OF_MEMORY"));
628    return ESR_OUT_OF_MEMORY;
629  }
630
631  impl->Interface.start = &SR_RecognizerStartImpl;
632  impl->Interface.stop = &SR_RecognizerStopImpl;
633  impl->Interface.destroy = &SR_RecognizerDestroyImpl;
634  impl->Interface.setup = &SR_RecognizerSetupImpl;
635  impl->Interface.unsetup = &SR_RecognizerUnsetupImpl;
636  impl->Interface.isSetup = &SR_RecognizerIsSetupImpl;
637  impl->Interface.getParameter = &SR_RecognizerGetParameterImpl;
638  impl->Interface.getSize_tParameter = &SR_RecognizerGetSize_tParameterImpl;
639  impl->Interface.getBoolParameter = &SR_RecognizerGetBoolParameterImpl;
640  impl->Interface.setParameter = &SR_RecognizerSetParameterImpl;
641  impl->Interface.setSize_tParameter = &SR_RecognizerSetSize_tParameterImpl;
642  impl->Interface.setBoolParameter = &SR_RecognizerSetBoolParameterImpl;
643  impl->Interface.setLockFunction = &SR_RecognizerSetLockFunctionImpl;
644  impl->Interface.hasSetupRules = &SR_RecognizerHasSetupRulesImpl;
645  impl->Interface.activateRule = &SR_RecognizerActivateRuleImpl;
646  impl->Interface.deactivateRule = &SR_RecognizerDeactivateRuleImpl;
647  impl->Interface.deactivateAllRules = &SR_RecognizerDeactivateAllRulesImpl;
648  impl->Interface.isActiveRule = &SR_RecognizerIsActiveRuleImpl;
649  impl->Interface.setWordAdditionCeiling = &SR_RecognizerSetWordAdditionCeilingImpl;
650  impl->Interface.checkGrammarConsistency = &SR_RecognizerCheckGrammarConsistencyImpl;
651  impl->Interface.getModels = &SR_RecognizerGetModelsImpl;
652  impl->Interface.putAudio = &SR_RecognizerPutAudioImpl;
653  impl->Interface.advance = &SR_RecognizerAdvanceImpl;
654  impl->Interface.loadUtterance = &SR_RecognizerLoadUtteranceImpl;
655  impl->Interface.loadWaveFile = &SR_RecognizerLoadWaveFileImpl;
656  impl->Interface.logEvent = &SR_RecognizerLogEventImpl;
657  impl->Interface.logToken = &SR_RecognizerLogTokenImpl;
658  impl->Interface.logTokenInt = &SR_RecognizerLogTokenIntImpl;
659  impl->Interface.logSessionStart = &SR_RecognizerLogSessionStartImpl;
660  impl->Interface.logSessionEnd = &SR_RecognizerLogSessionEndImpl;
661  impl->Interface.logWaveformData = &SR_RecognizerLogWaveformDataImpl;
662  impl->Interface.isSignalClipping = &SR_RecognizerIsSignalClippingImpl;
663  impl->Interface.isSignalDCOffset = &SR_RecognizerIsSignalDCOffsetImpl;
664  impl->Interface.isSignalNoisy = &SR_RecognizerIsSignalNoisyImpl;
665  impl->Interface.isSignalTooFewSamples = &SR_RecognizerIsSignalTooFewSamplesImpl;
666  impl->Interface.isSignalTooManySamples = &SR_RecognizerIsSignalTooManySamplesImpl;
667  impl->Interface.isSignalTooQuiet = &SR_RecognizerIsSignalTooQuietImpl;
668
669  impl->frontend = NULL;
670  impl->wavein = NULL;
671  impl->utterance = NULL;
672  impl->confidenceScorer = NULL;
673  impl->recognizer = NULL;
674  impl->models = NULL;
675  impl->grammars = NULL;
676  impl->result = NULL;
677  impl->parameters = NULL;
678  impl->acousticState = NULL;
679  impl->audioBuffer = NULL;
680  impl->buffer = NULL;
681  impl->frames = impl->processed;
682  impl->internalState = SR_RECOGNIZER_INTERNAL_BEGIN;
683  impl->isStarted = ESR_FALSE;
684  impl->isRecognizing = ESR_FALSE;
685  impl->gotLastFrame = ESR_FALSE;
686  impl->sampleRate = 0;
687  impl->lockFunction = NULL;
688  impl->lockData = NULL;
689  impl->eventLog = NULL;
690  impl->osi_log_level = 0;
691  impl->waveformBuffer = NULL;
692  impl->isSignalQualityInitialized = ESR_FALSE;
693  impl->beginningOfSpeechOffset = 0;
694  impl->gatedMode = ESR_TRUE;
695  impl->bgsniff = 0;
696  impl->isSignalClipping       = ESR_FALSE;
697  impl->isSignalDCOffset       = ESR_FALSE;
698  impl->isSignalNoisy          = ESR_FALSE;
699  impl->isSignalTooFewSamples  = ESR_FALSE;
700  impl->isSignalTooManySamples = ESR_FALSE;
701  impl->isSignalTooQuiet       = ESR_FALSE;
702
703  CHKLOG(rc, ESR_SessionTypeCreate(&impl->parameters));
704  CHKLOG(rc, SR_RecognizerToSessionImpl());
705  CHKLOG(rc, ESR_SessionGetSize_t(L("SREC.Recognizer.osi_log_level"), &impl->osi_log_level));
706
707  /* create the event log */
708  if (impl->osi_log_level) /* do some logging if non-zero val */
709    CHKLOG(rc, ESR_SessionGetProperty(L("eventlog"), (void **)&impl->eventLog, TYPES_SR_EVENTLOG));
710
711  /* Record the OSI log event */
712  psprintf(recHandle, L("%p"), impl);
713  CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle));
714  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIcrst")));
715
716  CHKLOG(rc, SR_RecognizerFrontendToSessionImpl());
717  CHKLOG(rc, SR_RecognizerCreateFrontendImpl(impl));
718  rc = ESR_SessionGetProperty("recognizer.confidenceScorer", (void **)&impl->confidenceScorer, TYPES_CONFIDENCESCORER);
719  if (rc == ESR_NO_MATCH_ERROR)
720  {
721    impl->confidenceScorer = CA_AllocateConfidenceScorer();
722
723    if (!CA_LoadConfidenceScorer(impl->confidenceScorer)) {
724      rc = ESR_INVALID_STATE;
725      PLogError(ESR_rc2str(rc));
726      goto CLEANUP;
727    }
728    CHKLOG(rc, ESR_SessionSetProperty("recognizer.confidenceScorer", impl->confidenceScorer, TYPES_CONFIDENCESCORER));
729  }
730  else if (rc != ESR_SUCCESS)
731  {
732    PLogError(ESR_rc2str(rc));
733    goto CLEANUP;
734  }
735
736  recogParams = CA_AllocateRecognitionParameters();
737  if (recogParams == NULL)
738  {
739    rc = ESR_OUT_OF_MEMORY;
740    PLogError(ESR_rc2str(rc));
741    goto CLEANUP;
742  }
743  CHKLOG(rc, SR_AcousticModels_LoadLegacyRecognizerParameters(recogParams));
744  impl->recognizer = CA_AllocateRecognition();
745  if (impl->recognizer == NULL)
746  {
747    PLogError(ESR_rc2str(rc));
748    goto CLEANUP;
749  }
750  CA_ConfigureRecognition(impl->recognizer, recogParams);
751  CA_FreeRecognitionParameters(recogParams);
752  CHKLOG(rc, HashMapCreate(&impl->grammars));
753  CHKLOG(rc, CircularBufferCreate(sizeof(asr_int16_t) * AUDIO_CIRC_BUFFER_SIZE, MTAG, &impl->buffer));
754  CHKLOG(rc, ESR_SessionGetSize_t("CREC.Frontend.samplerate", &impl->sampleRate));
755
756  impl->FRAME_SIZE = impl->sampleRate / FRAMERATE * SAMPLE_SIZE;
757
758  if ((impl->audioBuffer = MALLOC(impl->FRAME_SIZE, MTAG)) == NULL)
759  {
760    rc = ESR_OUT_OF_MEMORY;
761    goto CLEANUP;
762  }
763
764  /* create the waveform buffer */
765  CHKLOG(rc, WaveformBuffer_Create(&impl->waveformBuffer, impl->FRAME_SIZE));
766
767  CHKLOG(rc, ESR_SessionGetSize_t("SREC.Recognizer.utterance_timeout", &impl->utterance_timeout));
768
769  /* OSI logging (SUCCESS) */
770  CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle));
771  CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("SUCCESS"), L("ESR_SUCCESS")));
772  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIcrnd")));
773
774  CHKLOG(rc, SR_AcousticStateCreateImpl(&impl->Interface));
775
776  CHKLOG(rc, ESR_SessionGetSize_t(L("cmdline.bgsniff"), &impl->bgsniff));
777  /* gated mode == beginning of speech detection */
778  CHKLOG(rc, ESR_SessionGetBool(L("cmdline.gatedmode"), &impl->gatedMode));
779
780  *self = (SR_Recognizer*) impl;
781  return ESR_SUCCESS;
782CLEANUP:
783  /* OSI logging (FAILURE) */
784  if (impl->eventLog != NULL)
785  {
786    SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle);
787    SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("FAILURE"), ESR_rc2str(rc));
788    SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIcrnd"));
789  }
790
791  if (recogParams != NULL)
792    CA_FreeRecognitionParameters(recogParams);
793  impl->Interface.destroy(&impl->Interface);
794  return rc;
795}
796
797ESR_ReturnCode SR_RecognizerDestroyImpl(SR_Recognizer* self)
798{
799  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
800  ESR_BOOL exists; // isSetup;
801  ESR_ReturnCode rc;
802  LCHAR recHandle[12];
803
804  if (impl->result != NULL)
805  {
806    SR_RecognizerResult_Destroy(impl->result);
807    impl->result = NULL;
808  }
809
810  if (impl->eventLog != NULL)
811  {
812    /* Record the OSI log event */
813    psprintf(recHandle, L("%p"), impl);
814    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle));
815    CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIdesst")));
816  }
817
818  /* Clean session */
819  CHKLOG(rc, ESR_SessionContains("recognizer.confidenceScorer", &exists));
820  if (exists)
821    CHKLOG(rc, ESR_SessionRemoveProperty("recognizer.confidenceScorer"));
822
823  if (impl->confidenceScorer != NULL)
824  {
825    CA_FreeConfidenceScorer(impl->confidenceScorer);
826    impl->confidenceScorer = NULL;
827  }
828
829  /* Clear CMS, CRS_RecognizerClose() */
830  if (impl->wavein != NULL)
831  {
832    ESR_BOOL isAttached, isConfigured;
833
834    CHKLOG(rc, CA_IsCMSAttachedtoUtterance(impl->wavein, &isAttached));
835    if (isAttached)
836      CA_DetachCMSfromUtterance(impl->wavein, impl->utterance);
837
838    CHKLOG(rc, CA_IsConfiguredForAgc(impl->wavein, &isConfigured));
839    if (isConfigured)
840      CA_ClearCMSParameters(impl->wavein);
841  }
842
843  /* Free Utterance */
844  if (impl->utterance != NULL)
845  {
846    CA_ClearUtterance(impl->utterance);
847    CA_FreeUtterance(impl->utterance);
848    impl->utterance = NULL;
849  }
850
851  /* Free WaveformBuffer */
852  if (impl->waveformBuffer != NULL)
853  {
854    WaveformBuffer_Destroy(impl->waveformBuffer);
855    impl->waveformBuffer = NULL;
856  }
857
858  /* Free recognizer */
859/*  CHKLOG(rc, self->isSetup(self, &isSetup));
860  if (isSetup)
861    CHKLOG(rc, self->unsetup(self));*/
862  if (impl->grammars != NULL)
863    CHKLOG(rc, self->deactivateAllRules(self));
864  if (impl->recognizer != NULL)
865  {
866    CA_UnloadRecognitionModels(impl->recognizer);
867    CA_UnconfigureRecognition(impl->recognizer);
868    CA_FreeRecognition(impl->recognizer);
869    impl->recognizer = NULL;
870  }
871
872  if (impl->grammars != NULL)
873  {
874    CHKLOG(rc, HashMapDestroy(impl->grammars));
875    impl->grammars = NULL;
876  }
877
878  if (impl->buffer != NULL)
879  {
880    FREE(impl->buffer);
881    impl->buffer = NULL;
882  }
883
884  if (impl->audioBuffer != NULL)
885  {
886    FREE(impl->audioBuffer);
887    impl->audioBuffer = NULL;
888  }
889
890  /* Free frontend */
891  if (impl->frontend)
892  {
893    CA_UnconfigureFrontend(impl->frontend);
894    CA_FreeFrontend(impl->frontend);
895    impl->frontend = NULL;
896  }
897
898  /* Free wave */
899  if (impl->wavein)
900  {
901    CA_UnconfigureWave(impl->wavein);
902    CA_FreeWave(impl->wavein);
903    impl->wavein = NULL;
904  }
905
906  if (impl->parameters != NULL)
907    CHKLOG(rc, impl->parameters->destroy(impl->parameters));
908
909  if (impl->eventLog != NULL)
910  {
911    /* OSI logging (SUCCESS) */
912    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle));
913    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("SUCCESS"), L("ESR_SUCCESS")));
914    CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIdesnd")));
915    impl->eventLog = NULL;
916  }
917
918  if (impl->acousticState != NULL)
919  {
920    impl->acousticState->destroy(self);
921    impl->acousticState = NULL;
922  }
923  FREE(impl);
924  return ESR_SUCCESS;
925CLEANUP:
926  return rc;
927}
928
929ESR_ReturnCode beginRecognizing(SR_RecognizerImpl* impl)
930{
931  CA_RecInputParams* recogParams;
932  LCHAR tok[80];
933  LCHAR* val;
934  PTimeStamp BORT;
935  size_t i, grammarSize;
936  ESR_ReturnCode rc;
937
938  /* Setup recognizer for new utterance */
939  recogParams = CA_AllocateRecognitionParameters();
940  if (recogParams == NULL)
941  {
942    rc = ESR_OUT_OF_MEMORY;
943    PLogError(ESR_rc2str(rc));
944    goto CLEANUP;
945  }
946  SR_AcousticModels_LoadLegacyRecognizerParameters(recogParams);
947  CA_BeginRecognition(impl->recognizer, NULL, 1, recogParams);
948  CA_FreeRecognitionParameters(recogParams);
949  impl->isRecognizing = ESR_TRUE;
950
951  /* OSI log the  grammars */
952  CHKLOG(rc, HashMapGetSize(impl->grammars, &grammarSize));
953  for (i = 0; i < grammarSize; ++i)
954  {
955    psprintf(tok, L("GURI%d"), i);
956    /* use the key as the grammar URI */
957    CHKLOG(rc, HashMapGetKeyAtIndex(impl->grammars, i, &val));
958    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, tok, val));
959  }
960  /* OSI ACST acoustic state reset */
961  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("ACST"), 0));
962  CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("LANG"), L("en-us")));
963
964  /* OSI log the start of recognition */
965  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIrcst")));
966
967  /* save the BORT timing (begin of recog) */
968  PTimeStampSet(&BORT);
969  impl->recogLogTimings.BORT = PTimeStampDiff(&BORT, &impl->timestamp);
970
971  return ESR_SUCCESS;
972CLEANUP:
973  if (recogParams != NULL)
974    CA_FreeRecognitionParameters(recogParams);
975  return rc;
976}
977
978ESR_ReturnCode SR_RecognizerStartImpl(SR_Recognizer* self)
979{
980  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
981  size_t silence_duration_in_frames;
982  size_t end_of_utterance_hold_off_in_frames;
983  size_t grammarCount;
984  ESR_ReturnCode rc;
985  ESR_BOOL enableGetWaveform = ESR_FALSE;
986
987  CHKLOG(rc, impl->grammars->getSize(impl->grammars, &grammarCount));
988  if (impl->models == NULL)
989  {
990    PLogError("ESR_INVALID_STATE: No rule has been set up");
991    return ESR_INVALID_STATE;
992  }
993  if (grammarCount < 1)
994  {
995    PLogError("ESR_INVALID_STATE: No rule has been activated");
996    return ESR_INVALID_STATE;
997  }
998
999  if (!CA_OpenWaveFromDevice(impl->wavein, DEVICE_RAW_PCM, impl->frontend->samplerate, 0, WAVE_DEVICE_RAW))
1000  {
1001    rc = ESR_INVALID_STATE;
1002    PLogError(ESR_rc2str(rc));
1003    goto CLEANUP;
1004  }
1005
1006  /* Setup utterance */
1007  CA_UnlockUtteranceForInput(impl->utterance);
1008
1009  /* Setup utterance */
1010  CHKLOG(rc, ESR_SessionGetSize_t(L("cmdline.silence_duration_in_frames"), &silence_duration_in_frames));
1011  CHKLOG(rc, ESR_SessionGetSize_t(L("cmdline.end_of_utterance_hold_off_in_frames"), &end_of_utterance_hold_off_in_frames));
1012  CA_SetEndOfUtteranceByLevelTimeout(impl->utterance, silence_duration_in_frames, end_of_utterance_hold_off_in_frames);
1013
1014  CA_ResetVoicing(impl->utterance);
1015
1016  /*
1017   * NOTE: We don't actually begin the recognizer here, the beginning of speech
1018   * detector will do that.
1019   */
1020
1021  impl->gotLastFrame = ESR_FALSE;
1022  impl->isStarted = ESR_TRUE;
1023  impl->isRecognizing = ESR_FALSE;
1024  impl->isSignalQualityInitialized = ESR_FALSE;
1025  impl->internalState = SR_RECOGNIZER_INTERNAL_BEGIN;
1026  PTimeStampSet(&impl->timestamp);
1027
1028  /* reset waveform buffer at start of every recognition */
1029  CHKLOG(rc, WaveformBuffer_Reset(impl->waveformBuffer));
1030
1031  /* is waveform buffering active? */
1032  rc = ESR_SessionGetBool(L("enableGetWaveform"), &enableGetWaveform);
1033  // rc = impl->parameters->getBool(impl->parameters, L("enableGetWaveform"), &enableGetWaveform);
1034  if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR)
1035  {
1036    PLogError(L("%s: could determine whether VoiceEnrollment active or not"), ESR_rc2str(rc));
1037    goto CLEANUP;
1038  }
1039  if (enableGetWaveform)
1040    CHKLOG(rc, WaveformBuffer_SetBufferingState(impl->waveformBuffer, WAVEFORM_BUFFERING_ON_CIRCULAR));
1041  else
1042    CHKLOG(rc, WaveformBuffer_SetBufferingState(impl->waveformBuffer, WAVEFORM_BUFFERING_OFF));
1043
1044  /* I am going to try to open the audio waveform file here */
1045  if (impl->osi_log_level & OSI_LOG_LEVEL_AUDIO)
1046  {
1047    /* open a new audio waveform file */
1048    rc = SR_EventLogAudioOpen(impl->eventLog, L("audio/L16"), impl->sampleRate, SAMPLE_SIZE);
1049    if (rc != ESR_SUCCESS)
1050    {
1051      PLogError(L("%s: could not open the RIFF audio file"), ESR_rc2str(rc));
1052      goto CLEANUP;
1053    }
1054  }
1055  impl->frames = impl->processed = 0;
1056  return ESR_SUCCESS;
1057CLEANUP:
1058/*  self->stop(self);*/
1059  return rc;
1060}
1061
1062ESR_ReturnCode SR_RecognizerStopImpl(SR_Recognizer* self)
1063{
1064  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1065  SR_AcousticModelsImpl* modelsImpl;
1066  ESR_ReturnCode rc;
1067
1068  PLOG_DBG_API_ENTER();
1069  if (!impl->isStarted)
1070  {
1071    /* In case the user calls stop() twice */
1072    return ESR_SUCCESS;
1073  }
1074  modelsImpl = (SR_AcousticModelsImpl*) impl->models;
1075
1076  /* Clean-up recognizer and utterance */
1077  switch (impl->internalState)
1078  {
1079    case SR_RECOGNIZER_INTERNAL_BEGIN:
1080      /* Recognizer was never started */
1081      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("BEGIN")));
1082      CA_LockUtteranceFromInput(impl->utterance);
1083      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1084      if (impl->eventLog != NULL)
1085      {
1086        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_BEGIN -> SR_RECOGNIZER_INTERNAL_END")));
1087        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1088        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1089        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1090      }
1091      break;
1092
1093    case SR_RECOGNIZER_INTERNAL_BOS_TIMEOUT:
1094      /* Recognizer was never started */
1095      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("BOS_TIMEOUT")));
1096      CA_LockUtteranceFromInput(impl->utterance);
1097      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1098      if (impl->eventLog != NULL)
1099      {
1100        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_BOS_TIMEOUT -> SR_RECOGNIZER_INTERNAL_END")));
1101        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1102        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1103        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1104      }
1105      break;
1106
1107    case SR_RECOGNIZER_INTERNAL_BOS_NO_MATCH:
1108      /* Recognizer was never started */
1109      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("BOS_NO_MATCH")));
1110      CA_LockUtteranceFromInput(impl->utterance);
1111      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1112      if (impl->eventLog != NULL)
1113      {
1114        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_BOS_NO_MATCH -> SR_RECOGNIZER_INTERNAL_END")));
1115        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1116        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1117        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1118      }
1119      break;
1120
1121    case SR_RECOGNIZER_INTERNAL_BOS_DETECTION:
1122      /* Recognizer was never started */
1123      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("BOS_DETECTION")));
1124      CA_LockUtteranceFromInput(impl->utterance);
1125      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1126      if (impl->eventLog != NULL)
1127      {
1128        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_BOS_DETECTION -> SR_RECOGNIZER_INTERNAL_END")));
1129        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1130        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1131        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1132      }
1133      break;
1134
1135    case SR_RECOGNIZER_INTERNAL_EOS_DETECTION:
1136      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("EOS_DETECTION")));
1137      CA_LockUtteranceFromInput(impl->utterance);
1138      if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance))
1139      {
1140        rc = ESR_INVALID_STATE;
1141        PLogError(ESR_rc2str(rc));
1142        goto CLEANUP;
1143      }
1144      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1145      if (impl->eventLog != NULL)
1146      {
1147        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_EOS_DETECTION -> SR_RECOGNIZER_INTERNAL_END")));
1148        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1149        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1150        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1151      }
1152      break;
1153
1154    case SR_RECOGNIZER_INTERNAL_EOI:
1155      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("EOI")));
1156      CA_LockUtteranceFromInput(impl->utterance);
1157      if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance))
1158      {
1159        rc = ESR_INVALID_STATE;
1160        PLogError(ESR_rc2str(rc));
1161        goto CLEANUP;
1162      }
1163      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1164      if (impl->eventLog != NULL)
1165      {
1166        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_EOI -> SR_RECOGNIZER_INTERNAL_END")));
1167        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1168        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1169        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1170      }
1171      break;
1172
1173    case SR_RECOGNIZER_INTERNAL_EOS:
1174      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("EOS")));
1175      CA_LockUtteranceFromInput(impl->utterance);
1176      if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance))
1177      {
1178        rc = ESR_INVALID_STATE;
1179        PLogError(ESR_rc2str(rc));
1180        goto CLEANUP;
1181      }
1182      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1183      if (impl->eventLog != NULL)
1184      {
1185        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_EOS -> SR_RECOGNIZER_INTERNAL_END")));
1186        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1187        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1188        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1189      }
1190      break;
1191
1192    case SR_RECOGNIZER_INTERNAL_END:
1193      /* Recognizer already shut down */
1194      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("END")));
1195      break;
1196
1197    default:
1198      /* Shut down recognizer */
1199      CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), impl->internalState));
1200      if (impl->eventLog != NULL)
1201      {
1202        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("unknown state -> SR_RECOGNIZER_INTERNAL_END")));
1203        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1204        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1205        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1206      }
1207      CA_LockUtteranceFromInput(impl->utterance);
1208      if (impl->isRecognizing)
1209      {
1210        if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance))
1211        {
1212          rc = ESR_INVALID_STATE;
1213          PLogError(ESR_rc2str(rc));
1214          goto CLEANUP;
1215        }
1216      }
1217      rc = ESR_INVALID_STATE;
1218      PLogError(L("%s: %d"), ESR_rc2str(rc), impl->internalState);
1219      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1220      goto CLEANUP;
1221  }
1222  if (impl->eventLog != NULL)
1223  {
1224    int n;
1225    LCHAR result[MAX_ENTRY_LENGTH];
1226    result[0] = L('\0');
1227
1228    n = CA_GetUnprocessedFramesInUtterance(impl->utterance);
1229    CHKLOG(rc, SR_EventLogTokenInt(impl->eventLog, L("CA_GetUnprocessedFramesInUtterance() (x10ms)"), n));
1230    CA_FullResultLabel(impl->recognizer, result, MAX_ENTRY_LENGTH - 1);
1231    CHKLOG(rc, SR_EventLogToken(impl->eventLog, L("CA_FullResultLabel() (x20ms)"), result));
1232    n = CircularBufferGetSize(impl->buffer);
1233    CHKLOG(rc, SR_EventLogTokenInt(impl->eventLog, L("CircularBufferGetSize() (samples)"), n / SAMPLE_SIZE));
1234  }
1235  if (impl->lockFunction)
1236    impl->lockFunction(ESR_LOCK, impl->lockData);
1237  CircularBufferReset(impl->buffer);
1238  if (impl->lockFunction)
1239    impl->lockFunction(ESR_UNLOCK, impl->lockData);
1240  if (CA_RecognitionHasResults(impl->recognizer))
1241    CA_ClearResults(impl->recognizer);
1242  CA_FlushUtteranceFrames(impl->utterance);
1243  CA_CalculateCMSParameters(impl->wavein);
1244  CA_CloseDevice(impl->wavein);
1245
1246  /* record the OSI event */
1247  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIstop")));
1248
1249  if (impl->result != NULL)
1250  {
1251    CHKLOG(rc, SR_RecognizerResult_Destroy(impl->result));
1252    impl->result = NULL;
1253  }
1254
1255  if (impl->lockFunction)
1256    impl->lockFunction(ESR_LOCK, impl->lockData);
1257  impl->gotLastFrame = ESR_TRUE;
1258  PLOG_DBG_TRACE((L("SR_Recognizer shutdown occured")));
1259  impl->isStarted = ESR_FALSE;
1260  impl->isRecognizing = ESR_FALSE;
1261  if (impl->osi_log_level & OSI_LOG_LEVEL_AUDIO)
1262    SR_EventLogAudioClose(impl->eventLog);
1263
1264  impl->recogLogTimings.BORT = 0;
1265  impl->recogLogTimings.DURS = 0;
1266  impl->recogLogTimings.EORT = 0;
1267  impl->recogLogTimings.EOSD = 0;
1268  impl->recogLogTimings.EOSS = 0;
1269  impl->recogLogTimings.BOSS = 0;
1270  impl->recogLogTimings.EOST = 0;
1271  impl->eos_reason = L("undefined");
1272
1273  if (impl->lockFunction)
1274    impl->lockFunction(ESR_UNLOCK, impl->lockData);
1275  PLOG_DBG_API_EXIT(rc);
1276  return rc;
1277CLEANUP:
1278  PLOG_DBG_API_EXIT(rc);
1279  return rc;
1280}
1281
1282ESR_ReturnCode SR_RecognizerSetupImpl(SR_Recognizer* self)
1283{
1284  ESR_ReturnCode rc;
1285  CA_AcoustInputParams* acousticParams = NULL;
1286  SR_AcousticModelsImpl* modelsImpl;
1287  SR_AcousticModels* models;
1288  SR_RecognizerImpl* recogImpl = NULL;
1289  CA_Acoustic* acoustic;
1290  size_t size, i;
1291  LCHAR           filenames[P_PATH_MAX];
1292  size_t          len;
1293
1294  len = P_PATH_MAX;
1295  CHKLOG(rc, ESR_SessionGetLCHAR ( L("cmdline.modelfiles"), filenames, &len ));
1296
1297  CHKLOG(rc, SR_AcousticModelsLoad ( filenames, &models ));
1298
1299  if (models == NULL)
1300    {
1301      PLogError(L("ESR_INVALID_STATE while finding cmdline.modelfiles"));
1302      return ESR_INVALID_STATE;
1303    }
1304  modelsImpl = (SR_AcousticModelsImpl*) models;
1305  recogImpl = (SR_RecognizerImpl*) self;
1306  acousticParams = NULL;
1307
1308  CHKLOG(rc, SR_AcousticModelsGetCount(models, &size));
1309  acousticParams = CA_AllocateAcousticParameters();
1310  if (acousticParams == NULL)
1311      {
1312      rc = ESR_OUT_OF_MEMORY;
1313      PLogError(ESR_rc2str(rc));
1314      goto CLEANUP;
1315      }
1316    CHKLOG(rc, modelsImpl->getLegacyParameters(acousticParams));
1317    CHKLOG(rc, ArrayListGetSize(modelsImpl->acoustic, &size));
1318    for (i = 0; i < size; ++i)
1319      {
1320      CHKLOG(rc, ArrayListGet(modelsImpl->acoustic, i, (void **)&acoustic));
1321      CA_LoadModelsInAcoustic(recogImpl->recognizer, acoustic, acousticParams);
1322      }
1323  CA_FreeAcousticParameters(acousticParams);
1324
1325  recogImpl->models = models;
1326  CHKLOG(rc, modelsImpl->setupPattern(recogImpl->models, self));
1327  return ESR_SUCCESS;
1328 CLEANUP:
1329  if (acousticParams != NULL)
1330    CA_FreeAcousticParameters(acousticParams);
1331  if (recogImpl != NULL)
1332    CA_UnloadRecognitionModels(recogImpl->recognizer);
1333  return rc;
1334}
1335
1336ESR_ReturnCode SR_RecognizerUnsetupImpl(SR_Recognizer* self)
1337{
1338  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1339  SR_AcousticModelsImpl* modelsImpl = (SR_AcousticModelsImpl*) impl->models;
1340  ESR_ReturnCode rc;
1341
1342  CHKLOG(rc, modelsImpl->unsetupPattern(impl->models));
1343  CA_UnloadRecognitionModels(impl->recognizer);
1344  CHKLOG(rc, SR_AcousticModelsDestroy ( impl->models ));
1345  impl->models = NULL;
1346  return ESR_SUCCESS;
1347 CLEANUP:
1348  return rc;
1349}
1350
1351ESR_ReturnCode SR_RecognizerIsSetupImpl(SR_Recognizer* self, ESR_BOOL* isSetup)
1352{
1353  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1354
1355  if (isSetup == NULL)
1356  {
1357    PLogError(L("ESR_INVALID_ARGUMENT"));
1358    return ESR_INVALID_ARGUMENT;
1359  }
1360  *isSetup = impl->models != NULL;
1361  return ESR_SUCCESS;
1362}
1363
1364ESR_ReturnCode SR_RecognizerGetParameterImpl(SR_Recognizer* self, const LCHAR* key,
1365    LCHAR* value, size_t* len)
1366{
1367  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1368  ESR_ReturnCode rc;
1369
1370  rc = impl->parameters->getLCHAR(impl->parameters, key, value, len);
1371  if (rc == ESR_NO_MATCH_ERROR)
1372  {
1373    CHKLOG(rc, ESR_SessionGetLCHAR(key, value, len));
1374    return ESR_SUCCESS;
1375  }
1376  else if (rc != ESR_SUCCESS)
1377  {
1378    PLogError(ESR_rc2str(rc));
1379    goto CLEANUP;
1380  }
1381  return ESR_SUCCESS;
1382CLEANUP:
1383  return rc;
1384}
1385
1386/*
1387 * The get / set code is a mess. Since we only use size_t parameters, that's all
1388 * that I am going to make work. The impl->parameters don't work so you always
1389 * have to get them from the session. The impl always logs an error. SteveR
1390 */
1391
1392ESR_ReturnCode SR_RecognizerGetSize_tParameterImpl(SR_Recognizer* self, const LCHAR* key,
1393    size_t* value)
1394{
1395  ESR_ReturnCode rc;
1396
1397  CHKLOG(rc, ESR_SessionGetSize_t(key, value));
1398  return ESR_SUCCESS;
1399CLEANUP:
1400  return rc;
1401}
1402
1403ESR_ReturnCode SR_RecognizerGetBoolParameterImpl(SR_Recognizer* self, const LCHAR* key, ESR_BOOL* value)
1404{
1405  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1406  ESR_ReturnCode rc;
1407
1408  rc = impl->parameters->getBool(impl->parameters, key, value);
1409  if (rc == ESR_NO_MATCH_ERROR)
1410  {
1411    CHKLOG(rc, ESR_SessionGetBool(key, value));
1412    return ESR_SUCCESS;
1413  }
1414  else if (rc != ESR_SUCCESS)
1415  {
1416    PLogError(ESR_rc2str(rc));
1417    goto CLEANUP;
1418  }
1419  return ESR_SUCCESS;
1420CLEANUP:
1421  return rc;
1422}
1423
1424ESR_ReturnCode SR_RecognizerSetParameterImpl(SR_Recognizer* self, const LCHAR* key,
1425    LCHAR* value)
1426{
1427  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1428  LCHAR temp[256];
1429  ESR_ReturnCode rc;
1430  size_t len = 256;
1431
1432  rc = impl->parameters->getLCHAR(impl->parameters, key, temp, &len);
1433  if (rc == ESR_SUCCESS)
1434  {
1435    if (LSTRCMP(temp, value) == 0)
1436      return ESR_SUCCESS;
1437    CHKLOG(rc, impl->parameters->removeAndFreeProperty(impl->parameters, key));
1438  }
1439  else if (rc != ESR_NO_MATCH_ERROR && rc != ESR_INVALID_RESULT_TYPE)
1440  {
1441    PLogError(ESR_rc2str(rc));
1442    goto CLEANUP;
1443  }
1444
1445  CHKLOG(rc, impl->parameters->setLCHAR(impl->parameters, key, value));
1446  return ESR_SUCCESS;
1447CLEANUP:
1448  return rc;
1449}
1450/*
1451 * The only set param function that is working is for the size_t parameters; and not
1452 * all of them are working, only the ones specified in the function itself. There are
1453 * two reasons for this: first most of the set functions just put the value in an unused
1454 * table that has no effect; second many of the changes need to be propogated to a specific
1455 * part of the code. This needs to be evaluated on a per parameter basis. SteveR
1456 */
1457
1458/*
1459 * This function will be used to set parameters in the session. We need to go through
1460 * the recognizer so as to propogate the values into the recognizer. We will rely on
1461 * the session to do the right thing. SteveR
1462 */
1463
1464ESR_ReturnCode SR_RecognizerSetSize_tParameterImpl(SR_Recognizer* self, const LCHAR* key,
1465    size_t value)
1466{
1467  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1468  ESR_ReturnCode rc;
1469
1470  rc = ESR_SessionSetSize_t ( key, value );
1471
1472  if (rc == ESR_SUCCESS)
1473  {
1474    if  ( LSTRCMP ( L("SREC.Recognizer.utterance_timeout"), key ) == 0 )
1475    {
1476      impl->utterance_timeout = value;
1477    }
1478    else if  ( LSTRCMP ( L("CREC.Recognizer.terminal_timeout"), key ) == 0 )
1479    {
1480      impl->recognizer->eosd_parms->endnode_timeout = value;
1481    }
1482    else if  ( LSTRCMP ( L("CREC.Recognizer.optional_terminal_timeout"), key ) == 0 )
1483    {
1484      impl->recognizer->eosd_parms->optendnode_timeout = value;
1485    }
1486    else if  ( LSTRCMP ( L("CREC.Recognizer.non_terminal_timeout"), key ) == 0 )
1487    {
1488      impl->recognizer->eosd_parms->internalnode_timeout = value;
1489    }
1490    else if  ( LSTRCMP ( L("CREC.Recognizer.eou_threshold"), key ) == 0 )
1491    {
1492      impl->recognizer->eosd_parms->eos_costdelta = (frameID)value;
1493      impl->recognizer->eosd_parms->opt_eos_costdelta = (frameID)value;
1494    }
1495    else
1496    {
1497      PLogError(L("ESR_INVALID_ARGUMENT"));
1498      rc = ESR_INVALID_ARGUMENT;
1499    }
1500  }
1501  return rc;
1502}
1503
1504
1505ESR_ReturnCode SR_RecognizerSetBoolParameterImpl(SR_Recognizer* self, const LCHAR* key, ESR_BOOL value)
1506{
1507  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1508  ESR_BOOL temp;
1509  ESR_ReturnCode rc;
1510
1511  rc = impl->parameters->getBool(impl->parameters, key, &temp);
1512  if (rc == ESR_SUCCESS)
1513  {
1514    if (temp == value)
1515      return ESR_SUCCESS;
1516    CHKLOG(rc, impl->parameters->removeAndFreeProperty(impl->parameters, key));
1517  }
1518  else if (rc != ESR_NO_MATCH_ERROR && rc != ESR_INVALID_RESULT_TYPE)
1519    return rc;
1520
1521  CHKLOG(rc, impl->parameters->setBool(impl->parameters, key, value));
1522  return ESR_SUCCESS;
1523CLEANUP:
1524  return rc;
1525}
1526
1527ESR_ReturnCode SR_RecognizerHasSetupRulesImpl(SR_Recognizer* self, ESR_BOOL* hasSetupRules)
1528{
1529  SR_RecognizerImpl* recogImpl = (SR_RecognizerImpl*) self;
1530  size_t size;
1531  ESR_ReturnCode rc;
1532
1533  if (hasSetupRules == NULL)
1534  {
1535    PLogError(L("ESR_INVALID_ARGUMENT"));
1536    return ESR_INVALID_ARGUMENT;
1537  }
1538  CHKLOG(rc, HashMapGetSize(recogImpl->grammars, &size));
1539  *hasSetupRules = size > 0;
1540  return ESR_SUCCESS;
1541CLEANUP:
1542  return rc;
1543}
1544
1545ESR_ReturnCode SR_RecognizerActivateRuleImpl(SR_Recognizer* self, SR_Grammar* grammar,
1546    const LCHAR* ruleName, unsigned int weight)
1547{
1548  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1549  SR_GrammarImpl* grammarImpl = (SR_GrammarImpl*) grammar;
1550  SR_AcousticModelsImpl* modelsImpl;
1551  LCHAR grammarID[80];
1552  ESR_ReturnCode rc;
1553  char *failure_reason = NULL;
1554
1555  if (grammar == NULL)
1556  {
1557    if (impl->eventLog)
1558      failure_reason = "badinput";
1559    rc = ESR_INVALID_ARGUMENT;
1560    PLogError(L("ESR_INVALID_ARGUMENT"));
1561    goto CLEANUP;
1562  }
1563
1564  if (impl->models == NULL)
1565  {
1566    failure_reason = "nomodels";
1567    rc = ESR_INVALID_STATE;
1568    PLogError(L("acoustic models must be configured"));
1569    goto CLEANUP;
1570  }
1571
1572  modelsImpl = (SR_AcousticModelsImpl*) impl->models;
1573
1574  if (ruleName == NULL)
1575    psprintf(grammarID, L("%p"), grammar);
1576  else
1577  {
1578    if (LSTRLEN(ruleName) > 80)
1579    {
1580      rc = ESR_BUFFER_OVERFLOW;
1581      PLogError(ESR_rc2str(rc));
1582      goto CLEANUP;
1583    }
1584    LSTRCPY(grammarID, ruleName);
1585  }
1586
1587  CHKLOG(rc, HashMapPut(impl->grammars, grammarID, grammar));
1588  if (CA_SetupSyntaxForRecognizer(grammarImpl->syntax, impl->recognizer))
1589  {
1590    failure_reason = "cafailed";
1591    rc = ESR_INVALID_STATE;
1592    PLogError(L("ESR_INVALID_STATE"));
1593    goto CLEANUP;
1594  }
1595
1596   CHKLOG(rc, SR_Grammar_SetupRecognizer(grammar, self));
1597  grammarImpl->isActivated = ESR_TRUE;
1598
1599  /*
1600   * If we want to log dynamically added words, then we must give the grammar a reference
1601   * to our event log. The grammar logs word additions if and only if its reference to
1602   * eventLog is non-null.
1603   */
1604  if (impl->osi_log_level & OSI_LOG_LEVEL_ADDWD)
1605    grammarImpl->eventLog = impl->eventLog;
1606  else
1607    grammarImpl->eventLog = NULL;
1608
1609  rc = ESR_SUCCESS;
1610
1611CLEANUP:
1612  if (impl->eventLog)
1613  {
1614    if (failure_reason)
1615    {
1616      SR_EventLogTokenInt(impl->eventLog, L("igrm"), (int) grammar);
1617      SR_EventLogToken(impl->eventLog, L("rule"), ruleName);
1618      SR_EventLogToken(impl->eventLog, L("rslt"), "fail");
1619      SR_EventLogToken(impl->eventLog, L("reason"), failure_reason);
1620      SR_EventLogEvent(impl->eventLog, L("ESRacGrm"));
1621    }
1622    else
1623    {
1624      SR_EventLogTokenInt(impl->eventLog, L("igrm"), (int) grammar);
1625      SR_EventLogToken(impl->eventLog, L("rule"), ruleName);
1626      SR_EventLogToken(impl->eventLog, L("rslt"), "ok");
1627      SR_EventLogEvent(impl->eventLog, L("ESRacGrm"));
1628    }
1629  }
1630  return rc;
1631}
1632
1633ESR_ReturnCode SR_RecognizerDeactivateRuleImpl(SR_Recognizer* self, SR_Grammar* grammar,
1634    const LCHAR* ruleName)
1635{
1636  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1637  SR_GrammarImpl* grammarImpl = (SR_GrammarImpl*) grammar;
1638  LCHAR grammarID[MAX_INT_DIGITS+1];
1639  ESR_ReturnCode rc;
1640
1641  if (ruleName == NULL)
1642  {
1643    psprintf(grammarID, L("%p"), grammar);
1644    CHKLOG(rc, HashMapRemove(impl->grammars, grammarID));
1645  }
1646  else
1647    CHKLOG(rc, HashMapRemove(impl->grammars, ruleName));
1648  grammarImpl->isActivated = ESR_FALSE;
1649  return ESR_SUCCESS;
1650CLEANUP:
1651  return rc;
1652}
1653
1654ESR_ReturnCode SR_RecognizerDeactivateAllRulesImpl(SR_Recognizer* self)
1655{
1656  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1657  ESR_ReturnCode rc;
1658
1659  CHKLOG(rc, HashMapRemoveAll(impl->grammars));
1660  CA_ClearSyntaxForRecognizer(0, impl->recognizer);
1661  return ESR_SUCCESS;
1662CLEANUP:
1663  return rc;
1664}
1665
1666ESR_ReturnCode SR_RecognizerIsActiveRuleImpl(SR_Recognizer* self, SR_Grammar* grammar,
1667    const LCHAR* ruleName, ESR_BOOL* isActiveRule)
1668{
1669  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1670  LCHAR grammarID[MAX_INT_DIGITS+1];
1671  ESR_ReturnCode rc;
1672
1673  psprintf(grammarID, L("%p"), grammar);
1674  CHKLOG(rc, HashMapContainsKey(impl->grammars, (LCHAR*) &grammarID, isActiveRule));
1675  return ESR_SUCCESS;
1676CLEANUP:
1677  return rc;
1678}
1679
1680ESR_ReturnCode SR_RecognizerSetWordAdditionCeilingImpl(SR_Recognizer* self, SR_Grammar* grammar)
1681{
1682  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1683  SR_GrammarImpl* grammarImpl = (SR_GrammarImpl*)grammar;
1684  int iRc;
1685
1686  if(!impl || !grammarImpl)
1687    return ESR_INVALID_ARGUMENT;
1688  iRc = CA_CeilingSyntaxForRecognizer( grammarImpl->syntax, impl->recognizer);
1689  if(iRc) return ESR_INVALID_STATE;
1690
1691  return ESR_SUCCESS;
1692}
1693
1694ESR_ReturnCode SR_RecognizerCheckGrammarConsistencyImpl(SR_Recognizer* self, SR_Grammar* grammar,
1695    ESR_BOOL* isConsistent)
1696{
1697  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1698  SR_GrammarImpl* grammarImpl;
1699  SR_RecognizerImpl* impl2;
1700
1701
1702  grammarImpl = (SR_GrammarImpl*) grammar;
1703  impl2 = (SR_RecognizerImpl*)grammarImpl->recognizer;
1704  // *isConsistent = grammarImpl->models == impl->models;
1705  *isConsistent = (impl2->models == impl->models);
1706  return ESR_SUCCESS;
1707}
1708
1709ESR_ReturnCode SR_RecognizerGetModelsImpl(SR_Recognizer* self, SR_AcousticModels** pmodels)
1710{
1711  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1712  *pmodels = impl->models;
1713  return ESR_SUCCESS;
1714}
1715
1716ESR_ReturnCode SR_RecognizerPutAudioImpl(SR_Recognizer* self, asr_int16_t* buffer, size_t* bufferSize,
1717    ESR_BOOL isLast)
1718{
1719  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1720  ESR_ReturnCode rc;
1721  int    rcBufWrite;
1722  size_t nbWritten;
1723
1724  if (isLast == ESR_FALSE && (buffer == NULL || bufferSize == NULL))
1725  {
1726    PLogError(L("ESR_INVALID_ARGUMENT"));
1727    return ESR_INVALID_ARGUMENT;
1728  }
1729
1730  if (impl->lockFunction)
1731    impl->lockFunction(ESR_LOCK, impl->lockData);
1732  if (!impl->isStarted)
1733  {
1734    if (impl->lockFunction)
1735      impl->lockFunction(ESR_UNLOCK, impl->lockData);
1736    PLogMessage(L("ESR_INVALID_STATE: Tried pushing audio while recognizer was offline"));
1737    return ESR_INVALID_STATE;
1738  }
1739  if (impl->gotLastFrame)
1740  {
1741    if (impl->lockFunction)
1742      impl->lockFunction(ESR_UNLOCK, impl->lockData);
1743    PLogMessage(L("ESR_INVALID_STATE: isLast=TRUE"));
1744    return ESR_INVALID_STATE;
1745  }
1746  if (buffer == NULL && isLast == ESR_FALSE)
1747  {
1748    if (impl->lockFunction)
1749      impl->lockFunction(ESR_UNLOCK, impl->lockData);
1750    PLogError(L("ESR_INVALID_ARGUMENT: got NULL  buffer on non-terminal frame"));
1751    return ESR_INVALID_ARGUMENT;
1752  }
1753
1754  rcBufWrite = CircularBufferWrite(impl->buffer, buffer, *bufferSize * SAMPLE_SIZE);
1755  if (rcBufWrite < 0)
1756  {
1757    rc = ESR_INVALID_STATE;
1758    PLogError(L("%s: error writing to buffer (buffer=%d, available=%u)"), ESR_rc2str(rc), (int) impl->buffer, CircularBufferGetAvailable(impl->buffer));
1759    goto CLEANUP;
1760  }
1761
1762  nbWritten = (size_t)rcBufWrite;
1763  if (nbWritten % SAMPLE_SIZE != 0)
1764  {
1765    size_t amountUnwritten;
1766
1767    /* The buffer is byte-based while we're sample based. Make sure we write entire samples or not at all */
1768    amountUnwritten = CircularBufferUnwrite(impl->buffer, nbWritten % SAMPLE_SIZE);
1769    passert(amountUnwritten == nbWritten % SAMPLE_SIZE);
1770    nbWritten -= amountUnwritten;
1771  }
1772  passert(nbWritten % 2 == 0); /* make sure CircularBufferSize is divisible by 2 */
1773
1774  if (nbWritten < *bufferSize * SAMPLE_SIZE)
1775  {
1776    rc = ESR_BUFFER_OVERFLOW;
1777#ifndef NDEBUG
1778    PLOG_DBG_TRACE((L("%s: writing to circular buffer"), ESR_rc2str(rc)));
1779#endif
1780    *bufferSize = nbWritten / SAMPLE_SIZE;
1781    if (impl->lockFunction)
1782      impl->lockFunction(ESR_UNLOCK, impl->lockData);
1783    goto CLEANUP;
1784  }
1785  if (impl->lockFunction)
1786    impl->lockFunction(ESR_UNLOCK, impl->lockData);
1787
1788  if (isLast)
1789    impl->gotLastFrame = ESR_TRUE;
1790  return ESR_SUCCESS;
1791CLEANUP:
1792  return rc;
1793}
1794
1795/* utility function to sort the ArrayList of nbest list results by the score of the first
1796   semantic result */
1797ESR_ReturnCode SemanticResults_SortByScore(ArrayList *results, size_t nbestSize)
1798{
1799  ESR_ReturnCode rc;
1800  ArrayList* semanticResultList;
1801  ArrayList* semanticResultList_swap;
1802  SR_SemanticResult* semanticResult_i;
1803  SR_SemanticResult* semanticResult_j;
1804  size_t i, j;
1805  LCHAR scoreStr[MAX_ENTRY_LENGTH] ;
1806  size_t scoreStrLen = MAX_ENTRY_LENGTH ;
1807  int score_i, score_j;
1808
1809  /* bubble sort */
1810  for (i = 0; i < (size_t)nbestSize; ++i)
1811  {
1812    for (j = i + 1; j < (size_t)nbestSize; ++j)
1813    {
1814      /* get for i */
1815      CHKLOG(rc, ArrayListGet(results, i, (void **)&semanticResultList)); /* nbest index */
1816      CHKLOG(rc, ArrayListGet(semanticResultList, 0, (void **)&semanticResult_i));      /* semresult 0 */
1817
1818      /* get for j */
1819      CHKLOG(rc, ArrayListGet(results, j, (void **)&semanticResultList)); /* nbest index */
1820      CHKLOG(rc, ArrayListGet(semanticResultList, 0, (void **)&semanticResult_j));      /* semresult 0 */
1821
1822      scoreStrLen = MAX_ENTRY_LENGTH ;
1823      CHKLOG(rc, semanticResult_i->getValue(semanticResult_i, "raws", scoreStr, &scoreStrLen));
1824      CHKLOG(rc, lstrtoi(scoreStr, &score_i, 10));
1825      scoreStrLen = MAX_ENTRY_LENGTH ;
1826      CHKLOG(rc, semanticResult_j->getValue(semanticResult_j, "raws", scoreStr, &scoreStrLen));
1827      CHKLOG(rc, lstrtoi(scoreStr, &score_j, 10));
1828
1829      if (score_j < score_i)
1830      {
1831        /* need to swap */
1832        CHKLOG(rc, ArrayListGet(results, i, (void **)&semanticResultList_swap)); /* put i in swap */
1833        CHKLOG(rc, ArrayListSet(results, i, semanticResultList));       /* put j in i    */
1834        CHKLOG(rc, ArrayListSet(results, j, semanticResultList_swap));  /* put swap in j */
1835      }
1836    }
1837  }
1838  return ESR_SUCCESS;
1839CLEANUP:
1840  return rc;
1841}
1842
1843ESR_ReturnCode filter_CA_FullResultLabel(const LCHAR* label, LCHAR *filtered_label, size_t* boss, size_t* eoss)
1844{
1845  ESR_ReturnCode rc;
1846  enum
1847  {
1848    NO_COPY,
1849    FRAME,
1850    WORD,
1851  } filter_state = WORD;
1852  LCHAR *dst = filtered_label;
1853  LCHAR eosBuf[16]; /* max 9999 + '\0' */
1854  LCHAR bosBuf[16]; /* max 9999 + '\0' */
1855  LCHAR* pBuf = NULL;
1856
1857  /**
1858   * example: you want to filter this:
1859   *
1860   * "-pau-@23 clock@97 twenty_four@125 hour@145  "
1861   *        ^boss = 23                       ^ eoss = 145
1862   * and get this:
1863   *
1864   * "clock twenty_four hour"
1865   */
1866
1867  passert(LSTRLEN(label) > 0);
1868  while (*label)
1869  {
1870    switch (filter_state)
1871    {
1872      case NO_COPY:
1873        if (*label == L(' '))
1874          filter_state = WORD;
1875        else if (*label == L('@'))
1876        {
1877          filter_state = FRAME;
1878          if (pBuf == NULL)
1879            pBuf = bosBuf;
1880          else
1881          {
1882            *pBuf = 0;
1883            pBuf = eosBuf;
1884          }
1885        }
1886        break;
1887      case WORD:
1888        if (*label == L('@'))
1889        {
1890          *dst = L(' '); /* insert space */
1891          dst++;
1892          filter_state = FRAME;
1893          if (pBuf == NULL)
1894            pBuf = bosBuf;
1895          else
1896          {
1897            *pBuf = 0;
1898            pBuf = eosBuf;
1899          }
1900        }
1901        else
1902        {
1903          *dst = *label;
1904          dst++;
1905        }
1906        break;
1907      case FRAME:
1908        if (*label == L(' '))
1909          filter_state = WORD;
1910        else
1911        {
1912          *pBuf = *label;
1913          pBuf++;
1914        }
1915        break;
1916    }
1917    label++;
1918  }
1919  *dst = 0; /* term the string */
1920  *pBuf = 0; /* term the string */
1921
1922  /* trim the end spaces */
1923  dst--;
1924  while (*dst == ' ')
1925    *dst-- = '\0';
1926
1927  /* set the eos signal indicated by the end pointed data */
1928  if (eosBuf[0] != 0)
1929    CHKLOG(rc, lstrtoui(eosBuf, eoss, 10));
1930  else
1931    eoss = 0;
1932
1933  if (bosBuf[0] != 0)
1934    CHKLOG(rc, lstrtoui(bosBuf, boss, 10));
1935  else
1936    boss = 0;
1937
1938  return ESR_SUCCESS;
1939CLEANUP:
1940  return rc;
1941}
1942
1943/**
1944 * Populates the recognizer result if it can, otherwise it returns NO MATCH cuz no results exist
1945 *
1946 * INPUT STATE: SR_RECOGNIZER_INTERNAL_EOS
1947 *
1948 * @param self SR_Recognizer handle
1949 * @todo break up into smaller functions
1950 */
1951ESR_ReturnCode SR_RecognizerCreateResultImpl(SR_Recognizer* self, SR_RecognizerStatus* status,
1952    SR_RecognizerResultType* type)
1953{
1954  LCHAR label[MAX_ENTRY_LENGTH * 2];  /* run out of buffer */
1955#define WORDID_COUNT 48 /* can be quite high for voice enrollment! */
1956  wordID wordIDs[WORDID_COUNT];
1957  LCHAR tok[80];
1958  LCHAR waveformFilename[P_PATH_MAX];
1959  LCHAR* pkey;
1960  SR_GrammarImpl* pgrammar;
1961  asr_int32_t raws; /* raw score */
1962  size_t iBest, nbestSize, jBest, k, grammarSize, semanticResultsSize, grammarIndex_for_iBest;
1963  LCHAR* lValue;
1964  LCHAR* lValue2;
1965  int confValue;
1966  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1967  SR_RecognizerResultImpl* resultImpl = (SR_RecognizerResultImpl*) impl->result;
1968  ESR_BOOL containsKey;
1969  int valid, score, recogID;
1970  LCHAR result[MAX_ENTRY_LENGTH];
1971  size_t len, size;
1972  size_t locale;
1973  int current_choice;
1974
1975  /**
1976   * Semantic result stuff
1977   */
1978  /* a temp buffer to hold semantic results of a parse (there may be several results) */
1979  SR_SemanticResult* semanticResults[MAX_SEM_RESULTS];
1980  ArrayList* semanticList;
1981  ArrayList* semanticList2;
1982  SR_SemanticResultImpl* semanticImpl;
1983  SR_SemanticResultImpl* semanticImpl2;
1984  SR_SemanticResult* semanticResult;
1985  SR_SemanticResult* semanticResult2;
1986  waveform_buffering_state_t buffering_state;
1987
1988  SR_AcousticModelsImpl* modelsImpl = (SR_AcousticModelsImpl*) impl->models;
1989  ESR_ReturnCode rc;
1990  PTimeStamp EORT;
1991
1992  CA_LockUtteranceFromInput(impl->utterance);
1993  if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance))
1994  {
1995    PLogError(L("ESR_INVALID_STATE"));
1996    return ESR_INVALID_STATE;
1997  }
1998
1999  /* check if the forward search was successful */
2000  valid = CA_FullResultLabel(impl->recognizer, result, MAX_ENTRY_LENGTH - 1);
2001  CA_GetRecogID(impl->recognizer, &recogID);
2002  CA_FullResultScore(impl->recognizer, &score, 1);
2003#ifdef SREC_ENGINE_VERBOSE_LOGGING
2004  PLogMessage(L("R: %s type %d score %d from recognizer%d"), result, type, score, valid, recogID);
2005  PLogMessage(L("R: %s score %d from recognizer%d"), result, score, valid, recogID);
2006#endif
2007#ifdef _WIN32
2008  //pfprintf(PSTDOUT, ("R: %s type %d score %d from recognizer%d\n"), result, type, score, valid, recogID);
2009#endif
2010
2011
2012  switch (valid)
2013  {
2014    case FULL_RESULT:
2015      CHKLOG(rc, filter_CA_FullResultLabel(result, label, &impl->recogLogTimings.BOSS, &impl->recogLogTimings.EOSS));
2016#ifdef SREC_ENGINE_VERBOSE_LOGGING
2017      PLogMessage("R: %s", result);
2018#endif
2019      CA_FullResultScore(impl->recognizer, (int*) &raws, 0);
2020#ifdef SREC_ENGINE_VERBOSE_LOGGING
2021      PLogMessage("S: %d", raws);
2022#endif
2023
2024      /* now that we have an endpointed result, we can parse the result transcription
2025         to see where speech started and ended. Then we can trim off excess parts of the
2026         recorded audio waveform (if exists) so that nametags are just the right amount of
2027         audio
2028      */
2029      CHKLOG(rc, WaveformBuffer_GetBufferingState(impl->waveformBuffer, &buffering_state));
2030      if (buffering_state != WAVEFORM_BUFFERING_OFF)
2031      {
2032        CHKLOG(rc, WaveformBuffer_GetSize(impl->waveformBuffer, &size));
2033        if (size > 0)
2034        {
2035          rc = WaveformBuffer_ParseEndPointedResultAndTrim(impl->waveformBuffer, result, impl->FRAME_SIZE);
2036          if (rc == ESR_BUFFER_OVERFLOW)
2037          {
2038            /* Nametag EOS occured beyond end of buffer */
2039          }
2040          else if (rc != ESR_SUCCESS)
2041          {
2042            PLogError(ESR_rc2str(rc));
2043            goto CLEANUP;
2044          }
2045        }
2046      }
2047      break;
2048
2049    case REJECT_RESULT:
2050#ifdef SREC_ENGINE_VERBOSE_LOGGING
2051      PLogMessage(L("R: <REJECTED>"));
2052#endif
2053      break;
2054    default:
2055#ifdef SREC_ENGINE_VERBOSE_LOGGING
2056      PLogMessage(L("E: No results available"));
2057      PLogMessage(L("R: <FAILED>"));
2058#endif
2059      break;
2060  }
2061
2062
2063  if (valid == FULL_RESULT)
2064  {
2065    /* Populate SR_RecognizerResult */
2066    resultImpl->nbestList = CA_PrepareNBestList(impl->recognizer, 10, &raws);
2067    if (resultImpl->nbestList == NULL)
2068    {
2069      /*
2070       * This is not a failure. It simply means that I have not advanced far
2071       * enough in recognition in order to obtain results (no paths in
2072       * graph). This occurs, for instance, when a eof is reached (no more data)
2073       * and I have not even created any paths in my graph.
2074       */
2075
2076      *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2077      *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2078      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2079      if (impl->eventLog != NULL)
2080      {
2081        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RecognizerCreateResultImpl() -> SR_RECOGNIZER_INTERNAL_END")));
2082        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2083        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2084        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2085      }
2086      passert(0);
2087      return ESR_SUCCESS;
2088    }
2089
2090    nbestSize = CA_NBestListCount(resultImpl->nbestList);
2091  }
2092  else
2093    nbestSize = 0;
2094
2095  if (resultImpl->results != NULL)
2096    ArrayListRemoveAll(resultImpl->results);
2097  else
2098    CHKLOG(rc, ArrayListCreate(&resultImpl->results));
2099  if (nbestSize == 0)
2100  {
2101    /*
2102     * Got empty n-best list even though the recognition was successful.
2103     * We handle this in the same way that recog_startpt does... we consider it a no match.
2104     * We could adjust the CREC.Recognizer.viterbi_prune_thresh to a higher level, but that
2105     * may not fix the problem completely. We need to fix the bug in the astar search!!!
2106     */
2107    *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2108    *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2109    impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2110    if (impl->eventLog != NULL)
2111    {
2112      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RecognizerCreateResultImpl() -> SR_RECOGNIZER_INTERNAL_END")));
2113      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2114      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2115      CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2116    }
2117#ifdef SREC_ENGINE_VERBOSE_LOGGING
2118    PLogMessage(L("ESR_INVALID_STATE: got empty n-best list even though the recognition was successful"));
2119#endif
2120    return ESR_SUCCESS; /* we do not want to halt the app in this case */
2121  }
2122  else
2123  {
2124    *status = SR_RECOGNIZER_EVENT_RECOGNITION_RESULT;
2125    *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2126    impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2127    if (impl->eventLog != NULL)
2128    {
2129      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RecognizerCreateResultImpl() -> SR_RECOGNIZER_INTERNAL_END")));
2130      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2131      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2132      CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2133    }
2134  }
2135
2136  /**
2137   * All grammars associated with the recognizer are considered to be active
2138   * and therefore, I do a semantic parse on each. On the first grammar that
2139   * gives one or more semantic results, I stop parsing the other grammars.
2140   */
2141  CHKLOG(rc, impl->grammars->getSize(impl->grammars, &grammarSize));
2142  ASSERT( grammarSize == 1);
2143
2144  for (iBest = 0; iBest < nbestSize; ++iBest)
2145  {
2146    len = WORDID_COUNT;
2147    if (CA_NBestListGetResultWordIDs(resultImpl->nbestList, iBest, wordIDs, &len, &raws) != ESR_SUCCESS)
2148    {
2149      *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2150      *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2151      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2152      if (impl->eventLog != NULL)
2153      {
2154        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RecognizerCreateResultImpl() -> SR_RECOGNIZER_INTERNAL_END")));
2155        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2156        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2157        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2158      }
2159      PLogError(L("ESR_INVALID_STATE: got bad n-best list entry %d"), iBest);
2160      return ESR_INVALID_STATE;
2161    }
2162
2163    CHKLOG(rc, ArrayListCreate(&semanticList));
2164    CHKLOG(rc, resultImpl->results->add(resultImpl->results, semanticList));
2165
2166    grammarIndex_for_iBest = 0;
2167    CHKLOG(rc, impl->grammars->getKeyAtIndex(impl->grammars, grammarIndex_for_iBest, &pkey));
2168    CHKLOG(rc, impl->grammars->get(impl->grammars, pkey, (void **)&pgrammar));
2169
2170    CHKLOG(rc, SR_GrammarGetSize_tParameter((SR_Grammar*) pgrammar, L("locale"), &locale));
2171    resultImpl->locale = locale;
2172
2173    /* I need to manage my semantic results external to the check parse function */
2174    for (k = 0; k < MAX_SEM_RESULTS; ++k)
2175      SR_SemanticResultCreate(&semanticResults[k]);
2176
2177    /*
2178       The code here tries to make the voice-enrollment more effective.
2179       The VE grammar decodes a sequence of best phonemes, but the nbest
2180       processing may find a better score for an alternative choice than
2181       the score of the viterbi best choice.  The reason for this is that
2182       alternative choices don't honor cross-word context-dependency quite
2183       accurately.  If we choose an alternative choice then the sequence of
2184       phoneme decoded does not correspond to the sequence of models decoded.
2185       To counter this, we FORCIBLY make sure the top choice here is the
2186       VITERBI top choice.
2187    */
2188
2189    if (iBest == 0)
2190      {
2191        if (CA_IsEnrollmentSyntax( pgrammar->syntax)) {
2192          /* this was voice enrollment, so let's try to replace */
2193          // 	char* word1 = CA_NBestListGetResultWord(resultImpl->nbestList,wordIDs[0]);
2194          // char* word2 = CA_NBestListGetResultWord(resultImpl->nbestList,wordIDs[1]);
2195          // if (!strncmp(word1,voice_enroll_word_prefix,VEWPLEN)&&!strncmp(word2,voice_enroll_word_prefix,VEWPLEN))
2196          len = WORDID_COUNT;
2197          rc = CA_FullResultWordIDs(impl->recognizer, wordIDs, &len);
2198          if (rc != ESR_SUCCESS)
2199            {
2200              /* in case of problem with viterbi path choice, we revert back */
2201              len = WORDID_COUNT;
2202              rc = CA_NBestListGetResultWordIDs(resultImpl->nbestList, iBest, wordIDs, &len, &raws) ;
2203            }
2204        }
2205      }
2206
2207    LSTRCPY(label, L(""));
2208    for (k = 0; wordIDs[k] != MAXwordID; ++k)
2209      {
2210        LCHAR* wordk = NULL;
2211        wordk = CA_NBestListGetResultWord(resultImpl->nbestList,wordIDs[k]);
2212        LSTRCAT(label, wordk);
2213        LSTRCAT(label, L(" "));
2214      }
2215    CHKLOG(rc, CA_ResultStripSlotMarkers(label));
2216    passert(LSTRCMP(label, L("")) != 0);
2217
2218    /* strip the trailing blank */
2219    k = LSTRLEN(label) - 1;
2220    if (k > 0 && label[k] == L(' '))
2221      label[k] = 0;
2222
2223    semanticResultsSize = MAX_SEM_RESULTS;
2224
2225#if SEMPROC_ACTIVE
2226
2227    /* set the literal prior to processing so that semproc can read the value
2228       during processing */
2229    CHKLOG(rc, pgrammar->semproc->flush(pgrammar->semproc));
2230    CHKLOG(rc, pgrammar->semproc->setParam(pgrammar->semproc, L("literal"), label));
2231
2232    rc = pgrammar->semproc->checkParseByWordID(pgrammar->semproc, pgrammar->semgraph,
2233                                               wordIDs, semanticResults, &semanticResultsSize);
2234
2235    /* rc = pgrammar->semproc->checkParse(pgrammar->semproc, pgrammar->semgraph,
2236       label, semanticResults, &semanticResultsSize); */
2237
2238    if (rc != ESR_SUCCESS)
2239      {
2240        for (k = 0; k < MAX_SEM_RESULTS; ++k)
2241          {
2242            semanticResults[k]->destroy(semanticResults[k]);
2243            semanticResults[k] = NULL;
2244          }
2245        goto CLEANUP;
2246      }
2247#else
2248    semanticResultsSize = 0;
2249#endif
2250    /* cleanup the empty ones */
2251    for (k = semanticResultsSize; k < MAX_SEM_RESULTS; ++k)
2252      {
2253        CHKLOG(rc, semanticResults[k]->destroy(semanticResults[k]));
2254        semanticResults[k] = NULL;
2255      }
2256
2257    /* save the good ones */
2258    for (k = 0; k < semanticResultsSize; ++k)
2259      {
2260        /*
2261         * Save the pointer to the semantic result that was created.
2262         * Remember that the semantic result array only holds pointers
2263         * and for each time that the function is called, new semantic results
2264         * are created, and the pointers overwrite old values in the array
2265         */
2266        CHKLOG(rc, semanticList->add(semanticList, semanticResults[k]));
2267      }
2268
2269#if SEMPROC_ACTIVE
2270    if (semanticResultsSize > 0)
2271      {
2272        /* OSI log the grammar(s) that was used in recognizing */
2273        psprintf(tok, L("GURI%d"), grammarIndex_for_iBest);
2274        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("GRMR"), tok));
2275      }
2276#else
2277    /* OSI log the grammar(s) that was used in recognizing */
2278    psprintf(tok, L("GURI%d"), grammarIndex_for_iBest);
2279    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("GRMR"), tok));
2280#endif
2281
2282    /* Populate semantic results for each nbest list entry */
2283    CHKLOG(rc, semanticList->getSize(semanticList, &semanticResultsSize));
2284    if (semanticResultsSize == 0)
2285    {
2286      /*
2287       * If there was no semantic result... then I need to create one so that I can store
2288       * literal, conf, meaning which are default keys that must ALWAYS exist
2289       */
2290      CHKLOG(rc, SR_SemanticResultCreate(&semanticResult));
2291      CHKLOG(rc, semanticList->add(semanticList, semanticResult));
2292      semanticResultsSize = 1;
2293    }
2294
2295    for (k = 0; k < semanticResultsSize;++k)
2296    {
2297      CHKLOG(rc, semanticList->get(semanticList, k, (void **)&semanticResult));
2298      if (semanticResult == NULL)
2299      {
2300        PLogError(L("nbest entry contained NULL semanticResult"), ESR_INVALID_STATE);
2301        return ESR_INVALID_STATE;
2302      }
2303
2304      semanticImpl = (SR_SemanticResultImpl*) semanticResult;
2305
2306      /* put in the literal */
2307      lValue = MALLOC(sizeof(LCHAR) * (LSTRLEN(label) + 1), MTAG);
2308      if (lValue == NULL)
2309      {
2310        PLogError(L("ESR_OUT_OF_MEMORY"));
2311        return ESR_OUT_OF_MEMORY;
2312      }
2313      LSTRCPY(lValue, label);
2314      CHKLOG(rc, semanticImpl->results->put(semanticImpl->results, L("literal"), lValue));
2315
2316      /* if the meaning is not set, then put in the meaning which will be the literal */
2317      CHKLOG(rc, semanticImpl->results->containsKey(semanticImpl->results, L("meaning"), &containsKey));
2318      if (!containsKey)
2319      {
2320        lValue = MALLOC(sizeof(LCHAR) * (LSTRLEN(label) + 1), MTAG);
2321        if (lValue == NULL)
2322        {
2323          PLogError(L("ESR_OUT_OF_MEMORY"));
2324          return ESR_OUT_OF_MEMORY;
2325        }
2326        LSTRCPY(lValue, label);
2327        CHKLOG(rc, semanticImpl->results->put(semanticImpl->results, L("meaning"), lValue));
2328      }
2329
2330      /* put in the raw score */
2331      psprintf(label, L("%d"), raws);
2332      lValue = MALLOC(sizeof(LCHAR) * (LSTRLEN(label) + 1), MTAG);
2333      if (lValue == NULL)
2334      {
2335        PLogError(L("ESR_OUT_OF_MEMORY"));
2336        return ESR_OUT_OF_MEMORY;
2337      }
2338      LSTRCPY(lValue, label);
2339      CHKLOG(rc, semanticImpl->results->put(semanticImpl->results, L("raws"), lValue));
2340    }
2341  }
2342
2343  /* Now I have an nBest list where each entry has at least one semantic result */
2344  /* What I need to do is filter out the nBest list entries which have matching
2345     semantic results for 'meaning' */
2346  /* Once I have filtered out the nBest list based on this criteria, I can calculate the confidence
2347     score and populate the result of the first entry with the raw score */
2348
2349#if FILTER_NBEST_BY_SEM_RESULT
2350
2351  for (iBest = nbestSize-1; iBest>0; iBest--) /* do not filter out nBest entry 0 */
2352  {
2353    /**
2354     * This is the entry (indexed by i) targeted for removal
2355     *
2356     */
2357
2358    /* get the nBest entry which you wish to remove (if duplicate found) */
2359    CHKLOG(rc, ArrayListGet(resultImpl->results, iBest, (void **)&semanticList));
2360
2361    /* get the first sem_result for the entry */
2362    CHKLOG(rc, ArrayListGet(semanticList, 0, (void **)&semanticResult));
2363    semanticImpl = (SR_SemanticResultImpl*) semanticResult;
2364
2365    /* get the meaning */
2366    CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("meaning"), (void **)&lValue));
2367
2368    /* get the other entries to check against (start with 0, end on the current i entry) */
2369    for (jBest = 0; jBest < iBest; ++jBest)
2370    {
2371      /*
2372       * This is the entry (indexed by jBest) that we will compare with
2373       */
2374
2375      /* get the nBest entry which you wish to compare with */
2376      CHKLOG(rc, ArrayListGet(resultImpl->results, jBest, (void **)&semanticList2));
2377
2378      CHKLOG(rc, ArrayListGet(semanticList2, 0, (void **)&semanticResult2));
2379      semanticImpl2 = (SR_SemanticResultImpl*) semanticResult2;
2380
2381      CHKLOG(rc, semanticImpl2->results->get(semanticImpl2->results, L("meaning"), (void **)&lValue2));
2382      if (LSTRCMP(lValue, lValue2) == 0)
2383      {
2384        /* pfprintf(PSTDOUT,"duplicate sem result found %d == %d\n", iBest, jBest);
2385        pfprintf(PSTDOUT,"removing %d\n", iBest); */
2386
2387        /* removing from the list indexed by iBest */
2388        CHKLOG(rc, semanticList->remove(semanticList, semanticResult));
2389        CHKLOG(rc, semanticResult->destroy(semanticResult));
2390
2391        CHKLOG(rc, resultImpl->results->remove(resultImpl->results, semanticList));
2392        CHKLOG(rc, semanticList->destroy(semanticList));
2393
2394        if (!CA_NBestListRemoveResult(resultImpl->nbestList, iBest))
2395          return ESR_ARGUMENT_OUT_OF_BOUNDS;
2396        break;
2397      }
2398    }
2399  }
2400  nbestSize = CA_NBestListCount(resultImpl->nbestList);
2401#endif
2402
2403  CHKLOG(rc, ArrayListGetSize(resultImpl->results, &nbestSize));
2404
2405  if (nbestSize)
2406  {
2407   if(CA_ComputeConfidenceValues(impl->confidenceScorer, impl->recognizer, resultImpl->nbestList))
2408        return ESR_INVALID_STATE;
2409
2410   for(current_choice=nbestSize-1;current_choice>=0;current_choice--)
2411   {
2412    /* get the nBest entry you want to deal with */
2413    CHKLOG(rc, ArrayListGet(resultImpl->results, current_choice, (void **)&semanticList));
2414    /* get the first sem_result for that entry */
2415    CHKLOG(rc, ArrayListGet(semanticList, 0, (void **)&semanticResult));
2416    semanticImpl = (SR_SemanticResultImpl*) semanticResult;
2417
2418    /* put in the conf value for that nBest entry */
2419    if(!CA_NBestListGetResultConfidenceValue( resultImpl->nbestList, current_choice, &confValue))
2420      return ESR_ARGUMENT_OUT_OF_BOUNDS;
2421
2422    psprintf(label, L("%d"), confValue);
2423    lValue = MALLOC(sizeof(LCHAR) * (LSTRLEN(label) + 1), MTAG);
2424      if (lValue == NULL)
2425      {
2426        PLogError(L("ESR_OUT_OF_MEMORY"));
2427        return ESR_OUT_OF_MEMORY;
2428      }
2429      LSTRCPY(lValue, label);
2430      CHKLOG(rc, semanticImpl->results->put(semanticImpl->results, L("conf"),lValue));
2431    }
2432  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("CMPT"), 0));
2433  }
2434
2435  /* OSI log the end of recognition and all bufferred tokens */
2436
2437  /* OSI log end of recognition time */
2438  PTimeStampSet(&EORT);
2439  impl->recogLogTimings.EORT = PTimeStampDiff(&EORT, &impl->timestamp);
2440  impl->recogLogTimings.DURS = impl->processed * MSEC_PER_FRAME;
2441
2442  /*****************************************/
2443  /* OSI Logging stuff */
2444  /*****************************************/
2445if( impl->osi_log_level != 0)
2446 {
2447  /* get the nBest size (this size may have changed since previous set cuz of nbest list filtering) */
2448  CHKLOG(rc, ArrayListGetSize(resultImpl->results, &nbestSize));
2449  /* OSI log the nBest list size */
2450  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("NBST"), nbestSize));
2451
2452
2453  for (iBest = 0; iBest < nbestSize; iBest++) /* loop */
2454  {
2455    /* get the nBest entry */
2456    CHKLOG(rc, ArrayListGet(resultImpl->results, iBest, (void**)&semanticList));
2457
2458    /* get the first sem_result for the entry (ther emay be many, but ignore others) */
2459    CHKLOG(rc, ArrayListGet(semanticList, 0, (void **)&semanticResult));
2460    semanticImpl = (SR_SemanticResultImpl*) semanticResult;
2461
2462    /* get the meaning and OSI log it */
2463    CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("meaning"), (void **)&lValue));
2464    /* OSI log RSLT (meaning) for nbest item */
2465    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RSLT"), lValue));
2466
2467    /* get the literal and OSI log it */
2468    CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("literal"), (void **)&lValue));
2469    /* OSI log RAWT SPOK (literal) for nbest item */
2470    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RAWT"), lValue));
2471    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("SPOK"), lValue));
2472
2473    /* get the score and OSI log it */
2474    CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("raws"), (void **)&lValue));
2475    /* OSI log RAWS (score) for nbest item */
2476    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RAWS"), lValue));
2477    /* get the confidence value and OSI log it */
2478    CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("conf"), (void **)&lValue));
2479    /* OSI log CONF (values) for nbest item */
2480    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("CONF"), lValue));
2481  }
2482
2483  /* log the values */
2484  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BORT"), impl->recogLogTimings.BORT));
2485  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("DURS"), impl->recogLogTimings.DURS));
2486  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("EORT"), impl->recogLogTimings.EORT));
2487  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("EOSD"), impl->recogLogTimings.EOSD));
2488  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("EOSS"), impl->recogLogTimings.EOSS));
2489  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("EOST"), impl->recogLogTimings.EOST));
2490  if (impl->osi_log_level & OSI_LOG_LEVEL_AUDIO)
2491  {
2492    len = P_PATH_MAX;
2493    CHKLOG(rc, SR_EventLogAudioGetFilename(impl->eventLog, waveformFilename, &len));
2494    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("WVNM"), waveformFilename));
2495  }
2496  CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RSTT"), L("ok")));
2497  CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RENR"), L("ok")));
2498  CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("ENDR"), impl->eos_reason));
2499  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIrcnd")));
2500
2501  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BOSS"), impl->recogLogTimings.BOSS)); /* extra not in OSI spec */
2502  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("ESRboss")));
2503
2504  /*
2505   * Record which recognizer was the successful one (male or female)
2506   * this index refers to the order in the swimdllist file.
2507   */
2508  CHKLOG(rc, CA_GetRecogID(impl->recognizer, &recogID));
2509  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("RECOG"), recogID));
2510  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("ESRrcid")));
2511
2512  /* Record semantic results returned by top nbestlist entry */
2513  if (1)
2514  {
2515#define MAX_SEMANTIC_KEYS 50
2516    LCHAR* semanticKeys[MAX_SEMANTIC_KEYS];
2517#define SEMANTIC_VALUE_SIZE 512
2518    LCHAR semanticValue[SEMANTIC_VALUE_SIZE];
2519    size_t num_semanticKeys;
2520
2521    rc = resultImpl->results->getSize(resultImpl->results, &nbestSize);
2522    if (rc != ESR_SUCCESS)
2523    {
2524      PLogError(ESR_rc2str(rc));
2525      goto DONE;
2526    }
2527    for (iBest = 0; iBest < nbestSize; ++iBest) /* loop2 */
2528    {
2529      rc = resultImpl->results->get(resultImpl->results, iBest, (void **)&semanticList);
2530      if (rc != ESR_SUCCESS)
2531      {
2532        PLogError(ESR_rc2str(rc));
2533        goto DONE;
2534      }
2535
2536	  /* semanticResultsSize is the number of semantic meanings, although
2537		 ambiguous parses are not entirely supported
2538		 num_semanticKeys    is associated to a particular parse         */
2539
2540      rc = semanticList->getSize(semanticList, &semanticResultsSize);
2541      if (rc != ESR_SUCCESS)
2542      {
2543        PLogError(ESR_rc2str(rc));
2544        goto DONE;
2545      }
2546      for (k = 0; k < semanticResultsSize; ++k)
2547      {
2548		size_t iKey;
2549        rc = semanticList->get(semanticList, k, (void **)&semanticResult);
2550        if (rc != ESR_SUCCESS)
2551        {
2552          PLogError(ESR_rc2str(rc));
2553          goto DONE;
2554        }
2555        num_semanticKeys = MAX_SEMANTIC_KEYS;
2556        rc = semanticResult->getKeyList(semanticResult, (LCHAR**) & semanticKeys, &num_semanticKeys);
2557        if (rc != ESR_SUCCESS)
2558        {
2559          PLogError(ESR_rc2str(rc));
2560          goto DONE;
2561        }
2562
2563        for (iKey=0; iKey<num_semanticKeys; ++iKey)
2564        {
2565          len = SEMANTIC_VALUE_SIZE;
2566          rc = semanticResult->getValue(semanticResult, semanticKeys[iKey], (LCHAR*) &semanticValue, &len);
2567          if (rc != ESR_SUCCESS)
2568          {
2569            PLogError(ESR_rc2str(rc));
2570            goto DONE;
2571          }
2572
2573          rc = SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, semanticKeys[iKey], semanticValue);
2574          if (rc != ESR_SUCCESS)
2575          {
2576            PLogError(ESR_rc2str(rc));
2577            goto DONE;
2578          }
2579        }
2580      }
2581    }
2582    rc = SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("ESR_SemanticResult[0]"));
2583    if (rc != ESR_SUCCESS)
2584    {
2585      PLogError(ESR_rc2str(rc));
2586      goto DONE;
2587    }
2588  }
2589}
2590DONE:
2591  return ESR_SUCCESS;
2592CLEANUP:
2593  impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2594  return rc;
2595}
2596
2597/**
2598 * Indicates if it is possible to push data from SREC into the internal recognizer.
2599 * If data can be pushed, ESR_CONTINUE_PROCESSING is returned.
2600 *
2601 * INPUT STATES: SR_RECOGNIZER_INTERNAL_BOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2602 * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOI
2603 */
2604PINLINE ESR_ReturnCode canPushAudioIntoRecognizer(SR_RecognizerImpl* impl)
2605{
2606  ESR_ReturnCode rc;
2607
2608  if (impl->lockFunction)
2609    impl->lockFunction(ESR_LOCK, impl->lockData);
2610
2611  /* do I have enough to make a frame ? */
2612  if (CircularBufferGetSize(impl->buffer) < impl->FRAME_SIZE)
2613  {
2614    /* Not enough data */
2615    if (!impl->gotLastFrame)
2616    {
2617      /* not last frame, so ask for more audio */
2618      if (impl->lockFunction)
2619        impl->lockFunction(ESR_UNLOCK, impl->lockData);
2620      return ESR_SUCCESS;
2621    }
2622    else
2623    {
2624      /* last frame, make do with what you have */
2625      if (impl->lockFunction)
2626        impl->lockFunction(ESR_UNLOCK, impl->lockData);
2627#ifdef SREC_ENGINE_VERBOSE_LOGGING
2628      PLogMessage("L: Voicing END (EOI) at %d frames (%d processed)", impl->frames, impl->processed);
2629#endif
2630      impl->isRecognizing = ESR_FALSE;
2631      impl->recogLogTimings.EOSD = impl->frames;
2632      impl->eos_reason = L("EOI");
2633      impl->internalState = SR_RECOGNIZER_INTERNAL_EOI;
2634      if (impl->eventLog != NULL)
2635      {
2636        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("canPushAudioIntoRecognizer() -> SR_RECOGNIZER_INTERNAL_EOI")));
2637        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2638        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2639        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2640      }
2641      return ESR_CONTINUE_PROCESSING;
2642    }
2643  }
2644  if (impl->lockFunction)
2645    impl->lockFunction(ESR_UNLOCK, impl->lockData);
2646  return ESR_CONTINUE_PROCESSING;
2647CLEANUP:
2648  return rc;
2649}
2650
2651/**
2652 * Pushes data from SREC into the internal recognizer.
2653 *
2654 * INPUT STATES: SR_RECOGNIZER_INTERNAL_BOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2655 * OUTPUT STATES: same
2656 */
2657PINLINE ESR_ReturnCode pushAudioIntoRecognizer(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2658    SR_RecognizerResultType* type,
2659    SR_RecognizerResult* result)
2660{
2661  size_t count;
2662  ESR_ReturnCode rc;
2663
2664  if (CA_GetUnprocessedFramesInUtterance(impl->utterance) > 0 && impl->frames >= impl->bgsniff)
2665  {
2666    /* Don't push frames unless they're needed */
2667
2668    /* Check for leaked state */
2669    passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2670    return ESR_CONTINUE_PROCESSING;
2671  }
2672  if (impl->lockFunction)
2673    impl->lockFunction(ESR_LOCK, impl->lockData);
2674  count = CircularBufferRead(impl->buffer, impl->audioBuffer, impl->FRAME_SIZE);
2675  if (impl->lockFunction)
2676    impl->lockFunction(ESR_UNLOCK, impl->lockData);
2677
2678  WaveformBuffer_Write(impl->waveformBuffer, impl->audioBuffer, count);
2679  if (impl->osi_log_level & OSI_LOG_LEVEL_AUDIO)
2680  {
2681    rc = SR_EventLogAudioWrite(impl->eventLog, impl->audioBuffer, count);
2682    if (rc == ESR_BUFFER_OVERFLOW)
2683      rc = ESR_INVALID_STATE;
2684    if (rc != ESR_SUCCESS)
2685    {
2686      PLogError(ESR_rc2str(rc));
2687      if (impl->lockFunction)
2688        impl->lockFunction(ESR_UNLOCK, impl->lockData);
2689      goto CLEANUP;
2690    }
2691  }
2692  if (count < impl->FRAME_SIZE)
2693  {
2694    rc = ESR_INVALID_STATE;
2695    PLogError(L("%s: error reading buffer data (count=%d, frameSize=%d)"), ESR_rc2str(rc), count, impl->FRAME_SIZE);
2696    goto CLEANUP;
2697  }
2698  if (!CA_LoadSamples(impl->wavein, impl->audioBuffer, impl->sampleRate / FRAMERATE))
2699  {
2700    PLogError(L("ESR_INVALID_STATE"));
2701    rc = ESR_INVALID_STATE;
2702    goto CLEANUP;
2703  }
2704
2705  CA_ConditionSamples(impl->wavein);
2706  /* Check for leaked state */
2707  passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2708  return ESR_CONTINUE_PROCESSING;
2709CLEANUP:
2710  return rc;
2711}
2712
2713/**
2714 * INPUT STATES: SR_RECOGNIZER_INTERNAL_BOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2715 * OUTPUT STATES: same
2716 */
2717PINLINE ESR_ReturnCode generateFrameFromAudio(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2718    SR_RecognizerResultType* type,
2719    SR_RecognizerResult* result)
2720{
2721  if (CA_GetUnprocessedFramesInUtterance(impl->utterance) > 0 && impl->frames >= impl->bgsniff)
2722  {
2723    /* Don't create frames unless they're needed */
2724
2725    /* Check for leaked state */
2726    passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2727    return ESR_CONTINUE_PROCESSING;
2728  }
2729
2730  /* Try processing one frame */
2731  if (!CA_MakeFrame(impl->frontend, impl->utterance, impl->wavein))
2732  {
2733    /*
2734    * One of three cases occured:
2735    *
2736    * - We don't have enough samples to process one frame. This should be impossible because
2737    * pushAudioIntoRecognizer() is always called before us and will not continue if we don't
2738    * have enough samples.
2739    *
2740    * - The internal recognizer needs a minimum amount of audio before it'll begin generating
2741    *   frames. This is normal and we return with a success value.
2742    *
2743    * - The recognizer skips every even frame number (for performance reasons). This is normal
2744    *   and we return with a success value.
2745    */
2746    *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
2747    *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
2748    return ESR_SUCCESS;
2749  }
2750  ++impl->frames;
2751  /* Check for leaked state */
2752  passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2753  return ESR_CONTINUE_PROCESSING;
2754}
2755
2756/**
2757 * INPUT STATES: SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2758 * OUTPUT STATES: same
2759 */
2760PINLINE ESR_ReturnCode generateFrameStats(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2761                           SR_RecognizerResultType* type,
2762                           SR_RecognizerResult* result)
2763{
2764  if (impl->frames < impl->bgsniff)
2765  {
2766    /* Wait until we have enough frames to estimate background stats */
2767    *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
2768    *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
2769    return ESR_SUCCESS;
2770  }
2771  else if (impl->frames == impl->bgsniff)
2772    CA_CalculateUtteranceStatistics(impl->utterance, 0, impl->bgsniff);
2773
2774  /* Check for leaked state */
2775  passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2776  return ESR_CONTINUE_PROCESSING;
2777}
2778
2779/**
2780 * INPUT STATES: SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2781 * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOI, SR_RECOGNIZER_INTERNAL_EOS
2782 */
2783PINLINE ESR_ReturnCode generatePatternFromFrame(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2784    SR_RecognizerResultType* type,
2785    SR_RecognizerResult* result)
2786{
2787  SR_AcousticModelsImpl* modelsImpl;
2788  ESR_ReturnCode rc;
2789
2790  /* Run the search */
2791  modelsImpl = (SR_AcousticModelsImpl*) impl->models;
2792  if (!CA_MakePatternFrame(modelsImpl->pattern, impl->utterance))
2793  {
2794    *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2795    *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2796    impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2797    if (impl->eventLog != NULL)
2798    {
2799      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrame() -> SR_RECOGNIZER_INTERNAL_END")));
2800      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2801      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2802      CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2803    }
2804    PLogError(L("ESR_INVALID_STATE"));
2805    return ESR_INVALID_STATE;
2806  }
2807  if (!CA_AdvanceUtteranceFrame(impl->utterance))
2808  {
2809    *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2810    *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2811    impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2812    if (impl->eventLog != NULL)
2813    {
2814      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("canPushAudioIntoRecognizer() -> SR_RECOGNIZER_INTERNAL_END")));
2815      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2816      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2817      CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2818    }
2819    PLogError(L("ESR_INVALID_STATE"));
2820    return ESR_INVALID_STATE;
2821  }
2822  CA_AdvanceRecognitionByFrame(impl->recognizer, modelsImpl->pattern, impl->utterance);
2823  ++impl->processed;
2824
2825  if (impl->lockFunction)
2826    impl->lockFunction(ESR_LOCK, impl->lockData);
2827  if (impl->gotLastFrame && CircularBufferGetSize(impl->buffer) < impl->FRAME_SIZE)
2828  {
2829    /*
2830     * SREC have run out of data but the underlying recognizer might have some frames
2831     * queued for processing.
2832     */
2833    if (CA_GetUnprocessedFramesInUtterance(impl->utterance) > 0)
2834    {
2835      /* EOI means end of input */
2836#ifdef SREC_ENGINE_VERBOSE_LOGGING
2837      PLogMessage("L: Voicing END (EOI) at %d frames (%d processed)", impl->frames, impl->processed);
2838#endif
2839      impl->isRecognizing = ESR_FALSE;
2840      impl->recogLogTimings.EOSD = impl->frames;
2841      impl->eos_reason = L("EOI");
2842      impl->internalState = SR_RECOGNIZER_INTERNAL_EOI;
2843      if (impl->eventLog != NULL)
2844      {
2845        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrame() -> SR_RECOGNIZER_INTERNAL_EOI")));
2846        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2847        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2848        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2849      }
2850    }
2851    else
2852    {
2853#ifdef SREC_ENGINE_VERBOSE_LOGGING
2854      PLogMessage("L: Voicing END (EOF) at %d frames (%d processed)", impl->frames, impl->processed);
2855#endif
2856
2857      impl->isRecognizing = ESR_FALSE;
2858      impl->recogLogTimings.EOSD = impl->frames;
2859      impl->eos_reason = L("EOF");
2860      impl->internalState = SR_RECOGNIZER_INTERNAL_EOS;
2861      if (impl->eventLog != NULL)
2862      {
2863        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrame() -> SR_RECOGNIZER_INTERNAL_EOS")));
2864        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2865        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2866        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2867      }
2868      *status = SR_RECOGNIZER_EVENT_END_OF_VOICING;
2869      *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
2870      passert(impl->processed == impl->frames);
2871      if (impl->lockFunction)
2872        impl->lockFunction(ESR_UNLOCK, impl->lockData);
2873      return ESR_SUCCESS;
2874    }
2875  }
2876  if (impl->lockFunction)
2877    impl->lockFunction(ESR_UNLOCK, impl->lockData);
2878
2879  /* Check for leaked state */
2880  passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2881  return ESR_CONTINUE_PROCESSING;
2882CLEANUP:
2883  return rc;
2884}
2885
2886/**
2887 * Same as generatePatternFromFrame() only the buffer is known to be empty.
2888 *
2889 * INPUT STATES: SR_RECOGNIZER_INTERNAL_EOI
2890 * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOS
2891 */
2892PINLINE ESR_ReturnCode generatePatternFromFrameEOI(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2893    SR_RecognizerResultType* type,
2894    SR_RecognizerResult* result)
2895{
2896  SR_AcousticModelsImpl* modelsImpl;
2897  ESR_ReturnCode rc;
2898
2899  /* Run the search */
2900  modelsImpl = (SR_AcousticModelsImpl*) impl->models;
2901
2902  if (CA_GetUnprocessedFramesInUtterance(impl->utterance) <= 0)
2903  {
2904    passert(impl->processed == impl->frames);
2905    *status = SR_RECOGNIZER_EVENT_END_OF_VOICING;
2906    *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
2907    impl->internalState = SR_RECOGNIZER_INTERNAL_EOS;
2908    return ESR_SUCCESS;
2909  }
2910
2911  if (!CA_MakePatternFrame(modelsImpl->pattern, impl->utterance))
2912  {
2913    *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2914    *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2915    impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2916    if (impl->eventLog != NULL)
2917    {
2918      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrameEOI() -> SR_RECOGNIZER_INTERNAL_END")));
2919      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2920      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2921      CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2922    }
2923    PLogError(L("ESR_INVALID_STATE"));
2924    return ESR_INVALID_STATE;
2925  }
2926  if (!CA_AdvanceUtteranceFrame(impl->utterance))
2927  {
2928    *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2929    *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2930    impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2931    if (impl->eventLog != NULL)
2932    {
2933      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrameEOI() -> SR_RECOGNIZER_INTERNAL_END")));
2934      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2935      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2936      CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2937    }
2938    PLogError(L("ESR_INVALID_STATE"));
2939    return ESR_INVALID_STATE;
2940  }
2941  CA_AdvanceRecognitionByFrame(impl->recognizer, modelsImpl->pattern, impl->utterance);
2942  ++impl->processed;
2943
2944  if (impl->lockFunction)
2945    impl->lockFunction(ESR_LOCK, impl->lockData);
2946
2947  if (CA_GetUnprocessedFramesInUtterance(impl->utterance) <= 0)
2948  {
2949    passert(impl->processed == impl->frames);
2950    *status = SR_RECOGNIZER_EVENT_END_OF_VOICING;
2951    *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
2952    impl->internalState = SR_RECOGNIZER_INTERNAL_EOS;
2953    if (impl->eventLog != NULL)
2954    {
2955      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrameEOI() -> SR_RECOGNIZER_INTERNAL_EOS")));
2956      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2957      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2958      CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2959    }
2960    if (impl->lockFunction)
2961      impl->lockFunction(ESR_UNLOCK, impl->lockData);
2962    return ESR_SUCCESS;
2963  }
2964  if (impl->lockFunction)
2965    impl->lockFunction(ESR_UNLOCK, impl->lockData);
2966
2967  /* Check for leaked state */
2968  passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2969  return ESR_CONTINUE_PROCESSING;
2970CLEANUP:
2971  if (impl->lockFunction)
2972    impl->lockFunction(ESR_UNLOCK, impl->lockData);
2973  return rc;
2974}
2975
2976
2977/**
2978 * INPUT STATES: SR_RECOGNIZER_INTERNAL_EOI, SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2979 * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOS
2980 */
2981ESR_ReturnCode detectEndOfSpeech(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2982                                 SR_RecognizerResultType* type,
2983                                 SR_RecognizerResult* result)
2984{
2985  EOSrc eos; /* eos means end of speech */
2986  int eos_by_level; /* eos means end of speech */
2987  PTimeStamp timestamp;
2988  ESR_ReturnCode rc;
2989  ESR_BOOL enableGetWaveform = ESR_FALSE;
2990
2991  eos_by_level = CA_UtteranceHasEnded(impl->utterance);
2992  if (eos_by_level)
2993  {
2994    eos = SPEECH_ENDED_BY_LEVEL_TIMEOUT;
2995  }
2996  else
2997  {
2998    eos = CA_IsEndOfUtteranceByResults(impl->recognizer);
2999  }
3000
3001  ESR_SessionGetBool(L("enableGetWaveform"), &enableGetWaveform);
3002  //impl->parameters->getBool(impl->parameters, L("enableGetWaveform"), &enableGetWaveform);
3003
3004  if (eos == VALID_SPEECH_CONTINUING && enableGetWaveform && impl->waveformBuffer->overflow_count > 0)
3005  {
3006    size_t bufferSize;
3007    CHKLOG(rc, WaveformBuffer_GetSize(impl->waveformBuffer, &bufferSize));
3008    PLogMessage("Forcing EOS due to wfbuf overflow (fr=%d,sz=%d,of=%d)", impl->frames, bufferSize, impl->waveformBuffer->overflow_count);
3009    eos = SPEECH_TOO_LONG;
3010  }
3011
3012  if (eos != VALID_SPEECH_CONTINUING)
3013  {
3014    switch (eos)
3015    {
3016      case SPEECH_ENDED:
3017        /* normal */
3018        impl->eos_reason = L("itimeout");
3019        break;
3020
3021      case SPEECH_ENDED_WITH_ERROR:
3022        /* error */
3023        impl->eos_reason = L("err");
3024        break;
3025
3026      case SPEECH_TOO_LONG:
3027        /* timeout*/
3028        impl->eos_reason = L("ctimeout");
3029        break;
3030
3031      case SPEECH_MAYBE_ENDED:
3032        /* normal */
3033        impl->eos_reason = L("itimeout");
3034        break;
3035      case SPEECH_ENDED_BY_LEVEL_TIMEOUT:
3036        /* normal */
3037        impl->eos_reason = L("levelTimeout");
3038        break;
3039
3040      default:
3041        /* error */
3042        impl->eos_reason = L("err");
3043    }
3044
3045#ifdef SREC_ENGINE_VERBOSE_LOGGING
3046    PLogMessage("L: Voicing END (EOS) at %d frames, %d processed (reason: %s)\n", impl->frames, impl->processed, impl->eos_reason);
3047#endif
3048
3049    impl->recogLogTimings.EOSD = impl->frames; /* how many frames have been sent prior to detect EOS */
3050    PTimeStampSet(&timestamp); /* time it took to detect EOS (in millisec) */
3051    impl->recogLogTimings.EOST = PTimeStampDiff(&timestamp, &impl->timestamp);
3052
3053    *status = SR_RECOGNIZER_EVENT_END_OF_VOICING;
3054    *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3055    impl->internalState = SR_RECOGNIZER_INTERNAL_EOS;
3056    if (impl->eventLog != NULL)
3057    {
3058      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("detectEndOfSpeech() -> SR_RECOGNIZER_INTERNAL_EOS")));
3059      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("reason"), impl->eos_reason));
3060      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
3061      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
3062      CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
3063    }
3064    impl->isRecognizing = ESR_FALSE;
3065    return ESR_SUCCESS;
3066  }
3067
3068  /* Check for leaked state */
3069  passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
3070  return ESR_CONTINUE_PROCESSING;
3071CLEANUP:
3072  return rc;
3073}
3074
3075/**
3076 * INPUT STATES: SR_RECOGNIZER_INTERNAL_BOS_DETECTION
3077 * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOI
3078 */
3079ESR_ReturnCode detectBeginningOfSpeech(SR_RecognizerImpl* impl,
3080                                       SR_RecognizerStatus* status,
3081                                       SR_RecognizerResultType* type,
3082                                       SR_RecognizerResult* result)
3083{
3084  ESR_ReturnCode rc;
3085  ESR_BOOL gatedMode;
3086  size_t num_windback_bytes, num_windback_frames;
3087  waveform_buffering_state_t buffering_state;
3088
3089  CHKLOG(rc, ESR_SessionGetBool(L("cmdline.gatedmode"), &gatedMode));
3090
3091  if (gatedMode || (!gatedMode && impl->frames < impl->bgsniff))
3092  {
3093    ESR_BOOL pushable = ESR_FALSE;
3094
3095    rc = canPushAudioIntoRecognizer(impl);
3096    if (rc == ESR_SUCCESS)
3097    {
3098      /* Not enough samples to process one frame */
3099      if (CA_GetUnprocessedFramesInUtterance(impl->utterance) <= 0)
3100      {
3101        *status = SR_RECOGNIZER_EVENT_NEED_MORE_AUDIO;
3102        *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3103        return ESR_SUCCESS;
3104      }
3105    }
3106    else if (rc != ESR_CONTINUE_PROCESSING)
3107      return rc;
3108    else if (impl->internalState == SR_RECOGNIZER_INTERNAL_EOI)
3109    {
3110      /* Got end of input before beginning of speech */
3111      *status = SR_RECOGNIZER_EVENT_NO_MATCH;
3112      *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
3113      impl->internalState = SR_RECOGNIZER_INTERNAL_BOS_NO_MATCH;
3114      CHKLOG(rc, impl->Interface.stop(&impl->Interface));
3115      return ESR_SUCCESS;
3116    }
3117    else
3118      pushable = ESR_TRUE;
3119    if (pushable)
3120    {
3121      rc = pushAudioIntoRecognizer(impl, status, type, result);
3122      /* OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOI */
3123      if (rc != ESR_CONTINUE_PROCESSING)
3124      {
3125        /* Not enough samples to process one frame */
3126        return rc;
3127      }
3128      rc = generateFrameFromAudio(impl, status, type, result);
3129      /* OUTPUT STATES: same */
3130      if (rc != ESR_CONTINUE_PROCESSING)
3131      {
3132        /*
3133         * The internal recognizer needs a minimum amount of audio before
3134         * it begins generating frames.
3135         */
3136        return rc;
3137      }
3138    }
3139    if (!CA_AdvanceUtteranceFrame(impl->utterance))
3140    {
3141      PLogError(L("ESR_INVALID_STATE: Failed Advancing Utt Frame %d"), impl->frames);
3142      return ESR_INVALID_STATE;
3143    }
3144    if (CA_UtteranceHasVoicing(impl->utterance))
3145    {
3146      /* Utterance stats for Lombard if enough frames */
3147      if (impl->frames > impl->bgsniff)
3148      {
3149#ifdef SREC_ENGINE_VERBOSE_LOGGING
3150        PLogMessage("L:  Voicing START at %d frames", impl->frames);
3151#endif
3152        /* OSI log the endpointed data */
3153
3154        CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BTIM"), impl->frames * MSEC_PER_FRAME));
3155        CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BRGN"), 0)); /* Barge-in not supported */
3156        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIendp")));
3157
3158        CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BOSD"), impl->frames));
3159        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("ESRbosd")));
3160
3161        if (gatedMode)
3162          CA_CalculateUtteranceStatistics(impl->utterance, (int)(impl->frames * -1), 0);
3163        else
3164          CA_CalculateUtteranceStatistics(impl->utterance, 0, impl->frames);
3165      }
3166
3167      /* OK, we've got voicing or the end of input has occured
3168      ** (or both, I suppose).  If we had voicing then progress
3169      ** the recognizer, otherwise skip to the end.
3170      ** Of course, we could be running outside 'Gated Mode'
3171      ** so we won't have any frames processed at all yet -
3172      ** in this case start the recognizer anyway.
3173      */
3174
3175      /*************************************
3176       ** Run recognition until endOfInput **
3177       *************************************/
3178
3179      /*
3180       * Initialize both recognizers first
3181       * and disable reporting of results
3182       */
3183      if (gatedMode)
3184      {
3185        /*
3186         * We're in Gated Mode -
3187         * Because we'll have had voicing we wind-back
3188         * until the start of voicing (unsure region)
3189         */
3190        num_windback_frames = CA_SeekStartOfUtterance(impl->utterance);
3191        impl->beginningOfSpeechOffset = impl->frames - num_windback_frames;
3192        num_windback_bytes = num_windback_frames * impl->FRAME_SIZE * 2 /* due to skip even frames */;
3193
3194        /* pfprintf(PSTDOUT,L("audio buffer windback %d frames == %d bytes\n"), num_windback_frames, num_windback_bytes); */
3195        CHKLOG(rc, WaveformBuffer_GetBufferingState(impl->waveformBuffer, &buffering_state));
3196        if (buffering_state != WAVEFORM_BUFFERING_OFF)
3197          CHKLOG(rc, WaveformBuffer_WindBack(impl->waveformBuffer, num_windback_bytes));
3198
3199        /*
3200         * Only transition to linear if it was previously circular (in other words if
3201         * buffering was active in the first place)
3202         */
3203        if (buffering_state == WAVEFORM_BUFFERING_ON_CIRCULAR)
3204          CHKLOG(rc, WaveformBuffer_SetBufferingState(impl->waveformBuffer, WAVEFORM_BUFFERING_ON_LINEAR));
3205        impl->frames = CA_GetUnprocessedFramesInUtterance(impl->utterance);
3206      }
3207      else
3208        impl->frames = 0;
3209      /* reset the frames */
3210      impl->processed = 0;
3211      CHKLOG(rc, beginRecognizing(impl));
3212      impl->internalState = SR_RECOGNIZER_INTERNAL_EOS_DETECTION;
3213      *status = SR_RECOGNIZER_EVENT_START_OF_VOICING;
3214      *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3215      return ESR_SUCCESS;
3216    }
3217    else
3218    {
3219      if (impl->frames > impl->utterance_timeout)
3220      {
3221        /* beginning of speech timeout */
3222        impl->internalState = SR_RECOGNIZER_INTERNAL_BOS_TIMEOUT;
3223        *status = SR_RECOGNIZER_EVENT_START_OF_UTTERANCE_TIMEOUT;
3224        *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
3225        CHKLOG(rc, impl->Interface.stop(&impl->Interface));
3226        return ESR_SUCCESS;
3227      }
3228    }
3229  }
3230  else if (!gatedMode && impl->frames >= impl->bgsniff)
3231  {
3232    /*
3233    * If not gated mode and I have processed enough frames, then start the recognizer
3234    * right away.
3235    */
3236    impl->internalState = SR_RECOGNIZER_INTERNAL_EOS_DETECTION;
3237    *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
3238    *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3239
3240    /* reset the frames */
3241    impl->frames = impl->processed = 0;
3242    CHKLOG(rc, beginRecognizing(impl));
3243    return ESR_SUCCESS;
3244  }
3245  *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
3246  *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3247  return ESR_SUCCESS;
3248
3249CLEANUP:
3250  return rc;
3251}
3252
3253ESR_ReturnCode SR_RecognizerAdvanceImpl(SR_Recognizer* self, SR_RecognizerStatus* status,
3254                                        SR_RecognizerResultType* type,
3255                                        SR_RecognizerResult** result)
3256{
3257  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3258  ESR_BOOL pushable;
3259  ESR_ReturnCode rc;
3260
3261  if (status == NULL || type == NULL || result == NULL)
3262  {
3263    PLogError(L("ESR_INVALID_ARGUMENT"));
3264    return ESR_INVALID_ARGUMENT;
3265  }
3266
3267  /* create the result holder and save the pointer */
3268  /* creation only happens once (due to the if condition) */
3269  if (impl->result == NULL)
3270    CHKLOG(rc, SR_RecognizerResult_Create(&impl->result, impl));
3271  *result = impl->result;
3272
3273  /*
3274   * The following two lines are used to detect bugs whereby we forget to set
3275   * status or type before returning
3276   */
3277  *status = SR_RECOGNIZER_EVENT_INVALID;
3278  *type = SR_RECOGNIZER_RESULT_TYPE_INVALID;
3279
3280MOVE_TO_NEXT_STATE:
3281  switch (impl->internalState)
3282  {
3283    case SR_RECOGNIZER_INTERNAL_BEGIN:
3284      impl->internalState = SR_RECOGNIZER_INTERNAL_BOS_DETECTION;
3285      *status = SR_RECOGNIZER_EVENT_STARTED;
3286      *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3287      return ESR_SUCCESS;
3288
3289    case SR_RECOGNIZER_INTERNAL_BOS_DETECTION:
3290      rc = detectBeginningOfSpeech(impl, status, type, impl->result);
3291      if (rc != ESR_CONTINUE_PROCESSING)
3292      {
3293        /*
3294         * SR_RECOGNIZER_INTERNAL_BOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOS_DETECTION, or
3295         * SR_RECOGNIZER_INTERNAL_EOI
3296         */
3297        return rc;
3298      }
3299      /* Leaked state */
3300      passert(0);
3301      break;
3302
3303    case SR_RECOGNIZER_INTERNAL_EOS_DETECTION:
3304      pushable = ESR_FALSE;
3305      rc = canPushAudioIntoRecognizer(impl);
3306      if (rc == ESR_SUCCESS)
3307      {
3308        /* Not enough samples to process one frame */
3309        if (CA_GetUnprocessedFramesInUtterance(impl->utterance) <= 0)
3310        {
3311          *status = SR_RECOGNIZER_EVENT_NEED_MORE_AUDIO;
3312          *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3313          return ESR_SUCCESS;
3314        }
3315      }
3316      else if (rc != ESR_CONTINUE_PROCESSING)
3317        return rc;
3318      else if (impl->internalState == SR_RECOGNIZER_INTERNAL_EOI)
3319        goto MOVE_TO_NEXT_STATE;
3320      else
3321        pushable = ESR_TRUE;
3322      if (pushable)
3323      {
3324        rc = pushAudioIntoRecognizer(impl, status, type, impl->result);
3325        if (rc != ESR_CONTINUE_PROCESSING)
3326        {
3327          /* Not enough samples to process one frame */
3328          return rc;
3329        }
3330        if (impl->internalState == SR_RECOGNIZER_INTERNAL_EOI)
3331          goto MOVE_TO_NEXT_STATE;
3332        rc = generateFrameFromAudio(impl, status, type, impl->result);
3333        if (rc != ESR_CONTINUE_PROCESSING)
3334        {
3335          /*
3336           * The internal recognizer needs a minimum amount of audio before
3337           * it begins generating frames.
3338           */
3339          return rc;
3340        }
3341      }
3342      rc = generateFrameStats(impl, status, type, impl->result);
3343      if (rc != ESR_CONTINUE_PROCESSING)
3344      {
3345        /* Not enough frames to calculate stats */
3346        return rc;
3347      }
3348      rc = generatePatternFromFrame(impl, status, type, impl->result);
3349      if (rc != ESR_CONTINUE_PROCESSING)
3350      {
3351        /* End of speech detected */
3352        return rc;
3353      }
3354      if (impl->internalState == SR_RECOGNIZER_INTERNAL_END)
3355        goto MOVE_TO_NEXT_STATE;
3356      rc = detectEndOfSpeech(impl, status, type, impl->result);
3357      if (rc != ESR_CONTINUE_PROCESSING)
3358      {
3359        /* End of speech detected */
3360        return rc;
3361      }
3362      *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
3363      *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3364      return ESR_SUCCESS;
3365
3366    case SR_RECOGNIZER_INTERNAL_EOI:
3367      /*
3368       * On EOI (end of input), we need to process the remaining frames that had not
3369       * been processed when PutAudio set the gotLastFrame flag
3370       */
3371      rc = generatePatternFromFrameEOI(impl, status, type, impl->result);
3372      if (rc != ESR_CONTINUE_PROCESSING)
3373      {
3374        /* End of speech detected */
3375        return rc;
3376      }
3377      rc = detectEndOfSpeech(impl, status, type, impl->result);
3378      if (rc != ESR_CONTINUE_PROCESSING)
3379      {
3380        /* End of speech detected */
3381        return rc;
3382      }
3383      *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
3384      *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3385      return ESR_SUCCESS;
3386
3387    case SR_RECOGNIZER_INTERNAL_EOS:
3388      /* On EOS (end of speech detected - not due to end of input), create the result */
3389      if (impl->lockFunction)
3390        impl->lockFunction(ESR_LOCK, impl->lockData);
3391      CircularBufferReset(impl->buffer);
3392      if (impl->lockFunction)
3393        impl->lockFunction(ESR_UNLOCK, impl->lockData);
3394      CHKLOG(rc, SR_RecognizerCreateResultImpl((SR_Recognizer*) impl, status, type));
3395      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
3396      return ESR_SUCCESS;
3397
3398    case SR_RECOGNIZER_INTERNAL_END:
3399      return ESR_SUCCESS;
3400    default:
3401      PLogError(L("ESR_INVALID_STATE"));
3402      return ESR_INVALID_STATE;
3403  }
3404CLEANUP:
3405  return rc;
3406}
3407
3408
3409
3410ESR_ReturnCode SR_RecognizerLoadUtteranceImpl(SR_Recognizer* self, const LCHAR* filename)
3411{
3412  /* TODO: complete */
3413  return ESR_SUCCESS;
3414}
3415
3416ESR_ReturnCode SR_RecognizerLoadWaveFileImpl(SR_Recognizer* self, const LCHAR* filename)
3417{
3418  /* TODO: complete */
3419  return ESR_SUCCESS;
3420}
3421
3422ESR_ReturnCode SR_RecognizerLogEventImpl(SR_Recognizer* self, const LCHAR* event)
3423{
3424  ESR_ReturnCode rc;
3425  SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3426  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, event));
3427  return ESR_SUCCESS;
3428CLEANUP:
3429  return rc;
3430}
3431
3432ESR_ReturnCode SR_RecognizerLogTokenImpl(SR_Recognizer* self, const LCHAR* token, const LCHAR* value)
3433{
3434  ESR_ReturnCode rc;
3435  SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3436  CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, token, value));
3437  return ESR_SUCCESS;
3438CLEANUP:
3439  return rc;
3440}
3441
3442ESR_ReturnCode SR_RecognizerLogTokenIntImpl(SR_Recognizer* self, const LCHAR* token, int value)
3443{
3444  ESR_ReturnCode rc;
3445  SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3446  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, token, value));
3447  return ESR_SUCCESS;
3448CLEANUP:
3449  return rc;
3450}
3451
3452ESR_ReturnCode SR_RecognizerLogSessionStartImpl(SR_Recognizer* self, const LCHAR* sessionName)
3453{
3454  ESR_ReturnCode rc;
3455  SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3456  /**
3457  * OSI Platform logging.
3458  * In OSR, these events are logged by the platform. We have no platform in ESR, so we
3459   * log them here.
3460  */
3461
3462  /* call (session) start, tokens optional */
3463  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIclst")));
3464
3465  /* service start, in this case SRecTest service */
3466  CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("SVNM"), sessionName));
3467  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIsvst")));
3468  if (impl->osi_log_level & OSI_LOG_LEVEL_BASIC)
3469    CHKLOG(rc, SR_EventLogEventSession(impl->eventLog));
3470
3471  return ESR_SUCCESS;
3472CLEANUP:
3473  return rc;
3474}
3475
3476ESR_ReturnCode SR_RecognizerLogSessionEndImpl(SR_Recognizer* self)
3477{
3478  ESR_ReturnCode rc;
3479  SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3480
3481  /* OSI log end of call (session) */
3482  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIclnd")));
3483  if (impl->osi_log_level & OSI_LOG_LEVEL_BASIC)
3484    CHKLOG(rc, SR_EventLogEventSession(impl->eventLog));
3485  return ESR_SUCCESS;
3486CLEANUP:
3487  return rc;
3488}
3489
3490
3491ESR_ReturnCode SR_RecognizerLogWaveformDataImpl(SR_Recognizer* self, const LCHAR* waveformFilename,
3492    const LCHAR* transcription, const double bos,
3493    const double eos, ESR_BOOL isInvocab)
3494{
3495  ESR_ReturnCode rc;
3496  SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3497  LCHAR num[P_PATH_MAX];
3498  int frame;
3499
3500  CHKLOG(rc, SR_EventLogToken_AUDIO(impl->eventLog, impl->osi_log_level, L("FILE"), waveformFilename));
3501  CHKLOG(rc, SR_EventLogToken_AUDIO(impl->eventLog, impl->osi_log_level, L("TRANS"), transcription));
3502  sprintf(num, L("%.2f"), bos);
3503  CHKLOG(rc, SR_EventLogToken_AUDIO(impl->eventLog, impl->osi_log_level, L("BOS_SEC"), num));
3504  sprintf(num, L("%.2f"), eos);
3505  CHKLOG(rc, SR_EventLogToken_AUDIO(impl->eventLog, impl->osi_log_level, L("EOS_SEC"), num));
3506  CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("FRAMESIZE"), impl->FRAME_SIZE));
3507  CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("SAMPLERATE"), impl->sampleRate));
3508  frame = (int)(bos * impl->sampleRate * 2 /* 2 bytes per sample */) / impl->FRAME_SIZE;
3509  CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("BOS_FR"), frame));
3510  frame = (int)(eos * impl->sampleRate * 2 /* 2 bytes per sample */) / impl->FRAME_SIZE;
3511  CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("EOS_FR"), frame));
3512  CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("INVOCAB"), isInvocab));
3513  CHKLOG(rc, SR_EventLogEvent_AUDIO(impl->eventLog, impl->osi_log_level, L("ESRwfrd")));
3514  return ESR_SUCCESS;
3515CLEANUP:
3516  return rc;
3517}
3518
3519ESR_ReturnCode SR_RecognizerSetLockFunctionImpl(SR_Recognizer* self, SR_RecognizerLockFunction function, void* data)
3520{
3521  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3522
3523  impl->lockFunction = function;
3524  impl->lockData = data;
3525  return ESR_SUCCESS;
3526}
3527
3528static ESR_ReturnCode doSignalQualityInit(SR_RecognizerImpl* impl)
3529{
3530  CA_DoSignalCheck(impl->wavein, &impl->isSignalClipping, &impl->isSignalDCOffset,
3531                   &impl->isSignalNoisy, &impl->isSignalTooQuiet, &impl->isSignalTooFewSamples,
3532                   &impl->isSignalTooManySamples);
3533  impl->isSignalQualityInitialized = ESR_TRUE;
3534  return ESR_SUCCESS;
3535}
3536
3537ESR_ReturnCode SR_RecognizerIsSignalClippingImpl(SR_Recognizer* self, ESR_BOOL* isClipping)
3538{
3539  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3540  ESR_ReturnCode rc;
3541
3542  if (isClipping == NULL)
3543  {
3544    PLogError("SR_RecognizerIsSignalClippingImpl", ESR_INVALID_ARGUMENT);
3545    return ESR_INVALID_ARGUMENT;
3546  }
3547  if (!impl->isSignalQualityInitialized)
3548    CHKLOG(rc, doSignalQualityInit(impl));
3549  *isClipping = impl->isSignalClipping;
3550  return ESR_SUCCESS;
3551CLEANUP:
3552  return rc;
3553}
3554
3555ESR_ReturnCode SR_RecognizerIsSignalDCOffsetImpl(SR_Recognizer* self, ESR_BOOL* isDCOffset)
3556{
3557  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3558  ESR_ReturnCode rc;
3559
3560  if (isDCOffset == NULL)
3561  {
3562    PLogError("SR_RecognizerIsSignalDCOffsetImpl", ESR_INVALID_ARGUMENT);
3563    return ESR_INVALID_ARGUMENT;
3564  }
3565  if (!impl->isSignalQualityInitialized)
3566    CHKLOG(rc, doSignalQualityInit(impl));
3567  *isDCOffset = impl->isSignalDCOffset;
3568  return ESR_SUCCESS;
3569CLEANUP:
3570  return rc;
3571}
3572
3573ESR_ReturnCode SR_RecognizerIsSignalNoisyImpl(SR_Recognizer* self, ESR_BOOL* isNoisy)
3574{
3575  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3576  ESR_ReturnCode rc;
3577
3578  if (isNoisy == NULL)
3579  {
3580    PLogError("SR_RecognizerIsSignalNoisyImpl", ESR_INVALID_ARGUMENT);
3581    return ESR_INVALID_ARGUMENT;
3582  }
3583  if (!impl->isSignalQualityInitialized)
3584    CHKLOG(rc, doSignalQualityInit(impl));
3585  *isNoisy = impl->isSignalNoisy;
3586  return ESR_SUCCESS;
3587CLEANUP:
3588  return rc;
3589}
3590
3591ESR_ReturnCode SR_RecognizerIsSignalTooQuietImpl(SR_Recognizer* self, ESR_BOOL* isTooQuiet)
3592{
3593  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3594  ESR_ReturnCode rc;
3595
3596  if (isTooQuiet == NULL)
3597  {
3598    PLogError("SR_RecognizerIsSignalTooQuietImpl", ESR_INVALID_ARGUMENT);
3599    return ESR_INVALID_ARGUMENT;
3600  }
3601  if (!impl->isSignalQualityInitialized)
3602    CHKLOG(rc, doSignalQualityInit(impl));
3603  *isTooQuiet = impl->isSignalTooQuiet;
3604  return ESR_SUCCESS;
3605CLEANUP:
3606  return rc;
3607}
3608
3609ESR_ReturnCode SR_RecognizerIsSignalTooFewSamplesImpl(SR_Recognizer* self, ESR_BOOL* isTooFewSamples)
3610{
3611  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3612  ESR_ReturnCode rc;
3613
3614  if (isTooFewSamples == NULL)
3615  {
3616    PLogError("SR_RecognizerIsSignalTooFewSamplesImpl", ESR_INVALID_ARGUMENT);
3617    return ESR_INVALID_ARGUMENT;
3618  }
3619  if (!impl->isSignalQualityInitialized)
3620    CHKLOG(rc, doSignalQualityInit(impl));
3621  *isTooFewSamples = impl->isSignalTooFewSamples;
3622  return ESR_SUCCESS;
3623CLEANUP:
3624  return rc;
3625}
3626
3627ESR_ReturnCode SR_RecognizerIsSignalTooManySamplesImpl(SR_Recognizer* self, ESR_BOOL* isTooManySamples)
3628{
3629  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3630  ESR_ReturnCode rc;
3631
3632  if (isTooManySamples == NULL)
3633  {
3634    PLogError("SR_RecognizerIsSignalTooManySamplesImpl", ESR_INVALID_ARGUMENT);
3635    return ESR_INVALID_ARGUMENT;
3636  }
3637  if (!impl->isSignalQualityInitialized)
3638    CHKLOG(rc, doSignalQualityInit(impl));
3639  *isTooManySamples = impl->isSignalTooManySamples;
3640  return ESR_SUCCESS;
3641CLEANUP:
3642  return rc;
3643}
3644
3645
3646
3647/**************************************/
3648/* Waveform Buffer stuff              */
3649/**************************************/
3650ESR_ReturnCode WaveformBuffer_Create(WaveformBuffer** waveformBuffer, size_t frame_size)
3651{
3652  ESR_ReturnCode rc;
3653  WaveformBuffer *buf;
3654  size_t val_size_t;
3655  int    val_int;
3656  ESR_BOOL   exists;
3657
3658  buf = NEW(WaveformBuffer, L("SR_RecognizerImpl.wvfmbuf"));
3659  if (buf == NULL)
3660  {
3661    rc = ESR_OUT_OF_MEMORY;
3662    PLogError(L("%s: could not create WaveformBuffer"), ESR_rc2str(rc));
3663    goto CLEANUP;
3664  }
3665
3666  ESR_SessionContains(L("SREC.voice_enroll.bufsz_kB"), &exists);
3667  if (exists)
3668    ESR_SessionGetSize_t(L("SREC.voice_enroll.bufsz_kB"), &val_size_t);
3669  else
3670    val_size_t = DEFAULT_WAVEFORM_BUFFER_MAX_SIZE;
3671  val_size_t *= 1024; /* convert to kB*/
3672  CHKLOG(rc, CircularBufferCreate(val_size_t, L("SR_RecognizerImpl.wvfmbuf.cbuffer"), &buf->cbuffer));
3673
3674  ESR_SessionContains(L("CREC.Frontend.start_windback"), &exists);
3675  if (exists)
3676    ESR_SessionGetInt(L("CREC.Frontend.start_windback"), &val_int);
3677  else
3678    val_int = DEFAULT_WAVEFORM_WINDBACK_FRAMES;
3679  val_int *= frame_size; /* convert frames to bytes */
3680  buf->windback_buffer_sz = (size_t) val_int;
3681  buf->windback_buffer = MALLOC(buf->windback_buffer_sz, L("SR_RecognizerImpl.wvfmbuf.windback"));
3682  if (buf->windback_buffer == NULL)
3683  {
3684    rc = ESR_OUT_OF_MEMORY;
3685    PLogError(L("%s: could not create Waveform windback buffer"), ESR_rc2str(rc));
3686    goto CLEANUP;
3687  }
3688
3689
3690  ESR_SessionContains(L("SREC.voice_enroll.eos_comfort_frames"), &exists);
3691  if (exists)
3692    ESR_SessionGetSize_t(L("SREC.voice_enroll.eos_comfort_frames"), &val_size_t);
3693  else
3694    val_size_t = DEFAULT_EOS_COMFORT_FRAMES;
3695  buf->eos_comfort_frames = val_size_t;
3696
3697  ESR_SessionContains(L("SREC.voice_enroll.bos_comfort_frames"), &exists);
3698  if (exists)
3699    ESR_SessionGetSize_t(L("SREC.voice_enroll.bos_comfort_frames"), &val_size_t);
3700  else
3701    val_size_t = DEFAULT_BOS_COMFORT_FRAMES;
3702  buf->bos_comfort_frames = val_size_t;
3703
3704  /* initially off */
3705  buf->state = WAVEFORM_BUFFERING_OFF;
3706
3707  *waveformBuffer = buf;
3708  return ESR_SUCCESS;
3709CLEANUP:
3710  WaveformBuffer_Destroy(buf);
3711  return rc;
3712}
3713
3714ESR_ReturnCode WaveformBuffer_Write(WaveformBuffer* waveformBuffer, void *data, size_t num_bytes)
3715{
3716  size_t available_bytes;
3717  size_t done_bytes;
3718
3719  /* do nothing if not active */
3720  switch (waveformBuffer->state)
3721  {
3722    case WAVEFORM_BUFFERING_OFF:
3723      return ESR_SUCCESS;
3724
3725    case WAVEFORM_BUFFERING_ON_CIRCULAR:
3726      available_bytes = CircularBufferGetAvailable(waveformBuffer->cbuffer);
3727      if (available_bytes < num_bytes)
3728      {
3729        done_bytes = CircularBufferSkip(waveformBuffer->cbuffer, num_bytes - available_bytes);
3730        if (done_bytes != num_bytes - available_bytes)
3731        {
3732          PLogError("WaveformBuffer_Write: error when skipping bytes");
3733          return ESR_INVALID_STATE;
3734        }
3735      }
3736      done_bytes = CircularBufferWrite(waveformBuffer->cbuffer, data, num_bytes);
3737      if (done_bytes != num_bytes)
3738      {
3739        PLogError("WaveformBuffer_Write: error when writing bytes");
3740        return ESR_INVALID_STATE;
3741      }
3742      return ESR_SUCCESS;
3743
3744    case WAVEFORM_BUFFERING_ON_LINEAR:
3745      available_bytes = CircularBufferGetAvailable(waveformBuffer->cbuffer);
3746      if (available_bytes < num_bytes)
3747      {
3748        waveformBuffer->overflow_count += num_bytes;
3749        return ESR_BUFFER_OVERFLOW;
3750      }
3751      done_bytes = CircularBufferWrite(waveformBuffer->cbuffer, data, num_bytes);
3752      if (done_bytes != num_bytes)
3753      {
3754        PLogError("WaveformBuffer_Write: error when writing bytes");
3755        return ESR_INVALID_STATE;
3756      }
3757      return ESR_SUCCESS;
3758
3759    default:
3760      PLogError("WaveformBuffer_Write: bad control path");
3761      return ESR_INVALID_STATE;
3762  }
3763}
3764
3765ESR_ReturnCode WaveformBuffer_Read(WaveformBuffer* waveformBuffer, void *data, size_t* num_bytes)
3766{
3767  size_t bytes_to_read;
3768  ESR_ReturnCode rc;
3769
3770  if (num_bytes == NULL)
3771  {
3772    rc = ESR_INVALID_ARGUMENT;
3773    PLogError(ESR_rc2str(rc));
3774    goto CLEANUP;
3775  }
3776  if (waveformBuffer->overflow_count > 0)
3777  {
3778    memset(data, 0, *num_bytes);
3779    *num_bytes = 0;
3780    PLogError(L("WaveformBuffer_Read: previous overflow causes read to return NULL"));
3781    return ESR_SUCCESS;
3782  }
3783
3784  if (waveformBuffer->read_size != 0 && *num_bytes > waveformBuffer->read_size)
3785  {
3786    PLogError(L("ESR_OUT_OF_MEMORY: waveform buffer too small for read, increase from %d to %d"), *num_bytes, waveformBuffer->read_size);
3787    return ESR_OUT_OF_MEMORY;
3788  }
3789
3790  if (waveformBuffer->read_size == 0)
3791    bytes_to_read = *num_bytes;
3792  else
3793    bytes_to_read = MIN(waveformBuffer->read_size, *num_bytes);
3794  waveformBuffer->read_size -= bytes_to_read;
3795  *num_bytes = CircularBufferRead(waveformBuffer->cbuffer, data, bytes_to_read);
3796  if (*num_bytes != bytes_to_read)
3797  {
3798    PLogError("WaveformBuffer_Read: error reading buffer");
3799    return ESR_INVALID_STATE;
3800  }
3801  return ESR_SUCCESS;
3802CLEANUP:
3803  return rc;
3804}
3805
3806/* WindBack will save the last num_bytes recorded, reset the buffer, and then load the
3807   saved bytes at the beginning of the buffer */
3808ESR_ReturnCode WaveformBuffer_WindBack(WaveformBuffer* waveformBuffer, const size_t num_bytes)
3809{
3810  ESR_ReturnCode rc;
3811  size_t bufferSize;
3812
3813  if (num_bytes <= 0)
3814  {
3815    CHKLOG(rc, WaveformBuffer_Reset(waveformBuffer));
3816    return ESR_SUCCESS;
3817  }
3818
3819  /* make sure windback buffer is big enough */
3820  if (num_bytes > waveformBuffer->windback_buffer_sz)
3821  {
3822    rc = ESR_OUT_OF_MEMORY;
3823    PLogError(L("%s: windback buffer is too small (needed=%d, had=%d)"), ESR_rc2str(rc), num_bytes, waveformBuffer->windback_buffer_sz);
3824    goto CLEANUP;
3825  }
3826
3827  CHKLOG(rc, WaveformBuffer_GetSize(waveformBuffer, &bufferSize));
3828  /* skip the first few bytes written */
3829  if (bufferSize < num_bytes)
3830  {
3831    PLogError("bufferSize %d num_bytes %d (ESR_INVALID_STATE)\n", bufferSize, num_bytes);
3832    bufferSize = 0;
3833  }
3834  else
3835  {
3836    bufferSize -= num_bytes;
3837  }
3838  CHKLOG(rc, WaveformBuffer_Skip(waveformBuffer, bufferSize));
3839  /* read the last few bytes written */
3840  bufferSize = num_bytes;
3841  CHKLOG(rc, WaveformBuffer_Read(waveformBuffer, waveformBuffer->windback_buffer, &bufferSize));
3842
3843  /* reset buffer */
3844  CHKLOG(rc, WaveformBuffer_Reset(waveformBuffer));
3845
3846  /* rewrite the saved bytes at the beginning */
3847  CHKLOG(rc, WaveformBuffer_Write(waveformBuffer, waveformBuffer->windback_buffer, bufferSize));
3848  return ESR_SUCCESS;
3849CLEANUP:
3850  return rc;
3851}
3852
3853ESR_ReturnCode WaveformBuffer_Destroy(WaveformBuffer* waveformBuffer)
3854{
3855  if (waveformBuffer->cbuffer)
3856    FREE(waveformBuffer->cbuffer);
3857  if (waveformBuffer->windback_buffer)
3858    FREE(waveformBuffer->windback_buffer);
3859  if (waveformBuffer)
3860    FREE(waveformBuffer);
3861  return ESR_SUCCESS;
3862}
3863
3864ESR_ReturnCode WaveformBuffer_SetBufferingState(WaveformBuffer* waveformBuffer, waveform_buffering_state_t state)
3865{
3866  waveformBuffer->state = state;
3867  return ESR_SUCCESS;
3868}
3869
3870ESR_ReturnCode WaveformBuffer_GetBufferingState(WaveformBuffer* waveformBuffer, waveform_buffering_state_t* state)
3871{
3872  *state = waveformBuffer->state;
3873  return ESR_SUCCESS;
3874}
3875
3876/**
3877 * @return ESR_BUFFER_OVERFLOW if nametag EOS occured beyond end of buffer
3878 */
3879ESR_ReturnCode WaveformBuffer_ParseEndPointedResultAndTrim(WaveformBuffer* waveformBuffer, const LCHAR* end_pointed_result, const size_t bytes_per_frame)
3880{
3881  const LCHAR *p;
3882  size_t bos_frame, eos_frame, bufferSize, read_start_offset;
3883  ESR_ReturnCode rc;
3884
3885  /* potential end pointed results
3886
3887     -pau-@19 tape@36 scan@64 down@88 -pau2-@104
3888     -pau-@19 tape@34 off@55 -pau2-@78
3889     -pau-@19 tape@47 help@66 -pau2-@80
3890     -pau-@16 tape@36 reverse@71 -pau2-@91
3891     -pau-@21 tape@42 scan@59 down@80 -pau2-@91
3892
3893     what I need to extract is the integer between "-pau-@" and ' '
3894     and the integer between '@' and " -pau2-"
3895  */
3896
3897
3898  p = LSTRSTR( end_pointed_result, PREFIX_WORD);
3899  if(p) p+=PREFIX_WORD_LEN; while(p && *p == '@') p++;
3900  rc = p ? lstrtoui(p, &bos_frame, 10) : ESR_INVALID_ARGUMENT;
3901  if (rc == ESR_INVALID_ARGUMENT)
3902  {
3903    PLogError(L("%s: extracting bos from text=%s"), ESR_rc2str(rc), end_pointed_result);
3904    goto CLEANUP;
3905  }
3906  else if (rc != ESR_SUCCESS)
3907    goto CLEANUP;
3908
3909  p = LSTRSTR( end_pointed_result, SUFFIX_WORD);
3910  while(p && p>end_pointed_result && p[-1]!='@') --p;
3911  rc = p ? lstrtoui(p, &eos_frame, 10) : ESR_INVALID_ARGUMENT;
3912  if (rc == ESR_INVALID_ARGUMENT)
3913  {
3914    PLogError(L("%s: extracting eos from text=%s"), ESR_rc2str(rc), end_pointed_result);
3915    goto CLEANUP;
3916  }
3917  else if (rc != ESR_SUCCESS)
3918    goto CLEANUP;
3919
3920  bos_frame -= (bos_frame > waveformBuffer->bos_comfort_frames ? waveformBuffer->bos_comfort_frames : 0);
3921  eos_frame += waveformBuffer->eos_comfort_frames;
3922
3923  /*
3924   * I know where speech started, so I want to skip frames 0 to bos_frame.
3925   * I also know where speech ended so I want to set the amount of frames(bytes) to read for
3926   * the nametag audio buffer (i.e. the read_size)
3927   */
3928
3929  read_start_offset = bos_frame * bytes_per_frame * 2 /* times 2 because of skip even frames */;
3930  waveformBuffer->read_size = (eos_frame - bos_frame) * bytes_per_frame * 2 /* times 2 because of skip even frames */;
3931
3932  CHKLOG(rc, WaveformBuffer_GetSize(waveformBuffer, &bufferSize));
3933  if (read_start_offset + waveformBuffer->read_size > bufferSize)
3934  {
3935    waveformBuffer->overflow_count += read_start_offset + waveformBuffer->read_size - bufferSize;
3936    passert(waveformBuffer->overflow_count > 0);
3937    PLogMessage(L("Warning: Voice Enrollment audio buffer overflow (spoke too much, over by %d bytes)"),
3938                waveformBuffer->overflow_count);
3939    return ESR_BUFFER_OVERFLOW;
3940  }
3941  CHKLOG(rc, WaveformBuffer_Skip(waveformBuffer, read_start_offset));
3942#ifdef SREC_ENGINE_VERBOSE_LOGGING
3943  PLogMessage(L("Voice Enrollment: bos@%d, eos@%d, therefore sizeof(waveform) should be %d"), bos_frame, eos_frame, waveformBuffer->read_size);
3944#endif
3945  return ESR_SUCCESS;
3946CLEANUP:
3947  return rc;
3948}
3949
3950
3951ESR_ReturnCode WaveformBuffer_Reset(WaveformBuffer* waveformBuffer)
3952{
3953  CircularBufferReset(waveformBuffer->cbuffer);
3954  waveformBuffer->overflow_count = 0;
3955  waveformBuffer->read_size = 0;
3956  return ESR_SUCCESS;
3957}
3958
3959ESR_ReturnCode WaveformBuffer_GetSize(WaveformBuffer* waveformBuffer, size_t* size)
3960{
3961  *size = CircularBufferGetSize(waveformBuffer->cbuffer);
3962  return ESR_SUCCESS;
3963}
3964
3965ESR_ReturnCode WaveformBuffer_Skip(WaveformBuffer* waveformBuffer, const size_t bytes)
3966{
3967  if (CircularBufferSkip(waveformBuffer->cbuffer, bytes) != (int) bytes)
3968    return ESR_INVALID_STATE;
3969  return ESR_SUCCESS;
3970}
3971
3972
3973
3974static ESR_ReturnCode SR_Recognizer_Reset_Buffers ( SR_RecognizerImpl *impl )
3975    {
3976    ESR_ReturnCode  reset_status;
3977
3978    FREE ( impl->audioBuffer );
3979    impl->audioBuffer = NULL;
3980    impl->audioBuffer = MALLOC ( impl->FRAME_SIZE, MTAG );
3981
3982    if ( impl->audioBuffer != NULL )
3983        {
3984        WaveformBuffer_Destroy ( impl->waveformBuffer );
3985        impl->waveformBuffer = NULL;
3986        reset_status = WaveformBuffer_Create ( &impl->waveformBuffer, impl->FRAME_SIZE );
3987        }
3988    else
3989        {
3990        reset_status = ESR_OUT_OF_MEMORY;
3991        }
3992    return ( reset_status );
3993    }
3994
3995
3996
3997static ESR_ReturnCode SR_Recognizer_Validate_Sample_Rate ( size_t sample_rate )
3998    {
3999    ESR_ReturnCode  validate_status;
4000
4001    switch ( sample_rate )
4002        {
4003        case 8000:
4004        case 11025:
4005        case 16000:
4006        case 22050:
4007            validate_status = ESR_SUCCESS;
4008            break;
4009
4010        default:
4011            validate_status = ESR_INVALID_ARGUMENT;
4012            break;
4013        }
4014    return ( validate_status );
4015    }
4016
4017
4018
4019static ESR_ReturnCode SR_Recognizer_Sample_Rate_Needs_Change ( size_t new_sample_rate, ESR_BOOL *needs_changing )
4020    {
4021    ESR_ReturnCode  validate_status;
4022    size_t          current_sample_rate;
4023
4024    validate_status = ESR_SessionGetSize_t ( "CREC.Frontend.samplerate", &current_sample_rate );
4025
4026    if ( validate_status == ESR_SUCCESS )
4027        {
4028        if ( new_sample_rate != current_sample_rate )
4029            *needs_changing = ESR_TRUE;
4030        else
4031            *needs_changing = ESR_TRUE;
4032        }
4033    return ( validate_status );
4034    }
4035
4036
4037
4038static ESR_ReturnCode SR_Recognizer_Change_Sample_Rate_Session_Params_8K ( void )
4039    {
4040    ESR_ReturnCode  change_status;
4041    LCHAR           model_filenames [P_PATH_MAX];
4042    LCHAR           lda_filename [P_PATH_MAX];
4043    size_t          filename_length;
4044
4045    filename_length = P_PATH_MAX;
4046    change_status = ESR_SessionGetLCHAR ( L("cmdline.modelfiles8"), model_filenames, &filename_length );
4047
4048    if ( change_status == ESR_SUCCESS )
4049        {
4050        filename_length = P_PATH_MAX;
4051        change_status = ESR_SessionGetLCHAR ( L("cmdline.lda8"), lda_filename, &filename_length );
4052
4053/* From this point on, if an error occurs, we're screwed and recovery is probably impossible */
4054        if ( change_status == ESR_SUCCESS )
4055            {
4056            change_status = ESR_SessionSetSize_t ( "CREC.Frontend.samplerate", 8000 );
4057            if ( change_status == ESR_SUCCESS )
4058                {
4059                change_status = ESR_SessionSetInt ( "CREC.Frontend.highcut", 4000 );
4060
4061                if ( change_status == ESR_SUCCESS )
4062                    {
4063                    change_status =  ESR_SessionSetLCHAR ( L("cmdline.modelfiles"), model_filenames );
4064
4065                    if ( change_status == ESR_SUCCESS )
4066                        change_status = ESR_SessionSetLCHAR ( L("cmdline.lda"), lda_filename );
4067                    }
4068                }
4069            }
4070        else
4071            {
4072            PLogError (L("\nMissing Parameter lda8\n"));
4073            }
4074        }
4075    else
4076        {
4077        PLogError (L("\nMissing Parameter models8\n"));
4078        }
4079    return ( change_status );
4080    }
4081
4082
4083
4084static ESR_ReturnCode SR_Recognizer_Change_Sample_Rate_Session_Params_11K_to_22K ( size_t sample_rate )
4085    {
4086    ESR_ReturnCode  change_status;
4087    LCHAR           model_filenames [P_PATH_MAX];
4088    LCHAR           lda_filename [P_PATH_MAX];
4089    size_t          filename_length;
4090
4091    filename_length = P_PATH_MAX;
4092    change_status = ESR_SessionGetLCHAR ( L("cmdline.modelfiles11"), model_filenames, &filename_length );
4093
4094    if ( change_status == ESR_SUCCESS )
4095        {
4096        filename_length = P_PATH_MAX;
4097        change_status = ESR_SessionGetLCHAR ( L("cmdline.lda11"), lda_filename, &filename_length );
4098
4099/* From this point on, if an error occurs, we're screwed and recovery is probably impossible */
4100        if ( change_status == ESR_SUCCESS )
4101            {
4102            change_status = ESR_SessionSetSize_t ( "CREC.Frontend.samplerate", sample_rate );
4103
4104            if ( change_status == ESR_SUCCESS )
4105                {
4106                change_status = ESR_SessionSetInt ( "CREC.Frontend.highcut", 5500 );
4107
4108                if ( change_status == ESR_SUCCESS )
4109                    {
4110                    change_status =  ESR_SessionSetLCHAR ( L("cmdline.modelfiles"), model_filenames );
4111
4112                    if ( change_status == ESR_SUCCESS )
4113                        change_status = ESR_SessionSetLCHAR ( L("cmdline.lda"), lda_filename );
4114                    }
4115                }
4116            }
4117        else
4118            {
4119            PLogError (L("\nMissing Parameter lda11\n"));
4120            }
4121        }
4122    else
4123        {
4124        PLogError (L("\nMissing Parameter models11\n"));
4125        }
4126    return ( change_status );
4127    }
4128
4129
4130
4131static ESR_ReturnCode SR_Recognizer_Change_Sample_Rate_Session_Params ( size_t new_sample_rate )
4132    {
4133    ESR_ReturnCode  change_status;
4134
4135    if ( new_sample_rate == 8000 )
4136        change_status = SR_Recognizer_Change_Sample_Rate_Session_Params_8K ( );
4137    else
4138        change_status = SR_Recognizer_Change_Sample_Rate_Session_Params_11K_to_22K ( new_sample_rate );
4139
4140    return ( change_status );
4141    }
4142
4143
4144
4145ESR_ReturnCode SR_Recognizer_Change_Sample_RateImpl ( SR_Recognizer *recognizer, size_t new_sample_rate )
4146    {
4147    ESR_ReturnCode          change_status;
4148    ESR_BOOL                rate_needs_changing;
4149    SR_RecognizerImpl       *impl;
4150    CA_FrontendInputParams  *frontendParams;
4151
4152    change_status = SR_Recognizer_Validate_Sample_Rate ( new_sample_rate );
4153
4154    if ( change_status == ESR_SUCCESS )
4155        {
4156        change_status = SR_Recognizer_Sample_Rate_Needs_Change ( new_sample_rate, &rate_needs_changing );
4157
4158        if ( change_status == ESR_SUCCESS )
4159            {
4160            if ( rate_needs_changing == ESR_TRUE )
4161                {
4162                change_status = SR_Recognizer_Change_Sample_Rate_Session_Params ( new_sample_rate );
4163
4164                if ( change_status == ESR_SUCCESS )
4165                    { // SR_RecognizerCreateFrontendImpl
4166                    impl = (SR_RecognizerImpl *)recognizer;
4167                    change_status = SR_RecognizerUnsetupImpl( recognizer );
4168
4169                    if ( change_status == ESR_SUCCESS )
4170                        {
4171                        CA_UnconfigureFrontend ( impl->frontend );
4172                        frontendParams = CA_AllocateFrontendParameters ( );
4173
4174                        if ( frontendParams != NULL )
4175                            {
4176                            change_status = SR_RecognizerGetFrontendLegacyParametersImpl ( frontendParams );
4177
4178                            if ( change_status == ESR_SUCCESS )
4179                                {
4180                                CA_ConfigureFrontend ( impl->frontend, frontendParams );
4181                                CA_UnconfigureWave ( impl->wavein );
4182                                CA_ConfigureWave ( impl->wavein, impl->frontend );
4183                                impl->sampleRate = new_sample_rate;
4184                                impl->FRAME_SIZE = impl->sampleRate / FRAMERATE * SAMPLE_SIZE;
4185                                change_status = SR_Recognizer_Reset_Buffers ( impl );
4186
4187                                if ( change_status == ESR_SUCCESS )
4188                                    {
4189                                    change_status = SR_RecognizerSetupImpl( recognizer );
4190
4191                                    if ( change_status == ESR_SUCCESS )
4192                                        change_status = SR_AcousticStateReset ( recognizer );
4193                                    }
4194                                else
4195                                    {
4196                                    SR_RecognizerSetupImpl( recognizer );   /* Otherwise recognizer is in bad state */
4197                                    }
4198                                }
4199                            CA_FreeFrontendParameters ( frontendParams );
4200                            }
4201                        else
4202                            {
4203                            SR_RecognizerSetupImpl( recognizer );   /* Otherwise recognizer is in bad state */
4204                            change_status = ESR_OUT_OF_MEMORY;
4205                            }
4206                        }
4207                    }
4208                }
4209            }
4210        }
4211    return ( change_status );
4212    }
4213
4214
4215