RecognizerImpl.c revision 8fc5a7f51e62cb4ae44a27bdf4176d04adc80ede
1/*---------------------------------------------------------------------------*
2 *  RecognizerImpl.c  *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20/*#define SREC_MEASURE_LATENCY    1*/
21
22#ifdef SREC_MEASURE_LATENCY
23#include <sys/time.h>
24
25struct timeval latency_start;
26#endif
27
28
29#include "ESR_Session.h"
30#include "ESR_SessionTypeImpl.h"
31#include "IntArrayList.h"
32#include "LCHAR.h"
33#include "passert.h"
34#include "plog.h"
35#include "pstdio.h"
36#include "pmemory.h"
37#include "ptimestamp.h"
38#include "SR_AcousticModelsImpl.h"
39#include "SR_AcousticStateImpl.h"
40#include "SR_GrammarImpl.h"
41#include "SR_SemprocDefinitions.h"
42#include "SR_SemanticResult.h"
43#include "SR_SemanticResultImpl.h"
44#include "SR_Recognizer.h"
45#include "SR_RecognizerImpl.h"
46#include "SR_RecognizerResultImpl.h"
47#include "SR_SemanticResultImpl.h"
48#include "SR_EventLog.h"
49#include "srec.h"
50
51#define MTAG NULL
52#define FILTER_NBEST_BY_SEM_RESULT 1
53#define AUDIO_CIRC_BUFFER_SIZE 20000
54#define SEMPROC_ACTIVE 1
55#define SAMPLE_SIZE (16 / CHAR_BIT) /* 16-bits / sample */
56
57/* milliseconds per FRAME = 1/FRAMERATE * 1000 */
58/* We multiple by 2 because we skip even frames */
59#define MSEC_PER_FRAME (2000/FRAMERATE)
60#define MAX_ENTRY_LENGTH 512
61#define PREFIX_WORD     "-pau-"
62#define PREFIX_WORD_LEN 5
63#define SUFFIX_WORD     "-pau2-"
64#define SUFFIX_WORD_LEN 6
65
66#ifdef MEASURE_SAMPLE_TIMES
67#include <sys/time.h>
68#include <stdio.h>
69
70#define MAX_SAMPLES_TO_MEASURE      500
71
72static long sample_buffers_received = 0;
73static long total_samples_received = 0;
74static long samples_in_buffer [MAX_SAMPLES_TO_MEASURE];
75static long seconds_buffer_received [MAX_SAMPLES_TO_MEASURE];
76static long micro_seconds_buffer_received [MAX_SAMPLES_TO_MEASURE];
77static struct timeval buffer_received_time;
78
79static void SR_Recognizer_Log_Samples_Received ( void );
80
81static void SR_Recognizer_Log_Samples_Received ( void )
82{
83    FILE *log_file;
84    char file_name [256];
85    char log_buffer [256];
86    long loop_counter;
87
88    if ( sample_buffers_received > 0 )
89        {
90        gettimeofday ( &buffer_received_time, NULL );
91        sprintf ( file_name, "reco_recvd_%ld_%ld.txt", buffer_received_time.tv_sec, buffer_received_time.tv_usec );
92        log_file = fopen ( file_name, "w" );
93
94        if ( log_file != NULL )
95            {
96            for ( loop_counter = 0; loop_counter < sample_buffers_received; loop_counter++ )
97                {
98                sprintf ( log_buffer, "%ld %ld  %ld  %ld\n", loop_counter + 1, samples_in_buffer [loop_counter],
99                seconds_buffer_received [loop_counter], micro_seconds_buffer_received [loop_counter] );
100                fwrite ( log_buffer, 1, strlen ( log_buffer ), log_file );
101                }
102            fclose ( log_file );
103	    }
104	sample_buffers_received = 0;
105        }
106    }
107#endif
108
109
110static ESR_ReturnCode SR_Recognizer_Reset_Buffers ( SR_RecognizerImpl *impl );
111
112/**
113 * Initializes recognizer properties to default values.
114 *
115 * Replaces setup_recognition_parameters()
116 */
117ESR_ReturnCode SR_RecognizerToSessionImpl()
118{
119  ESR_ReturnCode rc;
120
121  /* Old comment: remember to keep "ca_rip.h" up to date with these parameters... */
122
123  /* CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_acoustic_models", 2)); */
124  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Recognizer.partial_results", ESR_FALSE));
125  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.NBest", 1));
126  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.eou_threshold", 100));
127  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_altword_tokens", 400));
128  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_frames", 1000));
129  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_fsm_arcs", 3000));
130  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_fsm_nodes", 3000));
131  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_fsmnode_tokens", 1000));
132  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_hmm_tokens", 1000));
133  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_model_states", 1000));
134  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_searches", 2));
135  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_word_tokens", 1000));
136  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.non_terminal_timeout", 50));
137  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.num_wordends_per_frame", 10));
138  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.often", 10));
139  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.optional_terminal_timeout", 30));
140  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.reject", 500));
141  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.terminal_timeout", 10));
142  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.viterbi_prune_thresh", 5000));
143  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.wordpen", 0));
144
145  CHKLOG(rc, ESR_SessionSetSize_tIfEmpty("SREC.Recognizer.utterance_timeout", 400));
146
147  return ESR_SUCCESS;
148CLEANUP:
149  return rc;
150}
151
152/**
153 * Initializes frontend properties to default values.
154 *
155 * Replaces load_up_parameter_list()
156 */
157ESR_ReturnCode SR_RecognizerFrontendToSessionImpl()
158{
159  IntArrayList* intList = NULL;
160  ESR_ReturnCode rc;
161  ESR_BOOL exists;
162  size_t i;
163
164  /* Old comment: Remember to keep "ca_pip.h" up to date with these parameters... */
165
166  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.mel_dim", 12));
167  CHKLOG(rc, ESR_SessionSetSize_tIfEmpty("CREC.Frontend.samplerate", 8000));
168  CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.premel", 0.98f));
169  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.lowcut", 260));  /* Hz */
170  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.highcut", 4000)); /* Hz */
171  CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.window_factor", 2.0)); /* times the frame size */
172  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.do_skip_even_frames", ESR_FALSE)); /* 10/20 ms rate */
173  CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.offset", 0)); /* additional */
174  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.ddmel", ESR_FALSE)); /* delta-delta mel pars */
175  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.forgetfactor", 40));
176  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.sv6_margin", 10));
177  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.rasta", ESR_FALSE));
178  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.rastac0", ESR_FALSE));
179  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.spectral_subtraction", ESR_FALSE));
180  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.spec_sub_dur", 0));
181  CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.spec_sub_scale", 1.0));
182  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.do_filterbank_dump", ESR_FALSE)); /* Output is filterbank (30 floats) */
183  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.do_filterbank_input", ESR_FALSE)); /* Input is filterbank (30 floats) in place of audio samples */
184  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.do_smooth_c0", ESR_TRUE));
185  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.plp", ESR_FALSE)); /* Do PLP instead of MEL */
186  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.lpcorder", 12)); /* order of lpc analysis in plp processing */
187  CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.warp_scale", 1.0));
188  CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.piecewise_start", 1.0));
189  CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.peakdecayup", -1.0)); /* If +ve, decay factor on peakpicker (low to high) */
190  CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.peakdecaydown", -1.0)); /* If +ve, decay factor on peakpicker (high to low) */
191  CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.cuberoot", ESR_FALSE)); /* Use cube root instead of log */
192
193  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.mel_offset", &exists));
194  if (!exists)
195  {
196    CHKLOG(rc, IntArrayListCreate(&intList));
197    for (i = 0; i < 32; ++i)
198      CHKLOG(rc, IntArrayListAdd(intList, 0));
199    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.mel_offset", intList, TYPES_INTARRAYLIST));
200    intList = NULL;
201  }
202
203  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.mel_loop", &exists));
204  if (!exists)
205  {
206    CHKLOG(rc, IntArrayListCreate(&intList));
207    for (i = 0; i < 32; ++i)
208      CHKLOG(rc, IntArrayListAdd(intList, 1));
209    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.mel_loop", intList, TYPES_INTARRAYLIST));
210    intList = NULL;
211  }
212
213  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.melA", &exists));
214  if (!exists)
215  {
216    CHKLOG(rc, IntArrayListCreate(&intList));
217    CHKLOG(rc, IntArrayListAdd(intList, (int) 13.2911));
218    CHKLOG(rc, IntArrayListAdd(intList, (int) 47.2229));
219    CHKLOG(rc, IntArrayListAdd(intList, (int) 79.2485));
220    CHKLOG(rc, IntArrayListAdd(intList, (int) 92.1967));
221    CHKLOG(rc, IntArrayListAdd(intList, (int) 136.3855));
222    CHKLOG(rc, IntArrayListAdd(intList, (int) 152.2896));
223    CHKLOG(rc, IntArrayListAdd(intList, (int) 183.3601));
224    CHKLOG(rc, IntArrayListAdd(intList, (int) 197.4200));
225    CHKLOG(rc, IntArrayListAdd(intList, (int) 217.8278));
226    CHKLOG(rc, IntArrayListAdd(intList, (int) 225.6556));
227    CHKLOG(rc, IntArrayListAdd(intList, (int) 263.3073));
228    CHKLOG(rc, IntArrayListAdd(intList, (int) 277.193));
229    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.melA", intList, TYPES_INTARRAYLIST));
230    intList = NULL;
231  }
232
233  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.melB", &exists));
234  if (!exists)
235  {
236    CHKLOG(rc, IntArrayListCreate(&intList));
237    CHKLOG(rc, IntArrayListAdd(intList, (int) 37.0847));
238    CHKLOG(rc, IntArrayListAdd(intList, (int) 91.3289));
239    CHKLOG(rc, IntArrayListAdd(intList, (int) 113.9995));
240    CHKLOG(rc, IntArrayListAdd(intList, (int) 123.0336));
241    CHKLOG(rc, IntArrayListAdd(intList, (int) 131.2704));
242    CHKLOG(rc, IntArrayListAdd(intList, (int) 128.9942));
243    CHKLOG(rc, IntArrayListAdd(intList, (int) 120.5267));
244    CHKLOG(rc, IntArrayListAdd(intList, (int) 132.0079));
245    CHKLOG(rc, IntArrayListAdd(intList, (int) 129.8076));
246    CHKLOG(rc, IntArrayListAdd(intList, (int) 126.5029));
247    CHKLOG(rc, IntArrayListAdd(intList, (int) 121.8519));
248    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.melB", intList, TYPES_INTARRAYLIST));
249    intList = NULL;
250  }
251
252  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.dmelA", &exists));
253  if (!exists)
254  {
255    CHKLOG(rc, IntArrayListCreate(&intList));
256    CHKLOG(rc, IntArrayListAdd(intList, (int) 91.6305));
257    CHKLOG(rc, IntArrayListAdd(intList, (int) 358.3790));
258    CHKLOG(rc, IntArrayListAdd(intList, (int) 527.5946));
259    CHKLOG(rc, IntArrayListAdd(intList, (int) 536.3163));
260    CHKLOG(rc, IntArrayListAdd(intList, (int) 731.2385));
261    CHKLOG(rc, IntArrayListAdd(intList, (int) 757.8382));
262    CHKLOG(rc, IntArrayListAdd(intList, (int) 939.4460));
263    CHKLOG(rc, IntArrayListAdd(intList, (int) 1028.4136));
264    CHKLOG(rc, IntArrayListAdd(intList, (int) 1071.3193));
265    CHKLOG(rc, IntArrayListAdd(intList, (int) 1183.7922));
266    CHKLOG(rc, IntArrayListAdd(intList, (int) 1303.1014));
267    CHKLOG(rc, IntArrayListAdd(intList, (int) 1447.7766));
268    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.dmelA", intList, TYPES_INTARRAYLIST));
269    intList = NULL;
270  }
271
272  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.dmelB", &exists));
273  if (!exists)
274  {
275    CHKLOG(rc, IntArrayListCreate(&intList));
276    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4785));
277    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.3878));
278    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4029));
279    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.3182));
280    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.3706));
281    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5394));
282    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5150));
283    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4270));
284    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4871));
285    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4088));
286    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4361));
287    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5449));
288    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.dmelB", intList, TYPES_INTARRAYLIST));
289    intList = NULL;
290  }
291
292  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.ddmelA", &exists));
293  if (!exists)
294  {
295    CHKLOG(rc, IntArrayListCreate(&intList));
296    CHKLOG(rc, IntArrayListAdd(intList, (int) 10.7381));
297    CHKLOG(rc, IntArrayListAdd(intList, (int) 32.6775));
298    CHKLOG(rc, IntArrayListAdd(intList, (int) 46.2301));
299    CHKLOG(rc, IntArrayListAdd(intList, (int) 51.5438));
300    CHKLOG(rc, IntArrayListAdd(intList, (int) 57.6636));
301    CHKLOG(rc, IntArrayListAdd(intList, (int) 57.0581));
302    CHKLOG(rc, IntArrayListAdd(intList, (int) 65.3696));
303    CHKLOG(rc, IntArrayListAdd(intList, (int) 70.1910));
304    CHKLOG(rc, IntArrayListAdd(intList, (int) 71.6751));
305    CHKLOG(rc, IntArrayListAdd(intList, (int) 78.2364));
306    CHKLOG(rc, IntArrayListAdd(intList, (int) 83.2440));
307    CHKLOG(rc, IntArrayListAdd(intList, (int) 89.6261));
308    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.ddmelA", intList, TYPES_INTARRAYLIST));
309    intList = NULL;
310  }
311
312  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.ddmelB", &exists));
313  if (!exists)
314  {
315    CHKLOG(rc, IntArrayListCreate(&intList));
316    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5274));
317    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5098));
318    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5333));
319    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5963));
320    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5132));
321    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5282));
322    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5530));
323    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5682));
324    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4662));
325    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4342));
326    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5235));
327    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4061));
328    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.ddmelB", intList, TYPES_INTARRAYLIST));
329    intList = NULL;
330  }
331
332  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.rastaA", &exists));
333  if (!exists)
334  {
335    CHKLOG(rc, IntArrayListCreate(&intList));
336    CHKLOG(rc, IntArrayListAdd(intList, (int) 7.80));
337    CHKLOG(rc, IntArrayListAdd(intList, (int) 37.0));
338    CHKLOG(rc, IntArrayListAdd(intList, (int) 54.0));
339    CHKLOG(rc, IntArrayListAdd(intList, (int) 57.0));
340    CHKLOG(rc, IntArrayListAdd(intList, (int) 84.0));
341    CHKLOG(rc, IntArrayListAdd(intList, (int) 86.5));
342    CHKLOG(rc, IntArrayListAdd(intList, (int) 98.1));
343    CHKLOG(rc, IntArrayListAdd(intList, (int) 127.0));
344    CHKLOG(rc, IntArrayListAdd(intList, (int) 153.0));
345    CHKLOG(rc, IntArrayListAdd(intList, (int) 160.0));
346    CHKLOG(rc, IntArrayListAdd(intList, (int) 188.0));
347    CHKLOG(rc, IntArrayListAdd(intList, (int) 199.0));
348    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.rastaA", intList, TYPES_INTARRAYLIST));
349    intList = NULL;
350  }
351
352  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.rastaB", &exists));
353  if (!exists)
354  {
355    CHKLOG(rc, IntArrayListCreate(&intList));
356    CHKLOG(rc, IntArrayListAdd(intList, 117));
357    CHKLOG(rc, IntArrayListAdd(intList, 121));
358    CHKLOG(rc, IntArrayListAdd(intList, 114));
359    CHKLOG(rc, IntArrayListAdd(intList, 111));
360    CHKLOG(rc, IntArrayListAdd(intList, 113));
361    CHKLOG(rc, IntArrayListAdd(intList, 126));
362    CHKLOG(rc, IntArrayListAdd(intList, 134));
363    CHKLOG(rc, IntArrayListAdd(intList, 130));
364    CHKLOG(rc, IntArrayListAdd(intList, 135));
365    CHKLOG(rc, IntArrayListAdd(intList, 129));
366    CHKLOG(rc, IntArrayListAdd(intList, 139));
367    CHKLOG(rc, IntArrayListAdd(intList, 138));
368    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.rastaB", intList, TYPES_INTARRAYLIST));
369    intList = NULL;
370  }
371
372  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.speech_detect", 18));
373  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.speech_above", 18));
374  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.ambient_within", 12));
375  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.start_windback", 50));
376  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.utterance_allowance", 40));
377  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.speech_duration", 6));
378  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.quiet_duration", 20));
379
380  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.high_clip", 32767));
381  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.low_clip", -32768));
382  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.max_per10000_clip", 10));
383  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.max_dc_offset", 1000));
384  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.high_noise_level_bit", 11));
385  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.low_speech_level_bit", 11));
386  CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.min_samples", 10000));
387
388  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.spectrum_filter_freq", &exists));
389  if (!exists)
390  {
391    CHKLOG(rc, IntArrayListCreate(&intList));
392    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.spectrum_filter_freq", intList, TYPES_INTARRAYLIST));
393    intList = NULL;
394  }
395  CHKLOG(rc, ESR_SessionContains("CREC.Frontend.spectrum_filter_spread", &exists));
396  if (!exists)
397  {
398    CHKLOG(rc, IntArrayListCreate(&intList));
399    CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.spectrum_filter_spread", intList, TYPES_INTARRAYLIST));
400    intList = NULL;
401  }
402  return ESR_SUCCESS;
403CLEANUP:
404  if (intList != NULL)
405    intList->destroy(intList);
406  return rc;
407}
408
409/**
410 * Generate legacy frontend parameter structure from ESR_Session.
411 *
412 * @param impl SR_RecognizerImpl handle
413 * @param params Resulting structure
414 */
415ESR_ReturnCode SR_RecognizerGetFrontendLegacyParametersImpl(CA_FrontendInputParams* params)
416{
417  ESR_ReturnCode rc;
418  IntArrayList* intList;
419  size_t size, i, size_tValue;
420  int iValue;
421
422  passert(params != NULL);
423  params->is_loaded = ESR_FALSE;
424  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.mel_dim", &params->mel_dim));
425  CHKLOG(rc, ESR_SessionGetSize_t("CREC.Frontend.samplerate", &size_tValue));
426  params->samplerate = (int) size_tValue;
427  CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.premel", &params->pre_mel));
428  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.lowcut", &params->low_cut));
429  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.highcut", &params->high_cut));
430  CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.window_factor", &params->window_factor));
431  CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.do_skip_even_frames", &params->do_skip_even_frames));
432  CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.offset", &params->offset));
433  CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.ddmel", &params->do_dd_mel));
434  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.forgetfactor", &params->forget_factor));
435  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.sv6_margin", &params->sv6_margin));
436  CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.rastac0", &params->do_rastac0));
437  CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.spectral_subtraction", &params->do_spectral_sub));
438  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.spec_sub_dur", &params->spectral_sub_frame_dur));
439  CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.spec_sub_scale", &params->spec_sub_scale));
440  CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.do_filterbank_dump", &params->do_filterbank_input));
441  CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.do_filterbank_input", &params->do_filterbank_input));
442  CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.do_smooth_c0", &params->do_smooth_c0));
443  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.lpcorder", &params->lpc_order));
444  CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.warp_scale", &params->warp_scale));
445  CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.piecewise_start", &params->piecewise_start));
446  CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.peakdecayup", &params->peakpickup));
447  CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.peakdecaydown", &params->peakpickdown));
448
449  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.mel_offset", (void **)&intList, TYPES_INTARRAYLIST));
450  if (intList == NULL)
451  {
452    PLogError(L("ESR_INVALID_STATE"));
453    return ESR_INVALID_STATE;
454  }
455  CHKLOG(rc, IntArrayListGetSize(intList, &size));
456  for (i = 0; i < size; ++i)
457    CHKLOG(rc, IntArrayListGet(intList, i, &params->mel_offset[i]));
458
459  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.mel_loop", (void **)&intList, TYPES_INTARRAYLIST));
460  if (intList == NULL)
461  {
462    PLogError(L("ESR_INVALID_STATE"));
463    return ESR_INVALID_STATE;
464  }
465  CHKLOG(rc, IntArrayListGetSize(intList, &size));
466  for (i = 0; i < size; ++i)
467    CHKLOG(rc, IntArrayListGet(intList, i, &params->mel_loop[i]));
468
469  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.melA", (void **)&intList, TYPES_INTARRAYLIST));
470  CHKLOG(rc, IntArrayListGetSize(intList, &size));
471  for (i = 0; i < size; ++i)
472    CHKLOG(rc, IntArrayListGet(intList, i, &params->melA_scale[i]));
473
474  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.melB", (void **)&intList, TYPES_INTARRAYLIST));
475  CHKLOG(rc, IntArrayListGetSize(intList, &size));
476  for (i = 0; i < size; ++i)
477    CHKLOG(rc, IntArrayListGet(intList, i, &params->melB_scale[i]));
478
479  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.dmelA", (void **)&intList, TYPES_INTARRAYLIST));
480  CHKLOG(rc, IntArrayListGetSize(intList, &size));
481  for (i = 0; i < size; ++i)
482    CHKLOG(rc, IntArrayListGet(intList, i, &params->dmelA_scale[i]));
483
484  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.dmelB", (void **)&intList, TYPES_INTARRAYLIST));
485  CHKLOG(rc, IntArrayListGetSize(intList, &size));
486  for (i = 0; i < size; ++i)
487    CHKLOG(rc, IntArrayListGet(intList, i, &params->dmelB_scale[i]));
488
489  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.ddmelA", (void **)&intList, TYPES_INTARRAYLIST));
490  CHKLOG(rc, IntArrayListGetSize(intList, &size));
491  for (i = 0; i < size; ++i)
492    CHKLOG(rc, IntArrayListGet(intList, i, &params->ddmelA_scale[i]));
493
494  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.ddmelB", (void **)&intList, TYPES_INTARRAYLIST));
495  CHKLOG(rc, IntArrayListGetSize(intList, &size));
496  for (i = 0; i < size; ++i)
497    CHKLOG(rc, IntArrayListGet(intList, i, &params->ddmelB_scale[i]));
498
499  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.rastaA", (void **)&intList, TYPES_INTARRAYLIST));
500  CHKLOG(rc, IntArrayListGetSize(intList, &size));
501  for (i = 0; i < size; ++i)
502    CHKLOG(rc, IntArrayListGet(intList, i, &params->rastaA_scale[i]));
503
504  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.rastaB", (void **)&intList, TYPES_INTARRAYLIST));
505  CHKLOG(rc, IntArrayListGetSize(intList, &size));
506  for (i = 0; i < size; ++i)
507    CHKLOG(rc, IntArrayListGet(intList, i, &params->rastaB_scale[i]));
508
509  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.speech_detect", &params->voice_margin));
510  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.speech_above", &params->fast_voice_margin));
511  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.ambient_within", &params->tracker_margin));
512  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.start_windback", &params->start_windback));
513  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.utterance_allowance", &params->unsure_duration));
514  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.speech_duration", &params->voice_duration));
515  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.quiet_duration", &params->quiet_duration));
516
517  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.high_clip", &params->high_clip));
518  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.low_clip", &params->low_clip));
519  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.max_per10000_clip", &params->max_per10000_clip));
520  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.max_dc_offset", &params->max_dc_offset));
521  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.high_noise_level_bit", &params->high_noise_level_bit));
522  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.low_speech_level_bit", &params->low_speech_level_bit));
523  CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.min_samples", &params->min_samples));
524
525  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.spectrum_filter_freq", (void **)&intList, TYPES_INTARRAYLIST));
526  if (intList == NULL)
527  {
528    PLogError(L("ESR_INVALID_STATE"));
529    return ESR_INVALID_STATE;
530  }
531  CHKLOG(rc, IntArrayListGetSize(intList, &size));
532  for (i = 0; i < size; ++i)
533  {
534    CHKLOG(rc, IntArrayListGet(intList, i, &iValue));
535    params->spectrum_filter_freq[i] = iValue;
536  }
537
538  CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.spectrum_filter_spread", (void **)&intList, TYPES_INTARRAYLIST));
539  if (intList == NULL)
540  {
541    PLogError(L("ESR_INVALID_STATE"));
542    return ESR_INVALID_STATE;
543  }
544  CHKLOG(rc, IntArrayListGetSize(intList, &size));
545  for (i = 0; i < size; ++i)
546  {
547    CHKLOG(rc, IntArrayListGet(intList, i, &iValue));
548    params->spectrum_filter_spread[i] = iValue;
549  }
550  params->is_loaded = ESR_TRUE;
551  return ESR_SUCCESS;
552CLEANUP:
553  return rc;
554}
555
556/**
557 * Creates frontend components of SR_Recognizer.
558 *
559 * @param impl SR_RecognizerImpl handle
560 */
561ESR_ReturnCode SR_RecognizerCreateFrontendImpl(SR_RecognizerImpl* impl)
562{
563  ESR_ReturnCode rc;
564  CA_FrontendInputParams* frontendParams;
565
566  /* Create a frontend object */
567  impl->frontend = CA_AllocateFrontend(1, 0, 1);
568  frontendParams = CA_AllocateFrontendParameters();
569  CHKLOG(rc, SR_RecognizerGetFrontendLegacyParametersImpl(frontendParams));
570
571  CA_ConfigureFrontend(impl->frontend, frontendParams);
572
573  /* Create a wave object */
574  impl->wavein = CA_AllocateWave('N');
575  if (impl->wavein == NULL)
576  {
577    rc = ESR_OUT_OF_MEMORY;
578    PLogError(ESR_rc2str(rc));
579    goto CLEANUP;
580  }
581  CA_ConfigureWave(impl->wavein, impl->frontend);
582  CA_ConfigureVoicingAnalysis(impl->wavein, frontendParams);
583
584  CA_LoadCMSParameters(impl->wavein, NULL, frontendParams);
585
586  /* Create an utterance object */
587  impl->utterance = CA_AllocateUtterance();
588  if (impl->utterance == NULL)
589  {
590    rc = ESR_OUT_OF_MEMORY;
591    PLogError(ESR_rc2str(rc));
592    goto CLEANUP;
593  }
594  CA_InitUtteranceForFrontend(impl->utterance, frontendParams);
595  CA_AttachCMStoUtterance(impl->wavein, impl->utterance);
596  CA_FreeFrontendParameters(frontendParams);
597  return ESR_SUCCESS;
598
599CLEANUP:
600  if (impl->frontend != NULL)
601  {
602    CA_UnconfigureFrontend(impl->frontend);
603    CA_FreeFrontend(impl->frontend);
604    impl->frontend = NULL;
605  }
606  if (impl->wavein != NULL)
607  {
608    CA_UnconfigureWave(impl->wavein);
609    CA_FreeWave(impl->wavein);
610    impl->wavein = NULL;
611  }
612  if (impl->utterance != NULL)
613  {
614    CA_ClearUtterance(impl->utterance);
615    CA_FreeUtterance(impl->utterance);
616    impl->utterance = NULL;
617  }
618  if (frontendParams != NULL)
619    CA_FreeFrontendParameters(frontendParams);
620  return rc;
621}
622
623/**
624 * Populates legacy recognizer parameters from the session.
625 *
626 * Replaces setup_pattern_parameters()
627 */
628ESR_ReturnCode SR_AcousticModels_LoadLegacyRecognizerParameters(CA_RecInputParams* params)
629{
630  ESR_ReturnCode rc;
631
632  passert(params != NULL);
633  params->is_loaded = ESR_FALSE;
634  CHKLOG(rc, ESR_SessionGetBool("CREC.Recognizer.partial_results", &params->do_partial));
635  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.NBest", &params->top_choices));
636  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.eou_threshold", &params->eou_threshold));
637  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_altword_tokens", &params->max_altword_tokens));
638  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_frames", &params->max_frames));
639  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_fsm_arcs", &params->max_fsm_arcs));
640  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_fsm_nodes", &params->max_fsm_nodes));
641  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_fsmnode_tokens", &params->max_fsmnode_tokens));
642  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_hmm_tokens", &params->max_hmm_tokens));
643  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_model_states", &params->max_model_states));
644  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_searches", &params->max_searches));
645  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_word_tokens", &params->max_word_tokens));
646  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.non_terminal_timeout", &params->non_terminal_timeout));
647  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.num_wordends_per_frame", &params->num_wordends_per_frame));
648  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.often", &params->traceback_freq));
649  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.optional_terminal_timeout", &params->optional_terminal_timeout));
650  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.reject", &params->reject_score));
651  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.terminal_timeout", &params->terminal_timeout));
652  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.viterbi_prune_thresh", &params->viterbi_prune_thresh));
653  CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.wordpen", &params->word_penalty));
654  params->is_loaded = ESR_TRUE;
655
656  return ESR_SUCCESS;
657CLEANUP:
658  return rc;
659}
660
661ESR_ReturnCode SR_RecognizerCreate(SR_Recognizer** self)
662{
663  SR_RecognizerImpl* impl;
664  CA_RecInputParams* recogParams = NULL;
665  ESR_ReturnCode rc;
666  LCHAR recHandle[20] = { 0 };
667
668  if (self == NULL)
669  {
670    PLogError(L("ESR_INVALID_ARGUMENT"));
671    return ESR_INVALID_ARGUMENT;
672  }
673  impl = NEW(SR_RecognizerImpl, MTAG);
674  if (impl == NULL)
675  {
676    PLogError(L("ESR_OUT_OF_MEMORY"));
677    return ESR_OUT_OF_MEMORY;
678  }
679
680  impl->Interface.start = &SR_RecognizerStartImpl;
681  impl->Interface.stop = &SR_RecognizerStopImpl;
682  impl->Interface.destroy = &SR_RecognizerDestroyImpl;
683  impl->Interface.setup = &SR_RecognizerSetupImpl;
684  impl->Interface.unsetup = &SR_RecognizerUnsetupImpl;
685  impl->Interface.isSetup = &SR_RecognizerIsSetupImpl;
686  impl->Interface.getParameter = &SR_RecognizerGetParameterImpl;
687  impl->Interface.getSize_tParameter = &SR_RecognizerGetSize_tParameterImpl;
688  impl->Interface.getBoolParameter = &SR_RecognizerGetBoolParameterImpl;
689  impl->Interface.setParameter = &SR_RecognizerSetParameterImpl;
690  impl->Interface.setSize_tParameter = &SR_RecognizerSetSize_tParameterImpl;
691  impl->Interface.setBoolParameter = &SR_RecognizerSetBoolParameterImpl;
692  impl->Interface.setLockFunction = &SR_RecognizerSetLockFunctionImpl;
693  impl->Interface.hasSetupRules = &SR_RecognizerHasSetupRulesImpl;
694  impl->Interface.activateRule = &SR_RecognizerActivateRuleImpl;
695  impl->Interface.deactivateRule = &SR_RecognizerDeactivateRuleImpl;
696  impl->Interface.deactivateAllRules = &SR_RecognizerDeactivateAllRulesImpl;
697  impl->Interface.isActiveRule = &SR_RecognizerIsActiveRuleImpl;
698  impl->Interface.setWordAdditionCeiling = &SR_RecognizerSetWordAdditionCeilingImpl;
699  impl->Interface.checkGrammarConsistency = &SR_RecognizerCheckGrammarConsistencyImpl;
700  impl->Interface.getModels = &SR_RecognizerGetModelsImpl;
701  impl->Interface.putAudio = &SR_RecognizerPutAudioImpl;
702  impl->Interface.advance = &SR_RecognizerAdvanceImpl;
703  impl->Interface.loadUtterance = &SR_RecognizerLoadUtteranceImpl;
704  impl->Interface.loadWaveFile = &SR_RecognizerLoadWaveFileImpl;
705  impl->Interface.logEvent = &SR_RecognizerLogEventImpl;
706  impl->Interface.logToken = &SR_RecognizerLogTokenImpl;
707  impl->Interface.logTokenInt = &SR_RecognizerLogTokenIntImpl;
708  impl->Interface.logSessionStart = &SR_RecognizerLogSessionStartImpl;
709  impl->Interface.logSessionEnd = &SR_RecognizerLogSessionEndImpl;
710  impl->Interface.logWaveformData = &SR_RecognizerLogWaveformDataImpl;
711  impl->Interface.isSignalClipping = &SR_RecognizerIsSignalClippingImpl;
712  impl->Interface.isSignalDCOffset = &SR_RecognizerIsSignalDCOffsetImpl;
713  impl->Interface.isSignalNoisy = &SR_RecognizerIsSignalNoisyImpl;
714  impl->Interface.isSignalTooFewSamples = &SR_RecognizerIsSignalTooFewSamplesImpl;
715  impl->Interface.isSignalTooManySamples = &SR_RecognizerIsSignalTooManySamplesImpl;
716  impl->Interface.isSignalTooQuiet = &SR_RecognizerIsSignalTooQuietImpl;
717
718  impl->frontend = NULL;
719  impl->wavein = NULL;
720  impl->utterance = NULL;
721  impl->confidenceScorer = NULL;
722  impl->recognizer = NULL;
723  impl->models = NULL;
724  impl->grammars = NULL;
725  impl->result = NULL;
726  impl->parameters = NULL;
727  impl->acousticState = NULL;
728  impl->audioBuffer = NULL;
729  impl->buffer = NULL;
730  impl->frames = impl->processed;
731  impl->internalState = SR_RECOGNIZER_INTERNAL_BEGIN;
732  impl->isStarted = ESR_FALSE;
733  impl->isRecognizing = ESR_FALSE;
734  impl->gotLastFrame = ESR_FALSE;
735  impl->sampleRate = 0;
736  impl->lockFunction = NULL;
737  impl->lockData = NULL;
738  impl->eventLog = NULL;
739  impl->osi_log_level = 0;
740  impl->waveformBuffer = NULL;
741  impl->isSignalQualityInitialized = ESR_FALSE;
742  impl->beginningOfSpeechOffset = 0;
743  impl->gatedMode = ESR_TRUE;
744  impl->bgsniff = 0;
745  impl->isSignalClipping       = ESR_FALSE;
746  impl->isSignalDCOffset       = ESR_FALSE;
747  impl->isSignalNoisy          = ESR_FALSE;
748  impl->isSignalTooFewSamples  = ESR_FALSE;
749  impl->isSignalTooManySamples = ESR_FALSE;
750  impl->isSignalTooQuiet       = ESR_FALSE;
751
752  CHKLOG(rc, ESR_SessionTypeCreate(&impl->parameters));
753  CHKLOG(rc, SR_RecognizerToSessionImpl());
754  CHKLOG(rc, ESR_SessionGetSize_t(L("SREC.Recognizer.osi_log_level"), &impl->osi_log_level));
755
756  /* create the event log */
757  if (impl->osi_log_level) /* do some logging if non-zero val */
758    CHKLOG(rc, ESR_SessionGetProperty(L("eventlog"), (void **)&impl->eventLog, TYPES_SR_EVENTLOG));
759
760  /* Record the OSI log event */
761  psprintf(recHandle, L("%p"), impl);
762  CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle));
763  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIcrst")));
764
765  CHKLOG(rc, SR_RecognizerFrontendToSessionImpl());
766  CHKLOG(rc, SR_RecognizerCreateFrontendImpl(impl));
767  rc = ESR_SessionGetProperty("recognizer.confidenceScorer", (void **)&impl->confidenceScorer, TYPES_CONFIDENCESCORER);
768  if (rc == ESR_NO_MATCH_ERROR)
769  {
770    impl->confidenceScorer = CA_AllocateConfidenceScorer();
771
772    if (!CA_LoadConfidenceScorer(impl->confidenceScorer)) {
773      rc = ESR_INVALID_STATE;
774      PLogError(ESR_rc2str(rc));
775      goto CLEANUP;
776    }
777    CHKLOG(rc, ESR_SessionSetProperty("recognizer.confidenceScorer", impl->confidenceScorer, TYPES_CONFIDENCESCORER));
778  }
779  else if (rc != ESR_SUCCESS)
780  {
781    PLogError(ESR_rc2str(rc));
782    goto CLEANUP;
783  }
784
785  recogParams = CA_AllocateRecognitionParameters();
786  if (recogParams == NULL)
787  {
788    rc = ESR_OUT_OF_MEMORY;
789    PLogError(ESR_rc2str(rc));
790    goto CLEANUP;
791  }
792  CHKLOG(rc, SR_AcousticModels_LoadLegacyRecognizerParameters(recogParams));
793  impl->recognizer = CA_AllocateRecognition();
794  if (impl->recognizer == NULL)
795  {
796    PLogError(ESR_rc2str(rc));
797    goto CLEANUP;
798  }
799  CA_ConfigureRecognition(impl->recognizer, recogParams);
800  CA_FreeRecognitionParameters(recogParams);
801  CHKLOG(rc, HashMapCreate(&impl->grammars));
802  CHKLOG(rc, CircularBufferCreate(sizeof(asr_int16_t) * AUDIO_CIRC_BUFFER_SIZE, MTAG, &impl->buffer));
803  CHKLOG(rc, ESR_SessionGetSize_t("CREC.Frontend.samplerate", &impl->sampleRate));
804
805  impl->FRAME_SIZE = impl->sampleRate / FRAMERATE * SAMPLE_SIZE;
806
807  if ((impl->audioBuffer = MALLOC(impl->FRAME_SIZE, MTAG)) == NULL)
808  {
809    rc = ESR_OUT_OF_MEMORY;
810    goto CLEANUP;
811  }
812
813  /* create the waveform buffer */
814  CHKLOG(rc, WaveformBuffer_Create(&impl->waveformBuffer, impl->FRAME_SIZE));
815
816  CHKLOG(rc, ESR_SessionGetSize_t("SREC.Recognizer.utterance_timeout", &impl->utterance_timeout));
817
818  /* OSI logging (SUCCESS) */
819  CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle));
820  CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("SUCCESS"), L("ESR_SUCCESS")));
821  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIcrnd")));
822
823  CHKLOG(rc, SR_AcousticStateCreateImpl(&impl->Interface));
824
825  CHKLOG(rc, ESR_SessionGetSize_t(L("cmdline.bgsniff"), &impl->bgsniff));
826  /* gated mode == beginning of speech detection */
827  CHKLOG(rc, ESR_SessionGetBool(L("cmdline.gatedmode"), &impl->gatedMode));
828
829  *self = (SR_Recognizer*) impl;
830  return ESR_SUCCESS;
831CLEANUP:
832  /* OSI logging (FAILURE) */
833  if (impl->eventLog != NULL)
834  {
835    SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle);
836    SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("FAILURE"), ESR_rc2str(rc));
837    SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIcrnd"));
838  }
839
840  if (recogParams != NULL)
841    CA_FreeRecognitionParameters(recogParams);
842  impl->Interface.destroy(&impl->Interface);
843  return rc;
844}
845
846ESR_ReturnCode SR_RecognizerDestroyImpl(SR_Recognizer* self)
847{
848  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
849  ESR_BOOL exists; // isSetup;
850  ESR_ReturnCode rc;
851  LCHAR recHandle[20] = { 0 };
852
853  if (impl->result != NULL)
854  {
855    SR_RecognizerResult_Destroy(impl->result);
856    impl->result = NULL;
857  }
858
859  if (impl->eventLog != NULL)
860  {
861    /* Record the OSI log event */
862    psprintf(recHandle, L("%p"), impl);
863    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle));
864    CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIdesst")));
865  }
866
867  /* Clean session */
868  CHKLOG(rc, ESR_SessionContains("recognizer.confidenceScorer", &exists));
869  if (exists)
870    CHKLOG(rc, ESR_SessionRemoveProperty("recognizer.confidenceScorer"));
871
872  if (impl->confidenceScorer != NULL)
873  {
874    CA_FreeConfidenceScorer(impl->confidenceScorer);
875    impl->confidenceScorer = NULL;
876  }
877
878  /* Clear CMS, CRS_RecognizerClose() */
879  if (impl->wavein != NULL)
880  {
881    ESR_BOOL isAttached, isConfigured;
882
883    CHKLOG(rc, CA_IsCMSAttachedtoUtterance(impl->wavein, &isAttached));
884    if (isAttached)
885      CA_DetachCMSfromUtterance(impl->wavein, impl->utterance);
886
887    CHKLOG(rc, CA_IsConfiguredForAgc(impl->wavein, &isConfigured));
888    if (isConfigured)
889      CA_ClearCMSParameters(impl->wavein);
890  }
891
892  /* Free Utterance */
893  if (impl->utterance != NULL)
894  {
895    CA_ClearUtterance(impl->utterance);
896    CA_FreeUtterance(impl->utterance);
897    impl->utterance = NULL;
898  }
899
900  /* Free WaveformBuffer */
901  if (impl->waveformBuffer != NULL)
902  {
903    WaveformBuffer_Destroy(impl->waveformBuffer);
904    impl->waveformBuffer = NULL;
905  }
906
907  /* Free recognizer */
908/*  CHKLOG(rc, self->isSetup(self, &isSetup));
909  if (isSetup)
910    CHKLOG(rc, self->unsetup(self));*/
911  if (impl->grammars != NULL)
912    CHKLOG(rc, self->deactivateAllRules(self));
913  if (impl->recognizer != NULL)
914  {
915    CA_UnloadRecognitionModels(impl->recognizer);
916    CA_UnconfigureRecognition(impl->recognizer);
917    CA_FreeRecognition(impl->recognizer);
918    impl->recognizer = NULL;
919  }
920
921  if (impl->grammars != NULL)
922  {
923    CHKLOG(rc, HashMapDestroy(impl->grammars));
924    impl->grammars = NULL;
925  }
926
927  if (impl->buffer != NULL)
928  {
929    FREE(impl->buffer);
930    impl->buffer = NULL;
931  }
932
933  if (impl->audioBuffer != NULL)
934  {
935    FREE(impl->audioBuffer);
936    impl->audioBuffer = NULL;
937  }
938
939  /* Free frontend */
940  if (impl->frontend)
941  {
942    CA_UnconfigureFrontend(impl->frontend);
943    CA_FreeFrontend(impl->frontend);
944    impl->frontend = NULL;
945  }
946
947  /* Free wave */
948  if (impl->wavein)
949  {
950    CA_UnconfigureWave(impl->wavein);
951    CA_FreeWave(impl->wavein);
952    impl->wavein = NULL;
953  }
954
955  if (impl->parameters != NULL)
956    CHKLOG(rc, impl->parameters->destroy(impl->parameters));
957
958  if (impl->eventLog != NULL)
959  {
960    /* OSI logging (SUCCESS) */
961    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle));
962    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("SUCCESS"), L("ESR_SUCCESS")));
963    CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIdesnd")));
964    impl->eventLog = NULL;
965  }
966
967  if (impl->acousticState != NULL)
968  {
969    impl->acousticState->destroy(self);
970    impl->acousticState = NULL;
971  }
972  FREE(impl);
973  return ESR_SUCCESS;
974CLEANUP:
975  return rc;
976}
977
978ESR_ReturnCode beginRecognizing(SR_RecognizerImpl* impl)
979{
980  CA_RecInputParams* recogParams;
981  LCHAR tok[80];
982  LCHAR* val;
983  PTimeStamp BORT;
984  size_t i, grammarSize;
985  ESR_ReturnCode rc;
986
987  /* Setup recognizer for new utterance */
988  recogParams = CA_AllocateRecognitionParameters();
989  if (recogParams == NULL)
990  {
991    rc = ESR_OUT_OF_MEMORY;
992    PLogError(ESR_rc2str(rc));
993    goto CLEANUP;
994  }
995  SR_AcousticModels_LoadLegacyRecognizerParameters(recogParams);
996  CA_BeginRecognition(impl->recognizer, NULL, 1, recogParams);
997  CA_FreeRecognitionParameters(recogParams);
998  impl->isRecognizing = ESR_TRUE;
999
1000  /* OSI log the  grammars */
1001  CHKLOG(rc, HashMapGetSize(impl->grammars, &grammarSize));
1002  for (i = 0; i < grammarSize; ++i)
1003  {
1004    psprintf(tok, L("GURI%d"), i);
1005    /* use the key as the grammar URI */
1006    CHKLOG(rc, HashMapGetKeyAtIndex(impl->grammars, i, &val));
1007    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, tok, val));
1008  }
1009  /* OSI ACST acoustic state reset */
1010  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("ACST"), 0));
1011  CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("LANG"), L("en-us")));
1012
1013  /* OSI log the start of recognition */
1014  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIrcst")));
1015
1016  /* save the BORT timing (begin of recog) */
1017  PTimeStampSet(&BORT);
1018  impl->recogLogTimings.BORT = PTimeStampDiff(&BORT, &impl->timestamp);
1019
1020  return ESR_SUCCESS;
1021CLEANUP:
1022  if (recogParams != NULL)
1023    CA_FreeRecognitionParameters(recogParams);
1024  return rc;
1025}
1026
1027ESR_ReturnCode SR_RecognizerStartImpl(SR_Recognizer* self)
1028{
1029  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1030  size_t silence_duration_in_frames;
1031  size_t end_of_utterance_hold_off_in_frames;
1032  size_t grammarCount;
1033  ESR_ReturnCode rc;
1034  ESR_BOOL enableGetWaveform = ESR_FALSE;
1035
1036  CHKLOG(rc, impl->grammars->getSize(impl->grammars, &grammarCount));
1037  if (impl->models == NULL)
1038  {
1039    PLogError("ESR_INVALID_STATE: No rule has been set up");
1040    return ESR_INVALID_STATE;
1041  }
1042  if (grammarCount < 1)
1043  {
1044    PLogError("ESR_INVALID_STATE: No rule has been activated");
1045    return ESR_INVALID_STATE;
1046  }
1047
1048  if (!CA_OpenWaveFromDevice(impl->wavein, DEVICE_RAW_PCM, impl->frontend->samplerate, 0, WAVE_DEVICE_RAW))
1049  {
1050    rc = ESR_INVALID_STATE;
1051    PLogError(ESR_rc2str(rc));
1052    goto CLEANUP;
1053  }
1054
1055  /* Setup utterance */
1056  CA_UnlockUtteranceForInput(impl->utterance);
1057
1058  /* Setup utterance */
1059  CHKLOG(rc, ESR_SessionGetSize_t(L("cmdline.silence_duration_in_frames"), &silence_duration_in_frames));
1060  CHKLOG(rc, ESR_SessionGetSize_t(L("cmdline.end_of_utterance_hold_off_in_frames"), &end_of_utterance_hold_off_in_frames));
1061  CA_SetEndOfUtteranceByLevelTimeout(impl->utterance, silence_duration_in_frames, end_of_utterance_hold_off_in_frames);
1062
1063  CA_ResetVoicing(impl->utterance);
1064
1065  /*
1066   * NOTE: We don't actually begin the recognizer here, the beginning of speech
1067   * detector will do that.
1068   */
1069
1070  impl->gotLastFrame = ESR_FALSE;
1071  impl->isStarted = ESR_TRUE;
1072  impl->isRecognizing = ESR_FALSE;
1073  impl->isSignalQualityInitialized = ESR_FALSE;
1074  impl->internalState = SR_RECOGNIZER_INTERNAL_BEGIN;
1075  PTimeStampSet(&impl->timestamp);
1076
1077  /* reset waveform buffer at start of every recognition */
1078  CHKLOG(rc, WaveformBuffer_Reset(impl->waveformBuffer));
1079
1080  /* is waveform buffering active? */
1081  rc = impl->parameters->getBool(impl->parameters, L("enableGetWaveform"), &enableGetWaveform);
1082  if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR)
1083  {
1084    PLogError(L("%s: could determine whether VoiceEnrollment active or not"), ESR_rc2str(rc));
1085    goto CLEANUP;
1086  }
1087  if (enableGetWaveform)
1088    CHKLOG(rc, WaveformBuffer_SetBufferingState(impl->waveformBuffer, WAVEFORM_BUFFERING_ON_CIRCULAR));
1089  else
1090    CHKLOG(rc, WaveformBuffer_SetBufferingState(impl->waveformBuffer, WAVEFORM_BUFFERING_OFF));
1091
1092  /* I am going to try to open the audio waveform file here */
1093  if (impl->osi_log_level & OSI_LOG_LEVEL_AUDIO)
1094  {
1095    /* open a new audio waveform file */
1096    rc = SR_EventLogAudioOpen(impl->eventLog, L("audio/L16"), impl->sampleRate, SAMPLE_SIZE);
1097    if (rc != ESR_SUCCESS)
1098    {
1099      PLogError(L("%s: could not open the RIFF audio file"), ESR_rc2str(rc));
1100      goto CLEANUP;
1101    }
1102  }
1103  impl->frames = impl->processed = 0;
1104  return ESR_SUCCESS;
1105CLEANUP:
1106/*  self->stop(self);*/
1107  return rc;
1108}
1109
1110ESR_ReturnCode SR_RecognizerStopImpl(SR_Recognizer* self)
1111{
1112  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1113  SR_AcousticModelsImpl* modelsImpl;
1114  ESR_ReturnCode rc;
1115
1116#ifdef MEASURE_SAMPLE_TIMES
1117    SR_Recognizer_Log_Samples_Received ( );
1118#endif
1119
1120  PLOG_DBG_API_ENTER();
1121  if (!impl->isStarted)
1122  {
1123    /* In case the user calls stop() twice */
1124    return ESR_SUCCESS;
1125  }
1126  modelsImpl = (SR_AcousticModelsImpl*) impl->models;
1127
1128  /* Clean-up recognizer and utterance */
1129  switch (impl->internalState)
1130  {
1131    case SR_RECOGNIZER_INTERNAL_BEGIN:
1132      /* Recognizer was never started */
1133      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("BEGIN")));
1134      CA_LockUtteranceFromInput(impl->utterance);
1135      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1136      if (impl->eventLog != NULL)
1137      {
1138        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_BEGIN -> SR_RECOGNIZER_INTERNAL_END")));
1139        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1140        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1141        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1142      }
1143      break;
1144
1145    case SR_RECOGNIZER_INTERNAL_BOS_TIMEOUT:
1146      /* Recognizer was never started */
1147      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("BOS_TIMEOUT")));
1148      CA_LockUtteranceFromInput(impl->utterance);
1149      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1150      if (impl->eventLog != NULL)
1151      {
1152        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_BOS_TIMEOUT -> SR_RECOGNIZER_INTERNAL_END")));
1153        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1154        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1155        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1156      }
1157      break;
1158
1159    case SR_RECOGNIZER_INTERNAL_BOS_NO_MATCH:
1160      /* Recognizer was never started */
1161      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("BOS_NO_MATCH")));
1162      CA_LockUtteranceFromInput(impl->utterance);
1163      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1164      if (impl->eventLog != NULL)
1165      {
1166        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_BOS_NO_MATCH -> SR_RECOGNIZER_INTERNAL_END")));
1167        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1168        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1169        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1170      }
1171      break;
1172
1173    case SR_RECOGNIZER_INTERNAL_BOS_DETECTION:
1174      /* Recognizer was never started */
1175      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("BOS_DETECTION")));
1176      CA_LockUtteranceFromInput(impl->utterance);
1177      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1178      if (impl->eventLog != NULL)
1179      {
1180        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_BOS_DETECTION -> SR_RECOGNIZER_INTERNAL_END")));
1181        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1182        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1183        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1184      }
1185      break;
1186
1187    case SR_RECOGNIZER_INTERNAL_EOS_DETECTION:
1188      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("EOS_DETECTION")));
1189      CA_LockUtteranceFromInput(impl->utterance);
1190      if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance))
1191      {
1192        rc = ESR_INVALID_STATE;
1193        PLogError(ESR_rc2str(rc));
1194        goto CLEANUP;
1195      }
1196      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1197      if (impl->eventLog != NULL)
1198      {
1199        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_EOS_DETECTION -> SR_RECOGNIZER_INTERNAL_END")));
1200        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1201        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1202        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1203      }
1204      break;
1205
1206    case SR_RECOGNIZER_INTERNAL_EOI:
1207      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("EOI")));
1208      CA_LockUtteranceFromInput(impl->utterance);
1209      if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance))
1210      {
1211        rc = ESR_INVALID_STATE;
1212        PLogError(ESR_rc2str(rc));
1213        goto CLEANUP;
1214      }
1215      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1216      if (impl->eventLog != NULL)
1217      {
1218        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_EOI -> SR_RECOGNIZER_INTERNAL_END")));
1219        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1220        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1221        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1222      }
1223      break;
1224
1225    case SR_RECOGNIZER_INTERNAL_EOS:
1226      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("EOS")));
1227      CA_LockUtteranceFromInput(impl->utterance);
1228      if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance))
1229      {
1230        rc = ESR_INVALID_STATE;
1231        PLogError(ESR_rc2str(rc));
1232        goto CLEANUP;
1233      }
1234      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1235      if (impl->eventLog != NULL)
1236      {
1237        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_EOS -> SR_RECOGNIZER_INTERNAL_END")));
1238        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1239        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1240        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1241      }
1242      break;
1243
1244    case SR_RECOGNIZER_INTERNAL_END:
1245      /* Recognizer already shut down */
1246      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("END")));
1247      break;
1248
1249    default:
1250      /* Shut down recognizer */
1251      CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), impl->internalState));
1252      if (impl->eventLog != NULL)
1253      {
1254        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("unknown state -> SR_RECOGNIZER_INTERNAL_END")));
1255        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1256        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1257        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1258      }
1259      CA_LockUtteranceFromInput(impl->utterance);
1260      if (impl->isRecognizing)
1261      {
1262        if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance))
1263        {
1264          rc = ESR_INVALID_STATE;
1265          PLogError(ESR_rc2str(rc));
1266          goto CLEANUP;
1267        }
1268      }
1269      rc = ESR_INVALID_STATE;
1270      PLogError(L("%s: %d"), ESR_rc2str(rc), impl->internalState);
1271      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1272      goto CLEANUP;
1273  }
1274  if (impl->eventLog != NULL)
1275  {
1276    int n;
1277    LCHAR result[MAX_ENTRY_LENGTH];
1278    result[0] = L('\0');
1279
1280    n = CA_GetUnprocessedFramesInUtterance(impl->utterance);
1281    CHKLOG(rc, SR_EventLogTokenInt(impl->eventLog, L("CA_GetUnprocessedFramesInUtterance() (x10ms)"), n));
1282    CA_FullResultLabel(impl->recognizer, result, MAX_ENTRY_LENGTH - 1);
1283    CHKLOG(rc, SR_EventLogToken(impl->eventLog, L("CA_FullResultLabel() (x20ms)"), result));
1284    n = CircularBufferGetSize(impl->buffer);
1285    CHKLOG(rc, SR_EventLogTokenInt(impl->eventLog, L("CircularBufferGetSize() (samples)"), n / SAMPLE_SIZE));
1286  }
1287  if (impl->lockFunction)
1288    impl->lockFunction(ESR_LOCK, impl->lockData);
1289  CircularBufferReset(impl->buffer);
1290  if (impl->lockFunction)
1291    impl->lockFunction(ESR_UNLOCK, impl->lockData);
1292  if (CA_RecognitionHasResults(impl->recognizer))
1293    CA_ClearResults(impl->recognizer);
1294  CA_FlushUtteranceFrames(impl->utterance);
1295  CA_CalculateCMSParameters(impl->wavein);
1296  CA_CloseDevice(impl->wavein);
1297
1298  /* record the OSI event */
1299  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIstop")));
1300
1301  if (impl->result != NULL)
1302  {
1303    CHKLOG(rc, SR_RecognizerResult_Destroy(impl->result));
1304    impl->result = NULL;
1305  }
1306
1307  if (impl->lockFunction)
1308    impl->lockFunction(ESR_LOCK, impl->lockData);
1309  impl->gotLastFrame = ESR_TRUE;
1310  PLOG_DBG_TRACE((L("SR_Recognizer shutdown occured")));
1311  impl->isStarted = ESR_FALSE;
1312  impl->isRecognizing = ESR_FALSE;
1313  if (impl->osi_log_level & OSI_LOG_LEVEL_AUDIO)
1314    SR_EventLogAudioClose(impl->eventLog);
1315
1316  impl->recogLogTimings.BORT = 0;
1317  impl->recogLogTimings.DURS = 0;
1318  impl->recogLogTimings.EORT = 0;
1319  impl->recogLogTimings.EOSD = 0;
1320  impl->recogLogTimings.EOSS = 0;
1321  impl->recogLogTimings.BOSS = 0;
1322  impl->recogLogTimings.EOST = 0;
1323  impl->eos_reason = L("undefined");
1324
1325  if (impl->lockFunction)
1326    impl->lockFunction(ESR_UNLOCK, impl->lockData);
1327  PLOG_DBG_API_EXIT(rc);
1328  return rc;
1329CLEANUP:
1330  PLOG_DBG_API_EXIT(rc);
1331  return rc;
1332}
1333
1334ESR_ReturnCode SR_RecognizerSetupImpl(SR_Recognizer* self)
1335{
1336  ESR_ReturnCode rc;
1337  CA_AcoustInputParams* acousticParams = NULL;
1338  SR_AcousticModelsImpl* modelsImpl;
1339  SR_AcousticModels* models;
1340  SR_RecognizerImpl* recogImpl = NULL;
1341  CA_Acoustic* acoustic;
1342  size_t size, i;
1343  LCHAR           filenames[P_PATH_MAX];
1344  size_t          len;
1345
1346  len = P_PATH_MAX;
1347  CHKLOG(rc, ESR_SessionGetLCHAR ( L("cmdline.modelfiles"), filenames, &len ));
1348
1349  CHKLOG(rc, SR_AcousticModelsLoad ( filenames, &models ));
1350
1351  if (models == NULL)
1352    {
1353      PLogError(L("ESR_INVALID_STATE while finding cmdline.modelfiles"));
1354      return ESR_INVALID_STATE;
1355    }
1356  modelsImpl = (SR_AcousticModelsImpl*) models;
1357  recogImpl = (SR_RecognizerImpl*) self;
1358  acousticParams = NULL;
1359
1360  CHKLOG(rc, SR_AcousticModelsGetCount(models, &size));
1361  acousticParams = CA_AllocateAcousticParameters();
1362  if (acousticParams == NULL)
1363      {
1364      rc = ESR_OUT_OF_MEMORY;
1365      PLogError(ESR_rc2str(rc));
1366      goto CLEANUP;
1367      }
1368    CHKLOG(rc, modelsImpl->getLegacyParameters(acousticParams));
1369    CHKLOG(rc, ArrayListGetSize(modelsImpl->acoustic, &size));
1370    for (i = 0; i < size; ++i)
1371      {
1372      CHKLOG(rc, ArrayListGet(modelsImpl->acoustic, i, (void **)&acoustic));
1373      CA_LoadModelsInAcoustic(recogImpl->recognizer, acoustic, acousticParams);
1374      }
1375  CA_FreeAcousticParameters(acousticParams);
1376
1377  recogImpl->models = models;
1378  CHKLOG(rc, modelsImpl->setupPattern(recogImpl->models, self));
1379  return ESR_SUCCESS;
1380 CLEANUP:
1381  if (acousticParams != NULL)
1382    CA_FreeAcousticParameters(acousticParams);
1383  if (recogImpl != NULL)
1384    CA_UnloadRecognitionModels(recogImpl->recognizer);
1385  return rc;
1386}
1387
1388ESR_ReturnCode SR_RecognizerUnsetupImpl(SR_Recognizer* self)
1389{
1390  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1391  SR_AcousticModelsImpl* modelsImpl = (SR_AcousticModelsImpl*) impl->models;
1392  ESR_ReturnCode rc;
1393
1394  CHKLOG(rc, modelsImpl->unsetupPattern(impl->models));
1395  CA_UnloadRecognitionModels(impl->recognizer);
1396  CHKLOG(rc, SR_AcousticModelsDestroy ( impl->models ));
1397  impl->models = NULL;
1398  return ESR_SUCCESS;
1399 CLEANUP:
1400  return rc;
1401}
1402
1403ESR_ReturnCode SR_RecognizerIsSetupImpl(SR_Recognizer* self, ESR_BOOL* isSetup)
1404{
1405  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1406
1407  if (isSetup == NULL)
1408  {
1409    PLogError(L("ESR_INVALID_ARGUMENT"));
1410    return ESR_INVALID_ARGUMENT;
1411  }
1412  *isSetup = impl->models != NULL;
1413  return ESR_SUCCESS;
1414}
1415
1416ESR_ReturnCode SR_RecognizerGetParameterImpl(SR_Recognizer* self, const LCHAR* key,
1417    LCHAR* value, size_t* len)
1418{
1419  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1420  ESR_ReturnCode rc;
1421
1422  rc = impl->parameters->getLCHAR(impl->parameters, key, value, len);
1423  if (rc == ESR_NO_MATCH_ERROR)
1424  {
1425    CHKLOG(rc, ESR_SessionGetLCHAR(key, value, len));
1426    return ESR_SUCCESS;
1427  }
1428  else if (rc != ESR_SUCCESS)
1429  {
1430    PLogError(ESR_rc2str(rc));
1431    goto CLEANUP;
1432  }
1433  return ESR_SUCCESS;
1434CLEANUP:
1435  return rc;
1436}
1437
1438/*
1439 * The get / set code is a mess. Since we only use size_t parameters, that's all
1440 * that I am going to make work. The impl->parameters don't work so you always
1441 * have to get them from the session. The impl always logs an error. SteveR
1442 */
1443
1444ESR_ReturnCode SR_RecognizerGetSize_tParameterImpl(SR_Recognizer* self, const LCHAR* key,
1445    size_t* value)
1446{
1447  ESR_ReturnCode rc;
1448
1449  CHKLOG(rc, ESR_SessionGetSize_t(key, value));
1450  return ESR_SUCCESS;
1451CLEANUP:
1452  return rc;
1453}
1454
1455ESR_ReturnCode SR_RecognizerGetBoolParameterImpl(SR_Recognizer* self, const LCHAR* key, ESR_BOOL* value)
1456{
1457  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1458  ESR_ReturnCode rc;
1459
1460  rc = impl->parameters->getBool(impl->parameters, key, value);
1461  if (rc == ESR_NO_MATCH_ERROR)
1462  {
1463    CHKLOG(rc, ESR_SessionGetBool(key, value));
1464    return ESR_SUCCESS;
1465  }
1466  else if (rc != ESR_SUCCESS)
1467  {
1468    PLogError(ESR_rc2str(rc));
1469    goto CLEANUP;
1470  }
1471  return ESR_SUCCESS;
1472CLEANUP:
1473  return rc;
1474}
1475
1476ESR_ReturnCode SR_RecognizerSetParameterImpl(SR_Recognizer* self, const LCHAR* key,
1477    LCHAR* value)
1478{
1479  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1480  LCHAR temp[256];
1481  ESR_ReturnCode rc;
1482  size_t len = 256;
1483
1484  rc = impl->parameters->getLCHAR(impl->parameters, key, temp, &len);
1485  if (rc == ESR_SUCCESS)
1486  {
1487    if (LSTRCMP(temp, value) == 0)
1488      return ESR_SUCCESS;
1489    CHKLOG(rc, impl->parameters->removeAndFreeProperty(impl->parameters, key));
1490  }
1491  else if (rc != ESR_NO_MATCH_ERROR && rc != ESR_INVALID_RESULT_TYPE)
1492  {
1493    PLogError(ESR_rc2str(rc));
1494    goto CLEANUP;
1495  }
1496
1497  CHKLOG(rc, impl->parameters->setLCHAR(impl->parameters, key, value));
1498  return ESR_SUCCESS;
1499CLEANUP:
1500  return rc;
1501}
1502/*
1503 * The only set param function that is working is for the size_t parameters; and not
1504 * all of them are working, only the ones specified in the function itself. There are
1505 * two reasons for this: first most of the set functions just put the value in an unused
1506 * table that has no effect; second many of the changes need to be propogated to a specific
1507 * part of the code. This needs to be evaluated on a per parameter basis. SteveR
1508 */
1509
1510/*
1511 * This function will be used to set parameters in the session. We need to go through
1512 * the recognizer so as to propogate the values into the recognizer. We will rely on
1513 * the session to do the right thing. SteveR
1514 */
1515
1516ESR_ReturnCode SR_RecognizerSetSize_tParameterImpl(SR_Recognizer* self, const LCHAR* key,
1517    size_t value)
1518{
1519  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1520  ESR_ReturnCode rc;
1521
1522  rc = ESR_SessionSetSize_t ( key, value );
1523
1524  if (rc == ESR_SUCCESS)
1525  {
1526    if  ( LSTRCMP ( L("SREC.Recognizer.utterance_timeout"), key ) == 0 )
1527    {
1528      impl->utterance_timeout = value;
1529    }
1530    else if  ( LSTRCMP ( L("CREC.Recognizer.terminal_timeout"), key ) == 0 )
1531    {
1532      impl->recognizer->eosd_parms->endnode_timeout = value;
1533    }
1534    else if  ( LSTRCMP ( L("CREC.Recognizer.optional_terminal_timeout"), key ) == 0 )
1535    {
1536      impl->recognizer->eosd_parms->optendnode_timeout = value;
1537    }
1538    else if  ( LSTRCMP ( L("CREC.Recognizer.non_terminal_timeout"), key ) == 0 )
1539    {
1540      impl->recognizer->eosd_parms->internalnode_timeout = value;
1541    }
1542    else if  ( LSTRCMP ( L("CREC.Recognizer.eou_threshold"), key ) == 0 )
1543    {
1544      impl->recognizer->eosd_parms->eos_costdelta = (frameID)value;
1545      impl->recognizer->eosd_parms->opt_eos_costdelta = (frameID)value;
1546    }
1547    else
1548    {
1549      PLogError(L("ESR_INVALID_ARGUMENT"));
1550      rc = ESR_INVALID_ARGUMENT;
1551    }
1552  }
1553  return rc;
1554}
1555
1556
1557ESR_ReturnCode SR_RecognizerSetBoolParameterImpl(SR_Recognizer* self, const LCHAR* key, ESR_BOOL value)
1558{
1559  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1560  ESR_BOOL temp;
1561  ESR_ReturnCode rc;
1562
1563  rc = impl->parameters->getBool(impl->parameters, key, &temp);
1564  if (rc == ESR_SUCCESS)
1565  {
1566    if (temp == value)
1567      return ESR_SUCCESS;
1568    CHKLOG(rc, impl->parameters->removeAndFreeProperty(impl->parameters, key));
1569  }
1570  else if (rc != ESR_NO_MATCH_ERROR && rc != ESR_INVALID_RESULT_TYPE)
1571    return rc;
1572
1573  CHKLOG(rc, impl->parameters->setBool(impl->parameters, key, value));
1574  return ESR_SUCCESS;
1575CLEANUP:
1576  return rc;
1577}
1578
1579ESR_ReturnCode SR_RecognizerHasSetupRulesImpl(SR_Recognizer* self, ESR_BOOL* hasSetupRules)
1580{
1581  SR_RecognizerImpl* recogImpl = (SR_RecognizerImpl*) self;
1582  size_t size;
1583  ESR_ReturnCode rc;
1584
1585  if (hasSetupRules == NULL)
1586  {
1587    PLogError(L("ESR_INVALID_ARGUMENT"));
1588    return ESR_INVALID_ARGUMENT;
1589  }
1590  CHKLOG(rc, HashMapGetSize(recogImpl->grammars, &size));
1591  *hasSetupRules = size > 0;
1592  return ESR_SUCCESS;
1593CLEANUP:
1594  return rc;
1595}
1596
1597ESR_ReturnCode SR_RecognizerActivateRuleImpl(SR_Recognizer* self, SR_Grammar* grammar,
1598    const LCHAR* ruleName, unsigned int weight)
1599{
1600  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1601  SR_GrammarImpl* grammarImpl = (SR_GrammarImpl*) grammar;
1602  SR_AcousticModelsImpl* modelsImpl;
1603  LCHAR grammarID[80];
1604  ESR_ReturnCode rc;
1605  char *failure_reason = NULL;
1606
1607  if (grammar == NULL)
1608  {
1609    if (impl->eventLog)
1610      failure_reason = "badinput";
1611    rc = ESR_INVALID_ARGUMENT;
1612    PLogError(L("ESR_INVALID_ARGUMENT"));
1613    goto CLEANUP;
1614  }
1615
1616  if (impl->models == NULL)
1617  {
1618    failure_reason = "nomodels";
1619    rc = ESR_INVALID_STATE;
1620    PLogError(L("acoustic models must be configured"));
1621    goto CLEANUP;
1622  }
1623
1624  modelsImpl = (SR_AcousticModelsImpl*) impl->models;
1625
1626  if (ruleName == NULL)
1627    psprintf(grammarID, L("%p"), grammar);
1628  else
1629  {
1630    if (LSTRLEN(ruleName) > 80)
1631    {
1632      rc = ESR_BUFFER_OVERFLOW;
1633      PLogError(ESR_rc2str(rc));
1634      goto CLEANUP;
1635    }
1636    LSTRCPY(grammarID, ruleName);
1637  }
1638
1639  CHKLOG(rc, HashMapPut(impl->grammars, grammarID, grammar));
1640  if (CA_SetupSyntaxForRecognizer(grammarImpl->syntax, impl->recognizer))
1641  {
1642    failure_reason = "cafailed";
1643    rc = ESR_INVALID_STATE;
1644    PLogError(L("ESR_INVALID_STATE"));
1645    goto CLEANUP;
1646  }
1647
1648   CHKLOG(rc, SR_Grammar_SetupRecognizer(grammar, self));
1649  grammarImpl->isActivated = ESR_TRUE;
1650
1651  /*
1652   * If we want to log dynamically added words, then we must give the grammar a reference
1653   * to our event log. The grammar logs word additions if and only if its reference to
1654   * eventLog is non-null.
1655   */
1656  if (impl->osi_log_level & OSI_LOG_LEVEL_ADDWD)
1657    grammarImpl->eventLog = impl->eventLog;
1658  else
1659    grammarImpl->eventLog = NULL;
1660
1661  rc = ESR_SUCCESS;
1662
1663CLEANUP:
1664  if (impl->eventLog)
1665  {
1666    if (failure_reason)
1667    {
1668      SR_EventLogTokenInt(impl->eventLog, L("igrm"), (int) grammar);
1669      SR_EventLogToken(impl->eventLog, L("rule"), ruleName);
1670      SR_EventLogToken(impl->eventLog, L("rslt"), "fail");
1671      SR_EventLogToken(impl->eventLog, L("reason"), failure_reason);
1672      SR_EventLogEvent(impl->eventLog, L("ESRacGrm"));
1673    }
1674    else
1675    {
1676      SR_EventLogTokenInt(impl->eventLog, L("igrm"), (int) grammar);
1677      SR_EventLogToken(impl->eventLog, L("rule"), ruleName);
1678      SR_EventLogToken(impl->eventLog, L("rslt"), "ok");
1679      SR_EventLogEvent(impl->eventLog, L("ESRacGrm"));
1680    }
1681  }
1682  return rc;
1683}
1684
1685ESR_ReturnCode SR_RecognizerDeactivateRuleImpl(SR_Recognizer* self, SR_Grammar* grammar,
1686    const LCHAR* ruleName)
1687{
1688  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1689  SR_GrammarImpl* grammarImpl = (SR_GrammarImpl*) grammar;
1690  LCHAR grammarID[MAX_INT_DIGITS+1];
1691  ESR_ReturnCode rc;
1692
1693  if (ruleName == NULL)
1694  {
1695    psprintf(grammarID, L("%p"), grammar);
1696    CHKLOG(rc, HashMapRemove(impl->grammars, grammarID));
1697  }
1698  else
1699    CHKLOG(rc, HashMapRemove(impl->grammars, ruleName));
1700  grammarImpl->isActivated = ESR_FALSE;
1701  return ESR_SUCCESS;
1702CLEANUP:
1703  return rc;
1704}
1705
1706ESR_ReturnCode SR_RecognizerDeactivateAllRulesImpl(SR_Recognizer* self)
1707{
1708  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1709  ESR_ReturnCode rc;
1710
1711  CHKLOG(rc, HashMapRemoveAll(impl->grammars));
1712  CA_ClearSyntaxForRecognizer(0, impl->recognizer);
1713  return ESR_SUCCESS;
1714CLEANUP:
1715  return rc;
1716}
1717
1718ESR_ReturnCode SR_RecognizerIsActiveRuleImpl(SR_Recognizer* self, SR_Grammar* grammar,
1719    const LCHAR* ruleName, ESR_BOOL* isActiveRule)
1720{
1721  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1722  LCHAR grammarID[MAX_INT_DIGITS+1];
1723  ESR_ReturnCode rc;
1724
1725  psprintf(grammarID, L("%p"), grammar);
1726  CHKLOG(rc, HashMapContainsKey(impl->grammars, (LCHAR*) &grammarID, isActiveRule));
1727  return ESR_SUCCESS;
1728CLEANUP:
1729  return rc;
1730}
1731
1732ESR_ReturnCode SR_RecognizerSetWordAdditionCeilingImpl(SR_Recognizer* self, SR_Grammar* grammar)
1733{
1734  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1735  SR_GrammarImpl* grammarImpl = (SR_GrammarImpl*)grammar;
1736  int iRc;
1737
1738  if(!impl || !grammarImpl)
1739    return ESR_INVALID_ARGUMENT;
1740  iRc = CA_CeilingSyntaxForRecognizer( grammarImpl->syntax, impl->recognizer);
1741  if(iRc) return ESR_INVALID_STATE;
1742
1743  return ESR_SUCCESS;
1744}
1745
1746ESR_ReturnCode SR_RecognizerCheckGrammarConsistencyImpl(SR_Recognizer* self, SR_Grammar* grammar,
1747    ESR_BOOL* isConsistent)
1748{
1749  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1750  SR_GrammarImpl* grammarImpl;
1751  SR_RecognizerImpl* impl2;
1752
1753
1754  grammarImpl = (SR_GrammarImpl*) grammar;
1755  impl2 = (SR_RecognizerImpl*)grammarImpl->recognizer;
1756  // *isConsistent = grammarImpl->models == impl->models;
1757  *isConsistent = (impl2->models == impl->models);
1758  return ESR_SUCCESS;
1759}
1760
1761ESR_ReturnCode SR_RecognizerGetModelsImpl(SR_Recognizer* self, SR_AcousticModels** pmodels)
1762{
1763  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1764  *pmodels = impl->models;
1765  return ESR_SUCCESS;
1766}
1767
1768ESR_ReturnCode SR_RecognizerPutAudioImpl(SR_Recognizer* self, asr_int16_t* buffer, size_t* bufferSize,
1769    ESR_BOOL isLast)
1770{
1771  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1772  ESR_ReturnCode rc;
1773  int    rcBufWrite;
1774  size_t nbWritten;
1775
1776#ifdef MEASURE_SAMPLE_TIMES
1777    if ( sample_buffers_received < MAX_SAMPLES_TO_MEASURE )
1778        {
1779        gettimeofday ( &buffer_received_time, NULL );
1780        seconds_buffer_received [sample_buffers_received] = buffer_received_time.tv_sec;
1781        micro_seconds_buffer_received [sample_buffers_received] = buffer_received_time.tv_usec;
1782        samples_in_buffer [sample_buffers_received] = *bufferSize;
1783        total_samples_received += *bufferSize;
1784        sample_buffers_received++;
1785        }
1786#endif
1787
1788  if (isLast == ESR_FALSE && (buffer == NULL || bufferSize == NULL))
1789  {
1790    PLogError(L("ESR_INVALID_ARGUMENT"));
1791    return ESR_INVALID_ARGUMENT;
1792  }
1793
1794  if (impl->lockFunction)
1795    impl->lockFunction(ESR_LOCK, impl->lockData);
1796  if (!impl->isStarted)
1797  {
1798    if (impl->lockFunction)
1799      impl->lockFunction(ESR_UNLOCK, impl->lockData);
1800    PLogMessage(L("ESR_INVALID_STATE: Tried pushing audio while recognizer was offline"));
1801    return ESR_INVALID_STATE;
1802  }
1803  if (impl->gotLastFrame)
1804  {
1805    if (impl->lockFunction)
1806      impl->lockFunction(ESR_UNLOCK, impl->lockData);
1807    PLogMessage(L("ESR_INVALID_STATE: isLast=TRUE"));
1808    return ESR_INVALID_STATE;
1809  }
1810  if (buffer == NULL && isLast == ESR_FALSE)
1811  {
1812    if (impl->lockFunction)
1813      impl->lockFunction(ESR_UNLOCK, impl->lockData);
1814    PLogError(L("ESR_INVALID_ARGUMENT: got NULL  buffer on non-terminal frame"));
1815    return ESR_INVALID_ARGUMENT;
1816  }
1817
1818  rcBufWrite = CircularBufferWrite(impl->buffer, buffer, *bufferSize * SAMPLE_SIZE);
1819  if (rcBufWrite < 0)
1820  {
1821    rc = ESR_INVALID_STATE;
1822    PLogError(L("%s: error writing to buffer (buffer=%d, available=%u)"), ESR_rc2str(rc), (int) impl->buffer, CircularBufferGetAvailable(impl->buffer));
1823    goto CLEANUP;
1824  }
1825
1826  nbWritten = (size_t)rcBufWrite;
1827  if (nbWritten % SAMPLE_SIZE != 0)
1828  {
1829    size_t amountUnwritten;
1830
1831    /* The buffer is byte-based while we're sample based. Make sure we write entire samples or not at all */
1832    amountUnwritten = CircularBufferUnwrite(impl->buffer, nbWritten % SAMPLE_SIZE);
1833    passert(amountUnwritten == nbWritten % SAMPLE_SIZE);
1834    nbWritten -= amountUnwritten;
1835  }
1836  passert(nbWritten % 2 == 0); /* make sure CircularBufferSize is divisible by 2 */
1837
1838  if (nbWritten < *bufferSize * SAMPLE_SIZE)
1839  {
1840    rc = ESR_BUFFER_OVERFLOW;
1841#ifndef NDEBUG
1842    PLOG_DBG_TRACE((L("%s: writing to circular buffer"), ESR_rc2str(rc)));
1843#endif
1844    *bufferSize = nbWritten / SAMPLE_SIZE;
1845    if (impl->lockFunction)
1846      impl->lockFunction(ESR_UNLOCK, impl->lockData);
1847    goto CLEANUP;
1848  }
1849  if (impl->lockFunction)
1850    impl->lockFunction(ESR_UNLOCK, impl->lockData);
1851
1852  if (isLast)
1853    impl->gotLastFrame = ESR_TRUE;
1854  return ESR_SUCCESS;
1855CLEANUP:
1856  return rc;
1857}
1858
1859/* utility function to sort the ArrayList of nbest list results by the score of the first
1860   semantic result */
1861ESR_ReturnCode SemanticResults_SortByScore(ArrayList *results, size_t nbestSize)
1862{
1863  ESR_ReturnCode rc;
1864  ArrayList* semanticResultList;
1865  ArrayList* semanticResultList_swap;
1866  SR_SemanticResult* semanticResult_i;
1867  SR_SemanticResult* semanticResult_j;
1868  size_t i, j;
1869  LCHAR scoreStr[MAX_ENTRY_LENGTH] ;
1870  size_t scoreStrLen = MAX_ENTRY_LENGTH ;
1871  int score_i, score_j;
1872
1873  /* bubble sort */
1874  for (i = 0; i < (size_t)nbestSize; ++i)
1875  {
1876    for (j = i + 1; j < (size_t)nbestSize; ++j)
1877    {
1878      /* get for i */
1879      CHKLOG(rc, ArrayListGet(results, i, (void **)&semanticResultList)); /* nbest index */
1880      CHKLOG(rc, ArrayListGet(semanticResultList, 0, (void **)&semanticResult_i));      /* semresult 0 */
1881
1882      /* get for j */
1883      CHKLOG(rc, ArrayListGet(results, j, (void **)&semanticResultList)); /* nbest index */
1884      CHKLOG(rc, ArrayListGet(semanticResultList, 0, (void **)&semanticResult_j));      /* semresult 0 */
1885
1886      scoreStrLen = MAX_ENTRY_LENGTH ;
1887      CHKLOG(rc, semanticResult_i->getValue(semanticResult_i, "raws", scoreStr, &scoreStrLen));
1888      CHKLOG(rc, lstrtoi(scoreStr, &score_i, 10));
1889      scoreStrLen = MAX_ENTRY_LENGTH ;
1890      CHKLOG(rc, semanticResult_j->getValue(semanticResult_j, "raws", scoreStr, &scoreStrLen));
1891      CHKLOG(rc, lstrtoi(scoreStr, &score_j, 10));
1892
1893      if (score_j < score_i)
1894      {
1895        /* need to swap */
1896        CHKLOG(rc, ArrayListGet(results, i, (void **)&semanticResultList_swap)); /* put i in swap */
1897        CHKLOG(rc, ArrayListSet(results, i, semanticResultList));       /* put j in i    */
1898        CHKLOG(rc, ArrayListSet(results, j, semanticResultList_swap));  /* put swap in j */
1899      }
1900    }
1901  }
1902  return ESR_SUCCESS;
1903CLEANUP:
1904  return rc;
1905}
1906
1907ESR_ReturnCode filter_CA_FullResultLabel(const LCHAR* label, LCHAR *filtered_label, size_t* boss, size_t* eoss)
1908{
1909  ESR_ReturnCode rc;
1910  enum
1911  {
1912    NO_COPY,
1913    FRAME,
1914    WORD,
1915  } filter_state = WORD;
1916  LCHAR *dst = filtered_label;
1917  LCHAR eosBuf[16]; /* max 9999 + '\0' */
1918  LCHAR bosBuf[16]; /* max 9999 + '\0' */
1919  LCHAR* pBuf = NULL;
1920
1921  /**
1922   * example: you want to filter this:
1923   *
1924   * "-pau-@23 clock@97 twenty_four@125 hour@145  "
1925   *        ^boss = 23                       ^ eoss = 145
1926   * and get this:
1927   *
1928   * "clock twenty_four hour"
1929   */
1930
1931  passert(LSTRLEN(label) > 0);
1932  while (*label)
1933  {
1934    switch (filter_state)
1935    {
1936      case NO_COPY:
1937        if (*label == L(' '))
1938          filter_state = WORD;
1939        else if (*label == L('@'))
1940        {
1941          filter_state = FRAME;
1942          if (pBuf == NULL)
1943            pBuf = bosBuf;
1944          else
1945          {
1946            *pBuf = 0;
1947            pBuf = eosBuf;
1948          }
1949        }
1950        break;
1951      case WORD:
1952        if (*label == L('@'))
1953        {
1954          *dst = L(' '); /* insert space */
1955          dst++;
1956          filter_state = FRAME;
1957          if (pBuf == NULL)
1958            pBuf = bosBuf;
1959          else
1960          {
1961            *pBuf = 0;
1962            pBuf = eosBuf;
1963          }
1964        }
1965        else
1966        {
1967          *dst = *label;
1968          dst++;
1969        }
1970        break;
1971      case FRAME:
1972        if (*label == L(' '))
1973          filter_state = WORD;
1974        else
1975        {
1976          *pBuf = *label;
1977          pBuf++;
1978        }
1979        break;
1980    }
1981    label++;
1982  }
1983  *dst = 0; /* term the string */
1984  *pBuf = 0; /* term the string */
1985
1986  /* trim the end spaces */
1987  dst--;
1988  while (*dst == ' ')
1989    *dst-- = '\0';
1990
1991  /* set the eos signal indicated by the end pointed data */
1992  if (eosBuf[0] != 0)
1993    CHKLOG(rc, lstrtoui(eosBuf, eoss, 10));
1994  else
1995    eoss = 0;
1996
1997  if (bosBuf[0] != 0)
1998    CHKLOG(rc, lstrtoui(bosBuf, boss, 10));
1999  else
2000    boss = 0;
2001
2002  return ESR_SUCCESS;
2003CLEANUP:
2004  return rc;
2005}
2006
2007/**
2008 * Populates the recognizer result if it can, otherwise it returns NO MATCH cuz no results exist
2009 *
2010 * INPUT STATE: SR_RECOGNIZER_INTERNAL_EOS
2011 *
2012 * @param self SR_Recognizer handle
2013 * @todo break up into smaller functions
2014 */
2015ESR_ReturnCode SR_RecognizerCreateResultImpl(SR_Recognizer* self, SR_RecognizerStatus* status,
2016    SR_RecognizerResultType* type)
2017{
2018  LCHAR label[MAX_ENTRY_LENGTH * 2];  /* run out of buffer */
2019#define WORDID_COUNT 48 /* can be quite high for voice enrollment! */
2020  wordID wordIDs[WORDID_COUNT];
2021  LCHAR tok[80];
2022  LCHAR waveformFilename[P_PATH_MAX];
2023  LCHAR* pkey;
2024  SR_GrammarImpl* pgrammar;
2025  asr_int32_t raws; /* raw score */
2026  size_t iBest, nbestSize, jBest, k, grammarSize, semanticResultsSize, grammarIndex_for_iBest;
2027  LCHAR* lValue;
2028  LCHAR* lValue2;
2029  int confValue;
2030  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
2031  SR_RecognizerResultImpl* resultImpl = (SR_RecognizerResultImpl*) impl->result;
2032  ESR_BOOL containsKey;
2033  int valid, score, recogID;
2034  LCHAR result[MAX_ENTRY_LENGTH];
2035  size_t len, size;
2036  size_t locale;
2037  int current_choice;
2038
2039  /**
2040   * Semantic result stuff
2041   */
2042  /* a temp buffer to hold semantic results of a parse (there may be several results) */
2043  SR_SemanticResult* semanticResults[MAX_SEM_RESULTS];
2044  ArrayList* semanticList;
2045  ArrayList* semanticList2;
2046  SR_SemanticResultImpl* semanticImpl;
2047  SR_SemanticResultImpl* semanticImpl2;
2048  SR_SemanticResult* semanticResult;
2049  SR_SemanticResult* semanticResult2;
2050  waveform_buffering_state_t buffering_state;
2051
2052  SR_AcousticModelsImpl* modelsImpl = (SR_AcousticModelsImpl*) impl->models;
2053  ESR_ReturnCode rc;
2054  PTimeStamp EORT;
2055
2056  CA_LockUtteranceFromInput(impl->utterance);
2057  if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance))
2058  {
2059    PLogError(L("ESR_INVALID_STATE"));
2060    return ESR_INVALID_STATE;
2061  }
2062
2063  /* check if the forward search was successful */
2064  valid = CA_FullResultLabel(impl->recognizer, result, MAX_ENTRY_LENGTH - 1);
2065  CA_GetRecogID(impl->recognizer, &recogID);
2066  CA_FullResultScore(impl->recognizer, &score, 1);
2067#ifdef SREC_ENGINE_VERBOSE_LOGGING
2068  PLogMessage(L("R: %s type %d score %d from recognizer%d"), result, type, score, valid, recogID);
2069  PLogMessage(L("R: %s score %d from recognizer%d"), result, score, valid, recogID);
2070#endif
2071#ifdef _WIN32
2072  //pfprintf(PSTDOUT, ("R: %s type %d score %d from recognizer%d\n"), result, type, score, valid, recogID);
2073#endif
2074
2075
2076  switch (valid)
2077  {
2078    case FULL_RESULT:
2079      CHKLOG(rc, filter_CA_FullResultLabel(result, label, &impl->recogLogTimings.BOSS, &impl->recogLogTimings.EOSS));
2080#ifdef SREC_ENGINE_VERBOSE_LOGGING
2081      PLogMessage("R: %s", result);
2082#endif
2083      CA_FullResultScore(impl->recognizer, (int*) &raws, 0);
2084#ifdef SREC_ENGINE_VERBOSE_LOGGING
2085      PLogMessage("S: %d", raws);
2086#endif
2087
2088      /* now that we have an endpointed result, we can parse the result transcription
2089         to see where speech started and ended. Then we can trim off excess parts of the
2090         recorded audio waveform (if exists) so that nametags are just the right amount of
2091         audio
2092      */
2093      CHKLOG(rc, WaveformBuffer_GetBufferingState(impl->waveformBuffer, &buffering_state));
2094      if (buffering_state != WAVEFORM_BUFFERING_OFF)
2095      {
2096        CHKLOG(rc, WaveformBuffer_GetSize(impl->waveformBuffer, &size));
2097        if (size > 0)
2098        {
2099          rc = WaveformBuffer_ParseEndPointedResultAndTrim(impl->waveformBuffer, result, impl->FRAME_SIZE);
2100          if (rc == ESR_BUFFER_OVERFLOW)
2101          {
2102            /* Nametag EOS occured beyond end of buffer */
2103          }
2104          else if (rc != ESR_SUCCESS)
2105          {
2106            PLogError(ESR_rc2str(rc));
2107            goto CLEANUP;
2108          }
2109        }
2110      }
2111      break;
2112
2113    case REJECT_RESULT:
2114#ifdef SREC_ENGINE_VERBOSE_LOGGING
2115      PLogMessage(L("R: <REJECTED>"));
2116#endif
2117      break;
2118    default:
2119#ifdef SREC_ENGINE_VERBOSE_LOGGING
2120      PLogMessage(L("E: No results available"));
2121      PLogMessage(L("R: <FAILED>"));
2122#endif
2123      break;
2124  }
2125
2126
2127  if (valid == FULL_RESULT)
2128  {
2129    /* Populate SR_RecognizerResult */
2130    resultImpl->nbestList = CA_PrepareNBestList(impl->recognizer, 10, &raws);
2131    if (resultImpl->nbestList == NULL)
2132    {
2133      /*
2134       * This is not a failure. It simply means that I have not advanced far
2135       * enough in recognition in order to obtain results (no paths in
2136       * graph). This occurs, for instance, when a eof is reached (no more data)
2137       * and I have not even created any paths in my graph.
2138       */
2139
2140      *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2141      *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2142      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2143      if (impl->eventLog != NULL)
2144      {
2145        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RecognizerCreateResultImpl() -> SR_RECOGNIZER_INTERNAL_END")));
2146        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2147        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2148        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2149      }
2150      passert(0);
2151      return ESR_SUCCESS;
2152    }
2153
2154    nbestSize = CA_NBestListCount(resultImpl->nbestList);
2155  }
2156  else
2157    nbestSize = 0;
2158
2159  if (resultImpl->results != NULL)
2160    ArrayListRemoveAll(resultImpl->results);
2161  else
2162    CHKLOG(rc, ArrayListCreate(&resultImpl->results));
2163  if (nbestSize == 0)
2164  {
2165    /*
2166     * Got empty n-best list even though the recognition was successful.
2167     * We handle this in the same way that recog_startpt does... we consider it a no match.
2168     * We could adjust the CREC.Recognizer.viterbi_prune_thresh to a higher level, but that
2169     * may not fix the problem completely. We need to fix the bug in the astar search!!!
2170     */
2171    *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2172    *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2173    impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2174    if (impl->eventLog != NULL)
2175    {
2176      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RecognizerCreateResultImpl() -> SR_RECOGNIZER_INTERNAL_END")));
2177      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2178      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2179      CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2180    }
2181#ifdef SREC_ENGINE_VERBOSE_LOGGING
2182    PLogMessage(L("ESR_INVALID_STATE: got empty n-best list even though the recognition was successful"));
2183#endif
2184    return ESR_SUCCESS; /* we do not want to halt the app in this case */
2185  }
2186  else
2187  {
2188    *status = SR_RECOGNIZER_EVENT_RECOGNITION_RESULT;
2189    *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2190    impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2191    if (impl->eventLog != NULL)
2192    {
2193      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RecognizerCreateResultImpl() -> SR_RECOGNIZER_INTERNAL_END")));
2194      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2195      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2196      CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2197    }
2198  }
2199
2200  /**
2201   * All grammars associated with the recognizer are considered to be active
2202   * and therefore, I do a semantic parse on each. On the first grammar that
2203   * gives one or more semantic results, I stop parsing the other grammars.
2204   */
2205  CHKLOG(rc, impl->grammars->getSize(impl->grammars, &grammarSize));
2206  ASSERT( grammarSize == 1);
2207
2208  for (iBest = 0; iBest < nbestSize; ++iBest)
2209  {
2210    len = WORDID_COUNT;
2211    if (CA_NBestListGetResultWordIDs(resultImpl->nbestList, iBest, wordIDs, &len, &raws) != ESR_SUCCESS)
2212    {
2213      *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2214      *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2215      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2216      if (impl->eventLog != NULL)
2217      {
2218        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RecognizerCreateResultImpl() -> SR_RECOGNIZER_INTERNAL_END")));
2219        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2220        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2221        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2222      }
2223      PLogError(L("ESR_INVALID_STATE: got bad n-best list entry %d"), iBest);
2224      return ESR_INVALID_STATE;
2225    }
2226
2227    CHKLOG(rc, ArrayListCreate(&semanticList));
2228    CHKLOG(rc, resultImpl->results->add(resultImpl->results, semanticList));
2229
2230    grammarIndex_for_iBest = 0;
2231    CHKLOG(rc, impl->grammars->getKeyAtIndex(impl->grammars, grammarIndex_for_iBest, &pkey));
2232    CHKLOG(rc, impl->grammars->get(impl->grammars, pkey, (void **)&pgrammar));
2233
2234    CHKLOG(rc, SR_GrammarGetSize_tParameter((SR_Grammar*) pgrammar, L("locale"), &locale));
2235    resultImpl->locale = locale;
2236
2237    /* I need to manage my semantic results external to the check parse function */
2238    for (k = 0; k < MAX_SEM_RESULTS; ++k)
2239      SR_SemanticResultCreate(&semanticResults[k]);
2240
2241    /*
2242       The code here tries to make the voice-enrollment more effective.
2243       The VE grammar decodes a sequence of best phonemes, but the nbest
2244       processing may find a better score for an alternative choice than
2245       the score of the viterbi best choice.  The reason for this is that
2246       alternative choices don't honor cross-word context-dependency quite
2247       accurately.  If we choose an alternative choice then the sequence of
2248       phoneme decoded does not correspond to the sequence of models decoded.
2249       To counter this, we FORCIBLY make sure the top choice here is the
2250       VITERBI top choice.
2251    */
2252
2253    if (iBest == 0)
2254      {
2255        if (CA_IsEnrollmentSyntax( pgrammar->syntax)) {
2256          /* this was voice enrollment, so let's try to replace */
2257          // 	char* word1 = CA_NBestListGetResultWord(resultImpl->nbestList,wordIDs[0]);
2258          // char* word2 = CA_NBestListGetResultWord(resultImpl->nbestList,wordIDs[1]);
2259          // if (!strncmp(word1,voice_enroll_word_prefix,VEWPLEN)&&!strncmp(word2,voice_enroll_word_prefix,VEWPLEN))
2260          len = WORDID_COUNT;
2261          rc = CA_FullResultWordIDs(impl->recognizer, wordIDs, &len);
2262          if (rc != ESR_SUCCESS)
2263            {
2264              /* in case of problem with viterbi path choice, we revert back */
2265              len = WORDID_COUNT;
2266              rc = CA_NBestListGetResultWordIDs(resultImpl->nbestList, iBest, wordIDs, &len, &raws) ;
2267            }
2268        }
2269      }
2270
2271    LSTRCPY(label, L(""));
2272    for (k = 0; wordIDs[k] != MAXwordID; ++k)
2273      {
2274        LCHAR* wordk = NULL;
2275        wordk = CA_NBestListGetResultWord(resultImpl->nbestList,wordIDs[k]);
2276        LSTRCAT(label, wordk);
2277        LSTRCAT(label, L(" "));
2278      }
2279    CHKLOG(rc, CA_ResultStripSlotMarkers(label));
2280    passert(LSTRCMP(label, L("")) != 0);
2281
2282    /* strip the trailing blank */
2283    k = LSTRLEN(label) - 1;
2284    if (k > 0 && label[k] == L(' '))
2285      label[k] = 0;
2286
2287    semanticResultsSize = MAX_SEM_RESULTS;
2288
2289#if SEMPROC_ACTIVE
2290
2291    /* set the literal prior to processing so that semproc can read the value
2292       during processing */
2293    CHKLOG(rc, pgrammar->semproc->flush(pgrammar->semproc));
2294    CHKLOG(rc, pgrammar->semproc->setParam(pgrammar->semproc, L("literal"), label));
2295
2296    rc = pgrammar->semproc->checkParseByWordID(pgrammar->semproc, pgrammar->semgraph,
2297                                               wordIDs, semanticResults, &semanticResultsSize);
2298
2299    /* rc = pgrammar->semproc->checkParse(pgrammar->semproc, pgrammar->semgraph,
2300       label, semanticResults, &semanticResultsSize); */
2301
2302    if (rc != ESR_SUCCESS)
2303      {
2304        for (k = 0; k < MAX_SEM_RESULTS; ++k)
2305          {
2306            semanticResults[k]->destroy(semanticResults[k]);
2307            semanticResults[k] = NULL;
2308          }
2309        goto CLEANUP;
2310      }
2311#else
2312    semanticResultsSize = 0;
2313#endif
2314    /* cleanup the empty ones */
2315    for (k = semanticResultsSize; k < MAX_SEM_RESULTS; ++k)
2316      {
2317        CHKLOG(rc, semanticResults[k]->destroy(semanticResults[k]));
2318        semanticResults[k] = NULL;
2319      }
2320
2321    /* save the good ones */
2322    for (k = 0; k < semanticResultsSize; ++k)
2323      {
2324        /*
2325         * Save the pointer to the semantic result that was created.
2326         * Remember that the semantic result array only holds pointers
2327         * and for each time that the function is called, new semantic results
2328         * are created, and the pointers overwrite old values in the array
2329         */
2330        CHKLOG(rc, semanticList->add(semanticList, semanticResults[k]));
2331      }
2332
2333#if SEMPROC_ACTIVE
2334    if (semanticResultsSize > 0)
2335      {
2336        /* OSI log the grammar(s) that was used in recognizing */
2337        psprintf(tok, L("GURI%d"), grammarIndex_for_iBest);
2338        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("GRMR"), tok));
2339      }
2340#else
2341    /* OSI log the grammar(s) that was used in recognizing */
2342    psprintf(tok, L("GURI%d"), grammarIndex_for_iBest);
2343    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("GRMR"), tok));
2344#endif
2345
2346    /* Populate semantic results for each nbest list entry */
2347    CHKLOG(rc, semanticList->getSize(semanticList, &semanticResultsSize));
2348    if (semanticResultsSize == 0)
2349    {
2350      /*
2351       * If there was no semantic result... then I need to create one so that I can store
2352       * literal, conf, meaning which are default keys that must ALWAYS exist
2353       */
2354      CHKLOG(rc, SR_SemanticResultCreate(&semanticResult));
2355      CHKLOG(rc, semanticList->add(semanticList, semanticResult));
2356      semanticResultsSize = 1;
2357    }
2358
2359    for (k = 0; k < semanticResultsSize;++k)
2360    {
2361      CHKLOG(rc, semanticList->get(semanticList, k, (void **)&semanticResult));
2362      if (semanticResult == NULL)
2363      {
2364        PLogError(L("nbest entry contained NULL semanticResult"), ESR_INVALID_STATE);
2365        return ESR_INVALID_STATE;
2366      }
2367
2368      semanticImpl = (SR_SemanticResultImpl*) semanticResult;
2369
2370      /* put in the literal */
2371      lValue = MALLOC(sizeof(LCHAR) * (LSTRLEN(label) + 1), MTAG);
2372      if (lValue == NULL)
2373      {
2374        PLogError(L("ESR_OUT_OF_MEMORY"));
2375        return ESR_OUT_OF_MEMORY;
2376      }
2377      LSTRCPY(lValue, label);
2378      CHKLOG(rc, semanticImpl->results->put(semanticImpl->results, L("literal"), lValue));
2379
2380      /* if the meaning is not set, then put in the meaning which will be the literal */
2381      CHKLOG(rc, semanticImpl->results->containsKey(semanticImpl->results, L("meaning"), &containsKey));
2382      if (!containsKey)
2383      {
2384        lValue = MALLOC(sizeof(LCHAR) * (LSTRLEN(label) + 1), MTAG);
2385        if (lValue == NULL)
2386        {
2387          PLogError(L("ESR_OUT_OF_MEMORY"));
2388          return ESR_OUT_OF_MEMORY;
2389        }
2390        LSTRCPY(lValue, label);
2391        CHKLOG(rc, semanticImpl->results->put(semanticImpl->results, L("meaning"), lValue));
2392      }
2393
2394      /* put in the raw score */
2395      psprintf(label, L("%d"), raws);
2396      lValue = MALLOC(sizeof(LCHAR) * (LSTRLEN(label) + 1), MTAG);
2397      if (lValue == NULL)
2398      {
2399        PLogError(L("ESR_OUT_OF_MEMORY"));
2400        return ESR_OUT_OF_MEMORY;
2401      }
2402      LSTRCPY(lValue, label);
2403      CHKLOG(rc, semanticImpl->results->put(semanticImpl->results, L("raws"), lValue));
2404    }
2405  }
2406
2407  /* Now I have an nBest list where each entry has at least one semantic result */
2408  /* What I need to do is filter out the nBest list entries which have matching
2409     semantic results for 'meaning' */
2410  /* Once I have filtered out the nBest list based on this criteria, I can calculate the confidence
2411     score and populate the result of the first entry with the raw score */
2412
2413#if FILTER_NBEST_BY_SEM_RESULT
2414
2415  for (iBest = nbestSize-1; iBest>0; iBest--) /* do not filter out nBest entry 0 */
2416  {
2417    /**
2418     * This is the entry (indexed by i) targeted for removal
2419     *
2420     */
2421
2422    /* get the nBest entry which you wish to remove (if duplicate found) */
2423    CHKLOG(rc, ArrayListGet(resultImpl->results, iBest, (void **)&semanticList));
2424
2425    /* get the first sem_result for the entry */
2426    CHKLOG(rc, ArrayListGet(semanticList, 0, (void **)&semanticResult));
2427    semanticImpl = (SR_SemanticResultImpl*) semanticResult;
2428
2429    /* get the meaning */
2430    CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("meaning"), (void **)&lValue));
2431
2432    /* get the other entries to check against (start with 0, end on the current i entry) */
2433    for (jBest = 0; jBest < iBest; ++jBest)
2434    {
2435      /*
2436       * This is the entry (indexed by jBest) that we will compare with
2437       */
2438
2439      /* get the nBest entry which you wish to compare with */
2440      CHKLOG(rc, ArrayListGet(resultImpl->results, jBest, (void **)&semanticList2));
2441
2442      CHKLOG(rc, ArrayListGet(semanticList2, 0, (void **)&semanticResult2));
2443      semanticImpl2 = (SR_SemanticResultImpl*) semanticResult2;
2444
2445      CHKLOG(rc, semanticImpl2->results->get(semanticImpl2->results, L("meaning"), (void **)&lValue2));
2446      if (LSTRCMP(lValue, lValue2) == 0)
2447      {
2448        /* pfprintf(PSTDOUT,"duplicate sem result found %d == %d\n", iBest, jBest);
2449        pfprintf(PSTDOUT,"removing %d\n", iBest); */
2450
2451        /* removing from the list indexed by iBest */
2452        CHKLOG(rc, semanticList->remove(semanticList, semanticResult));
2453        CHKLOG(rc, semanticResult->destroy(semanticResult));
2454
2455        CHKLOG(rc, resultImpl->results->remove(resultImpl->results, semanticList));
2456        CHKLOG(rc, semanticList->destroy(semanticList));
2457
2458        if (!CA_NBestListRemoveResult(resultImpl->nbestList, iBest))
2459          return ESR_ARGUMENT_OUT_OF_BOUNDS;
2460        break;
2461      }
2462    }
2463  }
2464  nbestSize = CA_NBestListCount(resultImpl->nbestList);
2465#endif
2466
2467  CHKLOG(rc, ArrayListGetSize(resultImpl->results, &nbestSize));
2468
2469  if (nbestSize)
2470  {
2471   if(CA_ComputeConfidenceValues(impl->confidenceScorer, impl->recognizer, resultImpl->nbestList))
2472        return ESR_INVALID_STATE;
2473
2474   for(current_choice=nbestSize-1;current_choice>=0;current_choice--)
2475   {
2476    /* get the nBest entry you want to deal with */
2477    CHKLOG(rc, ArrayListGet(resultImpl->results, current_choice, (void **)&semanticList));
2478    /* get the first sem_result for that entry */
2479    CHKLOG(rc, ArrayListGet(semanticList, 0, (void **)&semanticResult));
2480    semanticImpl = (SR_SemanticResultImpl*) semanticResult;
2481
2482    /* put in the conf value for that nBest entry */
2483    if(!CA_NBestListGetResultConfidenceValue( resultImpl->nbestList, current_choice, &confValue))
2484      return ESR_ARGUMENT_OUT_OF_BOUNDS;
2485
2486    psprintf(label, L("%d"), confValue);
2487    lValue = MALLOC(sizeof(LCHAR) * (LSTRLEN(label) + 1), MTAG);
2488      if (lValue == NULL)
2489      {
2490        PLogError(L("ESR_OUT_OF_MEMORY"));
2491        return ESR_OUT_OF_MEMORY;
2492      }
2493      LSTRCPY(lValue, label);
2494      CHKLOG(rc, semanticImpl->results->put(semanticImpl->results, L("conf"),lValue));
2495    }
2496  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("CMPT"), 0));
2497  }
2498
2499  /* OSI log the end of recognition and all bufferred tokens */
2500
2501  /* OSI log end of recognition time */
2502  PTimeStampSet(&EORT);
2503  impl->recogLogTimings.EORT = PTimeStampDiff(&EORT, &impl->timestamp);
2504  impl->recogLogTimings.DURS = impl->processed * MSEC_PER_FRAME;
2505
2506  /*****************************************/
2507  /* OSI Logging stuff */
2508  /*****************************************/
2509if( impl->osi_log_level != 0)
2510 {
2511  /* get the nBest size (this size may have changed since previous set cuz of nbest list filtering) */
2512  CHKLOG(rc, ArrayListGetSize(resultImpl->results, &nbestSize));
2513  /* OSI log the nBest list size */
2514  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("NBST"), nbestSize));
2515
2516
2517  for (iBest = 0; iBest < nbestSize; iBest++) /* loop */
2518  {
2519    /* get the nBest entry */
2520    CHKLOG(rc, ArrayListGet(resultImpl->results, iBest, (void**)&semanticList));
2521
2522    /* get the first sem_result for the entry (ther emay be many, but ignore others) */
2523    CHKLOG(rc, ArrayListGet(semanticList, 0, (void **)&semanticResult));
2524    semanticImpl = (SR_SemanticResultImpl*) semanticResult;
2525
2526    /* get the meaning and OSI log it */
2527    CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("meaning"), (void **)&lValue));
2528    /* OSI log RSLT (meaning) for nbest item */
2529    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RSLT"), lValue));
2530
2531    /* get the literal and OSI log it */
2532    CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("literal"), (void **)&lValue));
2533    /* OSI log RAWT SPOK (literal) for nbest item */
2534    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RAWT"), lValue));
2535    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("SPOK"), lValue));
2536
2537    /* get the score and OSI log it */
2538    CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("raws"), (void **)&lValue));
2539    /* OSI log RAWS (score) for nbest item */
2540    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RAWS"), lValue));
2541    /* get the confidence value and OSI log it */
2542    CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("conf"), (void **)&lValue));
2543    /* OSI log CONF (values) for nbest item */
2544    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("CONF"), lValue));
2545  }
2546
2547  /* log the values */
2548  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BORT"), impl->recogLogTimings.BORT));
2549  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("DURS"), impl->recogLogTimings.DURS));
2550  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("EORT"), impl->recogLogTimings.EORT));
2551  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("EOSD"), impl->recogLogTimings.EOSD));
2552  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("EOSS"), impl->recogLogTimings.EOSS));
2553  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("EOST"), impl->recogLogTimings.EOST));
2554  if (impl->osi_log_level & OSI_LOG_LEVEL_AUDIO)
2555  {
2556    len = P_PATH_MAX;
2557    CHKLOG(rc, SR_EventLogAudioGetFilename(impl->eventLog, waveformFilename, &len));
2558    CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("WVNM"), waveformFilename));
2559  }
2560  CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RSTT"), L("ok")));
2561  CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RENR"), L("ok")));
2562  CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("ENDR"), impl->eos_reason));
2563  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIrcnd")));
2564
2565  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BOSS"), impl->recogLogTimings.BOSS)); /* extra not in OSI spec */
2566  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("ESRboss")));
2567
2568  /*
2569   * Record which recognizer was the successful one (male or female)
2570   * this index refers to the order in the swimdllist file.
2571   */
2572  CHKLOG(rc, CA_GetRecogID(impl->recognizer, &recogID));
2573  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("RECOG"), recogID));
2574  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("ESRrcid")));
2575
2576  /* Record semantic results returned by top nbestlist entry */
2577  if (1)
2578  {
2579#define MAX_SEMANTIC_KEYS 50
2580    LCHAR* semanticKeys[MAX_SEMANTIC_KEYS];
2581#define SEMANTIC_VALUE_SIZE 512
2582    LCHAR semanticValue[SEMANTIC_VALUE_SIZE];
2583    size_t num_semanticKeys;
2584
2585    rc = resultImpl->results->getSize(resultImpl->results, &nbestSize);
2586    if (rc != ESR_SUCCESS)
2587    {
2588      PLogError(ESR_rc2str(rc));
2589      goto DONE;
2590    }
2591    for (iBest = 0; iBest < nbestSize; ++iBest) /* loop2 */
2592    {
2593      rc = resultImpl->results->get(resultImpl->results, iBest, (void **)&semanticList);
2594      if (rc != ESR_SUCCESS)
2595      {
2596        PLogError(ESR_rc2str(rc));
2597        goto DONE;
2598      }
2599
2600	  /* semanticResultsSize is the number of semantic meanings, although
2601		 ambiguous parses are not entirely supported
2602		 num_semanticKeys    is associated to a particular parse         */
2603
2604      rc = semanticList->getSize(semanticList, &semanticResultsSize);
2605      if (rc != ESR_SUCCESS)
2606      {
2607        PLogError(ESR_rc2str(rc));
2608        goto DONE;
2609      }
2610      for (k = 0; k < semanticResultsSize; ++k)
2611      {
2612		size_t iKey;
2613        rc = semanticList->get(semanticList, k, (void **)&semanticResult);
2614        if (rc != ESR_SUCCESS)
2615        {
2616          PLogError(ESR_rc2str(rc));
2617          goto DONE;
2618        }
2619        num_semanticKeys = MAX_SEMANTIC_KEYS;
2620        rc = semanticResult->getKeyList(semanticResult, (LCHAR**) & semanticKeys, &num_semanticKeys);
2621        if (rc != ESR_SUCCESS)
2622        {
2623          PLogError(ESR_rc2str(rc));
2624          goto DONE;
2625        }
2626
2627        for (iKey=0; iKey<num_semanticKeys; ++iKey)
2628        {
2629          len = SEMANTIC_VALUE_SIZE;
2630          rc = semanticResult->getValue(semanticResult, semanticKeys[iKey], (LCHAR*) &semanticValue, &len);
2631          if (rc != ESR_SUCCESS)
2632          {
2633            PLogError(ESR_rc2str(rc));
2634            goto DONE;
2635          }
2636
2637          rc = SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, semanticKeys[iKey], semanticValue);
2638          if (rc != ESR_SUCCESS)
2639          {
2640            PLogError(ESR_rc2str(rc));
2641            goto DONE;
2642          }
2643        }
2644      }
2645    }
2646    rc = SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("ESR_SemanticResult[0]"));
2647    if (rc != ESR_SUCCESS)
2648    {
2649      PLogError(ESR_rc2str(rc));
2650      goto DONE;
2651    }
2652  }
2653}
2654DONE:
2655  return ESR_SUCCESS;
2656CLEANUP:
2657  impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2658  return rc;
2659}
2660
2661/**
2662 * Indicates if it is possible to push data from SREC into the internal recognizer.
2663 * If data can be pushed, ESR_CONTINUE_PROCESSING is returned.
2664 *
2665 * INPUT STATES: SR_RECOGNIZER_INTERNAL_BOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2666 * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOI
2667 */
2668PINLINE ESR_ReturnCode canPushAudioIntoRecognizer(SR_RecognizerImpl* impl)
2669{
2670  ESR_ReturnCode rc;
2671
2672  if (impl->lockFunction)
2673    impl->lockFunction(ESR_LOCK, impl->lockData);
2674
2675  /* do I have enough to make a frame ? */
2676  if (CircularBufferGetSize(impl->buffer) < impl->FRAME_SIZE)
2677  {
2678    /* Not enough data */
2679    if (!impl->gotLastFrame)
2680    {
2681      /* not last frame, so ask for more audio */
2682      if (impl->lockFunction)
2683        impl->lockFunction(ESR_UNLOCK, impl->lockData);
2684      return ESR_SUCCESS;
2685    }
2686    else
2687    {
2688      /* last frame, make do with what you have */
2689      if (impl->lockFunction)
2690        impl->lockFunction(ESR_UNLOCK, impl->lockData);
2691#ifdef SREC_ENGINE_VERBOSE_LOGGING
2692      PLogMessage("L: Voicing END (EOI) at %d frames (%d processed)", impl->frames, impl->processed);
2693#endif
2694      impl->isRecognizing = ESR_FALSE;
2695      impl->recogLogTimings.EOSD = impl->frames;
2696      impl->eos_reason = L("EOI");
2697      impl->internalState = SR_RECOGNIZER_INTERNAL_EOI;
2698      if (impl->eventLog != NULL)
2699      {
2700        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("canPushAudioIntoRecognizer() -> SR_RECOGNIZER_INTERNAL_EOI")));
2701        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2702        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2703        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2704      }
2705      return ESR_CONTINUE_PROCESSING;
2706    }
2707  }
2708  if (impl->lockFunction)
2709    impl->lockFunction(ESR_UNLOCK, impl->lockData);
2710  return ESR_CONTINUE_PROCESSING;
2711CLEANUP:
2712  return rc;
2713}
2714
2715/**
2716 * Pushes data from SREC into the internal recognizer.
2717 *
2718 * INPUT STATES: SR_RECOGNIZER_INTERNAL_BOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2719 * OUTPUT STATES: same
2720 */
2721PINLINE ESR_ReturnCode pushAudioIntoRecognizer(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2722    SR_RecognizerResultType* type,
2723    SR_RecognizerResult* result)
2724{
2725  size_t count;
2726  ESR_ReturnCode rc;
2727
2728  if (CA_GetUnprocessedFramesInUtterance(impl->utterance) > 0 && impl->frames >= impl->bgsniff)
2729  {
2730    /* Don't push frames unless they're needed */
2731
2732    /* Check for leaked state */
2733    passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_INVALID);
2734    return ESR_CONTINUE_PROCESSING;
2735  }
2736  if (impl->lockFunction)
2737    impl->lockFunction(ESR_LOCK, impl->lockData);
2738  count = CircularBufferRead(impl->buffer, impl->audioBuffer, impl->FRAME_SIZE);
2739  if (impl->lockFunction)
2740    impl->lockFunction(ESR_UNLOCK, impl->lockData);
2741
2742  WaveformBuffer_Write(impl->waveformBuffer, impl->audioBuffer, count);
2743  if (impl->osi_log_level & OSI_LOG_LEVEL_AUDIO)
2744  {
2745    rc = SR_EventLogAudioWrite(impl->eventLog, impl->audioBuffer, count);
2746    if (rc == ESR_BUFFER_OVERFLOW)
2747      rc = ESR_INVALID_STATE;
2748    if (rc != ESR_SUCCESS)
2749    {
2750      PLogError(ESR_rc2str(rc));
2751      if (impl->lockFunction)
2752        impl->lockFunction(ESR_UNLOCK, impl->lockData);
2753      goto CLEANUP;
2754    }
2755  }
2756  if (count < impl->FRAME_SIZE)
2757  {
2758    rc = ESR_INVALID_STATE;
2759    PLogError(L("%s: error reading buffer data (count=%d, frameSize=%d)"), ESR_rc2str(rc), count, impl->FRAME_SIZE);
2760    goto CLEANUP;
2761  }
2762  if (!CA_LoadSamples(impl->wavein, impl->audioBuffer, impl->sampleRate / FRAMERATE))
2763  {
2764    PLogError(L("ESR_INVALID_STATE"));
2765    rc = ESR_INVALID_STATE;
2766    goto CLEANUP;
2767  }
2768
2769  CA_ConditionSamples(impl->wavein);
2770  /* Check for leaked state */
2771  passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_INVALID);
2772  return ESR_CONTINUE_PROCESSING;
2773CLEANUP:
2774  return rc;
2775}
2776
2777/**
2778 * INPUT STATES: SR_RECOGNIZER_INTERNAL_BOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2779 * OUTPUT STATES: same
2780 */
2781PINLINE ESR_ReturnCode generateFrameFromAudio(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2782    SR_RecognizerResultType* type,
2783    SR_RecognizerResult* result)
2784{
2785  if (CA_GetUnprocessedFramesInUtterance(impl->utterance) > 0 && impl->frames >= impl->bgsniff)
2786  {
2787    /* Don't create frames unless they're needed */
2788
2789    /* Check for leaked state */
2790    passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_INVALID);
2791    return ESR_CONTINUE_PROCESSING;
2792  }
2793
2794  /* Try processing one frame */
2795  if (!CA_MakeFrame(impl->frontend, impl->utterance, impl->wavein))
2796  {
2797    /*
2798    * One of three cases occured:
2799    *
2800    * - We don't have enough samples to process one frame. This should be impossible because
2801    * pushAudioIntoRecognizer() is always called before us and will not continue if we don't
2802    * have enough samples.
2803    *
2804    * - The internal recognizer needs a minimum amount of audio before it'll begin generating
2805    *   frames. This is normal and we return with a success value.
2806    *
2807    * - The recognizer skips every even frame number (for performance reasons). This is normal
2808    *   and we return with a success value.
2809    */
2810    *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
2811    *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
2812    return ESR_SUCCESS;
2813  }
2814  ++impl->frames;
2815  /* Check for leaked state */
2816  passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_INVALID);
2817  return ESR_CONTINUE_PROCESSING;
2818}
2819
2820/**
2821 * INPUT STATES: SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2822 * OUTPUT STATES: same
2823 */
2824PINLINE ESR_ReturnCode generateFrameStats(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2825                           SR_RecognizerResultType* type,
2826                           SR_RecognizerResult* result)
2827{
2828  if (impl->frames < impl->bgsniff)
2829  {
2830    /* Wait until we have enough frames to estimate background stats */
2831    *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
2832    *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
2833    return ESR_SUCCESS;
2834  }
2835  else if (impl->frames == impl->bgsniff)
2836    CA_CalculateUtteranceStatistics(impl->utterance, 0, impl->bgsniff);
2837
2838  /* Check for leaked state */
2839  passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_INVALID);
2840  return ESR_CONTINUE_PROCESSING;
2841}
2842
2843/**
2844 * INPUT STATES: SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2845 * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOI, SR_RECOGNIZER_INTERNAL_EOS
2846 */
2847PINLINE ESR_ReturnCode generatePatternFromFrame(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2848    SR_RecognizerResultType* type,
2849    SR_RecognizerResult* result)
2850{
2851  SR_AcousticModelsImpl* modelsImpl;
2852  ESR_ReturnCode rc;
2853
2854  /* Run the search */
2855  modelsImpl = (SR_AcousticModelsImpl*) impl->models;
2856  if (!CA_MakePatternFrame(modelsImpl->pattern, impl->utterance))
2857  {
2858    *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2859    *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2860    impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2861    if (impl->eventLog != NULL)
2862    {
2863      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrame() -> SR_RECOGNIZER_INTERNAL_END")));
2864      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2865      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2866      CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2867    }
2868    PLogError(L("ESR_INVALID_STATE"));
2869    return ESR_INVALID_STATE;
2870  }
2871  if (!CA_AdvanceUtteranceFrame(impl->utterance))
2872  {
2873    *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2874    *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2875    impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2876    if (impl->eventLog != NULL)
2877    {
2878      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("canPushAudioIntoRecognizer() -> SR_RECOGNIZER_INTERNAL_END")));
2879      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2880      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2881      CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2882    }
2883    PLogError(L("ESR_INVALID_STATE"));
2884    return ESR_INVALID_STATE;
2885  }
2886  CA_AdvanceRecognitionByFrame(impl->recognizer, modelsImpl->pattern, impl->utterance);
2887  ++impl->processed;
2888
2889  if (impl->lockFunction)
2890    impl->lockFunction(ESR_LOCK, impl->lockData);
2891  if (impl->gotLastFrame && CircularBufferGetSize(impl->buffer) < impl->FRAME_SIZE)
2892  {
2893    /*
2894     * SREC have run out of data but the underlying recognizer might have some frames
2895     * queued for processing.
2896     */
2897    if (CA_GetUnprocessedFramesInUtterance(impl->utterance) > 0)
2898    {
2899      /* EOI means end of input */
2900#ifdef SREC_ENGINE_VERBOSE_LOGGING
2901      PLogMessage("L: Voicing END (EOI) at %d frames (%d processed)", impl->frames, impl->processed);
2902#endif
2903      impl->isRecognizing = ESR_FALSE;
2904      impl->recogLogTimings.EOSD = impl->frames;
2905      impl->eos_reason = L("EOI");
2906      impl->internalState = SR_RECOGNIZER_INTERNAL_EOI;
2907      if (impl->eventLog != NULL)
2908      {
2909        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrame() -> SR_RECOGNIZER_INTERNAL_EOI")));
2910        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2911        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2912        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2913      }
2914    }
2915    else
2916    {
2917#ifdef SREC_ENGINE_VERBOSE_LOGGING
2918      PLogMessage("L: Voicing END (EOF) at %d frames (%d processed)", impl->frames, impl->processed);
2919#endif
2920
2921      impl->isRecognizing = ESR_FALSE;
2922      impl->recogLogTimings.EOSD = impl->frames;
2923      impl->eos_reason = L("EOF");
2924      impl->internalState = SR_RECOGNIZER_INTERNAL_EOS;
2925      if (impl->eventLog != NULL)
2926      {
2927        CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrame() -> SR_RECOGNIZER_INTERNAL_EOS")));
2928        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2929        CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2930        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2931      }
2932      *status = SR_RECOGNIZER_EVENT_END_OF_VOICING;
2933      *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
2934      passert(impl->processed == impl->frames);
2935      if (impl->lockFunction)
2936        impl->lockFunction(ESR_UNLOCK, impl->lockData);
2937      return ESR_SUCCESS;
2938    }
2939  }
2940  if (impl->lockFunction)
2941    impl->lockFunction(ESR_UNLOCK, impl->lockData);
2942
2943  /* Check for leaked state */
2944  passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_INVALID);
2945  return ESR_CONTINUE_PROCESSING;
2946CLEANUP:
2947  return rc;
2948}
2949
2950/**
2951 * Same as generatePatternFromFrame() only the buffer is known to be empty.
2952 *
2953 * INPUT STATES: SR_RECOGNIZER_INTERNAL_EOI
2954 * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOS
2955 */
2956PINLINE ESR_ReturnCode generatePatternFromFrameEOI(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2957    SR_RecognizerResultType* type,
2958    SR_RecognizerResult* result)
2959{
2960  SR_AcousticModelsImpl* modelsImpl;
2961  ESR_ReturnCode rc;
2962
2963  /* Run the search */
2964  modelsImpl = (SR_AcousticModelsImpl*) impl->models;
2965
2966  if (CA_GetUnprocessedFramesInUtterance(impl->utterance) <= 0)
2967  {
2968    passert(impl->processed == impl->frames);
2969    *status = SR_RECOGNIZER_EVENT_END_OF_VOICING;
2970    *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
2971    impl->internalState = SR_RECOGNIZER_INTERNAL_EOS;
2972    return ESR_SUCCESS;
2973  }
2974
2975  if (!CA_MakePatternFrame(modelsImpl->pattern, impl->utterance))
2976  {
2977    *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2978    *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2979    impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2980    if (impl->eventLog != NULL)
2981    {
2982      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrameEOI() -> SR_RECOGNIZER_INTERNAL_END")));
2983      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2984      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2985      CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2986    }
2987    PLogError(L("ESR_INVALID_STATE"));
2988    return ESR_INVALID_STATE;
2989  }
2990  if (!CA_AdvanceUtteranceFrame(impl->utterance))
2991  {
2992    *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2993    *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2994    impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2995    if (impl->eventLog != NULL)
2996    {
2997      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrameEOI() -> SR_RECOGNIZER_INTERNAL_END")));
2998      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2999      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
3000      CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
3001    }
3002    PLogError(L("ESR_INVALID_STATE"));
3003    return ESR_INVALID_STATE;
3004  }
3005  CA_AdvanceRecognitionByFrame(impl->recognizer, modelsImpl->pattern, impl->utterance);
3006  ++impl->processed;
3007
3008  if (impl->lockFunction)
3009    impl->lockFunction(ESR_LOCK, impl->lockData);
3010
3011  if (CA_GetUnprocessedFramesInUtterance(impl->utterance) <= 0)
3012  {
3013    passert(impl->processed == impl->frames);
3014    *status = SR_RECOGNIZER_EVENT_END_OF_VOICING;
3015    *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3016    impl->internalState = SR_RECOGNIZER_INTERNAL_EOS;
3017    if (impl->eventLog != NULL)
3018    {
3019      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrameEOI() -> SR_RECOGNIZER_INTERNAL_EOS")));
3020      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
3021      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
3022      CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
3023    }
3024    if (impl->lockFunction)
3025      impl->lockFunction(ESR_UNLOCK, impl->lockData);
3026    return ESR_SUCCESS;
3027  }
3028  if (impl->lockFunction)
3029    impl->lockFunction(ESR_UNLOCK, impl->lockData);
3030
3031  /* Check for leaked state */
3032  passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_INVALID);
3033  return ESR_CONTINUE_PROCESSING;
3034CLEANUP:
3035  if (impl->lockFunction)
3036    impl->lockFunction(ESR_UNLOCK, impl->lockData);
3037  return rc;
3038}
3039
3040
3041/**
3042 * INPUT STATES: SR_RECOGNIZER_INTERNAL_EOI, SR_RECOGNIZER_INTERNAL_EOS_DETECTION
3043 * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOS
3044 */
3045ESR_ReturnCode detectEndOfSpeech(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
3046                                 SR_RecognizerResultType* type,
3047                                 SR_RecognizerResult* result)
3048{
3049  EOSrc eos; /* eos means end of speech */
3050  int eos_by_level; /* eos means end of speech */
3051  PTimeStamp timestamp;
3052  ESR_ReturnCode rc;
3053  ESR_BOOL enableGetWaveform = ESR_FALSE;
3054
3055  eos_by_level = CA_UtteranceHasEnded(impl->utterance);
3056  if (eos_by_level)
3057  {
3058    eos = SPEECH_ENDED_BY_LEVEL_TIMEOUT;
3059  }
3060  else
3061  {
3062    eos = CA_IsEndOfUtteranceByResults(impl->recognizer);
3063  }
3064
3065  impl->parameters->getBool(impl->parameters, L("enableGetWaveform"), &enableGetWaveform);
3066
3067  if (eos == VALID_SPEECH_CONTINUING && enableGetWaveform && impl->waveformBuffer->overflow_count > 0)
3068  {
3069    size_t bufferSize;
3070    CHKLOG(rc, WaveformBuffer_GetSize(impl->waveformBuffer, &bufferSize));
3071    PLogMessage("Forcing EOS due to wfbuf overflow (fr=%d,sz=%d,of=%d)", impl->frames, bufferSize, impl->waveformBuffer->overflow_count);
3072    eos = SPEECH_TOO_LONG;
3073  }
3074
3075  if (eos != VALID_SPEECH_CONTINUING)
3076  {
3077    switch (eos)
3078    {
3079      case SPEECH_ENDED:
3080        /* normal */
3081        impl->eos_reason = L("itimeout");
3082        break;
3083
3084      case SPEECH_ENDED_WITH_ERROR:
3085        /* error */
3086        impl->eos_reason = L("err");
3087        break;
3088
3089      case SPEECH_TOO_LONG:
3090        /* timeout*/
3091        impl->eos_reason = L("ctimeout");
3092        break;
3093
3094      case SPEECH_MAYBE_ENDED:
3095        /* normal */
3096        impl->eos_reason = L("itimeout");
3097        break;
3098      case SPEECH_ENDED_BY_LEVEL_TIMEOUT:
3099        /* normal */
3100        impl->eos_reason = L("levelTimeout");
3101        break;
3102
3103      default:
3104        /* error */
3105        impl->eos_reason = L("err");
3106    }
3107
3108#ifdef SREC_ENGINE_VERBOSE_LOGGING
3109    PLogMessage("L: Voicing END (EOS) at %d frames, %d processed (reason: %s)\n", impl->frames, impl->processed, impl->eos_reason);
3110#endif
3111
3112    impl->recogLogTimings.EOSD = impl->frames; /* how many frames have been sent prior to detect EOS */
3113    PTimeStampSet(&timestamp); /* time it took to detect EOS (in millisec) */
3114    impl->recogLogTimings.EOST = PTimeStampDiff(&timestamp, &impl->timestamp);
3115
3116    *status = SR_RECOGNIZER_EVENT_END_OF_VOICING;
3117    *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3118    impl->internalState = SR_RECOGNIZER_INTERNAL_EOS;
3119    if (impl->eventLog != NULL)
3120    {
3121      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("detectEndOfSpeech() -> SR_RECOGNIZER_INTERNAL_EOS")));
3122      CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("reason"), impl->eos_reason));
3123      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
3124      CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
3125      CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
3126    }
3127    impl->isRecognizing = ESR_FALSE;
3128    return ESR_SUCCESS;
3129  }
3130
3131  /* Check for leaked state */
3132  passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_INVALID);
3133  return ESR_CONTINUE_PROCESSING;
3134CLEANUP:
3135  return rc;
3136}
3137
3138/**
3139 * INPUT STATES: SR_RECOGNIZER_INTERNAL_BOS_DETECTION
3140 * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOI
3141 */
3142ESR_ReturnCode detectBeginningOfSpeech(SR_RecognizerImpl* impl,
3143                                       SR_RecognizerStatus* status,
3144                                       SR_RecognizerResultType* type,
3145                                       SR_RecognizerResult* result)
3146{
3147  ESR_ReturnCode rc;
3148  ESR_BOOL gatedMode;
3149  size_t num_windback_bytes, num_windback_frames;
3150  waveform_buffering_state_t buffering_state;
3151
3152  CHKLOG(rc, ESR_SessionGetBool(L("cmdline.gatedmode"), &gatedMode));
3153
3154  if (gatedMode || (!gatedMode && impl->frames < impl->bgsniff))
3155  {
3156    ESR_BOOL pushable = ESR_FALSE;
3157
3158    rc = canPushAudioIntoRecognizer(impl);
3159    if (rc == ESR_SUCCESS)
3160    {
3161      /* Not enough samples to process one frame */
3162      if (CA_GetUnprocessedFramesInUtterance(impl->utterance) <= 0)
3163      {
3164        *status = SR_RECOGNIZER_EVENT_NEED_MORE_AUDIO;
3165        *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3166        return ESR_SUCCESS;
3167      }
3168    }
3169    else if (rc != ESR_CONTINUE_PROCESSING)
3170      return rc;
3171    else if (impl->internalState == SR_RECOGNIZER_INTERNAL_EOI)
3172    {
3173      /* Got end of input before beginning of speech */
3174      *status = SR_RECOGNIZER_EVENT_NO_MATCH;
3175      *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
3176      impl->internalState = SR_RECOGNIZER_INTERNAL_BOS_NO_MATCH;
3177      CHKLOG(rc, impl->Interface.stop(&impl->Interface));
3178      return ESR_SUCCESS;
3179    }
3180    else
3181      pushable = ESR_TRUE;
3182    if (pushable)
3183    {
3184      rc = pushAudioIntoRecognizer(impl, status, type, result);
3185      /* OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOI */
3186      if (rc != ESR_CONTINUE_PROCESSING)
3187      {
3188        /* Not enough samples to process one frame */
3189        return rc;
3190      }
3191      rc = generateFrameFromAudio(impl, status, type, result);
3192      /* OUTPUT STATES: same */
3193      if (rc != ESR_CONTINUE_PROCESSING)
3194      {
3195        /*
3196         * The internal recognizer needs a minimum amount of audio before
3197         * it begins generating frames.
3198         */
3199        return rc;
3200      }
3201    }
3202    if (!CA_AdvanceUtteranceFrame(impl->utterance))
3203    {
3204      PLogError(L("ESR_INVALID_STATE: Failed Advancing Utt Frame %d"), impl->frames);
3205      return ESR_INVALID_STATE;
3206    }
3207    if (CA_UtteranceHasVoicing(impl->utterance))
3208    {
3209      /* Utterance stats for Lombard if enough frames */
3210      if (impl->frames > impl->bgsniff)
3211      {
3212#ifdef SREC_ENGINE_VERBOSE_LOGGING
3213        PLogMessage("L:  Voicing START at %d frames", impl->frames);
3214#endif
3215        /* OSI log the endpointed data */
3216
3217        CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BTIM"), impl->frames * MSEC_PER_FRAME));
3218        CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BRGN"), 0)); /* Barge-in not supported */
3219        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIendp")));
3220
3221        CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BOSD"), impl->frames));
3222        CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("ESRbosd")));
3223
3224        if (gatedMode)
3225          CA_CalculateUtteranceStatistics(impl->utterance, (int)(impl->frames * -1), 0);
3226        else
3227          CA_CalculateUtteranceStatistics(impl->utterance, 0, impl->frames);
3228      }
3229
3230      /* OK, we've got voicing or the end of input has occured
3231      ** (or both, I suppose).  If we had voicing then progress
3232      ** the recognizer, otherwise skip to the end.
3233      ** Of course, we could be running outside 'Gated Mode'
3234      ** so we won't have any frames processed at all yet -
3235      ** in this case start the recognizer anyway.
3236      */
3237
3238      /*************************************
3239       ** Run recognition until endOfInput **
3240       *************************************/
3241
3242      /*
3243       * Initialize both recognizers first
3244       * and disable reporting of results
3245       */
3246      if (gatedMode)
3247      {
3248        /*
3249         * We're in Gated Mode -
3250         * Because we'll have had voicing we wind-back
3251         * until the start of voicing (unsure region)
3252         */
3253        num_windback_frames = CA_SeekStartOfUtterance(impl->utterance);
3254        impl->beginningOfSpeechOffset = impl->frames - num_windback_frames;
3255        num_windback_bytes = num_windback_frames * impl->FRAME_SIZE * 2 /* due to skip even frames */;
3256
3257        /* pfprintf(PSTDOUT,L("audio buffer windback %d frames == %d bytes\n"), num_windback_frames, num_windback_bytes); */
3258        CHKLOG(rc, WaveformBuffer_GetBufferingState(impl->waveformBuffer, &buffering_state));
3259        if (buffering_state != WAVEFORM_BUFFERING_OFF)
3260          CHKLOG(rc, WaveformBuffer_WindBack(impl->waveformBuffer, num_windback_bytes));
3261
3262        /*
3263         * Only transition to linear if it was previously circular (in other words if
3264         * buffering was active in the first place)
3265         */
3266        if (buffering_state == WAVEFORM_BUFFERING_ON_CIRCULAR)
3267          CHKLOG(rc, WaveformBuffer_SetBufferingState(impl->waveformBuffer, WAVEFORM_BUFFERING_ON_LINEAR));
3268        impl->frames = CA_GetUnprocessedFramesInUtterance(impl->utterance);
3269      }
3270      else
3271        impl->frames = 0;
3272      /* reset the frames */
3273      impl->processed = 0;
3274      CHKLOG(rc, beginRecognizing(impl));
3275      impl->internalState = SR_RECOGNIZER_INTERNAL_EOS_DETECTION;
3276      *status = SR_RECOGNIZER_EVENT_START_OF_VOICING;
3277      *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3278      return ESR_SUCCESS;
3279    }
3280    else
3281    {
3282      if (impl->frames > impl->utterance_timeout)
3283      {
3284        /* beginning of speech timeout */
3285        impl->internalState = SR_RECOGNIZER_INTERNAL_BOS_TIMEOUT;
3286        *status = SR_RECOGNIZER_EVENT_START_OF_UTTERANCE_TIMEOUT;
3287        *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
3288        CHKLOG(rc, impl->Interface.stop(&impl->Interface));
3289        return ESR_SUCCESS;
3290      }
3291    }
3292  }
3293  else if (!gatedMode && impl->frames >= impl->bgsniff)
3294  {
3295    /*
3296    * If not gated mode and I have processed enough frames, then start the recognizer
3297    * right away.
3298    */
3299    impl->internalState = SR_RECOGNIZER_INTERNAL_EOS_DETECTION;
3300    *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
3301    *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3302
3303    /* reset the frames */
3304    impl->frames = impl->processed = 0;
3305    CHKLOG(rc, beginRecognizing(impl));
3306    return ESR_SUCCESS;
3307  }
3308  *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
3309  *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3310  return ESR_SUCCESS;
3311
3312CLEANUP:
3313  return rc;
3314}
3315
3316ESR_ReturnCode SR_RecognizerAdvanceImpl(SR_Recognizer* self, SR_RecognizerStatus* status,
3317                                        SR_RecognizerResultType* type,
3318                                        SR_RecognizerResult** result)
3319{
3320  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3321  ESR_BOOL pushable;
3322  ESR_ReturnCode rc;
3323
3324  if (status == NULL || type == NULL || result == NULL)
3325  {
3326    PLogError(L("ESR_INVALID_ARGUMENT"));
3327    return ESR_INVALID_ARGUMENT;
3328  }
3329
3330  /* create the result holder and save the pointer */
3331  /* creation only happens once (due to the if condition) */
3332  if (impl->result == NULL)
3333    CHKLOG(rc, SR_RecognizerResult_Create(&impl->result, impl));
3334  *result = impl->result;
3335
3336  /*
3337   * The following two lines are used to detect bugs whereby we forget to set
3338   * status or type before returning
3339   */
3340  *status = SR_RECOGNIZER_EVENT_INVALID;
3341  *type = SR_RECOGNIZER_INVALID;
3342
3343MOVE_TO_NEXT_STATE:
3344  switch (impl->internalState)
3345  {
3346    case SR_RECOGNIZER_INTERNAL_BEGIN:
3347      impl->internalState = SR_RECOGNIZER_INTERNAL_BOS_DETECTION;
3348      *status = SR_RECOGNIZER_EVENT_STARTED;
3349      *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3350      return ESR_SUCCESS;
3351
3352    case SR_RECOGNIZER_INTERNAL_BOS_DETECTION:
3353      rc = detectBeginningOfSpeech(impl, status, type, impl->result);
3354      if (rc != ESR_CONTINUE_PROCESSING)
3355      {
3356        /*
3357         * SR_RECOGNIZER_INTERNAL_BOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOS_DETECTION, or
3358         * SR_RECOGNIZER_INTERNAL_EOI
3359         */
3360        return rc;
3361      }
3362      /* Leaked state */
3363      passert(0);
3364      break;
3365
3366    case SR_RECOGNIZER_INTERNAL_EOS_DETECTION:
3367      pushable = ESR_FALSE;
3368      rc = canPushAudioIntoRecognizer(impl);
3369      if (rc == ESR_SUCCESS)
3370      {
3371        /* Not enough samples to process one frame */
3372        if (CA_GetUnprocessedFramesInUtterance(impl->utterance) <= 0)
3373        {
3374          *status = SR_RECOGNIZER_EVENT_NEED_MORE_AUDIO;
3375          *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3376          return ESR_SUCCESS;
3377        }
3378      }
3379      else if (rc != ESR_CONTINUE_PROCESSING)
3380        return rc;
3381      else if (impl->internalState == SR_RECOGNIZER_INTERNAL_EOI)
3382        goto MOVE_TO_NEXT_STATE;
3383      else
3384        pushable = ESR_TRUE;
3385      if (pushable)
3386      {
3387        rc = pushAudioIntoRecognizer(impl, status, type, impl->result);
3388        if (rc != ESR_CONTINUE_PROCESSING)
3389        {
3390          /* Not enough samples to process one frame */
3391          return rc;
3392        }
3393        if (impl->internalState == SR_RECOGNIZER_INTERNAL_EOI)
3394          goto MOVE_TO_NEXT_STATE;
3395        rc = generateFrameFromAudio(impl, status, type, impl->result);
3396        if (rc != ESR_CONTINUE_PROCESSING)
3397        {
3398          /*
3399           * The internal recognizer needs a minimum amount of audio before
3400           * it begins generating frames.
3401           */
3402          return rc;
3403        }
3404      }
3405      rc = generateFrameStats(impl, status, type, impl->result);
3406      if (rc != ESR_CONTINUE_PROCESSING)
3407      {
3408        /* Not enough frames to calculate stats */
3409        return rc;
3410      }
3411      rc = generatePatternFromFrame(impl, status, type, impl->result);
3412      if (rc != ESR_CONTINUE_PROCESSING)
3413      {
3414        /* End of speech detected */
3415        return rc;
3416      }
3417      if (impl->internalState == SR_RECOGNIZER_INTERNAL_END)
3418        goto MOVE_TO_NEXT_STATE;
3419      rc = detectEndOfSpeech(impl, status, type, impl->result);
3420      if (rc != ESR_CONTINUE_PROCESSING)
3421      {
3422#ifdef SREC_MEASURE_LATENCY
3423        gettimeofday ( &latency_start, NULL );
3424        printf ( "Start Time :  %ld Seconds  %ld Microseconds\n", latency_start.tv_sec, latency_start.tv_usec );
3425#endif
3426
3427        /* End of speech detected */
3428        return rc;
3429      }
3430      *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
3431      *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3432      return ESR_SUCCESS;
3433
3434    case SR_RECOGNIZER_INTERNAL_EOI:
3435      /*
3436       * On EOI (end of input), we need to process the remaining frames that had not
3437       * been processed when PutAudio set the gotLastFrame flag
3438       */
3439      rc = generatePatternFromFrameEOI(impl, status, type, impl->result);
3440      if (rc != ESR_CONTINUE_PROCESSING)
3441      {
3442        /* End of speech detected */
3443        return rc;
3444      }
3445      rc = detectEndOfSpeech(impl, status, type, impl->result);
3446      if (rc != ESR_CONTINUE_PROCESSING)
3447      {
3448#ifdef SREC_MEASURE_LATENCY
3449        gettimeofday ( &latency_start, NULL );
3450        printf ( "Start Time :  %ld Seconds  %ld Microseconds\n", latency_start.tv_sec, latency_start.tv_usec );
3451#endif
3452        /* End of speech detected */
3453        return rc;
3454      }
3455      *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
3456      *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3457      return ESR_SUCCESS;
3458
3459    case SR_RECOGNIZER_INTERNAL_EOS:
3460      /* On EOS (end of speech detected - not due to end of input), create the result */
3461      if (impl->lockFunction)
3462        impl->lockFunction(ESR_LOCK, impl->lockData);
3463      CircularBufferReset(impl->buffer);
3464      if (impl->lockFunction)
3465        impl->lockFunction(ESR_UNLOCK, impl->lockData);
3466      CHKLOG(rc, SR_RecognizerCreateResultImpl((SR_Recognizer*) impl, status, type));
3467      impl->internalState = SR_RECOGNIZER_INTERNAL_END;
3468      return ESR_SUCCESS;
3469
3470    case SR_RECOGNIZER_INTERNAL_END:
3471      return ESR_SUCCESS;
3472    default:
3473      PLogError(L("ESR_INVALID_STATE"));
3474      return ESR_INVALID_STATE;
3475  }
3476CLEANUP:
3477  return rc;
3478}
3479
3480
3481
3482ESR_ReturnCode SR_RecognizerLoadUtteranceImpl(SR_Recognizer* self, const LCHAR* filename)
3483{
3484  /* TODO: complete */
3485  return ESR_SUCCESS;
3486}
3487
3488ESR_ReturnCode SR_RecognizerLoadWaveFileImpl(SR_Recognizer* self, const LCHAR* filename)
3489{
3490  /* TODO: complete */
3491  return ESR_SUCCESS;
3492}
3493
3494ESR_ReturnCode SR_RecognizerLogEventImpl(SR_Recognizer* self, const LCHAR* event)
3495{
3496  ESR_ReturnCode rc;
3497  SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3498  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, event));
3499  return ESR_SUCCESS;
3500CLEANUP:
3501  return rc;
3502}
3503
3504ESR_ReturnCode SR_RecognizerLogTokenImpl(SR_Recognizer* self, const LCHAR* token, const LCHAR* value)
3505{
3506  ESR_ReturnCode rc;
3507  SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3508  CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, token, value));
3509  return ESR_SUCCESS;
3510CLEANUP:
3511  return rc;
3512}
3513
3514ESR_ReturnCode SR_RecognizerLogTokenIntImpl(SR_Recognizer* self, const LCHAR* token, int value)
3515{
3516  ESR_ReturnCode rc;
3517  SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3518  CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, token, value));
3519  return ESR_SUCCESS;
3520CLEANUP:
3521  return rc;
3522}
3523
3524ESR_ReturnCode SR_RecognizerLogSessionStartImpl(SR_Recognizer* self, const LCHAR* sessionName)
3525{
3526  ESR_ReturnCode rc;
3527  SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3528  /**
3529  * OSI Platform logging.
3530  * In OSR, these events are logged by the platform. We have no platform in ESR, so we
3531   * log them here.
3532  */
3533
3534  /* call (session) start, tokens optional */
3535  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIclst")));
3536
3537  /* service start, in this case SRecTest service */
3538  CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("SVNM"), sessionName));
3539  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIsvst")));
3540  if (impl->osi_log_level & OSI_LOG_LEVEL_BASIC)
3541    CHKLOG(rc, SR_EventLogEventSession(impl->eventLog));
3542
3543  return ESR_SUCCESS;
3544CLEANUP:
3545  return rc;
3546}
3547
3548ESR_ReturnCode SR_RecognizerLogSessionEndImpl(SR_Recognizer* self)
3549{
3550  ESR_ReturnCode rc;
3551  SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3552
3553  /* OSI log end of call (session) */
3554  CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIclnd")));
3555  if (impl->osi_log_level & OSI_LOG_LEVEL_BASIC)
3556    CHKLOG(rc, SR_EventLogEventSession(impl->eventLog));
3557  return ESR_SUCCESS;
3558CLEANUP:
3559  return rc;
3560}
3561
3562
3563ESR_ReturnCode SR_RecognizerLogWaveformDataImpl(SR_Recognizer* self, const LCHAR* waveformFilename,
3564    const LCHAR* transcription, const double bos,
3565    const double eos, ESR_BOOL isInvocab)
3566{
3567  ESR_ReturnCode rc;
3568  SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3569  LCHAR num[P_PATH_MAX];
3570  int frame;
3571
3572  CHKLOG(rc, SR_EventLogToken_AUDIO(impl->eventLog, impl->osi_log_level, L("FILE"), waveformFilename));
3573  CHKLOG(rc, SR_EventLogToken_AUDIO(impl->eventLog, impl->osi_log_level, L("TRANS"), transcription));
3574  sprintf(num, L("%.2f"), bos);
3575  CHKLOG(rc, SR_EventLogToken_AUDIO(impl->eventLog, impl->osi_log_level, L("BOS_SEC"), num));
3576  sprintf(num, L("%.2f"), eos);
3577  CHKLOG(rc, SR_EventLogToken_AUDIO(impl->eventLog, impl->osi_log_level, L("EOS_SEC"), num));
3578  CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("FRAMESIZE"), impl->FRAME_SIZE));
3579  CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("SAMPLERATE"), impl->sampleRate));
3580  frame = (int)(bos * impl->sampleRate * 2 /* 2 bytes per sample */) / impl->FRAME_SIZE;
3581  CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("BOS_FR"), frame));
3582  frame = (int)(eos * impl->sampleRate * 2 /* 2 bytes per sample */) / impl->FRAME_SIZE;
3583  CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("EOS_FR"), frame));
3584  CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("INVOCAB"), isInvocab));
3585  CHKLOG(rc, SR_EventLogEvent_AUDIO(impl->eventLog, impl->osi_log_level, L("ESRwfrd")));
3586  return ESR_SUCCESS;
3587CLEANUP:
3588  return rc;
3589}
3590
3591ESR_ReturnCode SR_RecognizerSetLockFunctionImpl(SR_Recognizer* self, SR_RecognizerLockFunction function, void* data)
3592{
3593  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3594
3595  impl->lockFunction = function;
3596  impl->lockData = data;
3597  return ESR_SUCCESS;
3598}
3599
3600static ESR_ReturnCode doSignalQualityInit(SR_RecognizerImpl* impl)
3601{
3602  CA_DoSignalCheck(impl->wavein, &impl->isSignalClipping, &impl->isSignalDCOffset,
3603                   &impl->isSignalNoisy, &impl->isSignalTooQuiet, &impl->isSignalTooFewSamples,
3604                   &impl->isSignalTooManySamples);
3605  impl->isSignalQualityInitialized = ESR_TRUE;
3606  return ESR_SUCCESS;
3607}
3608
3609ESR_ReturnCode SR_RecognizerIsSignalClippingImpl(SR_Recognizer* self, ESR_BOOL* isClipping)
3610{
3611  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3612  ESR_ReturnCode rc;
3613
3614  if (isClipping == NULL)
3615  {
3616    PLogError("SR_RecognizerIsSignalClippingImpl", ESR_INVALID_ARGUMENT);
3617    return ESR_INVALID_ARGUMENT;
3618  }
3619  if (!impl->isSignalQualityInitialized)
3620    CHKLOG(rc, doSignalQualityInit(impl));
3621  *isClipping = impl->isSignalClipping;
3622  return ESR_SUCCESS;
3623CLEANUP:
3624  return rc;
3625}
3626
3627ESR_ReturnCode SR_RecognizerIsSignalDCOffsetImpl(SR_Recognizer* self, ESR_BOOL* isDCOffset)
3628{
3629  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3630  ESR_ReturnCode rc;
3631
3632  if (isDCOffset == NULL)
3633  {
3634    PLogError("SR_RecognizerIsSignalDCOffsetImpl", ESR_INVALID_ARGUMENT);
3635    return ESR_INVALID_ARGUMENT;
3636  }
3637  if (!impl->isSignalQualityInitialized)
3638    CHKLOG(rc, doSignalQualityInit(impl));
3639  *isDCOffset = impl->isSignalDCOffset;
3640  return ESR_SUCCESS;
3641CLEANUP:
3642  return rc;
3643}
3644
3645ESR_ReturnCode SR_RecognizerIsSignalNoisyImpl(SR_Recognizer* self, ESR_BOOL* isNoisy)
3646{
3647  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3648  ESR_ReturnCode rc;
3649
3650  if (isNoisy == NULL)
3651  {
3652    PLogError("SR_RecognizerIsSignalNoisyImpl", ESR_INVALID_ARGUMENT);
3653    return ESR_INVALID_ARGUMENT;
3654  }
3655  if (!impl->isSignalQualityInitialized)
3656    CHKLOG(rc, doSignalQualityInit(impl));
3657  *isNoisy = impl->isSignalNoisy;
3658  return ESR_SUCCESS;
3659CLEANUP:
3660  return rc;
3661}
3662
3663ESR_ReturnCode SR_RecognizerIsSignalTooQuietImpl(SR_Recognizer* self, ESR_BOOL* isTooQuiet)
3664{
3665  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3666  ESR_ReturnCode rc;
3667
3668  if (isTooQuiet == NULL)
3669  {
3670    PLogError("SR_RecognizerIsSignalTooQuietImpl", ESR_INVALID_ARGUMENT);
3671    return ESR_INVALID_ARGUMENT;
3672  }
3673  if (!impl->isSignalQualityInitialized)
3674    CHKLOG(rc, doSignalQualityInit(impl));
3675  *isTooQuiet = impl->isSignalTooQuiet;
3676  return ESR_SUCCESS;
3677CLEANUP:
3678  return rc;
3679}
3680
3681ESR_ReturnCode SR_RecognizerIsSignalTooFewSamplesImpl(SR_Recognizer* self, ESR_BOOL* isTooFewSamples)
3682{
3683  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3684  ESR_ReturnCode rc;
3685
3686  if (isTooFewSamples == NULL)
3687  {
3688    PLogError("SR_RecognizerIsSignalTooFewSamplesImpl", ESR_INVALID_ARGUMENT);
3689    return ESR_INVALID_ARGUMENT;
3690  }
3691  if (!impl->isSignalQualityInitialized)
3692    CHKLOG(rc, doSignalQualityInit(impl));
3693  *isTooFewSamples = impl->isSignalTooFewSamples;
3694  return ESR_SUCCESS;
3695CLEANUP:
3696  return rc;
3697}
3698
3699ESR_ReturnCode SR_RecognizerIsSignalTooManySamplesImpl(SR_Recognizer* self, ESR_BOOL* isTooManySamples)
3700{
3701  SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3702  ESR_ReturnCode rc;
3703
3704  if (isTooManySamples == NULL)
3705  {
3706    PLogError("SR_RecognizerIsSignalTooManySamplesImpl", ESR_INVALID_ARGUMENT);
3707    return ESR_INVALID_ARGUMENT;
3708  }
3709  if (!impl->isSignalQualityInitialized)
3710    CHKLOG(rc, doSignalQualityInit(impl));
3711  *isTooManySamples = impl->isSignalTooManySamples;
3712  return ESR_SUCCESS;
3713CLEANUP:
3714  return rc;
3715}
3716
3717
3718
3719/**************************************/
3720/* Waveform Buffer stuff              */
3721/**************************************/
3722ESR_ReturnCode WaveformBuffer_Create(WaveformBuffer** waveformBuffer, size_t frame_size)
3723{
3724  ESR_ReturnCode rc;
3725  WaveformBuffer *buf;
3726  size_t val_size_t;
3727  int    val_int;
3728  ESR_BOOL   exists;
3729
3730  buf = NEW(WaveformBuffer, L("SR_RecognizerImpl.wvfmbuf"));
3731  if (buf == NULL)
3732  {
3733    rc = ESR_OUT_OF_MEMORY;
3734    PLogError(L("%s: could not create WaveformBuffer"), ESR_rc2str(rc));
3735    goto CLEANUP;
3736  }
3737
3738  ESR_SessionContains(L("SREC.voice_enroll.bufsz_kB"), &exists);
3739  if (exists)
3740    ESR_SessionGetSize_t(L("SREC.voice_enroll.bufsz_kB"), &val_size_t);
3741  else
3742    val_size_t = DEFAULT_WAVEFORM_BUFFER_MAX_SIZE;
3743  val_size_t *= 1024; /* convert to kB*/
3744  CHKLOG(rc, CircularBufferCreate(val_size_t, L("SR_RecognizerImpl.wvfmbuf.cbuffer"), &buf->cbuffer));
3745
3746  ESR_SessionContains(L("CREC.Frontend.start_windback"), &exists);
3747  if (exists)
3748    ESR_SessionGetInt(L("CREC.Frontend.start_windback"), &val_int);
3749  else
3750    val_int = DEFAULT_WAVEFORM_WINDBACK_FRAMES;
3751  val_int *= frame_size; /* convert frames to bytes */
3752  buf->windback_buffer_sz = (size_t) val_int;
3753  buf->windback_buffer = MALLOC(buf->windback_buffer_sz, L("SR_RecognizerImpl.wvfmbuf.windback"));
3754  if (buf->windback_buffer == NULL)
3755  {
3756    rc = ESR_OUT_OF_MEMORY;
3757    PLogError(L("%s: could not create Waveform windback buffer"), ESR_rc2str(rc));
3758    goto CLEANUP;
3759  }
3760
3761
3762  ESR_SessionContains(L("SREC.voice_enroll.eos_comfort_frames"), &exists);
3763  if (exists)
3764    ESR_SessionGetSize_t(L("SREC.voice_enroll.eos_comfort_frames"), &val_size_t);
3765  else
3766    val_size_t = DEFAULT_EOS_COMFORT_FRAMES;
3767  buf->eos_comfort_frames = val_size_t;
3768
3769  ESR_SessionContains(L("SREC.voice_enroll.bos_comfort_frames"), &exists);
3770  if (exists)
3771    ESR_SessionGetSize_t(L("SREC.voice_enroll.bos_comfort_frames"), &val_size_t);
3772  else
3773    val_size_t = DEFAULT_BOS_COMFORT_FRAMES;
3774  buf->bos_comfort_frames = val_size_t;
3775
3776  /* initially off */
3777  buf->state = WAVEFORM_BUFFERING_OFF;
3778
3779  *waveformBuffer = buf;
3780  return ESR_SUCCESS;
3781CLEANUP:
3782  WaveformBuffer_Destroy(buf);
3783  return rc;
3784}
3785
3786ESR_ReturnCode WaveformBuffer_Write(WaveformBuffer* waveformBuffer, void *data, size_t num_bytes)
3787{
3788  size_t available_bytes;
3789  size_t done_bytes;
3790
3791  /* do nothing if not active */
3792  switch (waveformBuffer->state)
3793  {
3794    case WAVEFORM_BUFFERING_OFF:
3795      return ESR_SUCCESS;
3796
3797    case WAVEFORM_BUFFERING_ON_CIRCULAR:
3798      available_bytes = CircularBufferGetAvailable(waveformBuffer->cbuffer);
3799      if (available_bytes < num_bytes)
3800      {
3801        done_bytes = CircularBufferSkip(waveformBuffer->cbuffer, num_bytes - available_bytes);
3802        if (done_bytes != num_bytes - available_bytes)
3803        {
3804          PLogError("WaveformBuffer_Write: error when skipping bytes");
3805          return ESR_INVALID_STATE;
3806        }
3807      }
3808      done_bytes = CircularBufferWrite(waveformBuffer->cbuffer, data, num_bytes);
3809      if (done_bytes != num_bytes)
3810      {
3811        PLogError("WaveformBuffer_Write: error when writing bytes");
3812        return ESR_INVALID_STATE;
3813      }
3814      return ESR_SUCCESS;
3815
3816    case WAVEFORM_BUFFERING_ON_LINEAR:
3817      available_bytes = CircularBufferGetAvailable(waveformBuffer->cbuffer);
3818      if (available_bytes < num_bytes)
3819      {
3820        waveformBuffer->overflow_count += num_bytes;
3821        return ESR_BUFFER_OVERFLOW;
3822      }
3823      done_bytes = CircularBufferWrite(waveformBuffer->cbuffer, data, num_bytes);
3824      if (done_bytes != num_bytes)
3825      {
3826        PLogError("WaveformBuffer_Write: error when writing bytes");
3827        return ESR_INVALID_STATE;
3828      }
3829      return ESR_SUCCESS;
3830
3831    default:
3832      PLogError("WaveformBuffer_Write: bad control path");
3833      return ESR_INVALID_STATE;
3834  }
3835}
3836
3837ESR_ReturnCode WaveformBuffer_Read(WaveformBuffer* waveformBuffer, void *data, size_t* num_bytes)
3838{
3839  size_t bytes_to_read;
3840  ESR_ReturnCode rc;
3841
3842  if (num_bytes == NULL)
3843  {
3844    rc = ESR_INVALID_ARGUMENT;
3845    PLogError(ESR_rc2str(rc));
3846    goto CLEANUP;
3847  }
3848  if (waveformBuffer->overflow_count > 0)
3849  {
3850    memset(data, 0, *num_bytes);
3851    *num_bytes = 0;
3852    PLogError(L("WaveformBuffer_Read: previous overflow causes read to return NULL"));
3853    return ESR_SUCCESS;
3854  }
3855
3856  if (waveformBuffer->read_size != 0 && *num_bytes > waveformBuffer->read_size)
3857  {
3858    PLogError(L("ESR_OUT_OF_MEMORY: waveform buffer too small for read, increase from %d to %d"), *num_bytes, waveformBuffer->read_size);
3859    return ESR_OUT_OF_MEMORY;
3860  }
3861
3862  if (waveformBuffer->read_size == 0)
3863    bytes_to_read = *num_bytes;
3864  else
3865    bytes_to_read = MIN(waveformBuffer->read_size, *num_bytes);
3866  waveformBuffer->read_size -= bytes_to_read;
3867  *num_bytes = CircularBufferRead(waveformBuffer->cbuffer, data, bytes_to_read);
3868  if (*num_bytes != bytes_to_read)
3869  {
3870    PLogError("WaveformBuffer_Read: error reading buffer");
3871    return ESR_INVALID_STATE;
3872  }
3873  return ESR_SUCCESS;
3874CLEANUP:
3875  return rc;
3876}
3877
3878/* WindBack will save the last num_bytes recorded, reset the buffer, and then load the
3879   saved bytes at the beginning of the buffer */
3880ESR_ReturnCode WaveformBuffer_WindBack(WaveformBuffer* waveformBuffer, const size_t num_bytes)
3881{
3882  ESR_ReturnCode rc;
3883  size_t bufferSize;
3884
3885  if (num_bytes <= 0)
3886  {
3887    CHKLOG(rc, WaveformBuffer_Reset(waveformBuffer));
3888    return ESR_SUCCESS;
3889  }
3890
3891  /* make sure windback buffer is big enough */
3892  if (num_bytes > waveformBuffer->windback_buffer_sz)
3893  {
3894    rc = ESR_OUT_OF_MEMORY;
3895    PLogError(L("%s: windback buffer is too small (needed=%d, had=%d)"), ESR_rc2str(rc), num_bytes, waveformBuffer->windback_buffer_sz);
3896    goto CLEANUP;
3897  }
3898
3899  CHKLOG(rc, WaveformBuffer_GetSize(waveformBuffer, &bufferSize));
3900  /* skip the first few bytes written */
3901  if (bufferSize < num_bytes)
3902  {
3903    PLogError("bufferSize %d num_bytes %d (ESR_INVALID_STATE)\n", bufferSize, num_bytes);
3904    bufferSize = 0;
3905  }
3906  else
3907  {
3908    bufferSize -= num_bytes;
3909  }
3910  CHKLOG(rc, WaveformBuffer_Skip(waveformBuffer, bufferSize));
3911  /* read the last few bytes written */
3912  bufferSize = num_bytes;
3913  CHKLOG(rc, WaveformBuffer_Read(waveformBuffer, waveformBuffer->windback_buffer, &bufferSize));
3914
3915  /* reset buffer */
3916  CHKLOG(rc, WaveformBuffer_Reset(waveformBuffer));
3917
3918  /* rewrite the saved bytes at the beginning */
3919  CHKLOG(rc, WaveformBuffer_Write(waveformBuffer, waveformBuffer->windback_buffer, bufferSize));
3920  return ESR_SUCCESS;
3921CLEANUP:
3922  return rc;
3923}
3924
3925ESR_ReturnCode WaveformBuffer_Destroy(WaveformBuffer* waveformBuffer)
3926{
3927  if (waveformBuffer->cbuffer)
3928    FREE(waveformBuffer->cbuffer);
3929  if (waveformBuffer->windback_buffer)
3930    FREE(waveformBuffer->windback_buffer);
3931  if (waveformBuffer)
3932    FREE(waveformBuffer);
3933  return ESR_SUCCESS;
3934}
3935
3936ESR_ReturnCode WaveformBuffer_SetBufferingState(WaveformBuffer* waveformBuffer, waveform_buffering_state_t state)
3937{
3938  waveformBuffer->state = state;
3939  return ESR_SUCCESS;
3940}
3941
3942ESR_ReturnCode WaveformBuffer_GetBufferingState(WaveformBuffer* waveformBuffer, waveform_buffering_state_t* state)
3943{
3944  *state = waveformBuffer->state;
3945  return ESR_SUCCESS;
3946}
3947
3948/**
3949 * @return ESR_BUFFER_OVERFLOW if nametag EOS occured beyond end of buffer
3950 */
3951ESR_ReturnCode WaveformBuffer_ParseEndPointedResultAndTrim(WaveformBuffer* waveformBuffer, const LCHAR* end_pointed_result, const size_t bytes_per_frame)
3952{
3953  const LCHAR *p;
3954  size_t bos_frame, eos_frame, bufferSize, read_start_offset;
3955  ESR_ReturnCode rc;
3956
3957  /* potential end pointed results
3958
3959     -pau-@19 tape@36 scan@64 down@88 -pau2-@104
3960     -pau-@19 tape@34 off@55 -pau2-@78
3961     -pau-@19 tape@47 help@66 -pau2-@80
3962     -pau-@16 tape@36 reverse@71 -pau2-@91
3963     -pau-@21 tape@42 scan@59 down@80 -pau2-@91
3964
3965     what I need to extract is the integer between "-pau-@" and ' '
3966     and the integer between '@' and " -pau2-"
3967  */
3968
3969
3970  p = LSTRSTR( end_pointed_result, PREFIX_WORD);
3971  if(p) p+=PREFIX_WORD_LEN; while(p && *p == '@') p++;
3972  rc = p ? lstrtoui(p, &bos_frame, 10) : ESR_INVALID_ARGUMENT;
3973  if (rc == ESR_INVALID_ARGUMENT)
3974  {
3975    PLogError(L("%s: extracting bos from text=%s"), ESR_rc2str(rc), end_pointed_result);
3976    goto CLEANUP;
3977  }
3978  else if (rc != ESR_SUCCESS)
3979    goto CLEANUP;
3980
3981  p = LSTRSTR( end_pointed_result, SUFFIX_WORD);
3982  while(p && p>end_pointed_result && p[-1]!='@') --p;
3983  rc = p ? lstrtoui(p, &eos_frame, 10) : ESR_INVALID_ARGUMENT;
3984  if (rc == ESR_INVALID_ARGUMENT)
3985  {
3986    PLogError(L("%s: extracting eos from text=%s"), ESR_rc2str(rc), end_pointed_result);
3987    goto CLEANUP;
3988  }
3989  else if (rc != ESR_SUCCESS)
3990    goto CLEANUP;
3991
3992  bos_frame -= (bos_frame > waveformBuffer->bos_comfort_frames ? waveformBuffer->bos_comfort_frames : 0);
3993  eos_frame += waveformBuffer->eos_comfort_frames;
3994
3995  /*
3996   * I know where speech started, so I want to skip frames 0 to bos_frame.
3997   * I also know where speech ended so I want to set the amount of frames(bytes) to read for
3998   * the nametag audio buffer (i.e. the read_size)
3999   */
4000
4001  read_start_offset = bos_frame * bytes_per_frame * 2 /* times 2 because of skip even frames */;
4002  waveformBuffer->read_size = (eos_frame - bos_frame) * bytes_per_frame * 2 /* times 2 because of skip even frames */;
4003
4004  CHKLOG(rc, WaveformBuffer_GetSize(waveformBuffer, &bufferSize));
4005  if (read_start_offset + waveformBuffer->read_size > bufferSize)
4006  {
4007    waveformBuffer->overflow_count += read_start_offset + waveformBuffer->read_size - bufferSize;
4008    passert(waveformBuffer->overflow_count > 0);
4009    PLogMessage(L("Warning: Voice Enrollment audio buffer overflow (spoke too much, over by %d bytes)"),
4010                waveformBuffer->overflow_count);
4011    return ESR_BUFFER_OVERFLOW;
4012  }
4013  CHKLOG(rc, WaveformBuffer_Skip(waveformBuffer, read_start_offset));
4014#ifdef SREC_ENGINE_VERBOSE_LOGGING
4015  PLogMessage(L("Voice Enrollment: bos@%d, eos@%d, therefore sizeof(waveform) should be %d"), bos_frame, eos_frame, waveformBuffer->read_size);
4016#endif
4017  return ESR_SUCCESS;
4018CLEANUP:
4019  return rc;
4020}
4021
4022
4023ESR_ReturnCode WaveformBuffer_Reset(WaveformBuffer* waveformBuffer)
4024{
4025  CircularBufferReset(waveformBuffer->cbuffer);
4026  waveformBuffer->overflow_count = 0;
4027  waveformBuffer->read_size = 0;
4028  return ESR_SUCCESS;
4029}
4030
4031ESR_ReturnCode WaveformBuffer_GetSize(WaveformBuffer* waveformBuffer, size_t* size)
4032{
4033  *size = CircularBufferGetSize(waveformBuffer->cbuffer);
4034  return ESR_SUCCESS;
4035}
4036
4037ESR_ReturnCode WaveformBuffer_Skip(WaveformBuffer* waveformBuffer, const size_t bytes)
4038{
4039  if (CircularBufferSkip(waveformBuffer->cbuffer, bytes) != (int) bytes)
4040    return ESR_INVALID_STATE;
4041  return ESR_SUCCESS;
4042}
4043
4044
4045
4046static ESR_ReturnCode SR_Recognizer_Reset_Buffers ( SR_RecognizerImpl *impl )
4047    {
4048    ESR_ReturnCode  reset_status;
4049
4050    FREE ( impl->audioBuffer );
4051    impl->audioBuffer = NULL;
4052    impl->audioBuffer = MALLOC ( impl->FRAME_SIZE, MTAG );
4053
4054    if ( impl->audioBuffer != NULL )
4055        {
4056        WaveformBuffer_Destroy ( impl->waveformBuffer );
4057        impl->waveformBuffer = NULL;
4058        reset_status = WaveformBuffer_Create ( &impl->waveformBuffer, impl->FRAME_SIZE );
4059        }
4060    else
4061        {
4062        reset_status = ESR_OUT_OF_MEMORY;
4063        }
4064    return ( reset_status );
4065    }
4066
4067
4068
4069static ESR_ReturnCode SR_Recognizer_Validate_Sample_Rate ( size_t sample_rate )
4070    {
4071    ESR_ReturnCode  validate_status;
4072
4073    switch ( sample_rate )
4074        {
4075        case 8000:
4076        case 11025:
4077        case 16000:
4078        case 22050:
4079            validate_status = ESR_SUCCESS;
4080            break;
4081
4082        default:
4083            validate_status = ESR_INVALID_ARGUMENT;
4084            break;
4085        }
4086    return ( validate_status );
4087    }
4088
4089
4090
4091static ESR_ReturnCode SR_Recognizer_Sample_Rate_Needs_Change ( size_t new_sample_rate, ESR_BOOL *needs_changing )
4092    {
4093    ESR_ReturnCode  validate_status;
4094    size_t          current_sample_rate;
4095
4096    validate_status = ESR_SessionGetSize_t ( "CREC.Frontend.samplerate", &current_sample_rate );
4097
4098    if ( validate_status == ESR_SUCCESS )
4099        {
4100        if ( new_sample_rate != current_sample_rate )
4101            *needs_changing = ESR_TRUE;
4102        else
4103            *needs_changing = ESR_TRUE;
4104        }
4105    return ( validate_status );
4106    }
4107
4108
4109
4110static ESR_ReturnCode SR_Recognizer_Change_Sample_Rate_Session_Params_8K ( void )
4111    {
4112    ESR_ReturnCode  change_status;
4113    LCHAR           model_filenames [P_PATH_MAX];
4114    LCHAR           lda_filename [P_PATH_MAX];
4115    size_t          filename_length;
4116
4117    filename_length = P_PATH_MAX;
4118    change_status = ESR_SessionGetLCHAR ( L("cmdline.modelfiles8"), model_filenames, &filename_length );
4119
4120    if ( change_status == ESR_SUCCESS )
4121        {
4122        filename_length = P_PATH_MAX;
4123        change_status = ESR_SessionGetLCHAR ( L("cmdline.lda8"), lda_filename, &filename_length );
4124
4125/* From this point on, if an error occurs, we're screwed and recovery is probably impossible */
4126        if ( change_status == ESR_SUCCESS )
4127            {
4128            change_status = ESR_SessionSetSize_t ( "CREC.Frontend.samplerate", 8000 );
4129            if ( change_status == ESR_SUCCESS )
4130                {
4131                change_status = ESR_SessionSetInt ( "CREC.Frontend.highcut", 4000 );
4132
4133                if ( change_status == ESR_SUCCESS )
4134                    {
4135                    change_status =  ESR_SessionSetLCHAR ( L("cmdline.modelfiles"), model_filenames );
4136
4137                    if ( change_status == ESR_SUCCESS )
4138                        change_status = ESR_SessionSetLCHAR ( L("cmdline.lda"), lda_filename );
4139                    }
4140                }
4141            }
4142        else
4143            {
4144            PLogError (L("\nMissing Parameter lda8\n"));
4145            }
4146        }
4147    else
4148        {
4149        PLogError (L("\nMissing Parameter models8\n"));
4150        }
4151    return ( change_status );
4152    }
4153
4154
4155
4156static ESR_ReturnCode SR_Recognizer_Change_Sample_Rate_Session_Params_11K_to_22K ( size_t sample_rate )
4157    {
4158    ESR_ReturnCode  change_status;
4159    LCHAR           model_filenames [P_PATH_MAX];
4160    LCHAR           lda_filename [P_PATH_MAX];
4161    size_t          filename_length;
4162
4163    filename_length = P_PATH_MAX;
4164    change_status = ESR_SessionGetLCHAR ( L("cmdline.modelfiles11"), model_filenames, &filename_length );
4165
4166    if ( change_status == ESR_SUCCESS )
4167        {
4168        filename_length = P_PATH_MAX;
4169        change_status = ESR_SessionGetLCHAR ( L("cmdline.lda11"), lda_filename, &filename_length );
4170
4171/* From this point on, if an error occurs, we're screwed and recovery is probably impossible */
4172        if ( change_status == ESR_SUCCESS )
4173            {
4174            change_status = ESR_SessionSetSize_t ( "CREC.Frontend.samplerate", sample_rate );
4175
4176            if ( change_status == ESR_SUCCESS )
4177                {
4178                change_status = ESR_SessionSetInt ( "CREC.Frontend.highcut", 5500 );
4179
4180                if ( change_status == ESR_SUCCESS )
4181                    {
4182                    change_status =  ESR_SessionSetLCHAR ( L("cmdline.modelfiles"), model_filenames );
4183
4184                    if ( change_status == ESR_SUCCESS )
4185                        change_status = ESR_SessionSetLCHAR ( L("cmdline.lda"), lda_filename );
4186                    }
4187                }
4188            }
4189        else
4190            {
4191            PLogError (L("\nMissing Parameter lda11\n"));
4192            }
4193        }
4194    else
4195        {
4196        PLogError (L("\nMissing Parameter models11\n"));
4197        }
4198    return ( change_status );
4199    }
4200
4201
4202
4203static ESR_ReturnCode SR_Recognizer_Change_Sample_Rate_Session_Params ( size_t new_sample_rate )
4204    {
4205    ESR_ReturnCode  change_status;
4206
4207    if ( new_sample_rate == 8000 )
4208        change_status = SR_Recognizer_Change_Sample_Rate_Session_Params_8K ( );
4209    else
4210        change_status = SR_Recognizer_Change_Sample_Rate_Session_Params_11K_to_22K ( new_sample_rate );
4211
4212    return ( change_status );
4213    }
4214
4215
4216
4217ESR_ReturnCode SR_Recognizer_Change_Sample_RateImpl ( SR_Recognizer *recognizer, size_t new_sample_rate )
4218    {
4219    ESR_ReturnCode          change_status;
4220    ESR_BOOL                rate_needs_changing;
4221    SR_RecognizerImpl       *impl;
4222    CA_FrontendInputParams  *frontendParams;
4223
4224    change_status = SR_Recognizer_Validate_Sample_Rate ( new_sample_rate );
4225
4226    if ( change_status == ESR_SUCCESS )
4227        {
4228        change_status = SR_Recognizer_Sample_Rate_Needs_Change ( new_sample_rate, &rate_needs_changing );
4229
4230        if ( change_status == ESR_SUCCESS )
4231            {
4232            if ( rate_needs_changing == ESR_TRUE )
4233                {
4234                change_status = SR_Recognizer_Change_Sample_Rate_Session_Params ( new_sample_rate );
4235
4236                if ( change_status == ESR_SUCCESS )
4237                    { // SR_RecognizerCreateFrontendImpl
4238                    impl = (SR_RecognizerImpl *)recognizer;
4239                    change_status = SR_RecognizerUnsetupImpl( recognizer );
4240
4241                    if ( change_status == ESR_SUCCESS )
4242                        {
4243                        CA_UnconfigureFrontend ( impl->frontend );
4244                        frontendParams = CA_AllocateFrontendParameters ( );
4245
4246                        if ( frontendParams != NULL )
4247                            {
4248                            change_status = SR_RecognizerGetFrontendLegacyParametersImpl ( frontendParams );
4249
4250                            if ( change_status == ESR_SUCCESS )
4251                                {
4252                                CA_ConfigureFrontend ( impl->frontend, frontendParams );
4253                                CA_UnconfigureWave ( impl->wavein );
4254                                CA_ConfigureWave ( impl->wavein, impl->frontend );
4255                                impl->sampleRate = new_sample_rate;
4256                                impl->FRAME_SIZE = impl->sampleRate / FRAMERATE * SAMPLE_SIZE;
4257                                change_status = SR_Recognizer_Reset_Buffers ( impl );
4258
4259                                if ( change_status == ESR_SUCCESS )
4260                                    {
4261                                    change_status = SR_RecognizerSetupImpl( recognizer );
4262
4263                                    if ( change_status == ESR_SUCCESS )
4264                                        change_status = SR_AcousticStateReset ( recognizer );
4265                                    }
4266                                else
4267                                    {
4268                                    SR_RecognizerSetupImpl( recognizer );   /* Otherwise recognizer is in bad state */
4269                                    }
4270                                }
4271                            CA_FreeFrontendParameters ( frontendParams );
4272                            }
4273                        else
4274                            {
4275                            SR_RecognizerSetupImpl( recognizer );   /* Otherwise recognizer is in bad state */
4276                            change_status = ESR_OUT_OF_MEMORY;
4277                            }
4278                        }
4279                    }
4280                }
4281            }
4282        }
4283    return ( change_status );
4284    }
4285
4286
4287