1/*---------------------------------------------------------------------------*
2 *  SR_Recognizer.h  *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20#ifndef __SR_RECOGNIZER_H
21#define __SR_RECOGNIZER_H
22
23
24
25#include "ESR_ReturnCode.h"
26#include "SR_RecognizerPrefix.h"
27#include "SR_AcousticModels.h"
28#include "SR_Grammar.h"
29#include "SR_RecognizerResult.h"
30#include "SR_Nametags.h"
31#include "pstdio.h"
32#include "ptypes.h"
33
34/* forward decl needed because of SR_Recognizer.h <-> SR_Grammar.h include loop */
35struct SR_Grammar_t;
36
37/**
38 * Recognizer status.
39 */
40typedef enum SR_RecognizerStatus_t
41{
42  /**
43   * Reserved value.
44   */
45  SR_RECOGNIZER_EVENT_INVALID,
46  /**
47   * Recognizer could not find a match for the utterance.
48   */
49  SR_RECOGNIZER_EVENT_NO_MATCH,
50  /**
51   * Recognizer processed one frame of audio.
52   */
53  SR_RECOGNIZER_EVENT_INCOMPLETE,
54  /**
55   * Recognizer has just been started.
56   */
57  SR_RECOGNIZER_EVENT_STARTED,
58  /**
59   * Recognizer is stopped.
60   */
61  SR_RECOGNIZER_EVENT_STOPPED,
62  /**
63   * Beginning of speech detected.
64   */
65  SR_RECOGNIZER_EVENT_START_OF_VOICING,
66  /**
67   * End of speech detected.
68   */
69  SR_RECOGNIZER_EVENT_END_OF_VOICING,
70  /**
71   * Beginning of utterance occured too soon.
72   */
73  SR_RECOGNIZER_EVENT_SPOKE_TOO_SOON,
74  /**
75   * Recognition match detected.
76   */
77  SR_RECOGNIZER_EVENT_RECOGNITION_RESULT,
78  /**
79   * Timeout occured before beginning of utterance.
80   */
81  SR_RECOGNIZER_EVENT_START_OF_UTTERANCE_TIMEOUT,
82  /**
83   * Timeout occured before speech recognition could complete.
84   */
85  SR_RECOGNIZER_EVENT_RECOGNITION_TIMEOUT,
86  /**
87   * Not enough samples to process one frame.
88   */
89  SR_RECOGNIZER_EVENT_NEED_MORE_AUDIO,
90  /**
91   * More audio encountered than is allowed by 'swirec_max_speech_duration'.
92   */
93  SR_RECOGNIZER_EVENT_MAX_SPEECH,
94} SR_RecognizerStatus;
95
96/**
97 * Type of RecognizerResult returned by SR_RecognizerAdvance().
98 */
99typedef enum SR_RecognizerResultType_t
100{
101  /**
102   * Reserved value.
103   */
104  SR_RECOGNIZER_RESULT_TYPE_INVALID,
105  /**
106   * The result is complete from a full recognition of audio.
107   */
108  SR_RECOGNIZER_RESULT_TYPE_COMPLETE,
109  /**
110   * No results at this time.
111   */
112  SR_RECOGNIZER_RESULT_TYPE_NONE,
113} SR_RecognizerResultType;
114
115/**
116 * SR_Utterance stubbed out.
117 */
118typedef void* SR_Utterance;
119
120typedef enum
121{
122  ESR_LOCK,
123  ESR_UNLOCK
124} ESR_LOCKMODE;
125
126/**
127 * Function which will be invoked before accessing internal variables.
128 */
129typedef ESR_ReturnCode(*SR_RecognizerLockFunction)(ESR_LOCKMODE mode, void* data);
130
131/**
132 * @addtogroup SR_RecognizerModule SR_Recognizer API functions
133 * Synchronous speech recognizer.
134 *
135 * @{
136 */
137
138/**
139 * Synchronous speech recognizer.
140 */
141typedef struct SR_Recognizer_t
142{
143  /**
144   * Starts recognition.
145   *
146   * @param self SR_Recognizer handle
147  * @return ESR_INVALID_ARGUMENT if self is null, if no acoustic models have been associated with the recognizer,
148  * if no grammars have been activated, or if the recognizer cannot be started for an unknown reason
149   */
150  ESR_ReturnCode(*start)(struct SR_Recognizer_t* self);
151  /**
152   * Stops the recognizer and invalidates the recognition result object.
153   * Calling this function before the recognizer receives the last frame causes the recognition
154   * to abort.
155   *
156   * @param self SR_Recognizer handle
157   * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if an internal error has occured
158   */
159  ESR_ReturnCode(*stop)(struct SR_Recognizer_t* self);
160  /**
161   * Destroy a recognizer.
162   *
163   * @param self SR_Recognizer handle
164  * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if an internal error has occured
165   */
166  ESR_ReturnCode(*destroy)(struct SR_Recognizer_t* self);
167  /**
168   * Associates a set of models with the recognizer.
169   *
170   * @param self SR_Recognizer handle
171  * @return ESR_INVALID_ARGUMENT if self is null
172   */
173  ESR_ReturnCode(*setup)(struct SR_Recognizer_t* self);
174  /**
175   * Unconfigures recognizer.
176   *
177   * @param self SR_Recognizer handle
178  * @return ESR_INVALID_ARGUMENT if self is null
179   */
180  ESR_ReturnCode(*unsetup)(struct SR_Recognizer_t* self);
181  /**
182   * Indicates whether recognizer is configured for use.
183   *
184   * @param self SR_Recognizer handle
185   * @param isSetup True if recognizer is configured
186  * @return ESR_INVALID_ARGUMENT if self is null
187   */
188  ESR_ReturnCode(*isSetup)(struct SR_Recognizer_t* self, ESR_BOOL* isSetup);
189
190  /**
191   * Returns copy of LCHAR recognition parameter.
192   *
193   * @param self SR_Recognizer handle
194   * @param key Parameter name
195   * @param value [out] Used to hold the parameter value
196   * @param len [in/out] Length of value argument. If the return code is ESR_BUFFER_OVERFLOW,
197   *            the required length is returned in this variable.
198  * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of
199  * type LCHAR*
200   */
201  ESR_ReturnCode(*getParameter)(struct SR_Recognizer_t* self, const LCHAR* key, LCHAR* value, size_t* len);
202  /**
203   * Return copy of size_t recognition parameter.
204   *
205   * @param self SR_Recognizer handle
206   * @param key Parameter name
207   * @param value [out] Used to hold the parameter value
208  * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of
209  * type size_t
210   */
211  ESR_ReturnCode(*getSize_tParameter)(struct SR_Recognizer_t* self, const LCHAR* key, size_t* value);
212  /**
213   * Return copy of BOOL recognition parameter.
214   *
215   * @param self SR_Recognizer handle
216   * @param key Parameter name
217   * @param value [out] Used to hold the parameter value
218  * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of
219  * type bool
220   */
221  ESR_ReturnCode(*getBoolParameter)(struct SR_Recognizer_t* self, const LCHAR* key, ESR_BOOL* value);
222  /**
223   * Sets recognition parameters.
224   *
225   * Key:             Description of associated value
226   *
227   * VoiceEnrollment       If "true", the next recognition will produce data required
228   *                              for Nametag support (i.e. Aurora bitstream).
229   *
230   * @param self SR_Recognizer handle
231   * @param key Parameter name
232   * @param value Parameter value
233  * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory
234   */
235  ESR_ReturnCode(*setParameter)(struct SR_Recognizer_t* self, const LCHAR* key, LCHAR* value);
236  /**
237   * Sets recognition parameters.
238   *
239   * @param self SR_Recognizer handle
240   * @param key Parameter name
241   * @param value Parameter value
242  * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory
243   */
244  ESR_ReturnCode(*setSize_tParameter)(struct SR_Recognizer_t* self, const LCHAR* key, size_t value);
245  /**
246   * Sets recognition parameters.
247   *
248   * @param self SR_Recognizer handle
249   * @param key Parameter name
250   * @param value Parameter value
251  * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory
252   */
253  ESR_ReturnCode(*setBoolParameter)(struct SR_Recognizer_t* self, const LCHAR* key, ESR_BOOL value);
254
255  /**
256   * Recognizer may be set up with multiple Grammars and multiple rules. All grammars
257   * must be unsetup before the recognizer can be destroy.
258   * A pre-compiled Grammar should have undergone a model consistency check with the
259   * recognizer prior to this call.
260   *
261   * @param self SR_Recognizer handle
262   * @param grammar Grammar containing rule
263   * @param ruleName Name of rule to associate with recognizer
264   * @see SR_GrammarCheckModelConsistency
265   * @return ESR_INVALID_ARGUMENT if self is null
266   */
267  ESR_ReturnCode (*setupRule)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar, const LCHAR* ruleName);
268  /**
269   * Indicates if Recognizer is configured with any rules within the specified Grammar.
270   *
271   * @param self SR_Recognizer handle
272   * @param hasSetupRules True if the Recognizer is configured for the Grammar
273  * @return ESR_INVALID_ARGUMENT if self is null
274   */
275  ESR_ReturnCode(*hasSetupRules)(struct SR_Recognizer_t* self, ESR_BOOL* hasSetupRules);
276  /**
277   * Activates rule in recognizer.
278   *
279   * @param self SR_Recognizer handle
280   * @param grammar Grammar containing rule
281   * @param ruleName Name of rule
282   * @param weight Relative weight to assign to self grammar vs. other activated grammars.
283   *               Values: Integers 0-2^31.
284  * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if no models are associated with the recognizer,
285  * or if the rule could not be setup, or if the acoustic models could not be setup;
286  * ESR_BUFFER_OVERFLOW if ruleName is too long
287   */
288  ESR_ReturnCode (*activateRule)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar,
289                                const LCHAR* ruleName, unsigned int weight);
290  /**
291   * Deactivates rule in recognizer.
292   *
293   * @param self SR_Recognizer handle
294   * @param grammar Grammar containing rule
295   * @param ruleName Name of root rule
296   * @return ESR_INVALID_ARGUMENT if self is null; ESR_NO_MATCH_ERROR if grammar is not activated
297   */
298  ESR_ReturnCode (*deactivateRule)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar,
299                                  const LCHAR* ruleName);
300
301  /**
302   * Deactivates all grammar rules in recognizer.
303   *
304   * @param self SR_Recognizer handle
305  * @return ESR_INVALID_ARGUMENT if self is null
306   */
307  ESR_ReturnCode(*deactivateAllRules)(struct SR_Recognizer_t* self);
308
309  /**
310   * Indicates if rule is active in recognizer.
311   *
312   * @param self SR_Recognizer handle
313   * @param grammar Grammar containing rule
314   * @param ruleName Name of rule
315   * @param isActiveRule True if rule is active
316  * @return ESR_INVALID_ARGUMENT if self is null
317   */
318  ESR_ReturnCode (*isActiveRule)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar,
319                                const LCHAR* ruleName, ESR_BOOL* isActiveRule);
320   /**
321   * Configures the grammar for maximum amount of word addition
322   *
323   * @param self SR_Recognizer handle
324   * @param grammar Grammar whose ceiling to be set
325   * @return ESR_INVALID_ARGUMENT if self or grammar are null
326   */
327  ESR_ReturnCode (*setWordAdditionCeiling)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar );
328  /**
329   * Ensure the model usage in a pre-compiled grammar is consistent with the models
330   * that are associated with the Recognizer. You must first have called Recognizer_Setup().
331   *
332   * @param self SR_Recognizer handle
333   * @param grammar Grammar to check against
334   * @param isConsistent True if rule is consistent
335  * @return ESR_INVALID_ARGUMENT if self is null
336   */
337  ESR_ReturnCode (*checkGrammarConsistency)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar,
338      ESR_BOOL* isConsistent);
339
340 /**
341   * Ensure the model usage in a pre-compiled grammar is consistent with the models
342   * that are associated with the Recognizer. You must first have called Recognizer_Setup().
343   *
344   * @param self SR_Recognizer handle
345   * @param grammar Grammar to check against
346   * @param isConsistent True if rule is consistent
347  * @return ESR_INVALID_ARGUMENT if self is null
348   */
349  ESR_ReturnCode (*getModels)(struct SR_Recognizer_t* self, SR_AcousticModels** pmodels);
350
351  /**
352   * Get audio into the recognizer.
353   *
354   * We decouple the Audio and frontend processing from the Recognizer processing via an
355   * internal FIFO frame buffer (aka utterance buffer). This ensures that this call is at least
356   * as fast as real time so that voicing events are not unduly delayed. The audio buffer size
357   * must be at least one frame buffer's worth and some reasonable maximum size for synchronous
358   * behaviour. This function may be called independently of Recognizer_Advance.
359   *
360   * @param self SR_Recognizer handle
361   * @param buffer Buffer containing audio data
362   * @param bufferSize [in/out] Size of buffer in samples. In case of a buffer overflow,
363   *                            ESR_BUFFER_OVERFLOW is returned and this value holds the actual
364   *                            amount of samples that were pushed.
365   * @param isLast Indicates if the audio frame is the last one in this recognition
366  * @return ESR_INVALID_ARGUMENT if self, buffer, or bufferSize are null; ESR_INVALID_STATE if the recognizer isn't
367  * started, or the recognizer has already received the last frame; ESR_BUFFER_OVERFLOW if the recognizer buffer is
368  * full
369   */
370  ESR_ReturnCode (*putAudio)(struct SR_Recognizer_t* self, asr_int16_t* buffer, size_t* bufferSize,
371                            ESR_BOOL isLast);
372  /**
373   * Advance the recognizer by at least one utterance frame. The number of frames advanced
374   * depends on the underlying definition. We anticipate that the recognizer will keep up with
375   * the supplied audio buffers when waiting for voicing. After this point, the number of frames
376   * may be one (for our default frame-advance mode) or it may be more if the synchronous nature
377   * of this operation is not considered a problem. The recognizer may be advanced independently
378   * of the Recognizer_PutAudio call. It is permissible to advance when there is no further data.
379   * A stop condition could be an appropriate consequence.
380   *
381   * @param self Recognizer handle
382   * @param status Resulting recognizer status
383   * @param type Resulting recognition result type
384   * @param result Resulting recognizer result
385  * @return ESR_INVALID_ARGUMENT if self, status, or type are null; ESR_INVALID_STATE if an internal error occurs
386   */
387  ESR_ReturnCode(*advance)(struct SR_Recognizer_t* self, SR_RecognizerStatus* status,
388                           SR_RecognizerResultType* type, SR_RecognizerResult** result);
389
390
391  /**
392   * Loads utterance from file.
393   *
394   * @param self SR_Recognizer handle
395   * @param filename File to read from
396  * @return ESR_INVALID_ARGUMENT if self is null
397   */
398  ESR_ReturnCode(*loadUtterance)(struct SR_Recognizer_t* self, const LCHAR* filename);
399  /**
400   * Loads utterance from WAVE file.
401   *
402   * @param self SR_Recognizer handle
403   * @param filename WAVE file to read from
404  * @return ESR_INVALID_ARGUMENT if self is null
405   */
406  ESR_ReturnCode(*loadWaveFile)(struct SR_Recognizer_t* self, const LCHAR* filename);
407
408  /**
409   * Log recognizer-related event token.
410   *
411   * @param self SR_Recognizer handle
412   * @param event Token name
413   * @param value Value to be logged
414   * @return ESR_INVALID_ARGUMENT if self is null
415   */
416  ESR_ReturnCode(*logToken)(struct SR_Recognizer_t* self, const LCHAR* token, const LCHAR* value);
417
418  /**
419   * Log recognizer-related event token integer.
420   *
421   * @param self SR_Recognizer handle
422   * @param event Token name
423   * @param value Value to be logged
424   * @return ESR_INVALID_ARGUMENT if self is null
425   */
426  ESR_ReturnCode(*logTokenInt)(struct SR_Recognizer_t* self, const LCHAR* token, int value);
427
428  /**
429   * Log recognizer-related event and dump all previously accumulated tokens since last event to
430   * log.
431   *
432   * @param self SR_Recognizer handle
433   * @param event Event name
434   * @return ESR_INVALID_ARGUMENT if self is null
435   */
436  ESR_ReturnCode(*logEvent)(struct SR_Recognizer_t* self, const LCHAR* event);
437
438  /**
439   * Log the beginning of a new log session. A log session contains zero or more recognitions (transactions)
440   * and it is up to the application to decided when the session ends and a new one begins (e.g.
441   * timeout, number of recognitions, etc.)
442   *
443   * @param self SR_Recognizer handle
444   * @param sessionName Session name
445   * @return ESR_INVALID_ARGUMENT if self is null
446   */
447  ESR_ReturnCode(*logSessionStart)(struct SR_Recognizer_t* self, const LCHAR* sessionName);
448
449  /**
450   * Log the end of a log session.
451   *
452   * @param self SR_Recognizer handle
453   * @return ESR_INVALID_ARGUMENT if self is null
454   */
455  ESR_ReturnCode(*logSessionEnd)(struct SR_Recognizer_t* self);
456
457  /**
458   * Log data about a waveform obtained from a TCP file. This function is not called
459   * when doing live recognition.
460   *
461   * @param self SR_Recognizer handle
462   * @param waveformFilename Session name
463   * @param transcription Transcription for the utterance
464   * @param bos Beginning of speech (seconds)
465   * @param eos End of speech (seconds)
466   * @param isInvocab True if the transcription is accepted by the grammar, False otherwise
467   * @return ESR_INVALID_ARGUMENT if self is null
468   */
469  ESR_ReturnCode(*logWaveformData)(struct SR_Recognizer_t* self,
470                                   const LCHAR* waveformFilename,
471                                   const LCHAR* transcription,
472                                   const double bos,
473                                   const double eos,
474                                   ESR_BOOL isInvocab);
475
476  /**
477   * Associates a locking function with the recognizer. This function is used to
478   * protect internal data from multithreaded access.
479   *
480   * @param self SR_Recognizer handle
481   * @param function Locking function
482   * @param data Function data
483   * @return ESR_INVALID_ARGUMENT if self is null
484   */
485  ESR_ReturnCode(*setLockFunction)(struct SR_Recognizer_t *self, SR_RecognizerLockFunction function, void* data);
486  /**
487   * Indicates if signal is getting clipped.
488   *
489   * @param self SR_Recognizer handle
490   * @param isClipping [out] Result value
491   * @return ESR_INVALID_ARGUMENT if self is null
492   */
493  ESR_ReturnCode(*isSignalClipping)(struct SR_Recognizer_t* self, ESR_BOOL* isClipping);
494  /**
495   * Indicates if signal has a DC-offset component.
496   *
497   * @param self SR_Recognizer handle
498   * @param isDCOffset [out] Result value
499   * @return ESR_INVALID_ARGUMENT if self is null
500   */
501  ESR_ReturnCode(*isSignalDCOffset)(struct SR_Recognizer_t* self, ESR_BOOL* isDCOffset);
502  /**
503   * Indicates if signal is noisy.
504   *
505   * @param self SR_Recognizer handle
506   * @param isNoisy [out] Result value
507   * @return ESR_INVALID_ARGUMENT if self is null
508   */
509  ESR_ReturnCode(*isSignalNoisy)(struct SR_Recognizer_t* self, ESR_BOOL* isNoisy);
510  /**
511   * Indicates if speech contained within the signal is too quiet.
512   *
513   * @param self SR_Recognizer handle
514   * @param isTooQuiet [out] Result value
515   * @return ESR_INVALID_ARGUMENT if self is null
516   */
517  ESR_ReturnCode(*isSignalTooQuiet)(struct SR_Recognizer_t* self, ESR_BOOL* isTooQuiet);
518  /**
519   * Indicates if there are too few samples in the signal for a proper recognition.
520   *
521   * @param self SR_Recognizer handle
522   * @param isTooFewSamples [out] Result value
523   * @return ESR_INVALID_ARGUMENT if self is null
524   */
525  ESR_ReturnCode(*isSignalTooFewSamples)(struct SR_Recognizer_t* self, ESR_BOOL* isTooFewSamples);
526  /**
527   * Indicates if there are too many samples in the signal for a proper recognition.
528   *
529   * @param self SR_Recognizer handle
530   * @param isTooManySamples [out] Result value
531   * @return ESR_INVALID_ARGUMENT if self is null
532   */
533  ESR_ReturnCode(*isSignalTooManySamples)(struct SR_Recognizer_t* self, ESR_BOOL* isTooManySamples);
534}
535SR_Recognizer;
536
537/**
538 * Starts recognition.
539 *
540 * @param self SR_Recognizer handle
541 * @return ESR_INVALID_ARGUMENT if self is null, if no acoustic models have been associated with the recognizer,
542 * if no grammars have been activated, or if the recognizer cannot be started for an unknown reason
543 */
544SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerStart(SR_Recognizer* self);
545/**
546 * Stops the recognizer and invalidates the recognition result object.
547 * Calling this function before the recognizer receives the last frame causes the recognition
548 * to abort.
549 *
550 * @param self SR_Recognizer handle
551 * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if an internal error has occured
552 */
553SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerStop(SR_Recognizer* self);
554
555/**
556 * @name Recognizer Setup operations
557 *
558 * @{
559 */
560
561/**
562 * Create a new recognizer.
563 *
564 * @param self SR_Recognizer handle
565 * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY if system is out of memory;
566 * ESR_INVALID_STATE if an internal error occurs
567 */
568SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerCreate(SR_Recognizer** self);
569/**
570 * Destroy a recognizer.
571 *
572 * @param self SR_Recognizer handle
573 * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if an internal error has occured
574 */
575SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerDestroy(SR_Recognizer* self);
576/**
577 * Associates a set of models with the recognizer. All grammars must use models consistently.
578 *
579 * @param self SR_Recognizer handle
580 * @see SR_RecognizerCheckGrammarConsistency
581 * @return ESR_INVALID_ARGUMENT if self is null
582 */
583SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetup(SR_Recognizer* self);
584/**
585 * Unconfigures recognizer.
586 *
587 * @param self SR_Recognizer handle
588 * @return ESR_INVALID_ARGUMENT if self is null
589 */
590SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerUnsetup(SR_Recognizer* self);
591/**
592 * Indicates whether recognizer is configured for use.
593 *
594 * @param self SR_Recognizer handle
595 * @param isSetup True if recognizer is configured
596 * @return ESR_INVALID_ARGUMENT if self is null
597 */
598SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSetup(SR_Recognizer* self, ESR_BOOL* isSetup);
599
600/**
601 * @}
602 *
603 * @name Recognizer parameter operations
604 *
605 * @{
606 */
607
608/**
609 * Returns copy of LCHAR recognition parameter.
610 *
611 * @param self SR_Recognizer handle
612 * @param key Parameter name
613 * @param value [out] Used to hold the parameter value
614 * @param len [in/out] Length of value argument. If the return code is ESR_BUFFER_OVERFLOW,
615 *            the required length is returned in this variable.
616 * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of
617 * type LCHAR*
618 */
619SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerGetParameter(SR_Recognizer* self, const LCHAR* key, LCHAR* value, size_t* len);
620/**
621 * Return copy of size_t recognition parameter.
622 *
623 * @param self SR_Recognizer handle
624 * @param key Parameter name
625 * @param value Used to hold the parameter value
626 * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of
627 * type size_t
628 */
629SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerGetSize_tParameter(SR_Recognizer* self, const LCHAR* key, size_t* value);
630/**
631 * Return copy of BOOL recognition parameter.
632 *
633 * @param self SR_Recognizer handle
634 * @param key Parameter name
635 * @param value Used to hold the parameter value
636 * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of
637 * type bool
638 */
639SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerGetBoolParameter(SR_Recognizer* self, const LCHAR* key, ESR_BOOL* value);
640/**
641 * Sets LCHAR* recognition parameters.
642 *
643 * Key:             Description of associated value
644 *
645 * VoiceEnrollment       If "true", the next recognition will produce data required
646 *                              for Nametag support (i.e. Aurora bitstream).
647 *
648 * @param self SR_Recognizer handle
649 * @param key Parameter name
650 * @param value Parameter value
651 * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory
652 */
653SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetParameter(SR_Recognizer* self, const LCHAR* key, LCHAR* value);
654/**
655 * Sets size_t recognition parameter.
656 *
657 * @param self SR_Recognizer handle
658 * @param key Parameter name
659 * @param value Parameter value
660 * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory
661 */
662SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetSize_tParameter(SR_Recognizer* self, const LCHAR* key, size_t value);
663/**
664 * Sets BOOL recognition parameter.
665 *
666 * @param self SR_Recognizer handle
667 * @param key Parameter name
668 * @param value Parameter value
669 * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory
670 */
671SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetBoolParameter(SR_Recognizer* self, const LCHAR* key, ESR_BOOL value);
672
673/**
674 * @}
675 *
676 * @name Recognizer rule Setup/Activation operations
677 *
678 * @{
679 */
680
681/**
682 * Recognizer may be set up with multiple Grammars and multiple rules. All grammars
683 * must be unsetup before the recognizer can be destroyed.
684 * A pre-compiled Grammar should have undergone a model consistency check with the
685 * recognizer prior to this call.
686 *
687 * @param self SR_Recognizer handle
688 * @param grammar Grammar containing rule
689 * @param ruleName Name of rule to associate with recognizer
690 * @see SR_GrammarCheckModelConsistency
691 * @return ESR_INVALID_ARGUMENT if self is null
692 */
693SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetupRule(SR_Recognizer* self,
694                                                          struct SR_Grammar_t* grammar,
695    const LCHAR* ruleName);
696/**
697 * Indicates if Recognizer is configured with any rules within the specified Grammar.
698 *
699 * @param self SR_Recognizer handle
700 * @param hasSetupRules True if the Recognizer is configured for the Grammar
701 * @return ESR_INVALID_ARGUMENT if self is null
702 */
703SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerHasSetupRules(SR_Recognizer* self,
704    ESR_BOOL* hasSetupRules);
705/**
706 * Activates rule in recognizer.
707 *
708 * @param self SR_Recognizer handle
709 * @param grammar Grammar containing rule
710 * @param ruleName Name of rule
711 * @param weight Relative weight to assign to self grammar vs. other activated grammars.
712 *               Values: Integers 0-2^31.
713 * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if no models are associated with the recognizer,
714 * or if the rule could not be setup, or if the acoustic models could not be setup;
715 * ESR_BUFFER_OVERFLOW if ruleName is too long
716 */
717SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerActivateRule(SR_Recognizer* self,
718                                                             struct SR_Grammar_t* grammar,
719    const LCHAR* ruleName,
720    unsigned int weight);
721/**
722 * Deactivates rule in recognizer.
723 *
724 * @param self SR_Recognizer handle
725 * @param grammar Grammar containing rule
726 * @param ruleName Name of rule
727 * @return ESR_INVALID_ARGUMENT if self is null; ESR_NO_MATCH_ERROR if grammar is not activated
728 */
729SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerDeactivateRule(SR_Recognizer* self,
730                                                               struct SR_Grammar_t* grammar,
731    const LCHAR* ruleName);
732
733/**
734 * Deactivates all grammar rule in recognizer.
735 *
736 * @param self SR_Recognizer handle
737 * @return ESR_INVALID_ARGUMENT if self is null
738 */
739SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerDeactivateAllRules(SR_Recognizer* self);
740
741/**
742 * Indicates if rule is active in recognizer.
743 *
744 * @param self SR_Recognizer handle
745 * @param grammar Grammar containing rule
746 * @param ruleName Name of rule
747 * @param isActiveRule True if rule is active
748 * @return ESR_INVALID_ARGUMENT if self is null
749 */
750SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsActiveRule(SR_Recognizer* self,
751                                                             struct SR_Grammar_t* grammar,
752    const LCHAR* ruleName,
753    ESR_BOOL* isActiveRule);
754/**
755 * Ensure the model usage in a pre-compiled grammar is consistent with the models
756 * that are associated with the Recognizer. You must first have called Recognizer_Setup().
757 *
758 * @param self SR_Recognizer handle
759 * @param grammar Grammar to check against
760 * @param isConsistent True if rule is consistent
761 * @return ESR_INVALID_ARGUMENT if self is null
762 */
763SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerCheckGrammarConsistency(SR_Recognizer* self,
764                                                                        struct SR_Grammar_t* grammar,
765    ESR_BOOL* isConsistent);
766/**
767 * @}
768 *
769 * @name Recognizer Advance operations
770 *
771 * @{
772 */
773
774/**
775 * Get audio into the recognizer.
776 *
777 * We decouple the Audio and frontend processing from the Recognizer processing via an
778 * internal FIFO frame buffer (aka utterance buffer). This ensures that this call is at least
779 * as fast as real time so that voicing events are not unduly delayed. The audio buffer size
780 * must be at least one frame buffer's worth and some reasonable maximum size for synchronous
781 * behaviour. This function may be called independently of Recognizer_Advance.
782 *
783 * @param self SR_Recognizer handle
784 * @param buffer Buffer containing audio data
785 * @param bufferSize [in/out] Size of buffer in samples. In case of a buffer overflow,
786 *                            ESR_BUFFER_OVERFLOW is returned and this value holds the actual
787 *                            amount of samples that were pushed.
788 * @param isLast Indicates if the audio frame is the last one in this recognition
789 * @return ESR_INVALID_ARGUMENT if self, buffer, or bufferSize are null; ESR_INVALID_STATE if the recognizer isn't
790 * started, or the recognizer has already received the last frame; ESR_BUFFER_OVERFLOW if the recognizer buffer is
791 * full
792 */
793SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerPutAudio(SR_Recognizer* self, asr_int16_t* buffer,
794    size_t* bufferSize, ESR_BOOL isLast);
795/**
796 * Advance the recognizer by at least one utterance frame. The number of frames advanced
797 * depends on the underlying definition. We anticipate that the recognizer will keep up with
798 * the supplied audio buffers when waiting for voicing. After this point, the number of frames
799 * may be one (for our default frame-advance mode) or it may be more if the synchronous nature
800 * of this operation is not considered a problem. The recognizer may be advanced independently
801 * of the Recognizer_PutAudio call. It is permissible to advance when there is no further data.
802 * A stop condition could be an appropriate consequence.
803 *
804 * @param self Recognizer handle
805 * @param status Resulting recognizer status
806 * @param type Resulting recognition result type
807 * @param result Resulting recognizer result
808 * @return ESR_INVALID_ARGUMENT if self, status, or type are null; ESR_INVALID_STATE if an internal error occurs
809 */
810SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerAdvance(SR_Recognizer* self,
811    SR_RecognizerStatus* status,
812    SR_RecognizerResultType* type,
813    SR_RecognizerResult** result);
814/**
815 * @}
816 */
817
818/**
819 * Log recognizer-related event token.
820 *
821 * @param self SR_Recognizer handle
822 * @param token Token name
823 * @param value Value to be logged
824 * @return ESR_INVALID_ARGUMENT if self is null
825 */
826SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogToken(SR_Recognizer* self, const LCHAR* token, const LCHAR* value);
827
828/**
829 * Log recognizer-related event token integer.
830 *
831 * @param self SR_Recognizer handle
832 * @param token Token name
833 * @param value Value to be logged
834 * @return ESR_INVALID_ARGUMENT if self is null
835 */
836SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogTokenInt(SR_Recognizer* self, const LCHAR* token, int value);
837
838/**
839 * Log recognizer-related event and dump all previously accumulated tokens since last event to
840 * log.
841 *
842 * @param self SR_Recognizer handle
843 * @param event Event name
844 * @return ESR_INVALID_ARGUMENT if self is null
845 */
846SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogEvent(SR_Recognizer* self, const LCHAR* event);
847
848/**
849 * Log the beginning of a new log session. A log session contains zero or more recognitions (transactions)
850 * and it is up to the application to decided when the session ends and a new one begins (e.g.
851 * timeout, number of recognitions, etc.)
852 *
853 * @param self SR_Recognizer handle
854 * @param sessionName Session name
855 * @return ESR_INVALID_ARGUMENT if self is null
856 */
857SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogSessionStart(SR_Recognizer* self, const LCHAR* sessionName);
858
859/**
860 * Log the end of a log session.
861 *
862 * @param self SR_Recognizer handle
863 * @return ESR_INVALID_ARGUMENT if self is null
864 */
865SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogSessionEnd(SR_Recognizer* self);
866
867/**
868 * Log data about a waveform obtained from a TCP file. This function is not called
869 * when doing live recognition.
870 *
871 * @param self SR_Recognizer handle
872 * @param waveformFilename Session name
873 * @param transcription Transcription for the utterance
874 * @param bos Beginning of speech (seconds)
875 * @param eos End of speech (seconds)
876 * @param isInvocab True if the transcription is accepted by the grammar, False otherwise
877 * @return ESR_INVALID_ARGUMENT if self is null
878 */
879SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogWaveformData(SR_Recognizer* self,
880    const LCHAR* waveformFilename,
881    const LCHAR* transcription,
882    const double bos,
883    const double eos,
884    ESR_BOOL isInvocab);
885
886
887/**
888 * Loads utterance from file.
889 *
890 * @param self SR_Recognizer handle
891 * @param filename File to read from
892 * @return ESR_INVALID_ARGUMENT if self is null
893 */
894SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLoadUtterance(SR_Recognizer* self, const LCHAR* filename);
895/**
896 * Loads utterance from WAVE file.
897 *
898 * @param self SR_Recognizer handle
899 * @param filename WAVE file to read from
900 * @return ESR_INVALID_ARGUMENT if self is null
901 */
902SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLoadWaveFile(SR_Recognizer* self, const LCHAR* filename);
903
904/**
905 * Associates a locking function with the recognizer. This function is used to
906 * protect internal data from multithreaded access.
907 *
908 * @param self SR_Recognizer handle
909 * @param function Locking function
910 * @param data Function data
911 * @return ESR_INVALID_ARGUMENT if self is null
912 */
913SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetLockFunction(SR_Recognizer* self,
914    SR_RecognizerLockFunction function,
915    void* data);
916
917/**
918 *
919 * @name Signal quality metrics
920 *
921 * @{
922 */
923
924/**
925 * Indicates if signal is getting clipped.
926 *
927 * @param self SR_Recognizer handle
928 * @param isClipping [out] Result value
929 * @return ESR_INVALID_ARGUMENT if self is null
930 */
931SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalClipping(SR_Recognizer* self, ESR_BOOL* isClipping);
932/**
933 * Indicates if signal has a DC-offset component.
934 *
935 * @param self SR_Recognizer handle
936 * @param isDCOffset [out] Result value
937 * @return ESR_INVALID_ARGUMENT if self is null
938 */
939SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalDCOffset(SR_Recognizer* self, ESR_BOOL* isDCOffset);
940/**
941 * Indicates if signal is noisy.
942 *
943 * @param self SR_Recognizer handle
944 * @param isNoisy [out] Result value
945 * @return ESR_INVALID_ARGUMENT if self is null
946 */
947SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalNoisy(SR_Recognizer* self, ESR_BOOL* isNoisy);
948/**
949 * Indicates if speech contained within the signal is too quiet.
950 *
951 * @param self SR_Recognizer handle
952 * @param isTooQuiet [out] Result value
953 * @return ESR_INVALID_ARGUMENT if self is null
954 */
955SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalTooQuiet(SR_Recognizer* self, ESR_BOOL* isTooQuiet);
956/**
957 * Indicates if there are too few samples in the signal for a proper recognition.
958 *
959 * @param self SR_Recognizer handle
960 * @param isTooFewSamples [out] Result value
961 * @return ESR_INVALID_ARGUMENT if self is null
962 */
963SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalTooFewSamples(SR_Recognizer* self, ESR_BOOL* isTooFewSamples);
964/**
965 * Indicates if there are too many samples in the signal for a proper recognition.
966 *
967 * @param self SR_Recognizer handle
968 * @param isTooManySamples [out] Result value
969 * @return ESR_INVALID_ARGUMENT if self is null
970 */
971SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalTooManySamples(SR_Recognizer* self, ESR_BOOL* isTooManySamples);
972
973/**
974 * Changes the sample rate of audio.
975 *
976 * @param self SR_Recognizer handle
977 * @param new_sample_rate [in] New Sample Rate
978 * @return ESR_ReturnCode if self is null
979 */
980SREC_RECOGNIZER_API ESR_ReturnCode SR_Recognizer_Change_Sample_Rate ( SR_Recognizer *self, size_t new_sample_rate );
981
982/**
983 * @}
984 */
985
986/**
987 * @}
988 */
989
990
991#endif /* __SR_RECOGNIZER_H */
992