1/*---------------------------------------------------------------------------*
2 *  srec_eosd.c  *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20#include"portable.h"
21#include"passert.h"
22#include"srec.h"
23#include"srec_eosd.h"
24#include"srec_context.h"
25#include"word_lattice.h"
26
27void srec_eosd_allocate(srec_eos_detector_parms** peosd,
28                        int eos_costdelta,
29                        int opt_eos_costdelta,
30                        int terminal_timeout,
31                        int optional_terminal_timeout,
32                        int non_terminal_timeout,
33                        int max_speech_duration)
34{
35  srec_eos_detector_parms* eosd;
36  eosd = (srec_eos_detector_parms*)CALLOC(1, sizeof(srec_eos_detector_parms), "search.endpointer");
37  eosd->eos_costdelta        = (frameID)eos_costdelta;
38  eosd->opt_eos_costdelta    = (frameID)opt_eos_costdelta;
39  eosd->endnode_timeout      = (frameID)terminal_timeout;
40  eosd->optendnode_timeout   = (frameID)optional_terminal_timeout;
41  eosd->internalnode_timeout = (frameID)non_terminal_timeout;
42  eosd->inspeech_timeout     = (frameID)max_speech_duration;
43  *peosd = eosd;
44}
45
46void srec_eosd_destroy(srec_eos_detector_parms* eosd)
47{
48  FREE(eosd);
49}
50
51/* The current algorithm does not make use of most of the frmcnt counters,
52   rather we look at the eos frame from the final end node search state
53   and comparrer with the current frame.  The new method is less sensitive
54   to background noise.
55
56   The 1.9 method had a blatant bug in that we were reseting the optend_frmnt
57   when there were no live alternative tokens, ie xftoken == NUL was causing
58   reset!
59*/
60
61void srec_eosd_state_reset(srec_eos_detector_state* eosd_state)
62{
63  eosd_state->endnode_frmcnt = 0;
64  eosd_state->optendnode_frmcnt = 0;
65  eosd_state->internalnode_frmcnt = 0;
66  eosd_state->inspeech_frmcnt = 0;
67  eosd_state->internalnode_node_index = MAXnodeID;
68}
69
70EOSrc srec_check_end_of_speech_end(srec* rec)
71{
72  EOSrc rc = SPEECH_MAYBE_ENDED;
73  return rc;
74}
75
76EOSrc srec_check_end_of_speech(srec_eos_detector_parms* eosd_parms, srec* rec)
77{
78  nodeID end_node;
79  EOSrc rc = VALID_SPEECH_CONTINUING;
80  bigcostdata eos_cost_margin;
81  bigcostdata opteos_cost_margin;
82  word_token* last_wtoken;
83  int nframes_since_eos;
84
85  fsmnode_token *ftoken, *eftoken, *oeftoken, *xftoken;
86  ftokenID ftoken_index, eftoken_index, oeftoken_index, xftoken_index;
87  costdata wrapup_cost = rec->context->wrapup_cost;
88  srec_eos_detector_state* eosd_state = &rec->eosd_state;
89
90  if (rec->current_search_frame == 1)
91    srec_eosd_state_reset(eosd_state);
92
93  end_node = rec->context->end_node;
94  eftoken_index = rec->best_token_for_node[ end_node];
95  if (eftoken_index != MAXftokenID)
96    eftoken = &rec->fsmnode_token_array[ eftoken_index];
97  else
98    eftoken = NULL;
99
100  xftoken_index  = rec->current_best_ftoken_index[NODE_INFO_REGULAR];
101  if (xftoken_index != MAXftokenID)
102    xftoken = &rec->fsmnode_token_array[ xftoken_index];
103  else
104    xftoken = NULL;
105
106  oeftoken_index = rec->current_best_ftoken_index[NODE_INFO_OPTENDN];
107  if (oeftoken_index != MAXftokenID)
108    oeftoken = &rec->fsmnode_token_array[ oeftoken_index];
109  else
110    oeftoken = NULL;
111
112
113  if (rec->srec_ended)
114    rc = SPEECH_MAYBE_ENDED;
115  else if (rec->current_search_frame >= rec->word_lattice->max_frames - 1
116           || rec->current_search_frame >= eosd_parms->inspeech_timeout)
117  {
118    /* here we will need to differentiate max_frames from
119       num_frames_allocated */
120    if (eftoken_index != MAXftokenID)
121      rc = SPEECH_ENDED;
122    else
123      rc = SPEECH_TOO_LONG;
124  }
125  else
126  {
127
128    /* reset the internal counter? */
129    ftoken_index = rec->current_best_ftoken_index[NODE_INFO_REGULAR];
130    if (ftoken_index != MAXftokenID)
131    {
132      ftoken = &rec->fsmnode_token_array[ ftoken_index];
133      if (eosd_state->internalnode_node_index != ftoken->FSMnode_index)
134      {
135        eosd_state->internalnode_node_index = ftoken->FSMnode_index;
136        eosd_state->internalnode_frmcnt = 1;
137      }
138      else
139      {
140        if (ftoken->word != rec->context->beg_silence_word)
141          eosd_state->internalnode_frmcnt++;
142      }
143    }
144    else
145    {
146      eosd_state->internalnode_frmcnt = 1;
147      eosd_state->internalnode_node_index = MAXnodeID;
148    }
149
150    /* nframes since eos */
151    if (eftoken)
152    {
153      last_wtoken = NULL;
154      if (eftoken->word_backtrace != MAXwtokenID)
155      {
156        last_wtoken = &rec->word_token_array[eftoken->word_backtrace];
157        nframes_since_eos = rec->current_search_frame - last_wtoken->end_time;
158      }
159      else
160        nframes_since_eos = 0;
161    }
162    else
163      nframes_since_eos = 0;
164
165    /* eos cost margin */
166    if (!eftoken)
167    {
168      eos_cost_margin = 0;
169    }
170    else if (!oeftoken && !xftoken)
171    {
172      eos_cost_margin = MAXcostdata;
173    }
174    else if (!oeftoken)
175    {
176      eos_cost_margin = xftoken->cost + wrapup_cost - eftoken->cost;
177    }
178    else if (!xftoken)
179    {
180      eos_cost_margin = oeftoken->cost + wrapup_cost - eftoken->cost;
181    }
182    else if (oeftoken->cost > eftoken->cost)
183    {
184      eos_cost_margin = xftoken->cost + wrapup_cost - eftoken->cost;
185    }
186    else
187    { /* if(oeftoken->cost < eftoken->cost) */
188      eos_cost_margin = oeftoken->cost + wrapup_cost - eftoken->cost;
189    }
190
191    /* opteos cost margin */
192    if (!eftoken)
193    {
194      opteos_cost_margin = 0;
195    }
196    else if (!oeftoken)
197    {
198      opteos_cost_margin = 0;
199    }
200    else if (!xftoken)
201    {
202      opteos_cost_margin = MAXcostdata;
203    }
204    else
205    {
206      opteos_cost_margin = xftoken->cost + wrapup_cost - eftoken->cost;
207    }
208
209    if (eftoken)
210    {
211      if (oeftoken && nframes_since_eos > eosd_parms->optendnode_timeout
212          && opteos_cost_margin > eosd_parms->eos_costdelta)
213      {
214        rc = SPEECH_ENDED;
215
216      }
217      else if (!oeftoken && nframes_since_eos > eosd_parms->endnode_timeout
218               && eos_cost_margin > eosd_parms->eos_costdelta)
219      {
220        rc = SPEECH_ENDED;
221
222      }
223      else if (nframes_since_eos > eosd_parms->optendnode_timeout
224               && eos_cost_margin > eosd_parms->eos_costdelta)
225      {
226        rc = SPEECH_ENDED;
227
228      }
229      else
230      {
231        rc = VALID_SPEECH_CONTINUING;
232      }
233    }
234
235    /* reached internal timeout, ie at same node for so long? */
236    if (eosd_state->internalnode_frmcnt >= eosd_parms->internalnode_timeout)
237    {
238      /* PLogMessage("eosd_state->internalnode_frmcnt %d eosd_parms->internalnode_timeout %d\n", eosd_state->internalnode_frmcnt, eosd_parms->internalnode_timeout); */
239      ftoken_index = rec->current_best_ftoken_index[NODE_INFO_REGULAR];
240      ftoken = &rec->fsmnode_token_array [ ftoken_index];
241      /* sprintf(buf, "eos rec%d@%d,%d i%d> ", rec->id,
242      rec->current_search_frame, ftoken->FSMnode_index,
243      eosd_state->internalnode_frmcnt);
244      PLogMessage(buf);
245      sprint_word_token_backtrace(buf,sizeof(buf),rec,ftoken->word_backtrace);
246      PLogMessage(" %s\n", buf); */
247      rc = SPEECH_ENDED;
248    }
249  }
250
251  /* the endnode will never win against an optend node because
252     the cost at endnode is the same or worse (even wrapup_cost adjustment) */
253
254
255
256  /* so we need to check for optend nodes separately here
257     but we really need to remember best_optendnode_index, best_endnode_index
258     best_nonendnode_index */
259  return rc;
260}
261