1/*---------------------------------------------------------------------------*
2 *  srec_initialize.c  *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20#ifndef _RTT
21#include "pstdio.h"
22#endif
23#include <stdlib.h>
24#include <string.h>
25#include <math.h>
26#include "passert.h"
27
28#include "portable.h"
29
30#include "hmm_desc.h"
31#include "utteranc.h"
32#include "hmmlib.h"
33
34#include "srec_sizes.h"
35#include "srec.h"
36#include "word_lattice.h"
37#include "swimodel.h"
38
39#include "c42mul.h"
40
41/*this file contains code which handles the initialization of the srec data structures*/
42
43/*allocates an srec -
44
45input args come from config and are:
46
47    int         viterbi_prune_thresh;  score-based pruning threshold - only keep paths within this delta of best cost
48
49    int         max_hmm_tokens;       controls the maximum number of HMM's alive in any frame.  If number
50     exceeded, pruning gets tightened.  So, this threshold can be used
51     to tradeoff accuracy for computation an memory
52    int         max_fsmnode_tokens;   controls the maximum number of FSMs alive in any frame.  If number,
53     exceeded, pruning gets tightened.  So, this threshold can be used
54     to tradeoff accuracy for computation an memory
55    int         max_word_tokens;      controls the maximum number of word tokens kept in the word lattice.
56     if number exceeded, the word lattice is pruned more tightly (less word
57     ends per frame
58
59    int         max_altword_tokens;     controls the maximum number of alternative paths to propagate for proper nbest
60
61    int         num_wordends_per_frame; controls the size of the word lattice - the number of word ends to
62       keep at each time frame
63    int         max_fsm_nodes;        allocation size of a few arrays in the search - needs to be big enough
64     to handle any grammar that the search needs to run.  Initialization fails
65     if num exceeded
66    int         max_fsm_arcs;         allocation size of a few arrays in the search - needs to be big enough
67     to handle any grammar that the search needs to run.  Initialization fails
68     if num exceeded
69
70*/
71
72static void allocate_recognition1(srec *rec,
73                                  int viterbi_prune_thresh,  /*score-based pruning threshold - only keep paths within this delta of best cost*/
74                                  int max_hmm_tokens,
75                                  int max_fsmnode_tokens,
76                                  int max_word_tokens,
77                                  int max_altword_tokens,
78                                  int num_wordends_per_frame,
79                                  int max_frames,
80                                  int max_model_states)
81{
82#ifdef SREC_ENGINE_VERBOSE_LOGGING
83  PLogMessage("allocating recognition arrays2 prune %d max_hmm_tokens %d max_fsmnode_tokens %d max_word_tokens %d max_altword_tokens %d max_wordends_per_frame %d\n",
84              viterbi_prune_thresh,
85              max_hmm_tokens,
86              max_fsmnode_tokens,
87              max_word_tokens,
88              max_altword_tokens,
89              num_wordends_per_frame);
90#endif
91  rec->current_model_scores = (costdata*) CALLOC_CLR(max_model_states, sizeof(costdata), "search.srec.current_model_scores"); /*FIX - either get NUM_MODELS from acoustic models, or check this someplace to make sure we have enough room*/
92  rec->num_model_slots_allocated = (modelID)max_model_states;
93
94  rec->fsmarc_token_array_size = (stokenID)max_hmm_tokens;
95
96  rec->fsmarc_token_array = (fsmarc_token*) CALLOC_CLR(rec->fsmarc_token_array_size , sizeof(fsmarc_token), "search.srec.fsmarc_token_array");
97  rec->max_new_states = (stokenID)max_hmm_tokens;
98
99  rec->word_token_array = (word_token*) CALLOC_CLR(max_word_tokens, sizeof(word_token), "search.srec.word_token_array");
100  rec->word_token_array_size = (wtokenID)max_word_tokens;
101  /* todo: change this to a bit array later */
102  rec->word_token_array_flags = (asr_int16_t*) CALLOC_CLR(max_word_tokens, sizeof(asr_int16_t), "search.srec.word_token_array_flags");
103
104  rec->fsmnode_token_array = (fsmnode_token*) CALLOC_CLR(max_fsmnode_tokens, sizeof(fsmnode_token), "search.srec.fsmnode_token_array");
105  rec->fsmnode_token_array_size = (ftokenID)max_fsmnode_tokens;
106
107  rec->altword_token_array = (altword_token*) CALLOC_CLR(max_altword_tokens, sizeof(altword_token), "search.srec.altword_token_array");
108  rec->altword_token_array_size = (wtokenID)max_altword_tokens;
109
110  rec->prune_delta = (costdata)viterbi_prune_thresh;
111
112  rec->max_frames   = (frameID)max_frames;
113  rec->best_model_cost_for_frame = (costdata*)CALLOC_CLR(max_frames, sizeof(costdata), "search.srec.best_model_cost_for_frame");
114  rec->word_lattice = allocate_word_lattice((frameID)max_frames);
115
116  rec->word_priority_q = allocate_priority_q(num_wordends_per_frame);
117  rec->best_fsmarc_token = MAXstokenID;
118
119#define ASTAR_NBEST_LEN 10
120  rec->astar_stack = astar_stack_make(rec, ASTAR_NBEST_LEN);
121  rec->context = NULL;
122}
123
124static int check_parameter_range(int parval, int parmin, int parmax, const char* parname)
125{
126  if (parval > parmax)
127  {
128    log_report("Error: %s value %d is out-of-range [%d,%d]\n", parname,
129               parval, parmin, parmax);
130    return 1;
131  }
132  else
133  {
134    return 0;
135  }
136}
137
138int allocate_recognition(multi_srec *rec,
139                         int viterbi_prune_thresh,  /*score-based pruning threshold - only keep paths within this delta of best cost*/
140                         int max_hmm_tokens,
141                         int max_fsmnode_tokens,
142                         int max_word_tokens,
143                         int max_altword_tokens,
144                         int num_wordends_per_frame,
145                         int max_fsm_nodes,
146                         int max_fsm_arcs,
147                         int max_frames,
148                         int max_model_states,
149                         int max_searches)
150{
151  int i;
152
153  if (check_parameter_range(max_fsm_nodes, 1, MAXnodeID, "max_fsm_nodes"))
154    return 1;
155  if (check_parameter_range(max_fsm_arcs, 1, MAXarcID, "max_fsm_arcs"))
156    return 1;
157  if (check_parameter_range(max_frames, 1, MAXframeID, "max_frames"))
158    return 1;
159  if (check_parameter_range(max_model_states, 1, MAXmodelID, "max_model_states"))
160    return 1;
161  if (check_parameter_range(max_hmm_tokens, 1, MAXstokenID, "max_hmm_tokens"))
162    return 1;
163  if (check_parameter_range(max_fsmnode_tokens, 1, MAXftokenID, "max_fsmnode_tokens"))
164    return 1;
165  if (check_parameter_range(viterbi_prune_thresh, 1, MAXcostdata, "viterbi_prune_thresh"))
166    return 1;
167  if (check_parameter_range(max_altword_tokens, 0, MAXftokenID, "max_altword_tokens"))
168    return 1;
169  if (check_parameter_range(max_searches, 1, 2, "max_searches"))
170    return 1;
171
172  rec->rec = (srec*)CALLOC_CLR(max_searches, sizeof(srec), "search.srec.base");
173  rec->num_allocated_recs = max_searches;
174  rec->num_swimodels      = 0;
175
176  /* best_token_for_arc and best_token_for_node are shared across
177     multiple searches */
178  rec->best_token_for_arc = (stokenID*)CALLOC_CLR(max_fsm_arcs, sizeof(stokenID), "search.srec.best_token_for_arc");
179  rec->max_fsm_arcs = (arcID)max_fsm_arcs;
180
181  rec->best_token_for_node = (ftokenID*)CALLOC_CLR(max_fsm_nodes, sizeof(ftokenID), "search.srec.best_token_for_node");
182  rec->max_fsm_nodes = (nodeID)max_fsm_nodes;
183
184  /* cost offsets and accumulated cost offsets are pooled for all
185     different searches, this saves memory and enables each search
186     to know it's total scores */
187  rec->cost_offset_for_frame = (costdata*)CALLOC_CLR(max_frames, sizeof(costdata), "search.srec.current_best_costs");
188  rec->accumulated_cost_offset = (bigcostdata*)CALLOC_CLR(max_frames, sizeof(bigcostdata), "search.srec.accumulated_cost_offset");
189  rec->max_frames = (frameID)max_frames;
190  for (i = 0; i < max_frames; i++)
191    rec->accumulated_cost_offset[i] = 0;
192
193  /* now copy the shared data down to individual recogs */
194  for (i = 0; i < rec->num_allocated_recs; i++)
195  {
196    allocate_recognition1(&rec->rec[i], viterbi_prune_thresh, max_hmm_tokens, max_fsmnode_tokens, max_word_tokens, max_altword_tokens, num_wordends_per_frame, max_frames, max_model_states);
197    rec->rec[i].best_token_for_node     = rec->best_token_for_node;
198    rec->rec[i].max_fsm_nodes           = rec->max_fsm_nodes;
199    rec->rec[i].best_token_for_arc      = rec->best_token_for_arc;
200    rec->rec[i].max_fsm_arcs            = rec->max_fsm_arcs;
201    rec->rec[i].max_frames              = rec->max_frames;
202    rec->rec[i].cost_offset_for_frame   = rec->cost_offset_for_frame;
203    rec->rec[i].accumulated_cost_offset = rec->accumulated_cost_offset;
204    rec->rec[i].id = (asr_int16_t)i;
205  }
206  rec->eos_status = VALID_SPEECH_NOT_YET_DETECTED;
207  return 0;
208}
209
210
211static void free_recognition1(srec *rec)
212{
213  FREE(rec->current_model_scores);
214  FREE(rec->fsmarc_token_array);
215  FREE(rec->word_token_array);
216  FREE(rec->word_token_array_flags);
217  FREE(rec->fsmnode_token_array);
218  FREE(rec->altword_token_array);
219  FREE(rec->best_model_cost_for_frame);
220  destroy_word_lattice(rec->word_lattice);
221  free_priority_q(rec->word_priority_q);
222  astar_stack_destroy(rec);
223}
224
225void free_recognition(multi_srec *rec)
226{
227  int i;
228  for (i = 0; i < rec->num_allocated_recs; i++)
229    free_recognition1(&rec->rec[i]);
230  FREE(rec->accumulated_cost_offset);
231  FREE(rec->cost_offset_for_frame);
232  FREE(rec->best_token_for_node);
233  FREE(rec->best_token_for_arc);
234  FREE(rec->rec);
235}
236
237