priority_q.c revision 8fc5a7f51e62cb4ae44a27bdf4176d04adc80ede
1/*---------------------------------------------------------------------------*
2 *  priority_q.c  *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20#include <stdlib.h>
21#include <string.h>
22#include <math.h>
23#include "passert.h"
24
25#include "portable.h"
26
27#include "hmm_desc.h"
28#include "utteranc.h"
29#include "hmmlib.h"
30
31#include "srec_sizes.h"
32#include "search_network.h"
33#include "srec.h"
34#include "word_lattice.h"
35
36#define PRINT_SEARCH_DETAILS 0
37
38/*this is just implemented as a list so far - FIX this!!*/
39
40/*allocates priority_q to han le max_n entries*/
41priority_q* allocate_priority_q(int max_n)
42{
43  priority_q *pq;
44
45  pq = (priority_q*) CALLOC(1, sizeof(priority_q), "search.srec.priority_q");
46  pq->max_cost_in_q = MAXcostdata;
47  pq->word_token_list = MAXwordID;
48  pq->max_in_q = (miscdata)max_n;
49  pq->num_in_q = 0;
50  return pq;
51}
52
53void free_priority_q(priority_q* pq)
54{
55  FREE(pq);
56}
57
58/*empties out priority_q*/
59
60void clear_priority_q(priority_q *pq)
61{
62  pq->max_cost_in_q = MAXcostdata;
63  pq->word_token_list = MAXwordID;
64  pq->num_in_q = 0;
65  /* Jean: what about the list of free tokens? */
66}
67/* returns the head of a linked list of all words in the priority_q.
68   Return MAXwtokenID if list is empty */
69
70wtokenID get_word_token_list(priority_q *pq, word_token *word_token_array)
71{
72  return pq->word_token_list;
73}
74
75void remove_non_end_word_from_q(srec *rec, priority_q *pq, word_token *word_token_array, nodeID end_node)
76{
77  word_token *token;
78  wtokenID *ptoken_index;
79  wtokenID old_token_index;
80
81  pq->max_cost_in_q = MAXcostdata;
82  pq->num_in_q = 0;
83  ptoken_index = &(pq->word_token_list);
84
85  while (*ptoken_index != MAXwtokenID)
86  {
87    token = &(word_token_array[*ptoken_index]);
88    if (token->end_node != end_node)
89    {
90      old_token_index = *ptoken_index;
91      *ptoken_index = token->next_token_index;
92      free_word_token(rec, old_token_index);
93      pq->max_cost_in_q = MAXcostdata; /* fix: sep9 */
94    }
95    else
96    {
97      pq->num_in_q++;
98      if ((pq->max_cost_in_q == MAXcostdata) || (token->cost > pq->max_cost_in_q))
99      {
100        pq->max_cost_in_q = token->cost;
101      }
102      ptoken_index = &(token->next_token_index);
103    }
104  }
105}
106
107int compare_histories(word_token* token1, word_token* token2,
108                      word_token* word_token_array)
109{
110  int history_for_token1 = 0;
111  int history_for_token2 = 0;
112
113  /* compare_histories() was an attempt to be smart about the priority_q,
114     in that we don't need to store two word_tokens when the two tokens
115     are the same word (obviously ending at the same frame), and with the
116     same word history.  This happens for a digit that has multiple end nodes
117     due to context-dependency.  When "history_for_token" ignores the end_node,
118     then we're all clear to save just 1 word_token, but continue propagating
119     all paths from the end nodes.  That bit of "continue propagating" is not
120     done. THE OTHER PROBLEM is that the two nodes may NOT be
121     simply different CD end models, they may be different from digit shifting!
122     We're screwed if we drop the path, unless we compare all the way back to
123     the start of utterance. */
124
125  if (token1->word != token2->word)
126    return 1;
127  if (token1->end_node != token2->end_node)
128    return 1;
129
130  if (token1->backtrace != MAXwordID)
131  {
132    history_for_token1 += token1->end_node * 1000000;
133    history_for_token1 += word_token_array[token1->backtrace].word * 10000;
134    history_for_token1 += word_token_array[token1->backtrace].end_time;
135  }
136
137  if (token2->backtrace != MAXwordID)
138  {
139    history_for_token2 += token2->end_node * 1000000;
140    history_for_token2 += word_token_array[token2->backtrace].word * 10000;
141    history_for_token2 += word_token_array[token2->backtrace].end_time;
142  }
143
144#if PRINT_SEARCH_DETAILS
145  printf("comparing history_for_token1 %d history_for_token2 %d\n",
146         history_for_token1, history_for_token2);
147#endif
148
149  if (history_for_token1 == history_for_token2)
150  {
151    return 0;
152  }
153  else
154  {
155    return 1;
156  }
157}
158
159#if PRINT_SEARCH_DETAILS
160void sanity_check_priority_q(priority_q* pq, word_token *word_token_array)
161{
162  int n = 0;
163  wtokenID token_index;
164  word_token* token;
165  n = 0;
166  token_index = pq->word_token_list;
167  while (token_index != MAXwordID)
168  {
169    token = &(word_token_array[token_index]);
170    token_index = token->next_token_index;
171    n++;
172  }
173  ASSERT(n == pq->num_in_q);
174  if (pq->num_in_q == pq->max_in_q)
175  {
176    token = &(word_token_array[pq->word_token_list]);
177    ASSERT(pq->max_cost_in_q == token->cost);
178  }
179}
180#endif
181
182/*adds a word token to the priority_q.  Returns the index of the word to
183  remove.
184  if nothing needs to be removed, returns MAXwtokenID.
185  if no room on priority_q, returns the one being put on */
186
187wtokenID add_word_token_to_priority_q(priority_q *pq, wtokenID token_index_to_add, word_token *word_token_array)
188{
189  word_token *token;
190  word_token *token_to_add;
191  wtokenID token_index, return_token_index;
192  wordID word_to_add;
193  costdata cost_to_add;
194  wtokenID *ptoken_index;
195  wtokenID *pplace_to_add;
196  wtokenID *pdelete_index;
197  word_token *token_to_delete;
198
199  token_to_add = &(word_token_array[token_index_to_add]);
200  cost_to_add = token_to_add->cost;
201
202#if PRINT_SEARCH_DETAILS
203  printf("WORDADD PQ tokenid %d cost %d\n", token_index_to_add, cost_to_add);
204  token_index = pq->word_token_list;
205  while (token_index != MAXwordID)
206  {
207    token = &(word_token_array[token_index]);
208    printf("WORDADD PQ token %d word %d cost %d\n", token_index, token->word, token->cost);
209    token_index = token->next_token_index;
210  }
211#endif
212
213  if (cost_to_add >= pq->max_cost_in_q && pq->num_in_q >= pq->max_in_q)
214  {
215#if PRINT_SEARCH_DETAILS
216    printf("WORDADD PQ - rejecting because cost too high cost_to_add(%d) max_cost_in_q(%d) num_in_q(%d)\n",
217           cost_to_add, pq->max_cost_in_q, pq->num_in_q);
218#endif
219#if PRINT_SEARCH_DETAILS
220    printf("WORDADD PQ (D) returning %d\n", token_index_to_add);
221    sanity_check_priority_q(pq, word_token_array);
222#endif
223    return token_index_to_add;
224  }
225
226  word_to_add = token_to_add->word;
227  /* search for duplicate words first */
228  ptoken_index = &(pq->word_token_list);
229  pplace_to_add = NULL;
230  pdelete_index = NULL;
231  while ((*ptoken_index) != MAXwordID)
232  {
233    token = &word_token_array[(*ptoken_index)];
234
235    if (token->word == token_to_add->word
236        && !compare_histories(token, token_to_add, word_token_array))
237    {
238      if (token->cost < cost_to_add)
239      {
240        /* don't bother adding, there's another like it on the list!
241           with a better score! */
242#if PRINT_SEARCH_DETAILS
243        printf("WORDADD PQ - rejecting because another like it is on the list\n");
244#endif
245        /* TODO: when returning back on the basis that something else is better,
246           we should let the caller know what to use instead, ie, make the
247           distinction between no-space and something-else-better */
248        token = &word_token_array[ token_index_to_add];
249        token->next_token_index = (*ptoken_index);
250        return token_index_to_add;
251      }
252      else
253      {
254        /* ok, replace the one on the list with this better scoring one! */
255        pdelete_index = ptoken_index;
256      }
257    }
258    if (token->cost < cost_to_add && pplace_to_add == NULL)
259    {
260      pplace_to_add = ptoken_index;
261      /* do not break, 'cuz we're still searching for a possible duplicates */
262    }
263    ptoken_index = &(token->next_token_index);
264  }
265  if (!pplace_to_add)
266    pplace_to_add = ptoken_index;
267
268  /* add the token by inserting in the linked list */
269  token_index = *pplace_to_add;
270  *pplace_to_add = token_index_to_add;
271  token_to_add->next_token_index = token_index;
272  pq->num_in_q++;
273  if (pplace_to_add == &pq->word_token_list && pq->num_in_q >= pq->max_in_q)
274    pq->max_cost_in_q = cost_to_add;
275
276  /* now delete any duplicate that was found */
277  if (pdelete_index)
278  {
279    token_index = *pdelete_index;
280    token_to_delete = &word_token_array[  token_index];
281    *pdelete_index = token_to_delete->next_token_index;
282    pq->num_in_q--;
283#if PRINT_SEARCH_DETAILS
284    printf("WORDADD PQ (B) returning %d\n", token_index);
285#endif
286    return_token_index = token_index;
287  }
288
289  /* now check for max length in the queue */
290  if (pq->num_in_q > pq->max_in_q)
291  { /* really expecting just 1 over */
292    token_index = pq->word_token_list;
293    token = &(word_token_array[ token_index]);
294    pq->num_in_q--;
295    pq->word_token_list = token->next_token_index;
296#if PRINT_SEARCH_DETAILS
297    printf("WORDADD PQ (C) returning %d\n", token_index);
298#endif
299    return_token_index = token_index;
300  }
301  else
302  {
303    return_token_index = MAXwtokenID;
304  }
305  if (pq->num_in_q >= pq->max_in_q)
306  {
307    token_index = pq->word_token_list;
308    token = &(word_token_array[token_index]);
309    pq->max_cost_in_q = token->cost;
310  }
311  else
312  { /* pq->num_in_q < pq->max_in_q, fixed sep9 */
313    pq->max_cost_in_q = MAXcostdata;
314  }
315#if PRINT_SEARCH_DETAILS
316  printf("WORDADD PQ (A) returning %d\n", token_index);
317  sanity_check_priority_q(pq, word_token_array);
318#endif
319  return return_token_index;
320}
321
322
323/*returns the cost threshold for the end of the priority queue.
324  If words have greater cost than this, no need to try to put them on the
325  queue*/
326
327costdata get_priority_q_threshold(priority_q *pq, word_token *word_token_array)
328{
329  return pq->max_cost_in_q;
330}
331
332
333
334
335
336