1/*---------------------------------------------------------------------------*
2 *  word_lattice.c  *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20#ifndef _RTT
21#include "pstdio.h"
22#endif
23#include <stdlib.h>
24#include <string.h>
25#include <math.h>
26#include "passert.h"
27#include "portable.h"
28
29
30#include "portable.h"
31
32#include "hmm_desc.h"
33#include "utteranc.h"
34#include "hmmlib.h"
35
36#include "srec_sizes.h"
37#include "search_network.h"
38#include "srec.h"
39#include "word_lattice.h"
40#include "astar.h"
41#include "srec_stats.h"
42#include "srec_results.h"
43
44#define PRINT_WORD_LATTICE        0
45#define PRINT_SEARCH_DETAILS      0
46
47#define TRUE_KILL_WTOKEN(WT) { WT.cost = MAXcostdata; \
48    WT.word = MAXwordID;  \
49    WT.end_time = MAXframeID; \
50    WT.end_node = MAXnodeID; \
51    WT.backtrace = MAXwtokenID; \
52  }
53
54srec_word_lattice *allocate_word_lattice(frameID max_frames)
55{
56  srec_word_lattice *wl;
57
58  wl = (srec_word_lattice*) CALLOC_CLR(1, sizeof(srec_word_lattice), "search.word_lattice.base");
59  wl->max_frames = max_frames;
60  wl->words_for_frame = (wtokenID*) CALLOC_CLR(max_frames, sizeof(wtokenID), "search.word_lattice.words");
61
62  wl->whether_sorted = (asr_int16_t*)CALLOC_CLR(max_frames,  sizeof(asr_int16_t), "search.word_lattice.sflag");
63
64  return wl;
65}
66
67void destroy_word_lattice(srec_word_lattice* wl)
68{
69  FREE(wl->words_for_frame);
70  FREE(wl->whether_sorted);
71  FREE(wl);
72}
73
74void initialize_word_lattice(srec_word_lattice* wl)
75{
76  frameID ifr;
77  for (ifr = 0; ifr < wl->max_frames; ifr++)
78  {
79    wl->words_for_frame[ifr] = MAXwtokenID;
80    wl->whether_sorted[ifr] = 0;
81  }
82}
83
84costdata lattice_best_cost_to_frame(srec_word_lattice *wl, word_token* word_token_array, frameID ifr)
85{
86  int sanity_counter = 0;
87  costdata best_cost = MAXcostdata;
88  wtokenID wtoken_index = wl->words_for_frame[ ifr];
89  while (wtoken_index != MAXwtokenID)
90  {
91    word_token* wtoken = &word_token_array[wtoken_index];
92    if (sanity_counter++ > 200) return MAXcostdata;
93    wtoken_index = wtoken->next_token_index;
94    if (best_cost > wtoken->cost)
95      best_cost = wtoken->cost;
96  }
97  return best_cost;
98}
99
100void lattice_add_word_tokens(srec_word_lattice *wl, frameID frame,
101                             wtokenID word_token_list_head)
102{
103  if (frame >= wl->max_frames)
104  {
105    log_report("lattice_add_word_tokens: max_frame not big enough\n");
106    ASSERT(0);
107  }
108  wl->words_for_frame[frame] = word_token_list_head;
109}
110
111void print_word_token_backtrace(srec* rec, wtokenID wtoken_index, char* tail)
112{
113  char* null = "NULL", *p;
114  char iwttime[8] = { 0, 0, 0, 0, 0, 0, 0, 0};
115  bigcostdata cost;
116  bigcostdata cost_for_word;
117  word_token *wtoken, *last_wtoken;
118
119  last_wtoken = NULL;
120  while (wtoken_index != MAXwtokenID)
121  {
122    wtoken = &rec->word_token_array[wtoken_index];
123    if (wtoken->word < rec->context->olabels->num_words)
124      p = rec->context->olabels->words[wtoken->word];
125    else
126      p = null;
127    ASSERT(!last_wtoken || last_wtoken->end_time > wtoken->end_time);
128    ASSERT(rec->accumulated_cost_offset[ wtoken->end_time] != 0);
129    cost = wtoken->cost + rec->accumulated_cost_offset[ wtoken->end_time];
130
131    if (wtoken->backtrace != MAXwtokenID)
132    {
133      word_token* next_wtoken = &rec->word_token_array[wtoken->backtrace];
134      cost_for_word = cost - next_wtoken->cost - rec->accumulated_cost_offset[ next_wtoken->end_time];
135    }
136    else
137    {
138      cost_for_word = cost;
139    }
140    sprintf(iwttime, "/%d", WORD_TOKEN_GET_WD_ETIME(wtoken) );
141    PLogMessage (" (%d W%d %s cost=%d/%d/%d time=%d%s node=%d)", wtoken_index, wtoken->word, p, wtoken->cost, cost, cost_for_word, wtoken->end_time, iwttime, wtoken->end_node);
142    fflush(stdout);
143    ASSERT(wtoken->backtrace != wtoken_index);
144    wtoken_index = wtoken->backtrace;
145    last_wtoken = wtoken;
146  }
147  PLogMessage (tail);
148}
149
150int sprint_bword_token_backtrace(char* buf, int buflen, srec* rec, wtokenID wtoken_index)
151{
152  char* null = "NULL", *p;
153  char *pbuf = buf;
154  *pbuf = 0;
155
156  while (wtoken_index != MAXwtokenID)
157  {
158    word_token* wtoken = &rec->word_token_array[wtoken_index];
159    p = null;
160    if (wtoken->word < rec->context->olabels->num_words)
161      p = rec->context->olabels->words[wtoken->word];
162    ASSERT(pbuf + strlen(p) + 1 < buf + buflen);
163    pbuf += sprintf(pbuf, "%s ", p);
164    ASSERT(wtoken->backtrace != wtoken_index);
165
166    wtoken_index = wtoken->backtrace;
167  }
168  if (pbuf > buf && *(pbuf - 1) == ' ') *(pbuf - 1) = 0;
169  return 0;
170}
171
172#define ERROR_TRANSCRIPTION_TOO_LONG -1
173
174ESR_ReturnCode sprint_word_token_backtraceByWordID(wordID* wordIDs, size_t* len, srec* rec, wtokenID wtoken_index)
175{
176  size_t i, currentLen = 0;
177  ESR_ReturnCode rc;
178  word_token* wtoken;
179
180#if PRINT_SEARCH_DETAILS
181  printf("in get backtrace wtoken %d\n", wtoken_index);
182#endif
183
184  while (wtoken_index != MAXwtokenID)
185  {
186    if (*len <= currentLen)
187    {
188      rc = ESR_BUFFER_OVERFLOW;
189      PLogError(ESR_rc2str(rc));
190      *len = currentLen + 1;
191      goto CLEANUP;
192    }
193    wtoken = &rec->word_token_array[wtoken_index];
194    wordIDs[currentLen] = wtoken->word;
195    ++currentLen;
196
197    if (wtoken_index == wtoken->backtrace)
198    {
199      *len = 0;
200      PLogError("Result is loopy, rejecting");
201      return ESR_INVALID_STATE;
202    }
203    wtoken_index = wtoken->backtrace;
204  }
205
206  /* reverse the order */
207  for (i = 0; i < currentLen / 2; i++)
208  {
209    wordID tmp = wordIDs[i];
210    wordIDs[i] = wordIDs[(currentLen-1-i)];
211    wordIDs[(currentLen-1-i)] = tmp;
212  }
213  /* strip the pau/pau2 markers */
214  if (currentLen >= 1 && wordIDs[0] == rec->context->beg_silence_word)
215  {
216    for (i = 0; i < currentLen - 1; i++)
217      wordIDs[i] = wordIDs[i+1];
218    currentLen--;
219  }
220  if (currentLen >= 1 && wordIDs[currentLen-1] == rec->context->end_silence_word)
221    currentLen--;
222  wordIDs[currentLen] = MAXwordID;
223  *len = currentLen;
224
225  return ESR_SUCCESS;
226CLEANUP:
227  return rc;
228}
229
230int sprint_word_token_backtrace(char *transcription, int len, srec* rec, wtokenID wtoken_index)
231{
232  char *w;
233  char *from_p;
234  char *to_p;
235  char *end;
236  char *tr_end = transcription;
237  int wlen;
238
239#define SHOW_END_TIMES 1
240#if SHOW_END_TIMES
241  char buf[256/*64*/];
242#endif
243
244  *transcription = 0;
245
246#if PRINT_SEARCH_DETAILS
247  printf("in get backtrace wtoken %d\n", wtoken_index);
248#endif
249
250  while (wtoken_index != MAXwtokenID)
251  {
252    word_token* wtoken = &rec->word_token_array[wtoken_index];
253#if PRINT_SEARCH_DETAILS
254    printf("got token %d word %d\n", wtoken_index, wtoken->word);
255#endif
256
257    w = "NULL";
258    if (wtoken->word < rec->context->olabels->num_words)
259      w = rec->context->olabels->words[wtoken->word];
260#if SHOW_END_TIMES
261    /* should be defined outside because it is used outside by w */
262    /* sprintf(buf,"%s@%d.%d",w, WORD_TOKEN_GET_WD_ETIME(wtoken), wtoken->end_time); */
263    if (strlen(w) + 12 > sizeof(buf))
264    {
265      *transcription = 0;
266      return ERROR_TRANSCRIPTION_TOO_LONG;
267    }
268    else
269    {
270      sprintf(buf, "%s@%d", w, wtoken->end_time);
271      w = &buf[0];
272    }
273#endif
274    wlen = strlen(w);
275    if (wlen + tr_end - transcription + 1 >= len)
276    {
277      *transcription = 0;
278      return ERROR_TRANSCRIPTION_TOO_LONG;
279    }
280    /*need to tack onto beginning, so move string over*/
281    from_p = tr_end;
282    to_p = tr_end + wlen + 1;
283    tr_end = to_p;
284    while (from_p >= transcription) *(to_p--) = *(from_p--);
285
286    /* add a space*/
287    *to_p = ' ';
288
289    /*add the new word*/
290    to_p = transcription;
291    end = to_p + wlen;
292
293    while (to_p < end) *(to_p++) = *(w++);
294
295    if (wtoken_index == wtoken->backtrace)
296    {
297      *transcription = 0;
298#if BUILD&BUILD_DEBUG
299      printf("Error: result is loopy, rejecting\n");
300#endif
301      return ERROR_RESULT_IS_LOOPY;
302    }
303    wtoken_index = wtoken->backtrace;
304  }
305  return 0;
306}
307
308void print_word_token(srec* rec, wtokenID wtoken_index, char* msg)
309{
310  bigcostdata cost, cost_for_word;
311  char *p = "NULL";
312  word_token* wtoken = &rec->word_token_array[wtoken_index];
313
314  PLogMessage ( msg );
315  if (wtoken->word < rec->context->olabels->num_words)
316    p = rec->context->olabels->words[wtoken->word];
317  ASSERT(rec->accumulated_cost_offset[ wtoken->end_time] != 0);
318  cost = wtoken->cost + rec->accumulated_cost_offset[wtoken->end_time];
319  if (wtoken->backtrace != MAXwtokenID)
320  {
321    word_token* next_wtoken = &rec->word_token_array[wtoken->backtrace];
322    cost_for_word = cost - next_wtoken->cost - rec->accumulated_cost_offset[next_wtoken->end_time];
323  }
324  else
325  {
326    cost_for_word = cost;
327  }
328  printf("wtoken %d W%i %s cost=%d/%d/%d time=%d/%d node=%d", wtoken_index,
329         wtoken->word, p, wtoken->cost, cost, cost_for_word, wtoken->end_time, WORD_TOKEN_GET_WD_ETIME(wtoken), wtoken->end_node);
330  pfflush(PSTDOUT);
331  print_word_token_backtrace(rec, wtoken->backtrace, "\n");
332}
333
334
335void print_word_token_list(srec* rec, wtokenID wtoken_index, char* msg)
336{
337#ifndef NDEBUG
338  int sanity_counter = 0;
339#endif
340  PLogMessage ( msg );
341  while (wtoken_index != MAXwtokenID)
342  {
343    word_token* wtoken = &rec->word_token_array[wtoken_index];
344    print_word_token(rec, wtoken_index, "");
345    ASSERT(sanity_counter++ < 200);
346    ASSERT(wtoken_index != wtoken->next_token_index);
347    wtoken_index = wtoken->next_token_index;
348  }
349}
350
351#define MAX_LEN 256
352void srec_get_result(srec *rec)
353{
354  srec_word_lattice *wl;
355  frameID i;
356  wtokenID token_index;
357  word_token *wtoken;
358
359#if PRINT_SEARCH_DETAILS
360  printf("in srec_get_result\n");
361#endif
362
363  wl = rec->word_lattice;
364#if PRINT_WORD_LATTICE
365  for (i = 0; i <= rec->current_search_frame; i++)
366  {
367#else
368  for (i = rec->current_search_frame; i <= rec->current_search_frame; i++)
369  {
370#endif
371
372    /* put the best choice at the top */
373    sort_word_lattice_at_frame(rec, i);
374    token_index = wl->words_for_frame[i];
375
376#if PRINT_WORD_LATTICE
377    printf("----- List of words for frame %d\n", i);
378    print_word_token_list(rec, token_index, "");
379#endif
380
381    if (i == rec->current_search_frame && token_index != MAXwtokenID)
382    {
383      wtoken =  &(rec->word_token_array[token_index]);
384      print_word_token(rec, token_index, "Final Top Choice: ");
385    }
386  }
387}
388
389static srec* WHICH_RECOG(multi_srec* rec)
390{
391  srec* return_rec = NULL;
392  costdata current_best_cost = MAXcostdata;
393  int i = 0;
394#if DO_ALLOW_MULTIPLE_MODELS
395  for (i = 0; i < rec->num_activated_recs; i++)
396  {
397#endif
398    if (current_best_cost > rec->rec[i].current_best_cost)
399    {
400      current_best_cost = rec->rec[i].current_best_cost;
401      return_rec = &rec->rec[i];
402    }
403#if DO_ALLOW_MULTIPLE_MODELS
404  }
405#endif
406  return return_rec;
407}
408
409ESR_ReturnCode srec_get_top_choice_wordIDs(multi_srec* recm, wordID* wordIDs, size_t* len)
410{
411  srec* rec = WHICH_RECOG(recm);
412  frameID end_frame;
413  srec_word_lattice* wl;
414  wtokenID token_index;
415  ESR_ReturnCode rc;
416
417  if (!rec)
418  {
419    rc = ESR_INVALID_STATE;
420    PLogError(ESR_rc2str(rc));
421    goto CLEANUP;
422  }
423
424  end_frame = rec->current_search_frame;
425  wl = rec->word_lattice;
426  token_index = wl->words_for_frame[end_frame];
427
428  if (token_index == MAXwtokenID)
429  {
430    PLogError(L("ESR_INVALID_STATE"));
431    return ESR_INVALID_STATE;
432  }
433#if PRINT_WORD_LATTICE
434  print_word_token_list(rec, token_index, "WORD TOKENS AT END\n");
435#endif
436  /* the head of the list on the last frame is always best */
437  CHKLOG(rc, sprint_word_token_backtraceByWordID(wordIDs, len, rec, token_index));
438
439  return ESR_SUCCESS;
440CLEANUP:
441  return rc;
442}
443
444int srec_get_top_choice_transcription(multi_srec* recm, char *transcription, int len, int whether_strip_slot_markers)
445{
446  int rc;
447  srec* rec = WHICH_RECOG(recm);
448  frameID end_frame;
449  srec_word_lattice* wl;
450  wtokenID token_index;
451
452  if (!rec)
453  {
454    *transcription = 0;
455    return 1;
456  }
457  if( recm->eos_status == VALID_SPEECH_NOT_YET_DETECTED)
458  {
459      *transcription = 0;
460      return 1;
461  }
462
463  end_frame = rec->current_search_frame;
464  wl = rec->word_lattice;
465  sort_word_lattice_at_frame(rec, end_frame);
466  token_index = wl->words_for_frame[end_frame];
467
468  if (token_index != MAXwtokenID)
469  {
470#if PRINT_WORD_LATTICE
471    print_word_token_list(rec, token_index, "WORD TOKENS AT END\n");
472#endif
473    /* the head of the list on the last frame is always best */
474    rc = sprint_word_token_backtrace(transcription, len, rec, token_index);
475  }
476  else
477  {
478    strcpy(transcription, "");
479    rc = 1;
480  }
481  if (whether_strip_slot_markers)
482    srec_result_strip_slot_markers(transcription);
483  return rc;
484}
485
486int srec_get_top_choice_score(multi_srec* recm, bigcostdata *cost, int do_incsil)
487{
488  srec* rec = WHICH_RECOG(recm);
489  frameID end_frame;
490  srec_word_lattice* wl;
491  wtokenID token_index;
492  word_token* wtoken;
493
494  if (!rec)
495  {
496    *cost = MAXcostdata;
497    return 1;
498  }
499
500  end_frame = rec->current_search_frame;
501  wl = rec->word_lattice;
502  token_index = wl->words_for_frame[end_frame];
503
504  if (end_frame < MAXframeID && token_index != MAXwtokenID)
505  {
506    wtoken = &rec->word_token_array[token_index];
507    *cost = wtoken->cost;
508    *cost += rec->accumulated_cost_offset[ wtoken->end_time];
509    return 0;
510  }
511  else
512  {
513    *cost = MAXcostdata;
514    return 1;
515  }
516}
517
518int srec_print_results(multi_srec *recm, int max_choices)
519{
520  char transcription[MAX_LEN];
521  bigcostdata cost;
522
523  srec_get_top_choice_transcription(recm, transcription, MAX_LEN, 1);
524  srec_get_top_choice_score(recm, &cost, SCOREMODE_INCLUDE_SILENCE);
525
526  log_report("R: %8ld %8ld %s\t%.1f\n", 0, 0, transcription, cost);
527
528  return 0;
529}
530
531/* sort the word lattice at this frame, todo: remove rec argument */
532
533#define MAX_WTOKENS_AT_FRAME 64 /* +1 for the MAXwtokenID! */
534void sort_word_lattice_at_frame(srec* rec, frameID frame)
535{
536  srec_word_lattice* wl = rec->word_lattice;
537  word_token *wtoken, *wtoken2;
538  wtokenID pwi[MAX_WTOKENS_AT_FRAME], token_index;
539  word_token* word_token_array = rec->word_token_array;
540  int i, j, npwi = 0;
541  ASSERT(rec->word_priority_q->max_in_q < MAX_WTOKENS_AT_FRAME);
542
543  ASSERT(frame < wl->max_frames);
544  if (wl->whether_sorted[frame])
545    return;
546
547  wl->whether_sorted[frame] = 1;
548
549  /* make an array of word token index addresses */
550  for (pwi[npwi] = wl->words_for_frame[frame]; pwi[npwi] != MAXwtokenID;)
551  {
552    ASSERT(npwi < MAX_WTOKENS_AT_FRAME);
553    token_index = pwi[npwi];
554    wtoken = &word_token_array[ token_index];
555    npwi++;
556    pwi[npwi] = wtoken->next_token_index;
557  }
558
559  /* sort the word token indices, bubble sort is fine */
560  for (i = 0; i < npwi; i++)
561  {
562    for (j = 0; j < (npwi - 1); j++)
563    {
564      wtoken = &word_token_array[ pwi[j]];
565      wtoken2 = &word_token_array[ pwi[j+1]];
566      if (wtoken->cost > wtoken2->cost)
567      {
568        token_index = pwi[j];
569        pwi[j] = pwi[j+1];
570        pwi[j+1] = token_index;
571      }
572    }
573  }
574
575  /*print_word_token_list(rec,wl->words_for_frame[frame],"## BEFORE SORT\n");*/
576  wl->words_for_frame[ frame] = pwi[0];
577  for (i = 0; i < npwi; i++)
578  {
579    wtoken = &word_token_array[ pwi[i]];
580    wtoken->next_token_index = pwi[i+1]; /* last points nowhwere */
581  }
582  /*print_word_token_list(rec,wl->words_for_frame[frame],"## AFTER  SORT\n");*/
583}
584
585
586/* this frees a word token, it may still have references in the lattice though */
587
588void free_word_token(srec *rec, wtokenID old_token_index)
589{
590  word_token* wtoken;
591  wtoken = &rec->word_token_array[old_token_index];
592  wtoken->next_token_index = rec->word_token_freelist;
593  rec->word_token_freelist = old_token_index;
594  TRUE_KILL_WTOKEN(rec->word_token_array[rec->word_token_freelist]);
595}
596
597/* this frees some earlier allocated word_tokens from previous frames,
598   this makes sure we can always have some to spare for future frames */
599
600void free_word_token_from_lattice(srec *rec, wtokenID old_token_index)
601{
602  word_token* wtoken;
603  wtokenID *rtoken_index;
604  word_token* rtoken;
605
606#define CHECK_FREE_WORD_TOKEN 1
607#if CHECK_FREE_WORD_TOKEN
608  stokenID stoken_index, i;
609  ftokenID ftoken_index;
610  fsmarc_token* stoken;
611  fsmnode_token* ftoken;
612  int nerrs = 0;
613
614  stoken_index = rec->active_fsmarc_tokens;
615  for (; stoken_index != MAXstokenID; stoken_index = stoken->next_token_index)
616  {
617    stoken = &rec->fsmarc_token_array[stoken_index];
618    for (i = 0; i < stoken->num_hmm_states; i++)
619    {
620      if (stoken->word_backtrace[i] == old_token_index)
621      {
622        printf("Error: can't delete wtoken %d cuz stoken%d.%d cost %d\n",
623               old_token_index, stoken_index, i, stoken->cost[i]);
624        nerrs++;
625      }
626    }
627  }
628
629  ftoken_index = rec->active_fsmnode_tokens;
630  for (; ftoken_index != MAXftokenID; ftoken_index = ftoken->next_token_index)
631  {
632    ftoken = &rec->fsmnode_token_array[ftoken_index];
633    if (ftoken->word_backtrace == old_token_index)
634    {
635      printf("Error: can't delete wtoken %d cuz ftoken %d cost %d\n",
636             old_token_index, ftoken_index, ftoken->cost);
637      nerrs++;
638    }
639  }
640
641  /*  wtoken = &rec->word_token_array[old_token_index];
642      for(ifr=wtoken->end_time+1; ifr>=0; ifr--) {
643      wtoken_index = rec->word_lattice->words_for_frame[ifr];
644      for( ; wtoken_index!= MAXwtokenID; wtoken_index=wtoken->next_token_index) {
645      wtoken = &rec->word_token_array[wtoken_index];
646      if(wtoken->backtrace == old_token_index) {
647      printf("Error: can't delete wtoken %d cuz wtoken %d at frame %d backtraces cost %d\n",
648      old_token_index, wtoken_index, ifr, wtoken->cost);
649      nerrs++;
650      }
651      }
652      }
653  */
654  ASSERT(nerrs == 0);
655  if (nerrs > 0)
656  {
657    print_word_token(rec, old_token_index, "Error: while deleting ");
658    return;
659  }
660#endif
661
662  wtoken = &rec->word_token_array[old_token_index];
663  /* remove from word lattice */
664  rtoken_index = &rec->word_lattice->words_for_frame[ wtoken->end_time+1];
665  for (; (*rtoken_index) != MAXwtokenID; rtoken_index = &rtoken->next_token_index)
666  {
667    rtoken = &rec->word_token_array[(*rtoken_index)];
668    if (*rtoken_index == old_token_index)
669    {
670      *rtoken_index = wtoken->next_token_index;
671      break;
672    }
673  }
674  wtoken->next_token_index = rec->word_token_freelist;
675  rec->word_token_freelist = old_token_index;
676  TRUE_KILL_WTOKEN(rec->word_token_array[rec->word_token_freelist]);
677}
678
679int reprune_word_tokens(srec* rec, costdata current_best_cost)
680{
681  int i, keep_astar_prune;
682  arc_token* keep_arc_token_list;
683
684  stokenID stoken_index;
685  fsmarc_token* stoken;
686  wtokenID btindex;
687  word_token* bttoken;
688  wtokenID wtoken_index;
689  word_token* wtoken;
690  altword_token* awtoken;
691
692  /* remember things about the astar before changing it for local purposes */
693  keep_astar_prune = rec->astar_stack->prune_delta;
694  /* rec->astar_stack->prune_delta = 400; */
695  /* ignore the grammar constraints for this quick astar backward pass */
696  keep_arc_token_list = rec->context->arc_token_list;
697  rec->context->arc_token_list = 0;
698
699  /* we will flag all wtokens to be kept */
700
701  /* initialize the flags to keep all */
702  memset(rec->word_token_array_flags, 0, sizeof(rec->word_token_array_flags[0])*rec->word_token_array_size);
703
704  /* flag all those tokens not active, ie already free */
705  wtoken_index = rec->word_token_freelist;
706  for (; wtoken_index != MAXwtokenID; wtoken_index = wtoken->next_token_index)
707  {
708    wtoken = &rec->word_token_array[wtoken_index];
709    rec->word_token_array_flags[wtoken_index]--;  /* already deleted */
710  }
711
712  /* flag along the best active state paths */
713  stoken_index = rec->active_fsmarc_tokens;
714  for (; stoken_index != MAXstokenID; stoken_index = stoken->next_token_index)
715  {
716    stoken = &rec->fsmarc_token_array[ stoken_index];
717    for (i = 0; i < stoken->num_hmm_states; i++)
718    {
719      btindex = stoken->word_backtrace[i];
720      for (; btindex != MAXwtokenID; btindex = bttoken->backtrace)
721      {
722        bttoken = &rec->word_token_array[ btindex];
723        ASSERT(rec->word_token_array_flags[ btindex] >= 0);
724        rec->word_token_array_flags[ btindex] = 1;
725      }
726      for (awtoken = stoken->aword_backtrace[i]; awtoken;
727           awtoken = awtoken->next_token)
728      {
729        btindex = awtoken->word_backtrace;
730        for (; btindex != MAXwtokenID; btindex = bttoken->backtrace)
731        {
732          bttoken = &rec->word_token_array[ btindex];
733          rec->word_token_array_flags[ btindex] = 1;
734        }
735      }
736    }
737  }
738
739  /* run the astar and flag a little more */
740  astar_stack_prepare_from_active_search(rec->astar_stack, 100, rec);
741  astar_stack_do_backwards_search(rec, 100);
742  astar_stack_flag_word_tokens_used(rec->astar_stack, rec);
743  astar_stack_clear(rec->astar_stack);
744
745  /* kill_word_tokens */
746  for (i = 0; i < rec->word_token_array_size; i++)
747  {
748    if (rec->word_token_array_flags[i] == 0) /* < 0 are already free! */
749      free_word_token_from_lattice(rec, (frameID)i);
750  }
751
752  /* set this back to a regular astar from remembered values */
753  rec->context->arc_token_list = keep_arc_token_list;
754  rec->astar_stack->prune_delta = (costdata) keep_astar_prune;
755
756  SREC_STATS_INC_WTOKEN_REPRUNES(1);
757  return 0;
758}
759
760