1/*---------------------------------------------------------------------------*
2 *  srec_context.c                                                           *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20
21#include "stdlib.h"
22#include "string.h"
23#include "buildopt.h"
24#include "setting.h"
25#include "passert.h"
26#include "pendian.h"
27#include "portable.h"
28#include "pstdio.h"
29#include "ptypes.h"
30#include "srec_context.h"
31#include "srec_sizes.h"
32#include "search_network.h"
33#include "srec_arb.h"
34#include "hmm_desc.h"
35#if USE_COMP_STATS
36#include "comp_stats.h"
37#endif
38
39#ifdef SET_RCSID
40static const char *rcsid = 0 ? (const char *) &rcsid :
41"$Id: srec_context.c,v 1.84.4.54 2008/05/15 20:06:39 dahan Exp $";
42#endif
43
44
45/* these are constants for now, we need to figure out how to make
46   this more flexible, possible ideas are:
47   (1) to specify these via API functions such as CR_LoadSyntax(),
48   (2) use defaults that would have been set the main parfile,
49   (3) put values for these fields in an actual grammar
50*/
51
52#define AVG_ARCS_PER_WORD    10
53#define AVG_NODES_PER_WORD    7
54#define AVG_CHARS_PER_WORD   18
55#define MAX_PRON_LEN         1024
56#define MAX_WORD_LEN 128
57#define DO_WEIGHTED_ADDWORD   1
58#define SLOTLOOP_OFFSET 64
59#define SLOTNAME_INDICATOR "__"
60#define RULE_INDICATOR '@'
61#define MAX_LINE_LENGTH 512
62#define DEFAULT_WB_COST 40
63#define DISABLE_ARC_COST 999
64#define DO_ARCS_IO_IN_ARC_ORDER 1
65
66#define IS_SILENCE_ILABEL(ilabel,context) (ilabel >= context->hmm_ilabel_offset+EPSILON_OFFSET && ilabel<context->hmm_ilabel_offset+EPSILON_OFFSET+NUM_SILENCE_HMMS)
67// looking to match filename.grxml.Names@__Names__
68// see also grxmlcompile.cpp where these names are constructed
69#define IS_SLOT_OLABEL(wd) (strchr(wd,IMPORTED_RULES_DELIM)<strstr(wd,SLOTNAME_INDICATOR) && strlen(wd)>4 && !strcmp(wd+strlen(wd)-2,SLOTNAME_INDICATOR) )
70
71/*
72SYNTAX : DATA_ALIGN(pointer, data type, bytes_filled)
73EXAMPLE: We need to cast a memory address to a (wordmap*)
74         so we call DATA_ALIGN(memptr, wordmap, filled),
75         where FILLED contains the number of bytes that were used to align the pointer
76*/
77#if (CPU & CPU_ARM)||(CPU & CPU_STRONGARM)
78/* under IPAQ it appears we need to align the structures
79   to certain boundaries.  More attention needed here for other
80   platforms ! */
81#define DATA_ALIGN(x, y, z) if ((int) (x) % sizeof(y)) { \
82    (z)=sizeof(y) - ((int) (x) % sizeof(y)); \
83    (x) += (z); /* force correct data alignment */ \
84  } else z=0;
85#else
86#define DATA_ALIGN(x, y, z)
87#endif
88
89static const int do_minimize = 1;
90#define PTR_TO_IDX(ptr, base) ((asr_uint32_t) (ptr == NULL ? 0xFFFFFFFFu : (asr_uint32_t)(ptr - base)))
91#define IDX_TO_PTR(idx, base) (idx == 0xFFFFFFFFu ? NULL : base + idx)
92
93/* prototypes */
94
95/*----------------------------------------------------------------------*
96 *                                                                      *
97 * internal prototypes                                                  *
98 *                                                                      *
99 *----------------------------------------------------------------------*/
100
101/* general use functions */
102char split_line(char* line, char** av);
103asr_int32_t atoi_with_check(const char* buf, asr_int32_t max);
104int sprintf_arc(char* buf, srec_context* fst, FSMarc* arc);
105int printf_arc1(srec_context* fst, char* msg, FSMarc* arc);
106int printf_node1(srec_context* fst, FSMnode* node);
107
108/* fst_* functions are internal fst functions */
109int fst_add_arcs(srec_context* fst, nodeID start_node, nodeID end_node,
110                 wordID olabel, costdata cost,
111                 modelID* model_sequence, int num_models);
112int fst_push_arc_olabel(srec_context* fst, FSMarc* arc);
113int fst_push_arc_cost(srec_context* fst, FSMarc* arc);
114int fst_pull_arc_olabel(srec_context* fst, FSMarc* arc);
115int fst_free_arc(srec_context* fst, FSMarc* arc);
116int fst_free_node(srec_context* fst, FSMnode* node);
117int fst_free_arc_net(srec_context* fst, FSMarc* arc);
118arcID fst_get_free_arc(srec_context* fst);
119nodeID fst_get_free_node(srec_context* fst);
120int fst_pack_arc_usage(srec_context* fst);
121
122void append_arc_arriving_node(srec_context* fst, FSMnode* to_node, FSMarc_ptr atok);
123void append_arc_leaving_node(srec_context* fst, FSMnode* fr_node, FSMarc_ptr atok);
124int num_arcs_leaving(srec_context* fst, FSMnode* node);
125
126int num_arcs_arriving(srec_context* fst, FSMnode* node);
127int num_arcs_arriving_gt_1(srec_context* fst, FSMnode* node);
128int fst_fill_node_info(srec_context* context);
129int fst_alloc_transit_points(srec_context* context);
130
131/* higher-level functions */
132int FST_LoadReverseWordGraph(srec_context* context, int num_words_to_add,
133                             PFile* fp);
134int FST_LoadParams(srec_context* context, PFile* fp);
135int FST_AssumeParams(srec_context* context);
136int FST_UnloadReverseWordGraph(srec_context* context);
137int FST_AttachArbdata(srec_context* fst, srec_arbdata* allophone_tree);
138
139static ESR_ReturnCode wordmap_clean ( wordmap *word_map );
140static ESR_ReturnCode wordmap_populate ( wordmap *word_map, wordID num_words );
141
142/*--------------------------------------------------------------------------*
143 *                                                                          *
144 *                                                                          *
145 *                                                                          *
146 *--------------------------------------------------------------------------*/
147
148int FST_IsVoiceEnrollment(srec_context* context)
149{
150  if (context->olabels == NULL) return 0;
151  if (context->olabels->num_words < 2) return 0;
152  if (strstr(context->olabels->words[1], "enroll")) return 1;
153  return 0;
154}
155
156int FST_LoadContext(const char* synbase, srec_context** pcontext,
157                    int num_words_to_add)
158{
159  int rc;
160  PFile* fp;
161  char buffer[MAX_LINE_LENGTH];
162  srec_context* context;
163
164  context = (srec_context*)CALLOC_CLR(1, sizeof(srec_context), "srec.graph.base");
165  if(!context)
166	  return FST_FAILED_ON_MEMORY;
167  memset(context, 0, sizeof(srec_context));
168
169  context->addWordCaching_lastslot_name = 0;
170  context->addWordCaching_lastslot_num = MAXwordID;
171  context->addWordCaching_lastslot_needs_post_silence = ESR_FALSE;
172
173  sprintf(buffer, "%s.map", synbase);
174  fp = file_must_open(NULL, buffer, L("r"), ESR_TRUE);
175  if (!fp) return FST_FAILED_ON_INVALID_ARGS;
176  rc = FST_LoadWordMap(&context->olabels, num_words_to_add, fp);
177  pfclose(fp);
178  if (rc) return rc;
179
180  sprintf(buffer, "%s.PCLG.txt", synbase);
181  fp = file_must_open(NULL, buffer, L("r"), ESR_TRUE);
182  if (!fp) return FST_FAILED_ON_INVALID_ARGS;
183  rc = FST_LoadGraph(context, context->ilabels, context->olabels, num_words_to_add, fp);
184  pfclose(fp);
185  if (rc != FST_SUCCESS) return rc;
186
187  sprintf(buffer, "%s.Grev2.det.txt", synbase);
188  fp = file_must_open(NULL, buffer, L("r"), ESR_TRUE);
189  if (!fp) return FST_FAILED_ON_INVALID_ARGS;
190  rc = FST_LoadReverseWordGraph(context, num_words_to_add, fp);
191  pfclose(fp);
192  if (rc != FST_SUCCESS) return rc;
193
194  sprintf(buffer, "%s.params", synbase);
195  fp = file_must_open(NULL, buffer, L("r"), ESR_TRUE);
196  if (fp) {
197    rc = FST_LoadParams(context, fp);
198    pfclose(fp);
199    if (rc != FST_SUCCESS) return rc;
200  } else {
201    rc = FST_AssumeParams(context);
202  }
203
204  context->grmtyp = (arcID)FST_GetGrammarType(context);
205
206  *pcontext = context;
207  rc = fst_fill_node_info(context);
208  return rc ? FST_FAILED_ON_INVALID_ARGS : FST_SUCCESS;
209}
210
211void FST_UnloadContext(srec_context* context)
212{
213  if (!context) return;
214
215  FST_UnloadWordMap(&context->ilabels);
216  FST_UnloadWordMap(&context->olabels);
217  FST_UnloadGraph(context);
218  FST_UnloadReverseWordGraph(context);
219  FREE(context);
220}
221
222int FST_PrepareContext(srec_context* context)
223{
224  nodeID i;
225  int rc = FST_SUCCESS;
226  /* after all word additions, we need to "prepare" the context */
227
228  /* first check for any changes to optional endnodes etc .. */
229  for (i = 0; i < context->num_nodes; i++)
230    if (context->FSMnode_info_list[i] == NODE_INFO_UNKNOWN)
231      break;
232  if (i != context->num_nodes)
233    rc = fst_fill_node_info(context);
234
235  context->whether_prepared = 1;
236  return rc ? FST_FAILED_ON_INVALID_ARGS : FST_SUCCESS;
237}
238
239void fst_set_wb_costs( srec_context* context, costdata wbcost)
240{
241  unsigned int i;
242  for(i=0; i<(unsigned int)context->FSMarc_list_len; i++) {
243    if(context->FSMarc_list[i].ilabel == WORD_BOUNDARY)
244      context->FSMarc_list[i].cost = wbcost;
245  }
246}
247
248int FST_LoadParams(srec_context* context, PFile* fp)
249{
250  char line[MAX_LINE_LENGTH];
251  costdata wbcost = MAXcostdata;
252  while (pfgets(line, MAX_LINE_LENGTH, fp))
253  {
254    char* key = strtok(line," \n\r\t");
255    char* val = key ? strtok(NULL," \n\r\t") : NULL;
256    if(val && !strcmp(val,"="))
257      val = key ? strtok(NULL," \n\r\t") : NULL;
258    if(!key || !key[0])
259      continue;
260    else if(val && val[0]) {
261      if(!strcmp(key,"word_penalty")) {
262        wbcost = (costdata)atoi_with_check(val, MAXcostdata);
263	if(wbcost == MAXcostdata) {
264	  return FST_FAILED_ON_INVALID_ARGS;
265	}
266      } else {
267	PLogError(L("error: unknown parameter %s in .params file"), key);
268	return FST_FAILED_ON_INVALID_ARGS;
269      }
270    }
271  }
272  if(wbcost != MAXcostdata)
273    fst_set_wb_costs( context, wbcost);
274  return FST_SUCCESS;
275}
276
277int FST_AssumeParams( srec_context* context)
278{
279  fst_set_wb_costs( context, (costdata)DEFAULT_WB_COST);
280  return FST_SUCCESS;
281}
282
283
284/*--------------------------------------------------------------------------*
285 *                                                                          *
286 *                                                                          *
287 *                                                                          *
288 *--------------------------------------------------------------------------*/
289
290modelID hmm_number(const char* hmm_Name, modelID hmm_ilabel_offset)
291{
292  if (!strcmp(hmm_Name, "eps")) return EPSILON_LABEL;
293  if (!strcmp(hmm_Name, ".wb")) return WORD_BOUNDARY;
294  if (!strcmp(hmm_Name, ".ph")) return PHONE_BOUNDARY;
295  ASSERT(hmm_Name[0] == 'h' && hmm_Name[1] == 'm' && hmm_Name[2] == 'm' && hmm_Name[3]);
296  return (modelID)(hmm_ilabel_offset + (modelID)atoi_with_check(hmm_Name + 3, MAXmodelID));
297}
298
299char* hmm_name(modelID ilabel, modelID hmm_ilabel_offset, char* buf)
300{
301  if (ilabel == EPSILON_LABEL)
302    sprintf(buf, "eps");
303  else if (ilabel == WORD_BOUNDARY)
304    sprintf(buf, ".wb");
305  else if (ilabel == PHONE_BOUNDARY)
306    sprintf(buf, ".ph");
307  else
308    sprintf(buf, "hmm%03d", ilabel - hmm_ilabel_offset);
309  return buf;
310}
311
312/*------------------------------------------------------------------*
313 *                                                                  *
314 * words                                                            *
315 *                                                                  *
316 *------------------------------------------------------------------*/
317int HashCmpWord(const LCHAR *key1, const LCHAR *key2)
318{
319	return LSTRCMP(key1,key2);
320}
321unsigned int HashGetCode(const void *key)
322{
323	const LCHAR* k = (const LCHAR*)key;
324	unsigned int i, len, h = 0;
325	len = LSTRLEN( k);
326	for ( i = 0; i < len; i++)
327		h = 31*h + (unsigned int)k[i];
328	return h;
329}
330
331int wordmap_create(wordmap** pwmap, int num_chars, int num_words, int num_words_to_add)
332{
333  wordmap* Interface;
334  PHashTableArgs hashArgs;
335  ESR_ReturnCode rc;
336
337  Interface = (wordmap*)CALLOC_CLR(1, sizeof(wordmap), "srec.graph.wordmap.base");
338  Interface->max_words = (wordID)(num_words + num_words_to_add);
339  Interface->num_words = (wordID)0;
340  /* *pwmap->words = (ptr32*) CALLOC_CLR(wmap->max_words, sizeof(ptr32), "graph.wordmap.words"); */
341  Interface->words = (char**) CALLOC_CLR(Interface->max_words, sizeof(char*), "srec.graph.wordmap.words");
342  Interface->max_chars = num_chars + num_words_to_add * AVG_CHARS_PER_WORD;
343  Interface->chars = (char*) CALLOC_CLR(Interface->max_chars, sizeof(char), "srec.graph.wordmap.chars");
344  Interface->next_chars = Interface->chars;
345  Interface->wordIDForWord = NULL;
346  *pwmap = Interface;
347
348  /* use a hashtable to save mapping between wdID and array index */
349  if (num_words_to_add >= 0)
350  {
351    hashArgs.capacity = num_words + num_words_to_add + 10;
352	if(hashArgs.capacity%2==0) hashArgs.capacity += 1;
353    hashArgs.compFunction = HashCmpWord; // PHASH_TABLE_DEFAULT_COMP_FUNCTION;
354    hashArgs.hashFunction = HashGetCode; // PHASH_TABLE_DEFAULT_HASH_FUNCTION;
355    hashArgs.maxLoadFactor = PHASH_TABLE_DEFAULT_MAX_LOAD_FACTOR;
356    CHKLOG(rc, PHashTableCreate(&hashArgs, L("srec.graph.wordmap.wordIDForWord.wordmap_create()"), &Interface->wordIDForWord));
357  }
358  else
359  {
360    Interface->wordIDForWord = NULL;
361  }
362
363  return FST_SUCCESS;
364
365CLEANUP:
366  wordmap_destroy(pwmap);
367  return rc;
368}
369wordID wordmap_add_word(wordmap* wmap, const char* word);
370
371int FST_LoadWordMap(wordmap** pwmap, int num_words_to_add, PFile* fp)
372{
373  wordID my_wID, wm_wID;
374  char *word, *wID_string;
375  int num_words;
376  asr_int32_t num_chars;
377  char buf[MAX_LINE_LENGTH];
378  long fpos;
379  wordmap* wmap;
380  /* figure out the number of words */
381  fpos = pftell(fp);
382  for (num_words = 0, num_chars = 0; pfgets(buf, MAX_LINE_LENGTH, fp); num_words++)
383  {
384    word = strtok(buf, " \n\r\t");
385    num_chars += strlen(word);
386  }
387  num_chars += num_words * 2; /* for null terminators */
388  pfseek(fp, fpos, SEEK_SET);
389
390  /* Alex added this to reuse this functionality elsewhere
391    destroy function for this create function is actually taken care of by the
392    FST_UnloadWordMap function
393  */
394  wordmap_create(&wmap, num_chars, num_words, num_words_to_add);
395
396  /* later replace this with wordblocks, linked list of 8 character blocks! */
397  while (pfgets(buf, MAX_LINE_LENGTH, fp))
398  {
399    word = strtok(buf, " \n\r\t");
400    wID_string = strtok(NULL, " \n\r\t");
401    my_wID = (wordID)atoi_with_check(wID_string, MAXwordID);
402
403    // if(strstr(word,SLOTNAME_INDICATOR))
404    //    word = strstr(word,SLOTNAME_INDICATOR);
405    wm_wID = wordmap_add_word(wmap, word);
406    ASSERT(my_wID == wm_wID);
407  }
408  ASSERT(wmap->num_words == num_words);
409
410  for (my_wID = 1; my_wID < num_words; my_wID++)
411  {
412    if (!IS_SLOT_OLABEL(wmap->words[my_wID]))
413      break;
414  }
415  wmap->num_slots = my_wID;
416  wordmap_setbase(wmap);
417  *pwmap = wmap;
418  if(wmap->num_slots > MAX_NUM_SLOTS)
419    return FST_FAILED_ON_INVALID_ARGS;
420  else
421    return FST_SUCCESS;
422}
423
424int wordmap_destroy(wordmap** wmap)
425{
426  if (wmap && *wmap)
427  {
428    wordmap_clean ( *wmap );
429    if (((*wmap)->wordIDForWord)) PHashTableDestroy((*wmap)->wordIDForWord);
430    if (((*wmap)->chars)) FREE((*wmap)->chars);
431    if (((*wmap)->words)) FREE((*wmap)->words);
432    if ((*wmap)) FREE((*wmap));
433    *wmap = 0;
434  }
435  return FST_SUCCESS;
436}
437
438int FST_UnloadWordMap(wordmap** wmap)
439{
440  return wordmap_destroy(wmap);
441}
442
443
444int FST_DumpWordMap(PFile* fp, wordmap* wmap)
445{
446  wordID my_wID;
447  for (my_wID = 0; my_wID < wmap->num_words; my_wID++)
448  {
449    pfprintf(fp, "%s %hu\n", wmap->words[my_wID], my_wID);
450  }
451  return FST_SUCCESS;
452}
453
454wordID wordmap_find_index(wordmap* wmap, const char* word)
455{
456  void *wdID_void;
457  ESR_ReturnCode rc;
458  size_t i;
459
460  if (!word)
461    return MAXwordID;
462
463  if (wmap->num_words == 0)
464    return MAXwordID;
465
466  if (wmap->wordIDForWord)
467  {
468    rc = PHashTableGetValue(wmap->wordIDForWord, word, (void**)&wdID_void);
469
470    if (rc == ESR_SUCCESS)
471      return (wordID)(int)wdID_void;
472  }
473  else
474  {
475    for (i = 0; i < wmap->num_words; i++)
476    {
477      if (!strcmp(wmap->words[i], word))
478      {
479        return (wordID)i;
480      }
481    }
482  }
483  return MAXwordID;
484}
485
486wordID wordmap_find_rule_index(wordmap* wmap, const char* rule)
487{
488  int i;
489  int strlen_rule = strlen(rule);
490  for (i = wmap->num_slots; --i > 0;)
491  { /* > (not >=) because eps is always 0 */
492    char* p = strstr(wmap->words[i], SLOTNAME_INDICATOR);
493    if(p != NULL){
494      if (!strcmp(p, rule)) break;
495      else if(!strncmp(p,rule,strlen_rule) && !strcmp(p+strlen_rule,SLOTNAME_INDICATOR)) break;
496    }
497  }
498  if (i == 0) return MAXwordID;
499  else return(wordID)i;
500}
501
502wordID wordmap_find_index_in_rule(wordmap* wmap, const char* word, wordID rule)
503{
504  int len = strlen(word);
505  int rule0 = rule + '0';
506  void *wdID_void;
507  LCHAR word_dot_rule[256];
508  ESR_ReturnCode rc;
509
510  if (!word)
511    return MAXwordID;
512  LSTRCPY(word_dot_rule, word);
513  word_dot_rule[len++] = IMPORTED_RULES_DELIM;
514  word_dot_rule[len++] = (char)rule0;
515  word_dot_rule[len++] = 0;
516  rc = PHashTableGetValue(wmap->wordIDForWord, word_dot_rule, (void**)&wdID_void);
517  if (rc == ESR_SUCCESS)
518    return (wordID)(int)(wdID_void);
519  return MAXwordID;
520}
521
522int strlen_with_null(const char* word)
523{
524#if 1 /* NT  */
525  int len = strlen(word) + 1;
526  if (len % 2 == 1) len++;
527  return len;
528#else /* C54 */
529#endif
530}
531
532int wordmap_whether_in_rule(wordmap* wmap, wordID word, wordID rule)
533{
534  char* word_chars;
535  int len;
536  if (word > wmap->num_words) return 0;
537  word_chars = wmap->words[word];
538  len = strlen(word_chars);
539  if (word_chars[len-1] == rule + '0' && word_chars[len-2] == IMPORTED_RULES_DELIM)
540    return 1;
541  else
542    return 0;
543}
544
545void wordmap_setbase(wordmap* wmap)
546{
547  wmap->num_base_words = wmap->num_words;
548  wmap->next_base_chars = wmap->next_chars;
549}
550
551void wordmap_ceiling(wordmap* wmap)
552{
553  /* this is irreversible */
554  wmap->max_words = wmap->num_words;
555  wmap->max_chars = (wmap->next_chars - wmap->chars);
556}
557
558
559static ESR_ReturnCode wordmap_clean ( wordmap *word_map )
560    {
561  ESR_ReturnCode  clean_status;
562  PHashTableEntry *entry;
563  PHashTableEntry *oldEntry;
564  wordID          *value;
565
566  clean_status = ESR_SUCCESS;
567
568  if ( word_map->wordIDForWord == NULL )
569    return clean_status;
570
571  clean_status = PHashTableEntryGetFirst ( word_map->wordIDForWord, &entry);
572
573  while ( ( entry != NULL ) && ( clean_status == ESR_SUCCESS ) )
574    {
575      clean_status = PHashTableEntryGetKeyValue ( entry, NULL, (void **)&value );
576
577      if ( clean_status == ESR_SUCCESS )
578	{
579	  oldEntry = entry;
580	  clean_status = PHashTableEntryAdvance ( &entry );
581
582	  if ( clean_status == ESR_SUCCESS )
583	    clean_status = PHashTableEntryRemove ( oldEntry );
584	}
585    }
586    return ( clean_status );
587    }
588
589
590static ESR_ReturnCode wordmap_populate ( wordmap *word_map, wordID num_words )
591{
592  ESR_ReturnCode  populate_status;
593  wordID          wdID;
594
595  populate_status = ESR_SUCCESS;
596  wdID = 0;
597
598  if ( word_map->wordIDForWord != NULL )
599    {
600      while ( ( wdID < num_words ) && ( populate_status == ESR_SUCCESS ) )
601	{
602	  populate_status = PHashTablePutValue ( word_map->wordIDForWord, word_map->words[wdID],
603						 (const void *)(int)wdID, NULL );
604	  if ( populate_status == ESR_SUCCESS )
605	    wdID++;
606	  else {
607	    return (populate_status);
608	  }
609	}
610    }
611  return ( populate_status );
612}
613
614void wordmap_reset(wordmap* wmap)
615{
616  char** tmp_words;
617  int i;
618  ESR_ReturnCode    reset_status;
619
620  if (wmap->num_base_words < wmap->num_words)
621  {
622    /*wordID i = (wordID)(wmap->num_base_words);*/
623    char* old_wmap_chars = wmap->chars;
624    int offset = wmap->next_base_chars - wmap->chars;
625    char* tmp_chars = NEW_ARRAY(char, offset, L("srec.g2g.graph.wordmap.chars"));
626    if(!tmp_chars) {
627      passert ( 0 && L("failed to reset the memory for wordmap.chars ") );
628    }
629    memcpy(tmp_chars,wmap->chars, offset * sizeof(char));
630    FREE(wmap->chars);
631
632    wmap->chars = tmp_chars;
633    wmap->next_base_chars = wmap->chars + (wmap->next_base_chars - old_wmap_chars);
634    wmap->max_chars = (wordID) offset;
635    wmap->next_chars = wmap->next_base_chars;
636
637    tmp_words = (char**) CALLOC_CLR(wmap->num_base_words, sizeof(char*), "srec.graph.wordmap.words");
638    if(!tmp_words) {
639      passert ( 0 && L("failed to reset the memory for wordmap.words ") );
640    }
641    memcpy( tmp_words, wmap->words, wmap->num_base_words * sizeof(char*));
642    FREE( wmap->words);
643
644    wmap->words = tmp_words;
645    wmap->max_words = wmap->num_base_words;
646    wmap->num_words  = wmap->num_base_words;
647
648    for(i=0; i<wmap->num_words; i++)
649      wmap->words[i] = wmap->chars + (wmap->words[i] - old_wmap_chars);
650  }
651
652  reset_status = wordmap_clean ( wmap );
653
654  if ( reset_status == ESR_SUCCESS )
655    {
656      reset_status = wordmap_populate ( wmap, wmap->num_base_words );
657
658      if ( reset_status != ESR_SUCCESS )
659	{
660	  wordmap_clean ( wmap );
661	  passert ( 0 && L("wordmap_reset failed") );
662	}
663    }
664  else
665    {
666      passert ( 0 && L("wordmap_reset failed") );
667    }
668}
669
670#define FST_GROW_FACTOR 12/10
671#define FST_GROW_MINCHARS 256
672#define FST_GROW_MINWORDS  32
673wordID wordmap_add_word(wordmap* wmap, const char* word)
674{
675  int len = strlen_with_null(word);
676  wordID wdID =0;
677
678  if (wmap->next_chars + len >= wmap->chars + wmap->max_chars)
679  {
680#if defined(FST_GROW_FACTOR)
681     int i,tmp_max_chars= wmap->max_chars * FST_GROW_FACTOR;
682	 char* old_wmap__chars = wmap->chars;
683      if(tmp_max_chars - wmap->max_chars < FST_GROW_MINCHARS)
684	tmp_max_chars +=FST_GROW_MINCHARS;
685
686      char* tmp_chars = NEW_ARRAY(char, tmp_max_chars, L("srec.g2g.graph.wordmap.chars"));
687      if(!tmp_chars) {
688          PLogError(L("ESR_OUT_OF_MEMORY: Could not extend allocation of wordmap.chars"));
689          return MAXwordID;
690      }
691      memcpy(tmp_chars,wmap->chars,wmap->max_chars * sizeof(char));
692      FREE(wmap->chars);
693
694      wmap->chars = tmp_chars;
695      wmap->next_chars = wmap->chars + (wmap->next_chars - old_wmap__chars);
696      wmap->next_base_chars = wmap->chars + (wmap->next_base_chars - old_wmap__chars);
697      wmap->max_chars = (wordID)tmp_max_chars;
698      //Remove old keys --- Add WORD
699      wordmap_clean (wmap );
700
701      // adjust word pointers
702      for(i=0; i<wmap->num_words; i++)
703	{
704	  wmap->words[i] = wmap->chars + (wmap->words[i] - old_wmap__chars);
705	  // adjust hashtable ---
706	  if (wmap->wordIDForWord)
707	    {
708	      ESR_ReturnCode rc = PHashTablePutValue ( wmap->wordIDForWord, wmap->words[i],
709						       (const void *)i, NULL );
710	      if ( rc != ESR_SUCCESS ) {
711		goto CLEANUP;
712	      }
713	    }
714	}
715#else // so not defined(FST_GROW_FACTOR)
716      PLogError("error: char overflow in wmap %d max %d\n", (int)(wmap->next_chars - wmap->chars), wmap->max_chars);
717      return MAXwordID;
718#endif // defined(FST_GROW_FACTOR)
719  }
720  //Add word
721  if (wmap->num_words == wmap->max_words)
722    {
723#if defined(FST_GROW_FACTOR)
724      char** tmp_words;
725      wordID tmp_max_words;
726      int itmp_max_words =  wmap->max_words * FST_GROW_FACTOR;
727
728      if(itmp_max_words - wmap->max_words < FST_GROW_MINWORDS)
729	    itmp_max_words += FST_GROW_MINWORDS;
730
731      if( itmp_max_words >= MAXwordID) {
732	    PLogError("error: word ptr overflow in wmap %d max %d\n", (int)wmap->num_words, wmap->max_words);
733	    return MAXwordID;
734      }
735      tmp_max_words = (wordID)itmp_max_words;
736	  tmp_words = (char**) CALLOC_CLR(tmp_max_words, sizeof(char*), "srec.graph.wordmap.words");
737      if(!tmp_words) {
738	PLogError(L("ESR_OUT_OF_MEMORY: Could not extend allocation of wordmap.words"));
739        return MAXwordID;
740      }
741      memcpy( tmp_words, wmap->words, wmap->num_words * sizeof(char*));
742      FREE( wmap->words);
743      wmap->words = tmp_words;
744      wmap->max_words = tmp_max_words;
745#else // so not defined(FST_GROW_FACTOR)
746      PLogError("error: word ptr overflow in wmap %d max %d\n", (int)wmap->num_words, wmap->max_words);
747      return MAXwordID;
748#endif // defined(FST_GROW_FACTOR)
749    }
750  if(1)
751    {
752    strcpy(wmap->next_chars, word);
753    wmap->words[ wmap->num_words++] = wmap->next_chars;
754    wmap->next_chars += len;
755    wdID = (wordID)(wmap->num_words - (wordID)1);
756    if (wmap->wordIDForWord)
757    {
758       ESR_ReturnCode rc = PHashTablePutValue ( wmap->wordIDForWord, wmap->words[wdID],
759                      (const void *)(int)wdID, NULL );
760    if ( rc != ESR_SUCCESS )
761      goto CLEANUP;
762    }
763    return wdID;
764  }
765CLEANUP:
766  PLogError("error: could not add word and wordID in wmap hash (%s -> %d)\n", word, wdID);
767  return MAXwordID;
768}
769
770wordID wordmap_add_word_in_rule(wordmap* wmap, const char* word, wordID rule)
771{
772  int len = strlen_with_null(word) + 2;
773  wordID wdID = 0;
774  wordID i;
775//wordmap_add_word_in_rule
776  if (wmap->next_chars + len >= wmap->chars + wmap->max_chars)
777  {
778#if defined(FST_GROW_FACTOR)
779      int tmp_max_chars = wmap->max_chars * FST_GROW_FACTOR;
780      char* tmp_chars;
781	  char* old_next_chars = wmap->next_chars;
782	  char* old_chars = wmap->chars;
783
784      if(tmp_max_chars-wmap->max_chars < FST_GROW_MINCHARS )
785	    tmp_max_chars += FST_GROW_MINCHARS;
786      if( wmap->chars + tmp_max_chars <= wmap->next_chars + len)
787	    tmp_max_chars += len;
788
789      tmp_chars = NEW_ARRAY(char, tmp_max_chars, L("srec.g2g.graph.wordmap.chars"));
790    if(!tmp_chars) {
791          PLogError(L("ESR_OUT_OF_MEMORY: Could not extend allocation of wordmap_add_in_rule.chars"));
792          return MAXwordID;
793      }
794      memcpy(tmp_chars, wmap->chars, wmap->max_chars * sizeof(char));
795      FREE(wmap->chars);
796
797      wmap->chars = tmp_chars;
798      wmap->next_chars = wmap->chars + (old_next_chars - old_chars) ;
799      wmap->next_base_chars = wmap->chars + (wmap->next_base_chars - old_chars);
800      wmap->max_chars = (wordID)tmp_max_chars;
801
802      //Remove old keys
803      wordmap_clean ( wmap );
804
805      // adjust word pointers wordmap_add_word_in_rule
806      for(i=0; i<wmap->num_words; i++)
807	{
808 	  wmap->words[i] = wmap->chars +(wmap->words[i] - old_chars) ;
809	  // adjust hashtable ----- add in rulewordmap_add_word_in_rule ----
810	  if (wmap->wordIDForWord) {
811	    ESR_ReturnCode rc = PHashTablePutValue ( wmap->wordIDForWord, wmap->words[i],
812						     (void*)(int)(i), NULL );
813	    if ( rc != ESR_SUCCESS )
814	      goto CLEANUP;
815	  }
816	}
817#else
818      PLogError("error: char overflow in wmap %d max %d\n", (int)(wmap->next_chars - wmap->chars), wmap->max_chars);
819      return MAXwordID;
820#endif
821  }//wordmap_add_word_in_rule
822  if (wmap->num_words == wmap->max_words)
823    {
824#if defined(FST_GROW_FACTOR)
825      char** tmp_words;
826      wordID tmp_max_words;
827      int itmp_max_words =  wmap->max_words * FST_GROW_FACTOR;
828      if(itmp_max_words - wmap->max_words < FST_GROW_MINWORDS)
829	itmp_max_words += FST_GROW_MINWORDS;
830
831      if( itmp_max_words >= MAXwordID) {
832	PLogError("error: word ptr overflow in wmap %d max %d\n", (int)wmap->num_words, wmap->max_words);
833	return MAXwordID;
834      }
835
836      tmp_max_words = (wordID)itmp_max_words;
837      tmp_words = (char**) CALLOC_CLR(tmp_max_words, sizeof(char*), "srec.graph.wordmap.words");
838      if(!tmp_words) {
839        PLogError(L("ESR_OUT_OF_MEMORY: Could not extend allocation of wordmap_add_rule.words"));
840        return MAXwordID;
841      }
842      memcpy( tmp_words, wmap->words, wmap->num_words * sizeof(char*));
843      FREE( wmap->words);
844      wmap->words = tmp_words;
845      wmap->max_words = tmp_max_words;
846#else
847      PLogError("error: word ptr overflow in wmap %d max %d\n", (int)wmap->num_words, wmap->max_words);
848      return MAXwordID;
849#endif
850    }
851  if(1)
852  {
853    char *p;
854    const char *q;
855    /* word.1 */
856    for (p = wmap->next_chars, q = word; (*p = *q) != '\0'; p++, q++) ; /* basic strcpy() */
857    *p++ = IMPORTED_RULES_DELIM;
858    *p++ = (char)(rule + ((wordID)'0'));
859    *p   = '\0';
860    wmap->words[ wmap->num_words++] = wmap->next_chars;
861    wmap->next_chars += len;
862    wdID = (wordID)(wmap->num_words - (wordID)1);
863    if (wmap->wordIDForWord)
864      {
865        ESR_ReturnCode rc = PHashTablePutValue ( wmap->wordIDForWord, wmap->words[wdID],
866						 (void*)(int)(wdID), NULL );
867	if ( rc != ESR_SUCCESS )
868	  goto CLEANUP;
869      }
870    return wdID;
871  }
872CLEANUP:
873  PLogError("error: could not add word and wordID in wmap hash (%s -> %d)\n", word, wdID);
874  return MAXwordID;
875}
876
877/*----------------------------------------------------------------------*
878 *                                                                      *
879 * API                                                                  *
880 *                                                                      *
881 *----------------------------------------------------------------------*/
882
883int FST_AttachArbdata(srec_context* fst, srec_arbdata* allophone_tree)
884{
885  int rc = 0;
886  unsigned int allotree__modelid;
887
888  fst->allotree = allophone_tree;
889  if( !allophone_tree)
890      return 1;
891  fst->hmm_info_for_ilabel = allophone_tree->hmm_infos - fst->hmm_ilabel_offset;
892  allotree__modelid = version_arbdata_models(allophone_tree);
893  if (allotree__modelid != 0 && fst->modelid != 0)
894  {
895    if (allotree__modelid != fst->modelid)
896    {
897      PLogError("Error: modelids disagree, sgcbaseline(%u) arbdata(%u)", fst->modelid, allotree__modelid);
898      rc = FST_FAILED_ON_INVALID_ARGS;
899    }
900  }
901
902  return rc;
903}
904
905int FST_LoadGraph(srec_context* pfst, wordmap* imap, wordmap* omap,
906                  int num_words_to_add, PFile* fp)
907{
908  int i, rc = 0;
909  char line[MAX_LINE_LENGTH], *args[32], nargs;
910  arcID max_num_FSMarcs;
911  arcID max_num_FSMnodes;
912  nodeID from_node, last_from_node, into_node, num_nodes, max_node_number = 0;
913  FSMarc_ptr atok =  (FSMarc_ptr)0;
914  FSMarc *atoken = NULL;
915  FSMnode *fr_node, *to_node;
916  costdata cost = FREEcostdata;
917  arcID num_arcs;
918
919  srec_context* fst = pfst;
920  char *ilabel_str = 0, *olabel_str = 0;
921  long fpos;
922  arcID new_arc_id;
923  asr_int32_t temp;
924
925
926  /* determine number of arcs and nodes to allocate, add 50% for dynamic */
927  fpos = pftell(fp);
928  max_num_FSMnodes = 0;
929  pfst->modelid = 0;
930  for (max_num_FSMarcs = 0; pfgets(line, sizeof(line) / sizeof(char), fp); max_num_FSMarcs++)
931  {
932    if (strstr(line, "modelid:") == line)
933    {
934      char *p;
935      pfst->modelid = strtoul(line + strlen("modelid:"), &p, 10);
936    }
937    from_node = (nodeID)atoi_with_check(line, MAXnodeID);
938    if (from_node > max_num_FSMnodes)
939      max_num_FSMnodes = from_node;
940  }
941  pfseek(fp, fpos, SEEK_SET);
942  temp = max_num_FSMnodes + 1 /*why+1?*/ + num_words_to_add * AVG_NODES_PER_WORD;
943  if (temp >= MAXnodeID)
944  {
945    max_num_FSMnodes = MAXnodeID - 1;
946    PLogMessage("Warning: using max nodes instead\n");
947  }
948  else
949  {
950    max_num_FSMnodes = (nodeID)temp;
951  }
952  temp = max_num_FSMarcs + num_words_to_add * AVG_ARCS_PER_WORD;
953  if (temp >= MAXarcID)
954  {
955    max_num_FSMarcs = MAXarcID - 1;
956    PLogMessage("Warning: using max arcs instead\n");
957  }
958  else
959  {
960    max_num_FSMarcs = (arcID)temp;
961  }
962  fst->olabels = omap;
963  if (imap)
964  {
965    /* generally no imap is specified */
966    fst->ilabels = imap;
967    fst->hmm_ilabel_offset = wordmap_find_index(fst->ilabels, "hmm0");
968    ASSERT(fst->hmm_ilabel_offset >= 0);
969  }
970  else
971  {
972    fst->ilabels = (wordmap*)CALLOC_CLR(1, sizeof(wordmap), "srec.graph.imap");
973    fst->ilabels->num_words = fst->ilabels->max_words = 0;
974    fst->ilabels->words = 0;
975    /* this is bad hard code, we can get this from the swiarb file, it is
976       equal to the number of phonemes (53 for enu, 39 for jpn) plus the special
977       models (eps,.wb,.ph,h#,#h,iwt,not) minus 1 ('cuz base number is 0) */
978    fst->hmm_ilabel_offset = 128; /* should match MAX_PHONEMES */
979  }
980  fst->FSMarc_list = (FSMarc*)CALLOC_CLR(max_num_FSMarcs, sizeof(FSMarc), "srec.graph.arcs");
981  fst->FSMnode_list = (FSMnode*)CALLOC_CLR(max_num_FSMnodes, sizeof(FSMnode), "srec.graph.nodes");
982  fst->FSMnode_info_list = (FSMnode_info*)CALLOC_CLR(max_num_FSMnodes, sizeof(char), "srec.graph.nodeinfos");
983
984  /* setup the arc freelist */
985  fst->FSMarc_freelist = 0;
986  fst->num_arcs = 0;
987  for (i = 0; i < max_num_FSMarcs - 1; i++)
988  {
989    fst->FSMarc_list[i].linkl_next_arc = ARC_ItoX(i + 1);
990    fst->FSMarc_list[i].linkl_prev_arc = FSMARC_FREE;
991  }
992  fst->FSMarc_list[i].linkl_next_arc = FSMARC_NULL;
993  fst->FSMarc_list[i].linkl_prev_arc = FSMARC_FREE;
994
995  /* initialize the nodes, 'cuz reading is random order */
996  fst->num_nodes = 0;
997  for (i = 0; i < max_num_FSMnodes; i++)
998  {
999    fr_node = &fst->FSMnode_list[i];
1000    fr_node->un_ptr.first_next_arc = fr_node->first_prev_arc = FSMARC_NULL;
1001  }
1002
1003  /* 1. first load up all the information */
1004  IF_DEBUG_WDADD(printf("load graph ... 1\n"));
1005  num_arcs = 0;
1006  last_from_node = MAXnodeID;
1007  while (pfgets(line, sizeof(line) / sizeof(char), fp))
1008  {
1009    if (strstr(line, "modelid:") == line) continue;
1010    IF_DEBUG_WDADD(printf("read arc %s", line));
1011    nargs = split_line(line, args);
1012    if (nargs >= 4)
1013    {
1014      from_node = (nodeID)atoi_with_check(args[0], MAXnodeID);
1015      into_node = (nodeID)atoi_with_check(args[1], MAXnodeID);
1016      ilabel_str = args[2];
1017      olabel_str = args[3];
1018      cost = FREEcostdata;
1019      if (nargs == 5)
1020        PLogError(L("Warning: too many arguments on line %s"), line);
1021    }
1022    else if (nargs == 1)
1023    {
1024      from_node = (nodeID)atoi_with_check(args[0], MAXnodeID);
1025      into_node = MAXnodeID;
1026      ilabel_str = 0;
1027      olabel_str = 0;
1028      cost = FREEcostdata;
1029    }
1030    else
1031    {
1032      from_node = into_node = 0;
1033      PLogError("can't parse line %s\n", line);
1034      ASSERT(0);
1035    }
1036
1037    if (into_node == MAXnodeID)
1038    {
1039      fst->end_node = from_node;
1040    }
1041    else
1042    {
1043      new_arc_id = fst_get_free_arc(fst);
1044      if (new_arc_id == MAXarcID)
1045        return FST_FAILED_ON_MEMORY;
1046      atok = ARC_ItoX(new_arc_id);
1047      atoken = ARC_XtoP(atok);
1048      num_arcs++;
1049      fr_node = &fst->FSMnode_list[from_node];
1050      to_node = &fst->FSMnode_list[into_node];
1051      if (fst->ilabels->num_words == 0)
1052      {
1053        atoken->ilabel = hmm_number(ilabel_str, fst->hmm_ilabel_offset);
1054	/* Xufang: if olabel_str is a slotname, change the input label to a no-silence and no-eps HMM. It is true that the weight of slot arcs is 999 and hmm will not take effect in the runtime, but it was found that when the slot is a loop, eps and silence will cause the recursive function fst_node_has_speech_to_come into a no-step-out status.*/
1055	if(strstr(olabel_str, SLOTNAME_INDICATOR)!=NULL)
1056	  atoken->ilabel = fst->hmm_ilabel_offset + SLOTLOOP_OFFSET;
1057      }
1058      else
1059      {
1060        atoken->ilabel = wordmap_find_index(fst->ilabels, ilabel_str);
1061      }
1062      atoken->olabel = wordmap_find_index(fst->olabels, olabel_str);
1063      /* if(g_fst_options.wtw_cost_override>0) {
1064      if(atoken->ilabel == WORD_BOUNDARY)
1065      atoken->cost = g_fst_options.wtw_cost_override;
1066      else
1067      atoken->cost = g_fst_options.arc_cost_override;
1068      } else  */
1069      atoken->cost = cost;
1070#if DEBUG_WDADD
1071      atoken->ilabel_str = (atoken->ilabel < fst->ilabels->num_words ? fst->ilabels->words[atoken->ilabel] : 0);
1072      atoken->olabel_str = (atoken->olabel < fst->olabels->num_words ? fst->olabels->words[atoken->olabel] : 0);
1073#endif
1074      append_arc_leaving_node(fst, fr_node, atok);
1075      if (into_node > max_node_number)
1076        max_node_number = into_node;
1077      append_arc_arriving_node(fst, to_node, atok);
1078      atoken->fr_node = NODE_ItoX(from_node);
1079      atoken->to_node = NODE_ItoX(into_node);
1080    }
1081    last_from_node = from_node;
1082  }
1083  ASSERT(fst->num_arcs == num_arcs);
1084
1085  /* setup the node freelist */
1086  IF_DEBUG_WDADD(printf("load graph ... 6\n"));
1087  num_nodes = (nodeID)(max_node_number + 1);
1088  if( max_num_FSMnodes > num_nodes) {
1089  fst->FSMnode_freelist = num_nodes;
1090  for (i = num_nodes; i < (max_num_FSMnodes - 1); i++)
1091  {
1092    fst->FSMnode_list[i].un_ptr.next_node = NODE_ItoX(i + 1);
1093    fst->FSMnode_list[i].first_prev_arc = FSMARC_FREE;
1094  }
1095	if (i == (max_num_FSMnodes - 1)) {
1096     fst->FSMnode_list[i].un_ptr.next_node = FSMNODE_NULL;
1097     fst->FSMnode_list[i].first_prev_arc = FSMARC_FREE;
1098    }
1099  } else
1100	fst->FSMnode_freelist  = FSMNODE_NULL;
1101
1102  /* some book-keeping stuff */
1103  IF_DEBUG_WDADD(printf("load graph ... 7\n"));
1104  fst->num_base_arcs = fst->num_arcs = num_arcs;
1105  fst->FSMarc_list_len = max_num_FSMarcs;
1106  fst->FSMnode_list_len = max_num_FSMnodes;
1107  fst->num_base_nodes = fst->num_nodes = num_nodes;
1108  fst->start_node = 0;
1109  /* fst->end_node = 0; this is set up above */
1110
1111  /* beg and end word labels */
1112  fst->beg_silence_word = wordmap_find_index(fst->olabels, "-pau-");
1113  fst->end_silence_word = wordmap_find_index(fst->olabels, "-pau2-");
1114  fst->hack_silence_word = wordmap_find_index(fst->olabels, "silence");
1115
1116  /* set the search limitations, real ones will be registered later */
1117  fst->max_searchable_nodes = 0;
1118  fst->max_searchable_arcs = 0;
1119
1120  rc = fst_alloc_transit_points(fst);
1121  return (rc ? FST_FAILED_ON_INVALID_ARGS : FST_SUCCESS);
1122}
1123
1124int FST_UnloadGraph(srec_context* pfst)
1125{
1126  if (pfst->ilabels)
1127    FREE(pfst->ilabels);
1128  FREE(pfst->FSMarc_list);
1129  FREE(pfst->FSMnode_list);
1130  FREE(pfst->FSMnode_info_list);
1131  pfst->FSMarc_list = 0;
1132  pfst->FSMnode_list = 0;
1133  pfst->FSMnode_info_list = 0;
1134  return FST_SUCCESS;
1135}
1136
1137int FST_DumpGraph(srec_context* fst, PFile* fp)
1138{
1139  int rc = 0;
1140  nodeID i;
1141  FSMarc_ptr atok;
1142  FSMarc* atoken;
1143  nodeID from_node, into_node;
1144  FSMnode* ntoken;
1145  char *ilabel, *olabel;
1146
1147  for (i = 0; i < fst->num_nodes; i++)
1148  {
1149    from_node = i;
1150    ntoken = &fst->FSMnode_list[i];
1151    if (ntoken->first_prev_arc == FSMARC_FREE)
1152      continue;
1153    if (ntoken->un_ptr.first_next_arc != FSMARC_NULL)
1154    {
1155      for (atok = ntoken->un_ptr.first_next_arc; atok != FSMARC_NULL; atok = atoken->linkl_next_arc)
1156      {
1157        char buf[32];
1158        atoken = ARC_XtoP(atok);
1159        into_node = NODE_XtoI(atoken->to_node);
1160        ilabel = fst->ilabels->num_words == 0 ?
1161                 hmm_name(atoken->ilabel, fst->hmm_ilabel_offset, buf) :
1162                 fst->ilabels->words[atoken->ilabel] ;
1163        olabel = fst->olabels->words[atoken->olabel];
1164
1165        if (atoken->cost != FREEcostdata)
1166        {
1167          /* regular arc */
1168          pfprintf(fp, "%hu\t%hu\t%s\t%s\t%hu\n",
1169                  from_node, into_node, ilabel, olabel, atoken->cost);
1170        }
1171        else
1172        {
1173          /* regular zero cost arc */
1174          pfprintf(fp, "%hu\t%hu\t%s\t%s\n",
1175                  from_node, into_node, ilabel, olabel);
1176        }
1177      }
1178    }
1179    else
1180    {
1181      pfprintf(fp, "%hu\n", from_node);
1182    }
1183  }
1184  return rc;
1185}
1186
1187int FST_AddWordToGrammar(srec_context* fst, const char* _slot,
1188                         const char* word, const char* pron,
1189                         const int cost)
1190{
1191  int num_pron_ampersands = 0, num_prons_added = 0, num_words_added = 0;
1192  int i, pron_len, model_sequence_len;
1193  modelID model_sequence[MAX_PRON_LEN];
1194  char phoneme_sequence[MAX_PRON_LEN];
1195  wordID olabel = MAXwordID;
1196  int irc, rc = FST_SUCCESS;
1197  nodeID start_node = MAXnodeID, end_node = MAXnodeID;
1198  char veslot[MAX_WORD_LEN];
1199#if USE_HMM_BASED_ENROLLMENT
1200  const char* Tpron;
1201#endif
1202
1203  /* The addword from voice enroll still use @, and to test voice enroll, @ sign was removed and __ was added at the begining and ending of a word. Xufang */
1204  if(_slot[0] == '@') {
1205    strcpy(veslot,SLOTNAME_INDICATOR);
1206    strcat(veslot,_slot+1);
1207    strcat(veslot,SLOTNAME_INDICATOR);
1208  } else
1209    strcpy(veslot, _slot);
1210
1211#if USE_COMP_STATS
1212  if (!comp_stats)
1213    comp_stats = init_comp_stats1();
1214  start_cs_clock1(&comp_stats->word_addition);
1215#endif
1216
1217  /* we expect developers to call AddWord with the same slot many times,
1218  so we cache the slot start and end nodes and re-use on subseq calls */
1219  if( fst->addWordCaching_lastslot_num == MAXwordID )
1220    fst->addWordCaching_lastslot_name = NULL;
1221  else {
1222    fst->addWordCaching_lastslot_name = fst->olabels->words[fst->addWordCaching_lastslot_num];
1223    fst->addWordCaching_lastslot_name = strstr(fst->addWordCaching_lastslot_name, SLOTNAME_INDICATOR);
1224    ASSERT( fst->addWordCaching_lastslot_name);
1225  }
1226  if( fst->addWordCaching_lastslot_name==NULL || strcmp( fst->addWordCaching_lastslot_name, veslot)) {
1227
1228    fst->addWordCaching_lastslot_num = wordmap_find_rule_index(fst->olabels, veslot);
1229    /* olabel not found is a user error */
1230    if (fst->addWordCaching_lastslot_num == MAXwordID)
1231    {
1232      size_t i;
1233
1234      pfprintf(PSTDOUT, L("error: slot '%s' not found among ["), veslot);
1235      for (i = 1; i < (size_t) fst->olabels->num_slots; ++i)
1236        pfprintf(PSTDOUT, "%s, ", fst->olabels->words[i]);
1237      pfprintf(PSTDOUT, L("] possible\n"));
1238      rc = FST_FAILED_ON_INVALID_ARGS;
1239      goto RETRC;
1240    }
1241
1242    fst->addWordCaching_lastslot_name = fst->olabels->words[fst->addWordCaching_lastslot_num];
1243    fst->addWordCaching_lastslot_name = strstr(fst->addWordCaching_lastslot_name, SLOTNAME_INDICATOR);
1244    ASSERT(fst->addWordCaching_lastslot_name);
1245
1246    /* now find where in the graph this slot is referenced, note that
1247       there might be more than one place, but here we ignore multiples */
1248    for (i = fst->num_fsm_exit_points; --i >= 0;)
1249      {
1250	arcID arcid = fst->fsm_exit_points[i].arc_index;
1251	if (fst->FSMarc_list[arcid].olabel == fst->addWordCaching_lastslot_num)
1252	  {
1253	    FSMarc* arc;
1254	    FSMnode* node;
1255	    start_node = fst->fsm_exit_points[i].from_node_index;
1256	    end_node = fst->fsm_exit_points[i].wbto_node_index;
1257	    fst->addWordCaching_lastslot_needs_post_silence = ESR_TRUE;
1258	    fst->addWordCaching_lastslot_ifsm_exit_point = i;
1259	    node = &fst->FSMnode_list[ end_node];
1260	    arc = &fst->FSMarc_list[node->un_ptr.first_next_arc];
1261	    if (arc->olabel == fst->end_silence_word && arc->linkl_next_arc == MAXarcID)
1262	      fst->addWordCaching_lastslot_needs_post_silence = ESR_FALSE;
1263	    break;
1264	  }
1265      }
1266
1267    /* not found in the graph is an internal error, coding error */
1268    if (i < 0 || start_node>=fst->num_nodes || end_node>=fst->num_nodes)
1269      {
1270	PLogError("error: (internal) finding olabel %d %d %d\n", fst->addWordCaching_lastslot_num,
1271		  start_node, end_node);
1272	goto RETRC;
1273      }
1274  } /* cached or not */
1275
1276  i = fst->addWordCaching_lastslot_ifsm_exit_point;
1277  start_node = fst->fsm_exit_points[i].from_node_index;
1278  end_node = fst->fsm_exit_points[i].wbto_node_index;
1279
1280  /* now start_node and end_node are known */
1281  if (!word || !*word || !pron || !*pron)
1282    {
1283      rc = FST_FAILED_ON_INVALID_ARGS;
1284      PLogError("error: null word/pron on input to FST_AddWordToGrammar()\n");
1285      goto RETRC;
1286    }
1287
1288  /* from here */
1289  IF_DEBUG_WDADD(printf("Adding %s %s\n", word, (const char*)pron));
1290
1291  /* loop over all prons, we break when we hit the double-null (hence the +1) */
1292  for( ; (*pron)!='\0'; pron+=(pron_len+1)) {
1293    pron_len = strlen((char*)pron);
1294    if (pron_len >= MAX_PRON_LEN)
1295      {
1296	PLogError("error: wordadd failed on word %s due to pron [%s] too long\n", word, (char*)pron);
1297	rc = FST_FAILED_ON_INVALID_ARGS;
1298	goto RETRC;
1299      }
1300
1301    /* check for searchability after adding, estimating at most pron_len
1302       arcs and nodes will be added */
1303    if (fst->num_arcs + pron_len > fst->max_searchable_arcs)
1304      {
1305	PLogError("error: wordadd failed on word %s due to %d arc search limit\n",
1306		  word, fst->max_searchable_arcs);
1307	rc = FST_FAILED_ON_MEMORY;
1308	goto RETRC;
1309      }
1310
1311    if (fst->num_nodes + pron_len > fst->max_searchable_nodes)
1312      {
1313	PLogError("error: wordadd failed on word %s due to %d node search limit\n",
1314		  word, fst->max_searchable_nodes);
1315	rc = FST_FAILED_ON_MEMORY;
1316	goto RETRC;
1317      }
1318
1319    /* add the word if necessary, !FST_SUCCESS is allowed if we're just adding
1320       an alternate pronunciation */
1321    olabel = wordmap_find_index_in_rule(fst->olabels, word, fst->addWordCaching_lastslot_num);
1322    if (olabel == MAXwordID)
1323      {
1324	olabel = wordmap_add_word_in_rule(fst->olabels, word, fst->addWordCaching_lastslot_num);
1325	if (olabel != MAXwordID)
1326	  num_words_added++;
1327      }
1328    if (olabel == MAXwordID)
1329      {
1330	PLogError("error: wordmap_add_word failed\n");
1331	rc = FST_FAILED_ON_MEMORY;
1332	goto RETRC;
1333      }
1334
1335#if USE_HMM_BASED_ENROLLMENT
1336    // pron should be converted to model_sequence, model_sequence_len
1337#define VETRANS_PREFIX "wd_hmm"
1338#define VETRANS_PREFIX_LEN 6
1339    if( LSTRSTR(pron, VETRANS_PREFIX)!=NULL) {
1340      // printf("Debug-pron: %d, %s\n",pron_len, pron);
1341      model_sequence_len=0;
1342      for(Tpron=pron; (Tpron=strstr(Tpron, VETRANS_PREFIX))!=NULL; ){
1343	// Tpron = strstr(Tpron, "wd_hmm");
1344	Tpron += VETRANS_PREFIX_LEN; // skip over "wd_hmm"
1345	// Copy hmm number (56,132,...)
1346	model_sequence[ model_sequence_len] = atoi_with_check(Tpron, MAXmodelID);
1347	model_sequence_len++;
1348      }
1349
1350
1351      /* we need to append silence at the end since we might be dealing with a slot
1352	 that has ensuing speech, formerly we only dealt with ROOT which defacto
1353	 was followed by the pau2 silence, so ROOT did not need its own */
1354      if (fst->addWordCaching_lastslot_needs_post_silence)
1355	model_sequence[model_sequence_len++] = 3; // <<< hmm3_sil !!! ugly hard-code here
1356
1357      /* append the word boundary */
1358      model_sequence[model_sequence_len++] = WORD_BOUNDARY;
1359      /* translate to input label ids */
1360      for (i = 0; i < model_sequence_len; i++)
1361	{
1362	  if (model_sequence[i] >= EPSILON_OFFSET)
1363	    model_sequence[i] = (modelID)(model_sequence[i] + fst->hmm_ilabel_offset);
1364	}
1365    }
1366    else
1367#endif
1368      {
1369	pron_len = strlen((char*)pron);
1370	if (pron_len >= MAX_PRON_LEN)
1371	  {
1372	    PLogError("error: wordadd failed on word %s due to pron [%s] too long\n", word, (char*)pron);
1373	    rc = FST_FAILED_ON_INVALID_ARGS;
1374	    goto RETRC;
1375	  }
1376
1377	for (i = 0; i < pron_len; i++)
1378	  {
1379	    if (pron[i] == OPTSILENCE_CODE)
1380	      {
1381		phoneme_sequence[i] = SILENCE_CODE;
1382		num_pron_ampersands++;
1383	      }
1384	    else
1385	      phoneme_sequence[i] = pron[i];
1386	  }
1387	/* we need to append silence at the end since we might be dealing with a slot
1388	   that has ensuing speech, formerly we only dealt with ROOT which defacto
1389	   was followed by the pau2 silence, so ROOT did not need its own */
1390	if (fst->addWordCaching_lastslot_needs_post_silence)
1391	  phoneme_sequence[i++] = SILENCE_CODE;
1392
1393	model_sequence_len = i;
1394	irc = get_modelids_for_pron(fst->allotree, phoneme_sequence, model_sequence_len, model_sequence);
1395	/* check for bad sequence of phonemes */
1396	if (irc)
1397	  {
1398	    PLogError("error: get_modelids_for_pron(%s) returned %d\n", pron, irc);
1399	    rc = FST_FAILED_ON_INVALID_ARGS;
1400	    goto RETRC;
1401	  }
1402	IF_DEBUG_WDADD(printf("model_sequence ...\n"));
1403
1404	/* append the word boundary */
1405	model_sequence[model_sequence_len++] = WORD_BOUNDARY;
1406	/* translate to input label ids */
1407	for (i = 0; i < model_sequence_len; i++)
1408	  {
1409	    if (model_sequence[i] >= EPSILON_OFFSET)
1410	      model_sequence[i] = (modelID)(model_sequence[i] + fst->hmm_ilabel_offset);
1411	  }
1412      } /*  end of ph_t ph_r .. type decoding of the model  sequence */
1413
1414    /* now do the actual addition */
1415    rc = fst_add_arcs(fst, start_node, end_node, olabel, (costdata)cost, model_sequence, model_sequence_len);
1416    if (rc == FST_SUCCESS)
1417      {
1418	num_prons_added++;
1419      }
1420    else if (rc == FST_FAILED_ON_HOMONYM)
1421      {
1422	/* maybe the another pron will work? */
1423      }
1424    else
1425      {
1426	PLogMessage("error: fst_add_arcs() failed adding word %s pron %s ('&' as 'iwt')\n", word, (char*)pron);
1427	goto RETRC;
1428      }
1429
1430    /* second add the pron with no silences,
1431       this only applies to true prons, not wd_hmm333 wd_hmm222 type prons */
1432
1433    if (num_pron_ampersands > 0)
1434      {
1435	for (i = 0, model_sequence_len = 0; i < pron_len; i++)
1436	  {
1437	    if (pron[i] != OPTSILENCE_CODE)
1438	      phoneme_sequence[model_sequence_len++] = pron[i];
1439	  }
1440
1441	irc = get_modelids_for_pron(fst->allotree, phoneme_sequence, model_sequence_len, model_sequence);
1442	/* check for bad sequence of phonemes */
1443	if (irc)
1444	  {
1445	    PLogError("error: get_modelids_for_pron(%s) returned %d\n", pron, rc);
1446	    rc = FST_FAILED_ON_INVALID_ARGS;
1447	    goto RETRC;
1448	  }
1449	else
1450	  {
1451	    IF_DEBUG_WDADD(printf("model_sequence ...\n"));
1452
1453	    /* append the word boundary */
1454	    model_sequence[model_sequence_len++] = WORD_BOUNDARY;
1455	    /* translate to input label ids */
1456	    for (i = 0; i < model_sequence_len; i++)
1457	      {
1458		if (model_sequence[i] >= EPSILON_OFFSET)
1459		  model_sequence[i] = (modelID)(model_sequence[i] + fst->hmm_ilabel_offset);
1460	      }
1461	    /* now do the actual addition */
1462	    rc = fst_add_arcs(fst, start_node, end_node,
1463			      olabel, (costdata)cost, model_sequence, model_sequence_len);
1464
1465	    if (rc == FST_SUCCESS)
1466	      {
1467		num_prons_added++;
1468	      }
1469	    else if (rc == FST_FAILED_ON_HOMONYM)
1470	      {
1471		/* maybe another pron worked? */
1472	      }
1473	    else
1474	      {
1475		PLogMessage("Warning: fst_add_arcs() failed while adding "
1476			    "word %s pron %s (skip '&')\n", word, (char*)pron);
1477		goto RETRC;
1478	      }
1479	  }
1480      }
1481  }
1482
1483
1484 RETRC:
1485  /* set this to make sure that FST_Prepare gets called after add word */
1486  fst->whether_prepared = 0;
1487#if USE_COMP_STATS
1488  end_cs_clock1(&comp_stats->word_addition, 1);
1489#endif
1490
1491  if (rc < 0 && rc != FST_FAILED_ON_HOMONYM)
1492    return rc;
1493
1494  if (num_prons_added == 0 && num_words_added > 0)
1495    {
1496      return FST_FAILED_ON_HOMONYM;
1497    }
1498  else if (num_prons_added == 0 && num_words_added == 0)
1499    {
1500      return FST_FAILED_ON_HOMOGRAPH;
1501    }
1502  else if (num_prons_added > 0 && num_words_added == 0)
1503    {
1504      return FST_SUCCESS_ON_OLD_WORD;
1505    }
1506  else
1507    {
1508      /* if(num_prons_added>0 && num_words_added>0) */
1509      return FST_SUCCESS;
1510    }
1511}
1512
1513/* remove arcs leaving a node, arcs added via word add, are just discarded
1514to be cleaned up for re-use by other functions */
1515void remove_added_arcs_leaving(srec_context* fst, nodeID ni)
1516{
1517  FSMnode* node = &fst->FSMnode_list[ ni];
1518  FSMarc *arc = NULL, *arc2;
1519  arcID *pai, ai, ai2;
1520  for (pai = &node->un_ptr.first_next_arc, ai = (*pai); ai != MAXarcID; pai = &arc->linkl_next_arc, ai = (*pai))
1521  {
1522    if (ai < fst->num_base_arcs)
1523    {
1524      arc = &fst->FSMarc_list[ai];
1525    }
1526    else
1527    {
1528      arc2 = &fst->FSMarc_list[ai];
1529      for (ai2 = arc2->linkl_next_arc; ai2 >= fst->num_base_arcs && ai2 != MAXarcID;
1530           ai2 = arc2->linkl_next_arc)
1531      {
1532        arc2 = &fst->FSMarc_list[ai2];
1533      }
1534      *pai = ai2;
1535    }
1536  }
1537}
1538
1539/* remove arcs arriving at a node, arcs added via word add, are just discarded
1540   to be cleaned up for re-use by other functions */
1541void remove_added_arcs_arriving(srec_context* fst, nodeID ni)
1542{
1543  FSMnode* node = &fst->FSMnode_list[ni];
1544  FSMarc *arc = NULL, *arc2;
1545  arcID *pai, ai, ai2;
1546  for (pai = &node->first_prev_arc, ai = (*pai); ai != MAXarcID;
1547       pai = &arc->linkl_prev_arc, ai = (*pai))
1548  {
1549    if (ai < fst->num_base_arcs)
1550    {
1551      arc = &fst->FSMarc_list[ai];
1552    }
1553    else
1554    {
1555      arc2 = &fst->FSMarc_list[ai];
1556      for (ai2 = arc2->linkl_prev_arc; ai2 >= fst->num_base_arcs && ai2 != MAXarcID;
1557           ai2 = arc2->linkl_prev_arc)
1558      {
1559        arc2 = &fst->FSMarc_list[ai2];
1560      }
1561      *pai = ai2;
1562    }
1563  }
1564}
1565
1566/* reset greammar, but resetting all the arcs, nodes and words added
1567   via the addword functions */
1568
1569int FST_ResetGrammar(srec_context* fst)
1570{
1571  int i, rc = 0;
1572  nodeID fst_slot_start_node, fst_slot_end_node;
1573  wordID fst_slot_slotnum;
1574  arcID ai;
1575  nodeID ni2, ni3, ni4, ni3a;
1576  FSMnode_t *node, *node2, *node3;
1577  FSMarc_t *arc, *arc2, *arc3;
1578
1579  /*fst_slot_slotnum = wordmap_find_rule_index(fst->olabels, slot);*/
1580  for (fst_slot_slotnum = 1; fst_slot_slotnum < fst->olabels->num_slots;
1581       fst_slot_slotnum++)
1582  {
1583
1584    if (fst_slot_slotnum == MAXwordID)
1585    {
1586      char *slot = "";
1587      PLogError("error: slot '%s' not found among [%d,%d] possible\n", slot, 1, fst->olabels->num_slots - 1);
1588      return (rc = FST_FAILED_ON_INVALID_ARGS);
1589    }
1590
1591    /* now find where in the graph this slot is referenced, note that
1592       there might be more than one place, but here we ignore multiples */
1593    fst_slot_start_node = MAXnodeID;
1594    fst_slot_end_node = MAXnodeID;
1595    for (i = fst->num_fsm_exit_points; --i >= 0;)
1596    {
1597      ai = fst->fsm_exit_points[i].arc_index;
1598      if (fst->FSMarc_list[ai].olabel == fst_slot_slotnum)
1599      {
1600        fst_slot_start_node = fst->fsm_exit_points[i].from_node_index;
1601        fst_slot_end_node = fst->fsm_exit_points[i].wbto_node_index;
1602      }
1603    }
1604
1605    /* this 'slot' could be the root rule, which can't be removed */
1606    if (fst_slot_start_node == MAXnodeID || fst_slot_end_node == MAXnodeID)
1607      continue;
1608
1609    /*
1610      start                     end
1611      node   node2    node3   node4
1612       o--@N-->o--sil-->o--wb-->o
1613         arc     arc2   | arc3  ^
1614                 |       |wb
1615                 \__sil->o
1616    arc4   ni3a
1617    */
1618
1619    remove_added_arcs_leaving(fst, fst_slot_start_node);
1620    node = &fst->FSMnode_list[ fst_slot_start_node];
1621    for (ai = node->un_ptr.first_next_arc; ai != MAXarcID; ai = arc->linkl_next_arc)
1622    {
1623      arc = &fst->FSMarc_list[ai];
1624      if (arc->olabel != fst_slot_slotnum)
1625        continue;
1626
1627      ni2 = arc->to_node;
1628      remove_added_arcs_arriving(fst, ni2);
1629      if (ni2 == fst_slot_end_node)
1630        continue;
1631      node2 = &fst->FSMnode_list[ni2];
1632      arc2 = &fst->FSMarc_list[ node2->un_ptr.first_next_arc];
1633
1634
1635      ni3 = arc2->to_node;
1636      remove_added_arcs_arriving(fst, ni3);
1637      if (ni3 == fst_slot_end_node)
1638        continue;
1639      node3 = &fst->FSMnode_list[ni3];
1640      arc3 = &fst->FSMarc_list[ node3->un_ptr.first_next_arc];
1641      while (arc3->linkl_next_arc != MAXarcID)
1642      {
1643        arc3 = &fst->FSMarc_list[arc3->linkl_next_arc];
1644        ni3a = arc3->to_node;
1645        remove_added_arcs_arriving(fst, ni3a);
1646      }
1647      arc3 = &fst->FSMarc_list[ node3->un_ptr.first_next_arc];
1648
1649      ni4 = arc3->to_node;
1650      remove_added_arcs_arriving(fst, ni4);
1651      ASSERT(ni4 == fst_slot_end_node);
1652      if (ni4 == fst_slot_end_node)
1653        continue;
1654    }
1655  }
1656
1657  /* reset the freelist for nodes */
1658  if( fst->num_nodes == fst->num_base_nodes )
1659    {}
1660  else{
1661    FSMnode *ntoken;
1662    FSMnode* tmp_FSMnode_list;
1663    FSMnode_info* tmp_FSMnode_info_list;
1664
1665	fst->FSMnode_freelist = MAXnodeID;
1666    fst->num_nodes = fst->FSMnode_list_len = fst->num_base_nodes;
1667
1668	tmp_FSMnode_list = (FSMnode*)CALLOC_CLR(fst->FSMnode_list_len, sizeof(FSMnode), "srec.graph.nodes");
1669    if(!tmp_FSMnode_list){
1670     PLogError("ERROR: Could NOT reset the memory for srec.graph.nodes");
1671     return FST_FAILED_ON_MEMORY;
1672     }
1673    memcpy( tmp_FSMnode_list, fst->FSMnode_list, fst->FSMnode_list_len*sizeof(FSMnode));
1674
1675    /* scan to the end of the free list (to be re-used) */
1676    nodeID* last_free_node = (&fst->FSMnode_freelist);
1677	ntoken = (*last_free_node==MAXnodeID) ? NULL : &fst->FSMnode_list[*last_free_node] ;
1678
1679	for( ; *last_free_node!=MAXnodeID; last_free_node = &ntoken->un_ptr.next_node)
1680		 ntoken = &tmp_FSMnode_list[ *last_free_node];
1681
1682	FREE( fst->FSMnode_list);
1683    tmp_FSMnode_info_list = (FSMnode_info*)CALLOC_CLR(fst->FSMnode_list_len, sizeof(FSMnode_info), "srec.graph.nodeinfos");
1684    if(!tmp_FSMnode_info_list){
1685     PLogError("ERROR: Could NOT reset the memory for srec.graph.nodeinfos");
1686     return FST_FAILED_ON_MEMORY;
1687     }
1688	// copy in node info
1689	memcpy( tmp_FSMnode_info_list, fst->FSMnode_info_list, fst->FSMnode_list_len*sizeof(FSMnode_info));
1690
1691
1692	FREE( fst->FSMnode_info_list);
1693	fst->FSMnode_info_list = tmp_FSMnode_info_list;
1694    fst->FSMnode_list = tmp_FSMnode_list;
1695  }
1696
1697  /*ni = fst->FSMnode_freelist = fst->num_base_nodes;
1698    node = &fst->FSMnode_list[ni];
1699    for (; ni < fst->FSMnode_list_len - 1; ni++, node++)
1700       node->un_ptr.next_node = (nodeID)(ni + 1);
1701    node->un_ptr.next_node = MAXnodeID;
1702    fst->num_nodes = fst->num_base_nodes;*/
1703
1704  /* reset the freelist for arcs */
1705  if( fst->num_arcs == fst->num_base_arcs )
1706    {}
1707  else {
1708    FSMarc* atoken = NULL;
1709    FSMarc* tmp_FSMarc_list;
1710
1711    fst->num_arcs = fst->num_base_arcs;
1712    fst->FSMarc_list_len = fst->num_base_arcs;
1713    fst->FSMarc_freelist = MAXarcID;
1714    tmp_FSMarc_list = (FSMarc*)CALLOC_CLR(fst->FSMarc_list_len, sizeof(FSMarc), "srec.graph.arcs");
1715    if(!tmp_FSMarc_list){
1716      PLogError("ERROR: Could NOT reset the memory for srec.graph.arcs");
1717     return FST_FAILED_ON_MEMORY;
1718    }
1719    memcpy( tmp_FSMarc_list, fst->FSMarc_list, fst->FSMarc_list_len*sizeof(FSMarc));
1720
1721    /* scan to the end of the free list (to be re-used) */
1722    arcID* last_free_arc = &fst->FSMarc_freelist;
1723    atoken = (*last_free_arc==MAXarcID) ? NULL : &fst->FSMarc_list[*last_free_arc] ;
1724
1725    for( ; *last_free_arc!=MAXarcID; last_free_arc=&atoken->linkl_next_arc)
1726      atoken = &tmp_FSMarc_list[ *last_free_arc];
1727
1728    FREE( fst->FSMarc_list);
1729    fst->FSMarc_list = tmp_FSMarc_list;
1730  }
1731
1732  /*ai = fst->FSMarc_freelist = fst->num_base_arcs;
1733    arc = &fst->FSMarc_list[ai];
1734    for (; ai < fst->FSMarc_list_len - 1; ai++, arc++)
1735    arc->linkl_next_arc = (arcID)(ai + 1);
1736    arc->linkl_next_arc = MAXarcID;
1737    fst->num_arcs = fst->num_base_arcs;
1738    fst->FSMarc_list_len = fst->num_base_arcs;*/
1739
1740  /* now remove all the added words */
1741  wordmap_reset(fst->olabels);
1742  return FST_SUCCESS;
1743}
1744
1745/* See the description of arc_tokens in the header file. These
1746   arcs are nodeless, which make loading from a file that
1747   references nodes a little harder.  We need to do it in stages.
1748   Later, when we load from binary image it should be much simpler.
1749*/
1750
1751arc_token_lnk get_first_arc_leaving_node(arc_token* arc_token_list,
1752    arcID num_arcs,
1753    nodeID node)
1754{
1755  arcID i;
1756  for (i = 0; i < num_arcs; i++)
1757  {
1758    if ((nodeID)(int)arc_token_list[i].next_token_index == node)
1759      return ARC_TOKEN_LNK(arc_token_list, i);
1760  }
1761  return ARC_TOKEN_NULL;
1762}
1763
1764int FST_LoadReverseWordGraph(srec_context* context, int num_words_to_add, PFile* fp)
1765{
1766  arcID i;
1767  char line[MAX_LINE_LENGTH];
1768  char word_label_as_str[128];
1769  arcID num_alloc_arc_tokens;
1770  nodeID from_node, into_node;
1771  labelID word_label = MAXwordID;
1772  arc_token *atoken, *last_atoken;
1773  costdata cost;
1774  arcID num_arcs;
1775  arc_token *arc_token_list, *tmp;
1776  long fpos;
1777
1778  /* determine number of word arcs to allocate */
1779  fpos = pftell(fp);
1780  for (num_arcs = 0; pfgets(line, MAX_LINE_LENGTH, fp);  num_arcs++) ;
1781  num_alloc_arc_tokens = num_arcs;
1782  num_alloc_arc_tokens = (arcID)(num_arcs + num_words_to_add);
1783  pfseek(fp, fpos, SEEK_SET);
1784
1785  context->arc_token_list = (arc_token*)CALLOC_CLR(num_alloc_arc_tokens, sizeof(arc_token), "srec.graph.wordgraph");
1786  arc_token_list = context->arc_token_list;
1787
1788  /* 1. first load up all the information */
1789  i = 0;
1790  while (pfgets(line, MAX_LINE_LENGTH, fp))
1791  {
1792    if (sscanf(line, "%hu\t%hu\t%s", &from_node, &into_node, word_label_as_str) == 3)
1793    {
1794      word_label = wordmap_find_index(context->olabels, word_label_as_str);
1795      // ASSERT(word_label >= 0);
1796      cost = FREEcostdata;
1797    }
1798    else if (sscanf(line, "%hu", &from_node) == 1)
1799    {
1800      into_node = MAXnodeID;
1801      word_label = MAXwordID;
1802      cost = FREEcostdata;
1803    }
1804    else
1805    {
1806      PLogError("FST_LoadReverseWordGraph() .. can't parse line %s\n", line);
1807      ASSERT(0);
1808    }
1809    atoken = &arc_token_list[i];
1810    i++;
1811    atoken->ilabel = word_label;
1812	/* atoken->olabel = WORD_EPSILON_LABEL; */
1813    /*atoken->cost = cost; cost is not used for now */
1814#if DEBUG_ASTAR
1815    atoken->label = (word_label == MAXwordID ? 0 : wmap->words[word_label]);
1816#endif
1817    atoken->first_next_arc = (arc_token_lnk)into_node;
1818    atoken->next_token_index = (arc_token_lnk)from_node;
1819  }
1820  num_arcs = i;
1821
1822  /* 2. now do the internal cross references */
1823  for (i = 0; i < num_arcs; i++)
1824  {
1825    atoken = &arc_token_list[i];
1826    into_node = (nodeID)atoken->first_next_arc;
1827    if (into_node == MAXnodeID)
1828      atoken->first_next_arc = ARC_TOKEN_NULL;
1829    else
1830      atoken->first_next_arc = get_first_arc_leaving_node(arc_token_list, num_arcs, (nodeID)atoken->first_next_arc);
1831  }
1832
1833  /* 3. now do more internal cross refs */
1834  last_atoken = &arc_token_list[0];
1835  for (i = 1; i < num_arcs; i++)
1836  {
1837    atoken = &arc_token_list[i];
1838    if (atoken->next_token_index != last_atoken->next_token_index)
1839    {
1840      last_atoken->next_token_index = ARC_TOKEN_NULL;
1841    }
1842    else
1843    {
1844      last_atoken->next_token_index = ARC_TOKEN_LNK(arc_token_list, i);
1845    }
1846    last_atoken = atoken;
1847  }
1848  last_atoken->next_token_index = ARC_TOKEN_NULL;
1849
1850#if DEBUG_ASTAR
1851  /* under debug, it's nice to be able to see the words leaving the
1852     destination node, they are stored sequentially in the debug ary */
1853  for (i = 0; i < num_arcs; i++)
1854  {
1855    char * p;
1856    atoken = &arc_token_list[i];
1857    atoken->debug[0] = 0;
1858    for (tmp = ARC_TOKEN_LNK(arc_token_list, atoken->first_next_arc); tmp != NULL;
1859         tmp = ARC_TOKEN_PTR(arc_token_list, tmp->next_token_index))
1860    {
1861      if (tmp->first_next_arc == ARC_TOKEN_NULL)
1862      {
1863        p = "END";
1864      }
1865      else if (!tmp->label)
1866      {
1867        p = "NULL";
1868      }
1869      else
1870      {
1871        p = tmp->label;
1872      }
1873      if (strlen(atoken->debug) + strlen(p) + 6 < 64)
1874      {
1875        strcat(atoken->debug, p);
1876        strcat(atoken->debug, " ");
1877      }
1878      else
1879      {
1880        strcat(atoken->debug, "...");
1881        break;
1882      }
1883    }
1884  }
1885#endif
1886  context->arc_token_list_len = num_alloc_arc_tokens;
1887  if (num_alloc_arc_tokens > num_arcs)
1888  {
1889    atoken = &context->arc_token_list[num_arcs];
1890    for (i = num_arcs; i < num_alloc_arc_tokens; i++, atoken++)
1891    {
1892      atoken->first_next_arc = ARC_TOKEN_NULL;
1893      atoken->ilabel         = MAXwordID;
1894      atoken->next_token_index = ARC_TOKEN_LNK(arc_token_list, i + 1);/*atoken+1;*/
1895    }
1896    atoken--;
1897    atoken->next_token_index = ARC_TOKEN_NULL;
1898    context->arc_token_freelist = &context->arc_token_list[num_arcs];
1899  }
1900  else
1901  {
1902    context->arc_token_freelist = NULL;
1903  }
1904  /* the insert start is the arc that goes to the end */
1905  atoken = context->arc_token_list;
1906  atoken = ARC_TOKEN_PTR(arc_token_list, atoken->first_next_arc);
1907  for (; atoken; atoken = ARC_TOKEN_PTR(arc_token_list, atoken->next_token_index))
1908  {
1909    if (atoken->first_next_arc == ARC_TOKEN_NULL)
1910      break;
1911    tmp = ARC_TOKEN_PTR(context->arc_token_list, atoken->first_next_arc);
1912    if (tmp->ilabel == MAXwordID && tmp->first_next_arc == ARC_TOKEN_NULL)
1913      break;
1914  }
1915  context->arc_token_insert_start = atoken;
1916
1917  return FST_SUCCESS;
1918}
1919
1920int FST_UnloadReverseWordGraph(srec_context* context)
1921{
1922  FREE(context->arc_token_list);
1923  context->arc_token_list = 0;
1924  return FST_SUCCESS;
1925}
1926
1927/*----------------------------------------------------------------------*
1928 *                                                                      *
1929 * general                                                              *
1930 *                                                                      *
1931 *----------------------------------------------------------------------*/
1932
1933char split_line(char* line, char** av)
1934{
1935  int i = 0;
1936  av[i] = strtok(line, "\n\r\t ");
1937  for (; av[i];)
1938  {
1939    av[++i] = strtok(NULL, "\n\r\t ");
1940  }
1941  return i;
1942}
1943
1944asr_int32_t atoi_with_check(const char* buf, asr_int32_t mymax)
1945{
1946  asr_int32_t ans = atol(buf);
1947  ASSERT(ans < mymax);
1948  return ans;
1949}
1950
1951/*----------------------------------------------------------------------*
1952 *                                                                      *
1953 * fst                                                                  *
1954 *                                                                      *
1955 *----------------------------------------------------------------------*/
1956
1957arcID fst_get_free_arc(srec_context* fst)
1958{
1959  FSMarc* atoken;
1960  arcID atokid = fst->FSMarc_freelist;
1961  if (atokid == MAXarcID)
1962  {
1963    PLogError("error: ran out of arcs\n");
1964    return atokid;
1965  }
1966  atoken = &fst->FSMarc_list[atokid];
1967  fst->FSMarc_freelist = ARC_XtoI(atoken->linkl_next_arc);
1968  memset(atoken, 0, sizeof(FSMarc));
1969  atoken->to_node = FSMNODE_NULL;
1970  atoken->fr_node = FSMNODE_NULL;
1971  atoken->linkl_next_arc = FSMARC_NULL;
1972  atoken->linkl_prev_arc = FSMARC_NULL;
1973  atoken->ilabel = 0;
1974  atoken->olabel = 0;
1975  atoken->cost   = 0;
1976  fst->num_arcs++;
1977  return atokid;
1978}
1979nodeID fst_get_free_node(srec_context* fst)
1980{
1981  FSMnode* ntoken;
1982  nodeID ntokid = fst->FSMnode_freelist;
1983  if (ntokid == MAXnodeID)
1984  {
1985    PLogError("error: ran out of nodes\n");
1986    return ntokid;
1987  }
1988  ntoken = &fst->FSMnode_list[ntokid];
1989  fst->FSMnode_freelist = NODE_XtoI(ntoken->un_ptr.next_node);
1990  ntoken->un_ptr.first_next_arc = FSMARC_NULL;
1991  ntoken->first_prev_arc = FSMARC_NULL;
1992  /* ASSERT( (int)(ntok - fst->FSMnode_list) == fst->num_nodes); */
1993  fst->num_nodes++;
1994  return ntokid;
1995}
1996
1997int fst_free_node(srec_context* fst, FSMnode* node)
1998{
1999  IF_DEBUG_WDADD(printf_node(fst, "freeing: ", node));
2000  node->first_prev_arc = FSMARC_NULL;
2001  node->un_ptr.next_node = fst->FSMnode_freelist;
2002  node->first_prev_arc = FSMARC_FREE;
2003  fst->FSMnode_freelist = (nodeID)(node - fst->FSMnode_list);
2004  /* fst->num_nodes--; not allowed unless we compress! */
2005  return 0;
2006}
2007
2008int fst_free_arc(srec_context* fst, FSMarc* arc)
2009{
2010  IF_DEBUG_WDADD(printf_arc(fst, "freeing: ", arc));
2011  arc->linkl_prev_arc = FSMARC_FREE;
2012  arc->linkl_next_arc = ARC_ItoX(fst->FSMarc_freelist);
2013  arc->cost = 2;
2014  arc->ilabel = 0;
2015  arc->olabel = 0;
2016  IF_DEBUG_WDADD(arc->ilabel_str = 0);
2017  IF_DEBUG_WDADD(arc->olabel_str = 0);
2018  fst->FSMarc_freelist = (arcID)(arc - fst->FSMarc_list);
2019  fst->num_arcs--;
2020  return 0;
2021}
2022
2023int fst_free_arc_net(srec_context* fst, FSMarc* arc)
2024{
2025  if (!arc)
2026    return 0;
2027  /* need to traverse the arc, get the 'from' nodes,
2028     then traverse the from nodes and kill arcs */
2029  /* we'll need this after an error condition */
2030  return 0;
2031}
2032
2033int fst_push_arc_olabel(srec_context* fst, FSMarc* arc)
2034{
2035  FSMarc_ptr atok;
2036  FSMarc* atoken;
2037  FSMnode_ptr ntok = arc->to_node;
2038
2039  /* look for all arcs leaving this arc, and mark them
2040     normally there is only one, except for multipron words */
2041  for (atok = FIRST_NEXT(ntok); atok != FSMARC_NULL; atok = atoken->linkl_next_arc)
2042  {
2043    atoken = ARC_XtoP(atok);
2044    /* cannot have multiple olabels before a boundary */
2045    if (atoken->olabel != WORD_EPSILON_LABEL)
2046      return FST_FAILED_INTERNAL;
2047    atoken->olabel = arc->olabel;
2048#if DO_WEIGHTED_ADDWORD
2049    atoken->cost = (costdata)(atoken->cost + arc->cost);
2050#endif
2051    IF_DEBUG_WDADD(atoken->olabel_str = arc->olabel_str);
2052  }
2053  /* unlabel this arc */
2054  arc->olabel = WORD_EPSILON_LABEL;
2055#if DO_WEIGHTED_ADDWORD
2056  arc->cost = FREEcostdata;
2057#endif
2058  IF_DEBUG_WDADD(arc->olabel_str = fst->olabels->words[ arc->olabel]);
2059  return FST_SUCCESS;
2060}
2061
2062#if DO_WEIGHTED_ADDWORD
2063int fst_push_arc_cost(srec_context* fst, FSMarc* arc)
2064{
2065  FSMarc_ptr atok;
2066  FSMarc* atoken;
2067  FSMnode_ptr ntok = arc->to_node;
2068  /* look for all arcs leaving this arc, and push the cost thereto */
2069  for (atok = FIRST_NEXT(ntok); atok != FSMARC_NULL; atok = atoken->linkl_next_arc)
2070  {
2071    atoken = ARC_XtoP(atok);
2072    atoken->cost = (costdata)(atoken->cost + arc->cost);
2073  }
2074  arc->cost = FREEcostdata;
2075  return FST_SUCCESS;
2076}
2077#endif
2078
2079
2080int fst_pull_arc_olabel(srec_context* fst, FSMarc* arc)
2081{
2082  FSMarc_ptr atok;
2083  FSMarc* atoken;
2084  FSMnode_ptr fr_node = arc->fr_node;
2085  FSMarc_ptr tmp_atok;
2086  FSMarc *tmp_atoken;
2087  FSMnode *anode;
2088
2089  if (arc->olabel == WORD_EPSILON_LABEL)
2090    return FST_SUCCESS;
2091
2092  /* can the olabel be pulled earlier? */
2093  for (atok = FIRST_PREV(fr_node); atok != FSMARC_NULL; atok = atoken->linkl_prev_arc)
2094  {
2095    atoken = ARC_XtoP(atok);
2096    anode = NODE_XtoP(atoken->to_node);
2097
2098    tmp_atok = anode->un_ptr.first_next_arc;
2099    if (tmp_atok != FSMARC_NULL)
2100    {
2101      tmp_atoken = ARC_XtoP(tmp_atok);
2102      if (tmp_atoken->linkl_next_arc != FSMARC_NULL)
2103      {
2104        return FST_CONTINUE;
2105      }
2106    }
2107  }
2108
2109  /* look for all arcs arriving at this arc, and mark them */
2110  for (atok = FIRST_PREV(fr_node);atok != FSMARC_NULL;atok = atoken->linkl_prev_arc)
2111  {
2112    atoken = ARC_XtoP(atok);
2113    /* cannot have multiple olabels! serious internal error!? */
2114    if (atoken->olabel != WORD_EPSILON_LABEL)
2115    {
2116      PLogError("error: internal error, in fst_pull_arc_olabel()\n");
2117      return FST_FAILED_INTERNAL;
2118    }
2119    atoken->olabel = arc->olabel;
2120#if DO_WEIGHTED_ADDWORD
2121    atoken->cost = arc->cost;
2122#endif
2123    IF_DEBUG_WDADD(atoken->olabel_str = arc->olabel_str);
2124  }
2125  /* unlabel this arc */
2126  arc->olabel = WORD_EPSILON_LABEL;
2127#if DO_WEIGHTED_ADDWORD
2128  arc->cost   = FREEcostdata;
2129#endif
2130  IF_DEBUG_WDADD(arc->olabel_str = fst->olabels->words[ arc->olabel]);
2131  return FST_SUCCESS;
2132}
2133
2134static FSMarc* find_next_arc_with_ilabel(srec_context* fst, FSMnode* node, labelID ilabel, FSMarc** last)
2135{
2136  FSMarc_ptr atok;
2137  FSMarc* atoken = NULL;
2138  for (atok = node->un_ptr.first_next_arc; atok != FSMARC_NULL; atok = atoken->linkl_next_arc)
2139  {
2140    atoken = ARC_XtoP(atok);
2141    if (atoken->ilabel == ilabel)
2142      return atoken;
2143  }
2144  *last = atoken; /* to this we may want to append a new arc */
2145  return NULL;
2146}
2147
2148FSMarc* find_prev_arc_with_iolabels(srec_context* fst, FSMnode* node, labelID ilabel, labelID olabel, FSMarc** last)
2149{
2150  FSMarc_ptr atok;
2151  FSMarc* atoken = NULL;
2152  FSMarc_ptr tmp_atok;
2153  FSMarc* tmp_atoken;
2154
2155  for (atok = node->first_prev_arc; atok != FSMARC_NULL; atok = atoken->linkl_prev_arc)
2156  {
2157    atoken = ARC_XtoP(atok);
2158    if (atoken->olabel == olabel && atoken->ilabel == ilabel)
2159    {
2160      tmp_atok = NODE_XtoP(atoken->fr_node)->un_ptr.first_next_arc;
2161      if (tmp_atok != FSMARC_NULL)
2162      {
2163        tmp_atoken = ARC_XtoP(tmp_atok);
2164
2165        if (tmp_atoken->linkl_next_arc == FSMARC_NULL)
2166          return atoken;
2167      }
2168      else
2169        return atoken;
2170    }
2171
2172  }
2173  if (last) *last = atoken;
2174  return NULL;
2175}
2176
2177int num_arcs_leaving(srec_context* fst, FSMnode* node)
2178{
2179  int num_arcs = 0;
2180  FSMarc_ptr atok;
2181  FSMarc* atoken;
2182  for (atok = node->un_ptr.first_next_arc; atok != FSMARC_NULL; atok = atoken->linkl_next_arc)
2183  {
2184    atoken = ARC_XtoP(atok);
2185    num_arcs++;
2186  }
2187  return num_arcs;
2188}
2189
2190int num_arcs_arriving(srec_context* fst, FSMnode* node)
2191{
2192  int num_arcs = 0;
2193  FSMarc_ptr atok;
2194  FSMarc* atoken;
2195  for (atok = node->first_prev_arc; atok != FSMARC_NULL; atok = atoken->linkl_prev_arc)
2196  {
2197    atoken = ARC_XtoP(atok);
2198    num_arcs++;
2199  }
2200  return num_arcs;
2201}
2202
2203PINLINE int num_arcs_arriving_gt_1(srec_context* fst, FSMnode* node)
2204{
2205  FSMarc_ptr atok = node->first_prev_arc;
2206  FSMarc* atoken;
2207  if (atok == FSMARC_NULL)
2208    return 0;
2209  atoken = ARC_XtoP(atok);
2210  return atoken->linkl_prev_arc == FSMARC_NULL ? 0 : 1;
2211}
2212
2213static void remove_arc_arriving(srec_context* fst, FSMnode* to_node, FSMarc* arc)
2214{
2215  FSMarc* atoken;
2216  FSMarc_ptr* atok;
2217  for (atok = &to_node->first_prev_arc;(*atok) != FSMARC_NULL; atok = &atoken->linkl_prev_arc)
2218  {
2219    atoken = ARC_XtoP(*atok);
2220    if (atoken == arc)
2221    {
2222      (*atok) = arc->linkl_prev_arc;
2223      return;
2224    }
2225  }
2226  ASSERT(0);
2227}
2228
2229int split_node_for_arc(srec_context* fst, FSMarc* arc)
2230{
2231  arcID new_arc_id;
2232  nodeID new_node_id;
2233  FSMnode* new_node;
2234  FSMnode* old_node;
2235  FSMarc_ptr atok, new_next, last_new_next;
2236  FSMarc *atoken, *new_next_p;
2237
2238  IF_DEBUG_WDADD(printf_arc(fst, "spliting ... ", arc));
2239
2240  new_node_id = fst_get_free_node(fst);
2241  if (new_node_id == MAXnodeID)
2242    return FST_FAILED_ON_MEMORY;
2243  new_node = &fst->FSMnode_list[ new_node_id];
2244  old_node = NODE_XtoP(arc->to_node);
2245  arc->to_node = NODE_ItoX(new_node_id);
2246  new_node->first_prev_arc = ARC_PtoX(arc);
2247  remove_arc_arriving(fst, old_node, arc);
2248  arc->linkl_prev_arc = FSMARC_NULL;
2249
2250  last_new_next = FSMARC_NULL;
2251  for (atok = old_node->un_ptr.first_next_arc; atok != FSMARC_NULL; atok = atoken->linkl_next_arc)
2252  {
2253    atoken = ARC_XtoP(atok);
2254    new_arc_id = fst_get_free_arc(fst);
2255    if (new_arc_id == MAXarcID)
2256      return FST_FAILED_ON_MEMORY;
2257    new_next = ARC_ItoX(new_arc_id);
2258    if (last_new_next != FSMARC_NULL)
2259      LINKL_NEXT(last_new_next) = new_next;
2260    else
2261      new_node->un_ptr.first_next_arc = new_next;
2262    new_next_p = ARC_XtoP(new_next);
2263    new_next_p->ilabel = atoken->ilabel;
2264    new_next_p->to_node = atoken->to_node;
2265    new_next_p->fr_node = arc->to_node;
2266    new_next_p->olabel = atoken->olabel;
2267    IF_DEBUG_WDADD(new_next_p->ilabel_str = atoken->ilabel_str);
2268    IF_DEBUG_WDADD(new_next_p->olabel_str = atoken->olabel_str);
2269    new_next_p->linkl_next_arc = FSMARC_NULL; /* set at next loop */
2270    append_arc_arriving_node(fst, NODE_XtoP(atoken->to_node), new_next);
2271    last_new_next = new_next;
2272  }
2273  return FST_SUCCESS;
2274}
2275
2276int fst_add_arcs(srec_context* fst, nodeID start_node, nodeID end_node,
2277                 wordID add_tree_olabel, costdata add_tree_cost,
2278                 modelID* add_tree_start, int add_tree_len)
2279{
2280  int rc = FST_SUCCESS;
2281  FSMarc_ptr atok = (FSMarc_ptr)0, last_atok = (FSMarc_ptr)0;
2282  FSMarc* atoken = NULL;
2283  FSMarc *next_atoken, *prev_atoken;
2284  FSMarc *append_to;
2285  FSMnode *late_start_node, *early_end_node;
2286  FSMnode *ntoken, *last_ntoken;
2287  FSMnode_ptr ntok, last_ntok;
2288  modelID *add_tree_end = add_tree_start + add_tree_len - 1;
2289  modelID *add_tree_late_start, *add_tree_early_end;
2290  modelID *add_tree;
2291  arcID new_arc_id, atokid;
2292  nodeID new_node_id, atokid_node;
2293  wordID add_tree_olabel_use = add_tree_olabel;
2294  wordID add_tree_cost_use   = add_tree_cost;
2295
2296  append_to = NULL;
2297  add_tree = add_tree_start;
2298#define FST_GROW_MINARCS  100
2299#define FST_GROW_MINNODES 100
2300#if defined(FST_GROW_FACTOR)
2301
2302  /* make sure we have enough arcs available */
2303  if(fst->num_arcs + add_tree_len >= fst->FSMarc_list_len)
2304   {
2305	FSMarc* tmp_FSMarc_list;
2306	arcID tmp_FSMarc_list_len;
2307	int itmp_FSMarc_list_len = fst->FSMarc_list_len*FST_GROW_FACTOR ;
2308
2309	if( itmp_FSMarc_list_len - fst->FSMarc_list_len < FST_GROW_MINARCS)
2310		itmp_FSMarc_list_len += FST_GROW_MINARCS;
2311
2312	if( itmp_FSMarc_list_len - fst->FSMarc_list_len < add_tree_len)
2313		itmp_FSMarc_list_len += add_tree_len;
2314
2315    if(itmp_FSMarc_list_len >= (int)MAXarcID)
2316	     return FST_FAILED_ON_MEMORY;
2317
2318	tmp_FSMarc_list_len = (arcID)itmp_FSMarc_list_len;
2319
2320    tmp_FSMarc_list = (FSMarc*)CALLOC_CLR(tmp_FSMarc_list_len, sizeof(FSMarc), "srec.graph.arcs");
2321      if(!tmp_FSMarc_list) {
2322	PLogError("error: Failed to extend the memory for arcs\n");
2323        return FST_FAILED_INTERNAL;
2324      }
2325    memcpy( tmp_FSMarc_list, fst->FSMarc_list, fst->FSMarc_list_len*sizeof(FSMarc));
2326
2327	/* initialize the new free list.
2328    head of this new free list is tmp_FSMarc_list[ fst->FSMarc_list_len] */
2329	for(atokid=fst->FSMarc_list_len; atokid<tmp_FSMarc_list_len-1; atokid++) {
2330	   tmp_FSMarc_list[atokid].linkl_next_arc = ARC_ItoX(atokid+1);
2331       tmp_FSMarc_list[atokid].linkl_prev_arc = FSMARC_FREE;
2332    }
2333    tmp_FSMarc_list[atokid].linkl_next_arc = FSMARC_NULL;
2334    tmp_FSMarc_list[atokid].linkl_prev_arc = FSMARC_FREE;
2335
2336    /* scan to the end of the old free list (to be re-used) */
2337    arcID* last_free_arc = &fst->FSMarc_freelist;
2338    atoken = (*last_free_arc==MAXarcID) ? NULL : &fst->FSMarc_list[*last_free_arc] ;
2339
2340    for( ; *last_free_arc!=MAXarcID; last_free_arc=&atoken->linkl_next_arc)
2341	    atoken = &tmp_FSMarc_list[ *last_free_arc];
2342
2343	/* append the new free list to the current free list */
2344	*last_free_arc = fst->FSMarc_list_len;
2345
2346	FREE( fst->FSMarc_list);
2347    fst->FSMarc_list = tmp_FSMarc_list;
2348    fst->FSMarc_list_len = tmp_FSMarc_list_len;
2349   }
2350
2351   /* make sure we have enough nodes available */
2352   if(fst->num_nodes + add_tree_len >= fst->FSMnode_list_len)
2353   {
2354     FSMnode* tmp_FSMnode_list;
2355	 nodeID tmp_FSMnode_list_len;
2356	 FSMnode_info* tmp_FSMnode_info_list;
2357	 int itmp_FSMnode_list_len = fst->FSMnode_list_len * FST_GROW_FACTOR ;
2358
2359	 if( itmp_FSMnode_list_len - fst->FSMnode_list_len < FST_GROW_MINNODES)
2360		itmp_FSMnode_list_len += FST_GROW_MINNODES;
2361
2362	 if( itmp_FSMnode_list_len - fst->FSMnode_list_len < add_tree_len)
2363		itmp_FSMnode_list_len += add_tree_len;
2364
2365	 if(itmp_FSMnode_list_len >= (int)MAXnodeID)
2366	     return FST_FAILED_ON_MEMORY;
2367
2368	 tmp_FSMnode_list_len = (nodeID)itmp_FSMnode_list_len;
2369
2370     tmp_FSMnode_list = (FSMnode*)CALLOC_CLR(tmp_FSMnode_list_len, sizeof(FSMnode), "srec.graph.nodes");
2371     if(!tmp_FSMnode_list) {
2372       PLogError("ERROR: Failed to extend the memory for nodes\n");
2373       return FST_FAILED_INTERNAL;
2374     }
2375     memcpy( tmp_FSMnode_list, fst->FSMnode_list, fst->FSMnode_list_len*sizeof(FSMnode));
2376
2377	/* initialize the new free node list.
2378    head of this new free list is tmp_FSMnode_list[ fst->FSMnode_list_len] */
2379    for(atokid_node=fst->FSMnode_list_len; atokid_node<tmp_FSMnode_list_len-1; atokid_node++)
2380	{
2381	   tmp_FSMnode_list[atokid_node].un_ptr.next_node = NODE_ItoX(atokid_node + 1);
2382       tmp_FSMnode_list[atokid_node].first_prev_arc = FSMARC_FREE;
2383    }
2384       tmp_FSMnode_list[atokid_node].un_ptr.next_node = FSMNODE_NULL;
2385       tmp_FSMnode_list[atokid_node].first_prev_arc = FSMARC_FREE;
2386
2387    /* scan to the end of the old free list (to be re-used) */
2388    nodeID* last_free_node = (&fst->FSMnode_freelist);
2389	ntoken = (*last_free_node==MAXnodeID) ? NULL : &fst->FSMnode_list[*last_free_node] ;
2390
2391	for( ; *last_free_node!=MAXnodeID; last_free_node = &ntoken->un_ptr.next_node)
2392		 ntoken = &tmp_FSMnode_list[ *last_free_node];
2393
2394	/* append the new free list to the current free list */
2395	*last_free_node = fst->FSMnode_list_len;
2396
2397	FREE( fst->FSMnode_list);
2398
2399	tmp_FSMnode_info_list = (FSMnode_info*)CALLOC_CLR( tmp_FSMnode_list_len, sizeof(FSMnode_info), "srec.graph.nodeinfos");
2400     if(!tmp_FSMnode_info_list) {
2401       PLogError("ERROR: Failed to extend the memory for node infos\n");
2402       return FST_FAILED_INTERNAL;
2403     }
2404	// copy in old node info
2405	memcpy( tmp_FSMnode_info_list, fst->FSMnode_info_list, fst->FSMnode_list_len*sizeof(FSMnode_info));
2406
2407	// initialize the new node info
2408	for (atokid_node=fst->FSMnode_list_len; atokid_node < tmp_FSMnode_list_len; atokid_node++)
2409		tmp_FSMnode_info_list[atokid_node] = NODE_INFO_UNKNOWN;
2410
2411	FREE( fst->FSMnode_info_list);
2412	fst->FSMnode_info_list = tmp_FSMnode_info_list;
2413    fst->FSMnode_list = tmp_FSMnode_list;
2414    fst->FSMnode_list_len = tmp_FSMnode_list_len;
2415}
2416#endif
2417   late_start_node = &fst->FSMnode_list[start_node];
2418
2419  while (1)
2420  {
2421
2422    next_atoken = find_next_arc_with_ilabel(fst, late_start_node,
2423                                            *add_tree /*->ilabel*/,
2424                                            &append_to);
2425    if (next_atoken != NULL)
2426    {
2427      /* so next_atok->ilabel == add_tree->ilabel */
2428
2429      if (next_atoken->ilabel == WORD_BOUNDARY && *add_tree/*->ilabel*/ == WORD_BOUNDARY)
2430      {
2431	 next_atoken = NULL;
2432	 break; /* break as if nothing more can be shared! */
2433	// return FST_FAILED_ON_HOMONYM;
2434      }
2435
2436      if (num_arcs_arriving_gt_1(fst, NODE_XtoP(next_atoken->to_node)))
2437      {
2438        split_node_for_arc(fst, next_atoken);
2439        /* unfortunate side effect here, that if we later find out this
2440           was a homonym addition, then this expansion was useless and
2441           for now we don't undo it! */
2442      }
2443
2444      /* we shouldn't really push the olabel if it's the same for both! */
2445      if (next_atoken->olabel != add_tree_olabel_use)
2446      {
2447        if (next_atoken->olabel != WORD_EPSILON_LABEL)
2448        {
2449          if (fst_push_arc_olabel(fst, next_atoken) != FST_SUCCESS)
2450          {
2451            PLogError("error: internal error fst_push_arc_olabel()\n");
2452            return FST_FAILED_INTERNAL;
2453          }
2454        }
2455#if DO_WEIGHTED_ADDWORD
2456        else
2457          fst_push_arc_cost(fst, next_atoken);
2458#endif
2459      }
2460      else if (add_tree_olabel_use != WORD_EPSILON_LABEL)
2461      {
2462        /* the graph already has this word, so we just re-use the olabel
2463           and disable the setting of olabel down below */
2464        add_tree_olabel_use = WORD_EPSILON_LABEL;
2465#if DO_WEIGHTED_ADDWORD
2466        add_tree_cost_use   = FREEcostdata;
2467#endif
2468      }
2469      add_tree++;
2470      late_start_node = NODE_XtoP(next_atoken->to_node);
2471    }
2472    else
2473    {
2474      break;
2475    }
2476  }
2477
2478  add_tree_late_start = add_tree;
2479  early_end_node = &fst->FSMnode_list[end_node];
2480
2481  if (!do_minimize)
2482  {
2483    last_ntoken = NULL;
2484    last_ntok = NODE_PtoX(late_start_node);
2485    for (; add_tree <= add_tree_end; add_tree++)
2486    {
2487      new_arc_id = fst_get_free_arc(fst);
2488      if (new_arc_id == MAXarcID)
2489      {
2490        PLogError("fst_get_free_arc() failed\n");
2491        return FST_FAILED_ON_MEMORY;
2492      }
2493      atok = ARC_ItoX(new_arc_id);
2494      atoken = ARC_XtoP(atok);
2495      if (add_tree != add_tree_end)
2496      {
2497        new_node_id = fst_get_free_node(fst);
2498        if (new_node_id == MAXnodeID)
2499        {
2500          PLogError("fst_get_free_node() failed\n");
2501          return FST_FAILED_ON_MEMORY;
2502        }
2503        ntok = NODE_ItoX(new_node_id);
2504        ntoken = NODE_XtoP(ntok);
2505        ntoken->first_prev_arc = atok;
2506      }
2507      else
2508      {
2509        ntok = FSMNODE_NULL;
2510        ntoken = NULL;
2511      }
2512      atoken->fr_node = last_ntok; /* was NODE_PtoX(late_start_node) */
2513      atoken->to_node = ntok;
2514      atoken->ilabel = *add_tree;
2515      atoken->linkl_next_arc = FSMARC_NULL;
2516      atoken->linkl_prev_arc = FSMARC_NULL;
2517      IF_DEBUG_WDADD(atoken->ilabel_str = fst->ilabels->words[ atoken->ilabel]);
2518      if (!last_ntoken)
2519      {
2520        append_to->linkl_next_arc = atok;
2521        atoken->olabel = add_tree_olabel_use;
2522#if DO_WEIGHTED_ADDWORD
2523        atoken->cost  =  add_tree_cost_use;
2524#endif
2525        IF_DEBUG_WDADD(atok->olabel_str = fst->olabels->words[ atoken->olabel]);
2526      }
2527      else
2528      {
2529        last_ntoken->un_ptr.first_next_arc = atok;
2530      }
2531      last_ntoken = ntoken;
2532      last_ntok   = ntok;
2533    }
2534    /*  add_tree_end->to_node = early_end_node;
2535    add_tree_end->linkl_next_arc = FSMARC_NULL;
2536    atok = last_arc_arriving( early_end_node);
2537    atok->linkl_prev_arc = add_tree_end;
2538    add_tree_end->linkl_prev_arc = FSMARC_NULL; */
2539    atoken->to_node = NODE_ItoX(end_node);
2540    append_arc_arriving_node(fst, &fst->FSMnode_list[end_node], atok);
2541  }
2542
2543  else /* ie. do_minimize from the rear */
2544  {
2545
2546    add_tree = add_tree_end;
2547    new_arc_id = fst_get_free_arc(fst);
2548    if (new_arc_id == MAXarcID)
2549      return FST_FAILED_ON_MEMORY;
2550    atok = ARC_ItoX(new_arc_id);
2551    atoken = ARC_XtoP(atok);
2552    atoken->olabel = add_tree_olabel_use;
2553#if DO_WEIGHTED_ADDWORD
2554    atoken->cost   = add_tree_cost_use;
2555#endif
2556    atoken->ilabel = *add_tree_late_start;
2557    atoken->fr_node = NODE_PtoX(late_start_node);
2558    if (atoken->ilabel == WORD_BOUNDARY)
2559    {
2560      atoken->cost = (costdata)(atoken->cost + fst->wtw_average);
2561    }
2562    else
2563    {
2564      atoken->cost   = add_tree_cost_use + fst->wtw_average;
2565      add_tree_cost_use = FREEcostdata;
2566    }
2567    IF_DEBUG_WDADD(atoken->olabel_str = fst->olabels->words[ atoken->olabel]);
2568    IF_DEBUG_WDADD(atoken->ilabel_str = fst->ilabels->words[ atoken->ilabel]);
2569    append_arc_leaving_node(fst, late_start_node, atok);
2570
2571    last_atok = atok;
2572    while (1)
2573    {
2574
2575      if (add_tree == add_tree_late_start)
2576        break;
2577      /* if there are other arcs leaving this node then joining
2578      earlier than here will result in over-generation, so don't
2579      if( atok!=end_arc && atok->linkl_next_arc != FSMARC_NULL)
2580      break;
2581      */
2582      /*
2583      also need boundary conditions checker here
2584      */
2585
2586      /* */
2587      IF_DEBUG_WDADD(printf_node(fst, early_end_node));
2588
2589      /* if( num_arcs_leaving( add_tree_end->fr_node->first_prev_arc->to_node) >1) {
2590      printf("merge stopped due to num_arcs leaving ..\n");
2591      printf_arc(fst, add_tree_end->fr_node->first_prev_arc->to_node->first_next_arc);
2592      break;
2593      } */
2594
2595      for (atok = early_end_node->first_prev_arc; atok != FSMARC_NULL; atok = atoken->linkl_prev_arc)
2596      {
2597        atoken = ARC_XtoP(atok);
2598	/* never pull before the slot marker */
2599	if(atoken->cost == DISABLE_ARC_COST) break;
2600	if(atoken->olabel>0 && atoken->olabel<fst->olabels->num_slots) break;
2601        fst_pull_arc_olabel(fst, atoken); /* fails are ok */
2602      }
2603
2604      prev_atoken = find_prev_arc_with_iolabels(fst, early_end_node,
2605                    *add_tree /*->ilabel*/,
2606                    WORD_EPSILON_LABEL,
2607                    NULL /*&append_to*/);
2608      if (!prev_atoken)
2609        break;
2610      else
2611      {
2612        /* this check is now inside find_prev_arc_with_iolabels */
2613        /* if( num_arcs_leaving(prev_atok->fr_node->first_prev_arc->to_node->first_next_arc)>1)*/
2614
2615        /* now look to merge earlier still */
2616        early_end_node = NODE_XtoP(prev_atoken->fr_node);
2617        add_tree--;
2618        if (add_tree == add_tree_late_start)
2619          break;
2620      }
2621    }
2622    add_tree_early_end = add_tree;
2623    for (add_tree = add_tree_late_start + 1; add_tree <= add_tree_early_end; add_tree++)
2624    {
2625      new_node_id = fst_get_free_node(fst);
2626      if (new_node_id == MAXnodeID)
2627        return FST_FAILED_ON_MEMORY;
2628      ntok = NODE_ItoX(new_node_id);
2629      ntoken = NODE_XtoP(ntok);
2630      new_arc_id = fst_get_free_arc(fst);
2631      if (new_arc_id == MAXarcID)
2632        return FST_FAILED_ON_MEMORY;
2633      atok = ARC_ItoX(new_arc_id);
2634      atoken = ARC_XtoP(atok);
2635      atoken->ilabel = *add_tree;
2636#if DO_WEIGHTED_ADDWORD
2637      atoken->cost   = FREEcostdata; /* idea: distribute add_tree_cost here;*/
2638#endif
2639      atoken->olabel = WORD_EPSILON_LABEL;
2640      atoken->fr_node = ntok;
2641      atoken->to_node = FSMNODE_NULL; /* filled in next loop */
2642      IF_DEBUG_WDADD(atoken->olabel_str = fst->olabels->words[atoken->olabel]);
2643      IF_DEBUG_WDADD(atoken->ilabel_str = fst->ilabels->words[atoken->ilabel]);
2644
2645      ntoken->un_ptr.first_next_arc = atok;
2646      ntoken->first_prev_arc = last_atok;
2647      TO_NODE(last_atok) = ntok;
2648      last_atok = atok;
2649    }
2650    TO_NODE(last_atok) = NODE_PtoX(early_end_node);
2651    append_arc_arriving_node(fst, early_end_node, last_atok);
2652  }
2653  return rc;
2654}
2655
2656
2657void append_arc_leaving_node(srec_context* fst, FSMnode* fr_node, FSMarc_ptr arc)
2658{
2659  FSMarc_ptr* atok = &fr_node->un_ptr.first_next_arc;
2660  FSMarc* atoken = ARC_XtoP(*atok);
2661  for (; (*atok) != FSMARC_NULL; atok = &atoken->linkl_next_arc)
2662  {
2663    atoken = ARC_XtoP(*atok);
2664  }
2665  *atok = arc;
2666  LINKL_NEXT(arc) = FSMARC_NULL;
2667}
2668void append_arc_arriving_node(srec_context* fst, FSMnode* to_node, FSMarc_ptr arc)
2669{
2670  FSMarc_ptr* atok = &to_node->first_prev_arc;
2671  FSMarc* atoken = ARC_XtoP(*atok);
2672  for (; (*atok) != FSMARC_NULL; atok = &atoken->linkl_prev_arc)
2673  {
2674    atoken = ARC_XtoP(*atok);
2675  }
2676  *atok = arc;
2677  LINKL_PREV(arc) = FSMARC_NULL;
2678}
2679
2680
2681int printf_node1(srec_context* fst, FSMnode* node)
2682{
2683  return 0;
2684}
2685int printf_arc1(srec_context* fst, char* msg, FSMarc* arc)
2686{
2687  char buffer[MAX_LINE_LENGTH];
2688  sprintf_arc(buffer, fst, arc);
2689  printf("%s%s\n", msg, buffer);
2690  return 0;
2691}
2692int sprintf_arc(char* buf, srec_context* fst, FSMarc* arc)
2693{
2694  int rc;
2695  FSMnode* to_node = NODE_XtoP(arc->to_node);
2696  arcID arc_index = (arcID)(arc - fst->FSMarc_list);
2697  if (to_node->un_ptr.first_next_arc == FSMARC_NULL)
2698    rc = sprintf(buf, "arc%hu\n", arc_index);
2699  else
2700  {
2701    rc = sprintf(buf, "arc%hu\t%hu,%hu\t%s\t%s\t%hu\n",
2702                 arc_index,
2703                 ARC_XtoI(to_node->un_ptr.first_next_arc),
2704                 arc->linkl_next_arc != FSMARC_NULL ? ARC_XtoI(arc->linkl_next_arc) : -1,
2705                 fst->ilabels->words[arc->ilabel],
2706                 fst->olabels->words[arc->olabel],
2707                 arc->cost);
2708  }
2709  return rc;
2710}
2711
2712/* dumps the recognition context as a binary file,
2713   it will also store any empty space for growing */
2714
2715#define ENCODE(SIGN,TYP) { SIGN+=sizeof(TYP); SIGN=SIGN<<3; ASSERT((shifted+=3)<32); }
2716
2717asr_int32_t FST_sizes_signature()
2718{
2719#ifndef NDEBUG
2720  int shifted = 0;
2721#endif
2722  asr_int32_t signature = 0;
2723  ENCODE(signature, arcID);
2724  ENCODE(signature, nodeID);
2725  ENCODE(signature, wordID);
2726  ENCODE(signature, labelID);
2727  ENCODE(signature, costdata);
2728  return signature;
2729}
2730
2731int FST_ContextImageSize(srec_context* context)
2732{
2733  int size = 0;
2734  size += sizeof(srec_context);
2735  size += sizeof(wordmap);
2736  size += sizeof(char*) * context->olabels->max_words;
2737  size += sizeof(char) * context->olabels->max_chars;
2738  size += sizeof(wordmap);
2739  if (context->ilabels->words != NULL)
2740    size += sizeof(char*) * context->olabels->max_words;
2741  if (context->ilabels->chars != NULL)
2742    size += sizeof(char) * context->olabels->max_chars;
2743  size += sizeof(FSMarc) * context->FSMarc_list_len;
2744  size += sizeof(FSMnode) * context->FSMnode_list_len;
2745  size += sizeof(FSMnode_info) * context->FSMnode_list_len;
2746  size += sizeof(arc_token) * context->arc_token_list_len;
2747  return size;
2748}
2749
2750ESR_ReturnCode serializeWordMapV2(wordmap *wordmap, PFile* fp)
2751{
2752  unsigned int i = 0;
2753  unsigned int nfields;
2754  unsigned int tmp2[32];
2755
2756  i = 0;
2757  tmp2[i++] = wordmap->num_words;
2758  tmp2[i++] = wordmap->num_slots;
2759  tmp2[i++] = wordmap->max_words;
2760  tmp2[i++] = wordmap->num_base_words;
2761  tmp2[i++] = wordmap->max_chars;
2762  tmp2[i++] = (wordmap->next_chars - wordmap->chars);
2763  tmp2[i++] = (wordmap->next_base_chars - wordmap->chars);
2764  nfields = i;
2765  if (pfwrite(tmp2, sizeof(tmp2[0]), nfields, fp) != nfields)
2766    return ESR_WRITE_ERROR;
2767
2768  if (pfwrite(wordmap->chars, sizeof(char), wordmap->max_chars, fp) != (size_t)wordmap->max_chars)
2769    return ESR_WRITE_ERROR;
2770
2771  return ESR_SUCCESS;
2772}
2773
2774ESR_ReturnCode deserializeWordMapV2(wordmap **pwordmap, PFile* fp)
2775{
2776  unsigned int i = 0;
2777  unsigned int nfields;
2778  unsigned int tmp2[32];
2779  wordmap *awordmap;
2780  char *p;
2781  ESR_ReturnCode rc = ESR_SUCCESS;
2782  unsigned int next_chars_idx, next_base_chars_idx;
2783
2784  awordmap = NEW(wordmap, L("srec.g2g.graph.wordmap.base"));
2785  if (awordmap == NULL)
2786  {
2787    PLogError("NEW failed on srec.g2g.graph.wordmap.base\n");
2788    rc = ESR_OUT_OF_MEMORY;
2789    goto CLEANUP;
2790  }
2791  awordmap->wordIDForWord = NULL; // early break to cleanup needs this
2792
2793  nfields = 7;
2794  if (pfread(tmp2, sizeof(tmp2[0]), nfields, fp) != nfields)
2795  {
2796    PLogError("pfread failed when reading nfields\n");
2797    rc = ESR_READ_ERROR;
2798    goto CLEANUP;
2799  }
2800
2801  i = 0;
2802  awordmap->num_words      = (wordID)tmp2[i++];
2803  awordmap->num_slots      = (wordID)tmp2[i++];
2804  awordmap->max_words      = (wordID)tmp2[i++];
2805  awordmap->num_base_words = (wordID)tmp2[i++];
2806  awordmap->max_chars      = tmp2[i++];
2807  next_chars_idx           = tmp2[i++];
2808  next_base_chars_idx      = tmp2[i++];
2809  ASSERT(nfields == i);
2810
2811  awordmap->words = NEW_ARRAY(char*, awordmap->max_words, L("srec.g2g.graph.wordmap.words"));
2812  if (awordmap->words == NULL)
2813  {
2814    PLogError("NEW_ARRAY failed for srec.g2g.graph.wordmap.words %d\n", awordmap->max_words);
2815    rc = ESR_OUT_OF_MEMORY;
2816    goto CLEANUP;
2817  }
2818  awordmap->chars = NEW_ARRAY(char, awordmap->max_chars, L("srec.g2g.graph.wordmap.chars"));
2819  if (awordmap->chars == NULL)
2820  {
2821    PLogError("NEW_ARRAY failed for srec.g2g.graph.wordmap.chars %d\n", awordmap->max_chars);
2822    rc = ESR_OUT_OF_MEMORY;
2823    goto CLEANUP;
2824  }
2825  awordmap->next_chars = awordmap->chars + next_chars_idx;
2826  awordmap->next_base_chars = awordmap->chars + next_base_chars_idx;
2827
2828  if ((i=pfread(awordmap->chars, sizeof(char), awordmap->max_chars, fp)) != (size_t)awordmap->max_chars)
2829  {
2830    PLogError("pfread failed while reading %d chars\n", awordmap->max_chars);
2831    rc = ESR_READ_ERROR;
2832    goto CLEANUP;
2833  }
2834
2835
2836  p = awordmap->chars;
2837  ASSERT((int)p % 2 == 0);
2838  nfields = 0;
2839  if (nfields < awordmap->num_words)
2840    awordmap->words[nfields++] = p;
2841  for (; p < awordmap->next_chars; p++) // was next_base_chars
2842  {
2843    if ( ( *p ) == '\0' )
2844    {
2845      if (nfields == awordmap->num_words) // was num_base_words
2846        break;
2847      if (((int)p) % 2 == 0) p++; /* so that words begin on even byte bound */
2848      awordmap->words[nfields++] = p + 1;
2849    }
2850  }
2851  ASSERT(nfields == awordmap->num_words); // was num_base_words
2852
2853  if (awordmap->max_words >= awordmap->num_base_words)
2854    {
2855      PHashTableArgs hashArgs;
2856      hashArgs.capacity = awordmap->max_words;
2857      if(hashArgs.capacity%2==0) hashArgs.capacity += 1;
2858      hashArgs.compFunction = HashCmpWord; //PHASH_TABLE_DEFAULT_COMP_FUNCTION;
2859      hashArgs.hashFunction = HashGetCode; //PHASH_TABLE_DEFAULT_HASH_FUNCTION;
2860      hashArgs.maxLoadFactor = PHASH_TABLE_DEFAULT_MAX_LOAD_FACTOR;
2861      CHKLOG(rc, PHashTableCreate(&hashArgs, L("srec.graph.wordmap.wordIDForWord.deserializeWordMap()"), &awordmap->wordIDForWord));
2862
2863      rc = wordmap_populate ( awordmap, awordmap->num_words );
2864
2865      if (rc != ESR_SUCCESS)
2866	{
2867        wordmap_clean ( awordmap );
2868        goto CLEANUP;
2869      }
2870    }
2871  else
2872    {
2873      awordmap->wordIDForWord = NULL;
2874    }
2875
2876  /* success */
2877  *pwordmap = awordmap;
2878  return ESR_SUCCESS;
2879
2880 CLEANUP:
2881  if (awordmap != NULL)
2882  {
2883    if (awordmap->wordIDForWord != NULL)
2884      PHashTableDestroy(awordmap->wordIDForWord);
2885    if (awordmap->words != NULL) FREE(awordmap->words);
2886    if (awordmap->chars != NULL) FREE(awordmap->chars);
2887    FREE(awordmap);
2888  }
2889  return rc;
2890}
2891
2892static ESR_ReturnCode serializeArcToken(arc_token *token_base,
2893                                        int i,
2894                                        PFile* fp)
2895{
2896  arc_token *token = token_base + i;
2897  asr_uint32_t idx;
2898
2899  if (pfwrite(&token->ilabel, 2, 1, fp) != 1)
2900    return ESR_WRITE_ERROR;
2901
2902  if (pfwrite(&token->olabel, 2, 1, fp) != 1)
2903    return ESR_WRITE_ERROR;
2904
2905  /* if (pfwrite(&token->cost, 2, 1, fp) != 1)
2906     return ESR_WRITE_ERROR; */
2907  idx = PTR_TO_IDX(ARC_TOKEN_PTR(token_base, token->first_next_arc), token_base);
2908
2909  if (pfwrite(&idx, 4, 1, fp) != 1)
2910    return ESR_WRITE_ERROR;
2911
2912  idx = PTR_TO_IDX(ARC_TOKEN_PTR(token_base, token->next_token_index), token_base);
2913
2914  if (pfwrite(&idx, 4, 1, fp) != 1)
2915    return ESR_WRITE_ERROR;
2916
2917  return ESR_SUCCESS;
2918}
2919
2920static ESR_ReturnCode serializeArcTokenInfo(srec_context *context,
2921    PFile* fp)
2922{
2923  int i;
2924  asr_uint32_t idx;
2925  ESR_ReturnCode rc;
2926
2927  if (pfwrite(&context->arc_token_list_len, 2, 1, fp) != 1)
2928    return ESR_WRITE_ERROR;
2929
2930  idx = PTR_TO_IDX(context->arc_token_freelist, context->arc_token_list);
2931
2932  if (pfwrite(&idx, 4, 1, fp) != 1)
2933    return ESR_WRITE_ERROR;
2934
2935  idx = PTR_TO_IDX(context->arc_token_insert_start, context->arc_token_list);
2936
2937  if (pfwrite(&idx, 4, 1, fp) != 1)
2938    return ESR_WRITE_ERROR;
2939
2940  for (i = 0; i < context->arc_token_list_len; ++i)
2941  {
2942    rc = serializeArcToken(context->arc_token_list, i, fp);
2943    if (rc != ESR_SUCCESS) return rc;
2944  }
2945  return ESR_SUCCESS;
2946}
2947
2948static ESR_ReturnCode deserializeArcToken(arc_token *token_base,
2949    int i,
2950    PFile* fp)
2951{
2952  arc_token *token = token_base + i;
2953  asr_uint32_t idx[2];
2954  asr_uint16_t labels[2];
2955
2956  if (pfread(labels, 2, 2, fp) != 2)
2957    return ESR_READ_ERROR;
2958
2959  token->ilabel = labels[0];
2960  token->olabel = labels[1];
2961
2962  /* if (pfread(&token->cost, 2, 1, fp) != 1)
2963     return ESR_READ_ERROR; */
2964
2965  if (pfread(idx, 4, 2, fp) != 2)
2966    return ESR_READ_ERROR;
2967
2968  token->first_next_arc = ARC_TOKEN_PTR2LNK(token_base, IDX_TO_PTR(idx[0], token_base));
2969  token->next_token_index = ARC_TOKEN_PTR2LNK(token_base, IDX_TO_PTR(idx[1], token_base));
2970
2971  return ESR_SUCCESS;
2972}
2973
2974static ESR_ReturnCode deserializeArcTokenInfo(srec_context *context,
2975    PFile* fp)
2976{
2977  ESR_ReturnCode rc;
2978  int i;
2979  asr_uint32_t idx;
2980
2981  if (pfread(&context->arc_token_list_len, 2, 1, fp) != 1) {
2982    PLogError("pfread failed in deserializeArcTokenInfo()\n");
2983    return ESR_READ_ERROR;
2984  }
2985
2986  context->arc_token_list = NEW_ARRAY(arc_token, context->arc_token_list_len,
2987                                      L("srec.graph.wordgraph"));
2988
2989  if (context->arc_token_list == NULL)
2990    return ESR_OUT_OF_MEMORY;
2991
2992  if (pfread(&idx, 4, 1, fp) != 1)
2993  {
2994    rc = ESR_READ_ERROR;
2995    goto CLEANUP;
2996  }
2997
2998  context->arc_token_freelist =
2999    IDX_TO_PTR(idx, context->arc_token_list);
3000
3001  if (pfread(&idx, 4, 1, fp) != 1)
3002  {
3003    rc = ESR_READ_ERROR;
3004    goto CLEANUP;
3005  }
3006
3007  context->arc_token_insert_start =
3008    IDX_TO_PTR(idx, context->arc_token_list);
3009
3010  for (i = 0; i < context->arc_token_list_len; ++i)
3011  {
3012    rc = deserializeArcToken(context->arc_token_list, i, fp);
3013    if (rc != ESR_SUCCESS) goto CLEANUP;
3014  }
3015  return ESR_SUCCESS;
3016
3017CLEANUP:
3018  FREE(context->arc_token_list);
3019  context->arc_token_list =
3020    context->arc_token_freelist =
3021      context->arc_token_insert_start = NULL;
3022  return rc;
3023}
3024
3025int FST_GetGrammarType(srec_context* context)
3026{
3027  arc_token *t, *u;
3028  wordID expected_wdid;
3029  /* 0 1 0
3030     1 2 4
3031     ...
3032     1 2 1316
3033     2
3034  */
3035  t = context->arc_token_list;
3036  if (t->ilabel != WORD_EPSILON_LABEL)
3037    return GrammarTypeBNF;
3038  if (t->next_token_index)
3039    return GrammarTypeBNF;
3040  t = ARC_TOKEN_PTR(context->arc_token_list, t->first_next_arc);
3041  expected_wdid = NUM_ITEMLIST_HDRWDS;
3042  for (; t; t = ARC_TOKEN_PTR(context->arc_token_list, t->next_token_index))
3043  {
3044    if (t->ilabel != expected_wdid)
3045      return GrammarTypeBNF;
3046    u = ARC_TOKEN_PTR(context->arc_token_list, t->first_next_arc);
3047    if (u != NULL && u->ilabel != MAXwordID)
3048      return GrammarTypeBNF;
3049    expected_wdid++;
3050  }
3051  if (expected_wdid != context->olabels->num_words)
3052    return GrammarTypeBNF;
3053  return GrammarTypeItemList;
3054}
3055
3056int FST_DumpContextAsImageV2(srec_context* context, PFile* fp)
3057{
3058  asr_uint32_t header[4];
3059  arcID tmp[32], i, j, nfields;
3060  FSMarc* arc;
3061  ESR_ReturnCode rc;
3062#if !defined(DO_ARCS_IO_IN_ARC_ORDER)
3063  FSMnode* node;
3064  arcID arcid, num_arcs, num_arcs_written;
3065#endif
3066
3067  /* Write header information. */
3068  header[0] = 0;
3069  header[1] = IMAGE_FORMAT_V2;
3070  header[2] = context->modelid;
3071  header[3] = context->grmtyp;
3072#ifdef SREC_ENGINE_VERBOSE_LOGGING
3073  PLogMessage("FST_DumpContextAsImageV2() fstgrmtyp %d", context->grmtyp);
3074#endif
3075
3076  if (pfwrite(header, sizeof(header[0]), 4, fp) != 4)
3077  {
3078    PLogError("FST_DumpContextAsImage: Could not write header.\n");
3079    return FST_FAILED_INTERNAL;
3080  }
3081
3082  /* arcs and nodes */
3083  i = 0;
3084  tmp[i++] = context->num_arcs;
3085  tmp[i++] = context->FSMarc_list_len;
3086  tmp[i++] = context->num_base_arcs;
3087  tmp[i++] = context->FSMarc_freelist;
3088  tmp[i++] = context->max_searchable_arcs;
3089
3090  tmp[i++] = context->num_nodes;
3091  tmp[i++] = context->FSMnode_list_len;
3092  tmp[i++] = context->num_base_nodes;
3093  tmp[i++] = context->FSMnode_freelist;
3094  tmp[i++] = context->start_node;
3095  tmp[i++] = context->end_node;
3096  tmp[i++] = context->max_searchable_nodes;
3097
3098  tmp[i++] = context->beg_silence_word;
3099  tmp[i++] = context->end_silence_word;
3100  tmp[i++] = context->hack_silence_word;
3101  tmp[i++] = context->hmm_ilabel_offset;
3102
3103  nfields = i;
3104
3105  if (pfwrite(tmp, sizeof(tmp[0]), nfields, fp) != nfields)
3106    return ESR_WRITE_ERROR;
3107#ifdef SREC_ENGINE_VERBOSE_LOGGING
3108  PLogMessage("G2G done WR hdrs %d", pftell(fp));
3109#endif
3110
3111#if defined(DO_ARCS_IO_IN_ARC_ORDER)
3112  if( 1) {
3113	 i = 0;
3114     tmp[i++] = context->end_node;
3115     tmp[i++] = MAXnodeID;
3116     tmp[i++] = MAXmodelID;
3117     tmp[i++] = MAXwordID;
3118     tmp[i++] = FREEcostdata;
3119     nfields = i;
3120     if (pfwrite(tmp, sizeof(tmp[0]), nfields, fp) != nfields)
3121        return ESR_WRITE_ERROR;
3122  }
3123  for(j=0; j<context->num_arcs; j++) {
3124        arc = &context->FSMarc_list[j];
3125        i = 0;
3126        tmp[i++] = arc->fr_node;
3127        tmp[i++] = arc->to_node;
3128	ASSERT(arc->to_node == context->end_node || arc->to_node != MAXnodeID);
3129        tmp[i++] = arc->ilabel;
3130        tmp[i++] = arc->olabel;
3131        tmp[i++] = arc->cost;
3132        nfields = i;
3133        if (pfwrite(tmp, sizeof(tmp[0]), nfields, fp) != nfields)
3134          return ESR_WRITE_ERROR;
3135  }
3136#else
3137  num_arcs_written = 0;
3138  for (j = 0; j < context->num_nodes; j++) {
3139    node = &context->FSMnode_list[j];
3140    num_arcs = (arcID)num_arcs_leaving(context, node);
3141    arcid = node->un_ptr.first_next_arc;
3142    if (arcid == MAXarcID) {
3143      i = 0;
3144      tmp[i++] = (arcID)j;
3145      tmp[i++] = MAXarcID;
3146      tmp[i++] = MAXarcID;
3147      tmp[i++] = MAXarcID;
3148      tmp[i++] = MAXarcID;
3149      nfields = i;
3150      if (pfwrite(tmp, sizeof(tmp[0]), nfields, fp) != nfields)
3151        return ESR_WRITE_ERROR;
3152      /* num_arcs_written++; end node don't have a to-arc */
3153    }
3154    else {
3155      for (; arcid != MAXarcID; arcid = arc->linkl_next_arc) {
3156        arc = &context->FSMarc_list[arcid];
3157        i = 0;
3158        tmp[i++] = (arcID)j;
3159        tmp[i++] = arc->to_node;
3160	ASSERT(arc->to_node == context->end_node || arc->to_node != MAXnodeID);
3161        tmp[i++] = arc->ilabel;
3162        tmp[i++] = arc->olabel;
3163        tmp[i++] = arc->cost;
3164        nfields = i;
3165        if (pfwrite(tmp, sizeof(tmp[0]), nfields, fp) != nfields)
3166          return ESR_WRITE_ERROR;
3167        num_arcs_written++;
3168      }
3169    }
3170  }
3171  ASSERT(num_arcs_written == context->num_arcs);
3172#endif
3173
3174#ifdef SREC_ENGINE_VERBOSE_LOGGING
3175  PLogMessage("G2G done WR arcs %d", pftell(fp));
3176#endif
3177
3178  /* node info   .. will now be calculated on line */
3179  /* wrapup_cost .. will now be calculated on line */
3180
3181  /* exit points for slots */
3182  if (1)
3183  {
3184    srec_fsm_exit_point *p, *q;
3185    if (pfwrite(&context->num_fsm_exit_points, 2, 1, fp) != 1)
3186      return ESR_WRITE_ERROR;
3187    p = context->fsm_exit_points;
3188    q = p + MAX_NUM_SLOTS;
3189    while (p < q)
3190    {
3191      if (pfwrite(&p->from_node_index, 2, 1, fp) != 1)
3192        return ESR_WRITE_ERROR;
3193      if (pfwrite(&p->arc_index, 2, 1, fp) != 1)
3194        return ESR_WRITE_ERROR;
3195      if (pfwrite(&p->wbto_node_index, 2, 1, fp) != 1)
3196        return ESR_WRITE_ERROR;
3197      ++p;
3198    }
3199  }
3200#ifdef SREC_ENGINE_VERBOSE_LOGGING
3201  PLogMessage("G2G done WR exits %d", pftell(fp));
3202#endif
3203  /* no entry points stuff */
3204
3205  /* no saving ilabels */
3206
3207  /* now load the ilabels */
3208  rc = serializeWordMapV2(context->olabels, fp);
3209  if (rc != ESR_SUCCESS)
3210    return rc;
3211#ifdef SREC_ENGINE_VERBOSE_LOGGING
3212  PLogMessage("G2G done WR wmap %d", pftell(fp));
3213#endif
3214
3215  if (context->grmtyp != GrammarTypeItemList)
3216  {
3217    if (serializeArcTokenInfo(context, fp) != ESR_SUCCESS)
3218    {
3219      PLogError("FST_DumpContextAsImage: could not write arc token data.\n");
3220      return FST_FAILED_INTERNAL;
3221    }
3222  }
3223  else
3224  {
3225    /* nothing, at decoding time, we'll just construct the graph
3226       from the word list, ie just this ..
3227       0 1 0
3228       1 2 4
3229       ...
3230       1 2 1316
3231       2
3232       ie
3233       0 1 0
3234       for(i=4;i<nwords;i++) { "1 2 i" }
3235       2
3236       because words 0-3 are eps,-pau-,-pau2-,slot@grammarname.grxml
3237    */
3238  }
3239
3240  /* Write header information. */
3241  header[0] = pftell(fp);
3242  header[1] = IMAGE_FORMAT_V2;
3243
3244  /* goto beginning of file. */
3245  if (pfseek(fp, 0, SEEK_SET))
3246  {
3247    PLogError("FST_DumpContextAsImage: could not reposition for header.\n");
3248    return FST_FAILED_INTERNAL;
3249  }
3250
3251  if (pfwrite(header, 4, 2, fp) != 2)
3252  {
3253    PLogError("FST_DumpContextAsImage: Could not write header.\n");
3254    return FST_FAILED_INTERNAL;
3255  }
3256
3257  /* reset pointer at end of file. */
3258  if (pfseek(fp, 0, SEEK_END))
3259  {
3260    PLogError("FST_DumpContextAsImage: could not reposition file pointer at end.\n");
3261    return FST_FAILED_INTERNAL;
3262  }
3263
3264  return FST_SUCCESS;
3265}
3266
3267
3268int FST_LoadContextFromImageV2(srec_context* fst, PFile* fp)
3269{
3270  asr_int32_t header[6];
3271  arcID tmp[32], num_arcs, new_arc_id;
3272  unsigned int i, nfields;
3273  srec_fsm_exit_point *p, *q;
3274  arcID max_num_FSMarcs;
3275  nodeID max_num_FSMnodes;
3276  FSMarc_ptr atok =  (FSMarc_ptr)0;
3277  FSMarc *atoken = NULL;
3278  FSMnode *fr_node, *to_node;
3279  int rc = FST_SUCCESS;
3280  ESR_ReturnCode esr_rc;
3281  ESR_BOOL seenFinalNode = ESR_FALSE;
3282
3283  /* read header information. */
3284  if (pfread(&header[2], sizeof(header[0]), 2, fp) != 2)
3285  {
3286    PLogError("FST_DumpContextAsImage: Could not read header.\n");
3287    return FST_FAILED_INTERNAL;
3288  }
3289  fst->modelid = header[2];
3290  fst->grmtyp  = header[3];
3291
3292  nfields = 16;
3293  if (pfread(tmp, sizeof(tmp[0]), nfields, fp) != nfields)
3294    return ESR_READ_ERROR;
3295
3296  /* arcs and nodes */
3297  i = 0;
3298  fst->num_arcs             = max_num_FSMarcs = tmp[i++];
3299  fst->FSMarc_list_len      = tmp[i++];
3300  fst->num_base_arcs        = tmp[i++];
3301  fst->FSMarc_freelist      = tmp[i++];
3302  fst->max_searchable_arcs  = tmp[i++] * 0; /*5*/
3303  fst->num_nodes            = max_num_FSMnodes = tmp[i++];
3304  fst->FSMnode_list_len     = tmp[i++];
3305  fst->num_base_nodes       = tmp[i++];
3306  fst->FSMnode_freelist     = tmp[i++];
3307  fst->start_node           = tmp[i++];/*10*/
3308  fst->end_node             = tmp[i++];
3309  fst->max_searchable_nodes = tmp[i++] * 0;
3310  fst->beg_silence_word     = tmp[i++];
3311  fst->end_silence_word     = tmp[i++];
3312  fst->hack_silence_word    = tmp[i++]; /*15*/
3313  fst->hmm_ilabel_offset    = tmp[i++]; /* 16 */
3314
3315  fst->addWordCaching_lastslot_name = 0;
3316  fst->addWordCaching_lastslot_num = MAXwordID;
3317  fst->addWordCaching_lastslot_needs_post_silence = ESR_FALSE;
3318
3319  ASSERT(i == nfields);
3320#ifdef SREC_ENGINE_VERBOSE_LOGGING
3321  PLogMessage("G2G done RD hdrs %d", pftell(fp));
3322#endif
3323
3324  ASSERT(fst->num_arcs >= fst->num_base_arcs); // was ==
3325  ASSERT(fst->num_nodes >= fst->num_base_nodes); // was ==
3326
3327  fst->FSMarc_list = (FSMarc*)CALLOC_CLR(fst->FSMarc_list_len, sizeof(FSMarc), "srec.graph.arcs");
3328  if (!fst->FSMarc_list)
3329  {
3330    rc = FST_FAILED_ON_MEMORY;
3331    PLogError("CALLOC_CLR fst->FSMarc_list \n");
3332    goto CLEANUP;
3333  }
3334  fst->FSMnode_list = (FSMnode*)CALLOC_CLR(fst->FSMnode_list_len, sizeof(FSMnode), "srec.graph.nodes");
3335  if (!fst->FSMnode_list)
3336  {
3337    rc = FST_FAILED_ON_MEMORY;
3338    PLogError("CALLOC_CLR fst->FSMnode_list  failed\n");
3339    goto CLEANUP;
3340  }
3341  fst->FSMnode_info_list = (FSMnode_info*)CALLOC_CLR(fst->FSMnode_list_len, sizeof(FSMnode_info), "srec.graph.nodeinfos");
3342
3343  if (!fst->FSMnode_info_list)
3344  {
3345    rc = FST_FAILED_ON_MEMORY;
3346    PLogError("CALLOC_CLR fst->FSMnode_info_list failed\n");
3347    goto CLEANUP;
3348  }
3349
3350  /* setup the arc freelist */
3351  fst->FSMarc_freelist = 0;
3352  fst->num_arcs = 0;
3353  for (i = 0; i < (unsigned)fst->FSMarc_list_len - 1; i++)
3354  {
3355    fst->FSMarc_list[i].linkl_next_arc = ARC_ItoX(i + 1);
3356    fst->FSMarc_list[i].linkl_prev_arc = FSMARC_FREE;
3357  }
3358  fst->FSMarc_list[i].linkl_next_arc = FSMARC_NULL;
3359  fst->FSMarc_list[i].linkl_prev_arc = FSMARC_FREE;
3360
3361  /* initialize the nodes, 'cuz reading is random order */
3362  for (i = 0; i < max_num_FSMnodes; i++)
3363  {
3364    fr_node = &fst->FSMnode_list[i];
3365    fr_node->un_ptr.first_next_arc = fr_node->first_prev_arc = FSMARC_NULL;
3366  }
3367
3368  /* 1. first load up all the information */
3369  num_arcs = 0;
3370  for (i = 0; i < max_num_FSMarcs || !seenFinalNode; i++)
3371  {
3372    if (i > max_num_FSMarcs && !seenFinalNode)
3373    {
3374      PLogError("Final node never encountered");
3375      rc = ESR_INVALID_STATE;
3376      goto CLEANUP;
3377    }
3378	nfields = 5;
3379    if (pfread(tmp,sizeof(tmp[0]),nfields,fp) != nfields)
3380    {
3381      PLogError("reading arc");
3382      rc = ESR_INVALID_STATE;
3383      goto CLEANUP;
3384    }
3385
3386    if (tmp[1] == MAXnodeID)
3387    {
3388      seenFinalNode = ESR_TRUE;
3389      if(fst->end_node != tmp[0]) {
3390	PLogError("error with arc %d->%d ilabel %d olabel %d cost %d\n", tmp[0], tmp[1], tmp[2], tmp[3], tmp[4]);
3391      ASSERT(fst->end_node == tmp[0]);
3392      }
3393      i--;
3394    }
3395    else
3396    {
3397      new_arc_id = fst_get_free_arc(fst);
3398      if (new_arc_id == MAXarcID)
3399        return FST_FAILED_ON_MEMORY;
3400      atok = ARC_ItoX(new_arc_id);
3401      atoken = ARC_XtoP(atok);
3402      num_arcs++;
3403
3404      ASSERT(tmp[0] < fst->num_nodes);
3405      ASSERT(tmp[1] < fst->num_nodes);
3406      fr_node = &fst->FSMnode_list[ tmp[0]];
3407      to_node = &fst->FSMnode_list[ tmp[1]];
3408      atoken->ilabel = tmp[2];
3409      atoken->olabel = tmp[3];
3410      atoken->cost   = tmp[4];
3411      append_arc_leaving_node(fst, fr_node, atok);
3412      append_arc_arriving_node(fst, to_node, atok);
3413      atoken->fr_node = NODE_ItoX(tmp[0]);
3414      atoken->to_node = NODE_ItoX(tmp[1]);
3415    }
3416  }
3417  ASSERT(fst->num_arcs == num_arcs);
3418#ifdef SREC_ENGINE_VERBOSE_LOGGING
3419  PLogMessage("G2G done RD arcs %d", pftell(fp));
3420#endif
3421
3422  /* setup the node freelist */
3423  if (fst->num_nodes < fst->FSMnode_list_len)
3424  {
3425    fst->FSMnode_freelist = fst->num_nodes;
3426    for (i = fst->num_nodes; i < (unsigned)fst->FSMnode_list_len - 1; i++)
3427    {
3428      fst->FSMnode_list[i].un_ptr.next_node = NODE_ItoX(i + 1);
3429      fst->FSMnode_list[i].first_prev_arc = FSMARC_FREE;
3430    }
3431    fst->FSMnode_list[i].un_ptr.next_node = FSMNODE_NULL;
3432    fst->FSMnode_list[i].first_prev_arc = FSMARC_FREE;
3433  }
3434  else
3435  {
3436    fst->FSMnode_freelist = MAXnodeID;
3437  }
3438
3439  /* node info   .. will now be calculated on line */
3440  /* wrapup_cost .. will now be calculated on line */
3441  fst->whether_prepared = 0;
3442  fst_fill_node_info(fst);
3443  for (i = 0; i < fst->num_nodes; i++)
3444    fst->FSMnode_info_list[i] = NODE_INFO_UNKNOWN;
3445
3446  /* exit points for slots */
3447  if (pfread(&fst->num_fsm_exit_points, 2, 1, fp) != 1)
3448    return ESR_READ_ERROR;
3449  p = fst->fsm_exit_points;
3450  q = p + MAX_NUM_SLOTS;
3451  while (p < q)
3452  {
3453    if (pfread(&p->from_node_index, 2, 1, fp) != 1)
3454      return ESR_WRITE_ERROR;
3455    if (pfread(&p->arc_index, 2, 1, fp) != 1)
3456      return ESR_WRITE_ERROR;
3457    if (pfread(&p->wbto_node_index, 2, 1, fp) != 1)
3458      return ESR_WRITE_ERROR;
3459    ++p;
3460  }
3461#ifdef SREC_ENGINE_VERBOSE_LOGGING
3462  PLogMessage("G2G done RD exits %d", pftell(fp));
3463#endif
3464
3465  /* no entry points stuff */
3466
3467  /* ilabels were not saved, create them as empty */
3468  fst->ilabels = (wordmap*)CALLOC_CLR(1, sizeof(wordmap), "srec.graph.imap");
3469  fst->ilabels->num_words = fst->ilabels->max_words = 0;
3470  fst->ilabels->words = 0;
3471
3472  /* now save the olabels */
3473  esr_rc = deserializeWordMapV2(&fst->olabels, fp);
3474  if (esr_rc != ESR_SUCCESS)
3475  {
3476    PLogError("deserializeWordMapV2() failed\n");
3477    rc = FST_FAILED_INTERNAL;
3478    goto CLEANUP;
3479  }
3480#ifdef SREC_ENGINE_VERBOSE_LOGGING
3481  PLogMessage("G2G done RD wmap %d", pftell(fp));
3482#endif
3483
3484  /* arctokeninfo */
3485  if (fst->grmtyp != GrammarTypeItemList)
3486  {
3487    if (deserializeArcTokenInfo(fst, fp) != ESR_SUCCESS)
3488    {
3489      PLogError("FST_DumpContextAsImage: could not write arc token data.\n");
3490      rc = FST_FAILED_INTERNAL;
3491      goto CLEANUP;
3492    }
3493  }
3494  else
3495  {
3496    /* here we just construct an isolated list, but we should scrap it later */
3497    wordID wdid;
3498    arc_token *atl, *final, *atli;
3499    fst->arc_token_list_len = fst->olabels->num_words + 2 - NUM_ITEMLIST_HDRWDS;
3500    atl = NEW_ARRAY(arc_token, fst->arc_token_list_len, L("srec.graph.wordgraph"));
3501    if(!atl) {
3502      PLogError("fst->arc_token_list_len failed\n");
3503      goto CLEANUP;
3504    }
3505    atl->first_next_arc = ARC_TOKEN_LNK(atl, 1);
3506    atl->next_token_index = ARC_TOKEN_NULL;
3507    final = atl + fst->arc_token_list_len - 1;
3508    for (i = 1, wdid = NUM_ITEMLIST_HDRWDS; wdid < fst->olabels->num_words; i++, wdid++)
3509    {
3510      atli = &atl[i];
3511      atli->ilabel = wdid;
3512      atli->olabel = wdid;/*not used*/
3513      atli->next_token_index = ARC_TOKEN_LNK(atl, (i + 1));
3514      atli->first_next_arc = ARC_TOKEN_LNK(atl, (fst->arc_token_list_len - 1)); /*final*/
3515    }
3516    ASSERT(atl + i == final);
3517    final->next_token_index = ARC_TOKEN_NULL;
3518    final->first_next_arc = ARC_TOKEN_NULL;
3519    final->ilabel = final->olabel = 0;
3520    fst->arc_token_list = atl;
3521  }
3522  return FST_SUCCESS;
3523CLEANUP:
3524  return FST_FAILED_INTERNAL;
3525}
3526
3527
3528int FST_LoadContextFromImageV2(srec_context* context, PFile* fp);
3529
3530int FST_LoadContextFromImage(srec_context** pcontext, PFile* fp)
3531{
3532  srec_context* context = NULL;
3533  /* used to force correct data alignment */
3534  asr_int32_t header[2];
3535  int rc = FST_SUCCESS;
3536
3537  /*printf("FST_LoadContexFromImage\n");*/
3538  if (!fp)
3539  {
3540    PLogError("FST_LoadContextImage() failed; bad file pointer\n");
3541    return FST_FAILED_ON_INVALID_ARGS;
3542  }
3543
3544  context = NEW(srec_context, L("srec.graph.binary"));
3545  if (context == NULL)
3546  {
3547    PLogError("FST_LoadContextFromImage: out of memory while allocating context.\n");
3548    return FST_FAILED_ON_MEMORY;
3549  }
3550  memset(context, 0, sizeof(srec_context));
3551
3552  /* header */
3553  if (pfread(header, 4, 2, fp) != 2)
3554  {
3555    PLogError("FST_LoadContextFromImage: failed reading header.\n");
3556    rc = FST_FAILED_ON_INVALID_ARGS;
3557    goto CLEANUP;
3558  }
3559
3560  if (header[1] == IMAGE_FORMAT_V2)
3561  {
3562    rc = FST_LoadContextFromImageV2(context, fp);
3563    if (rc != FST_SUCCESS)
3564      goto CLEANUP;
3565  }
3566  else
3567  {
3568    PLogError("FST_LoadContextFromImage() failed on image_format\n");
3569    goto CLEANUP;
3570  }
3571  *pcontext = context;
3572  return FST_SUCCESS;
3573CLEANUP:
3574  if (context) FREE(context);
3575  *pcontext = 0;
3576  return rc;
3577}
3578
3579
3580int FST_DumpReverseWordGraph(srec_context* context, PFile* fp)
3581{
3582  /* not implemented, use FST_DumpSyntaxAsImage() for now */
3583  return 1;
3584}
3585
3586/* this belongs part of the srec_context */
3587typedef struct
3588{
3589  asr_int32_t image_format;
3590  asr_int32_t image_size;
3591  asr_int32_t sizes_signature;
3592}
3593context_image_header;
3594
3595
3596int FST_LoadContextFromStreamImage(srec_context** pcontext, char* buffer, asr_int32_t buffer_len)
3597{
3598  return FST_FAILED_INTERNAL;
3599}
3600
3601typedef struct nodeID_list_t
3602{
3603  nodeID *nodes, num_nodes, num_alloc;
3604}
3605nodeID_list;
3606
3607int fst_node_has_speech_to_come(srec_context* context, nodeID node_index)
3608{
3609  /* recursive func ... try to keep stack use low! */
3610  /* later we should cache this information:
3611     need 2bits:  endn opte regl dunno */
3612  FSMarc* arc;
3613  arcID arc_index = context->FSMnode_list[ node_index].un_ptr.first_next_arc;
3614  for (; arc_index != MAXarcID; arc_index = arc->linkl_next_arc)
3615  {
3616    arc = &context->FSMarc_list[arc_index];
3617    if (arc->ilabel >= context->hmm_ilabel_offset + EPSILON_OFFSET + NUM_SILENCE_HMMS)
3618    {
3619      return 1;
3620    }
3621    else if (arc->ilabel < EPSILON_OFFSET)
3622    {
3623      if (fst_node_has_speech_to_come(context, arc->to_node))
3624      {
3625        return 1;
3626      }
3627    }
3628    else if (IS_SILENCE_ILABEL(arc->ilabel, context))
3629    {
3630      /* another silence! */
3631      if (fst_node_has_speech_to_come(context, arc->to_node))
3632        return 1;
3633    }
3634  }
3635  return 0;
3636}
3637
3638int fst_fill_node_info(srec_context* context)
3639{
3640  char* infos = context->FSMnode_info_list;
3641  nodeID_list optends_obj;
3642  nodeID_list *optends = &optends_obj;
3643
3644  FSMnode* node;
3645  FSMarc* arc;
3646  nodeID i, j, node_index;
3647  arcID arc_index;
3648  costdata wrapup_cost;
3649  int wrapup_cost_inconsistencies;
3650  /* int num_near_end_nodes; nodes within EPS or silence to the end, these
3651     are called NODE_INFO_ENDNODES, so that they can be excluded from the
3652     comparison with end_node score in the eos detection */
3653
3654  optends->num_nodes    = 1;
3655  optends->num_alloc    = 8192; /* scaled up from 512 to support dynamic grammar add word of 5000 */
3656  optends->nodes        = (nodeID*)CALLOC(optends->num_alloc, sizeof(nodeID), "srec.tmp.optendnodes");
3657  optends->nodes[0]     = context->end_node;
3658  /* num_near_end_nodes = 0; */
3659
3660  for (i = 0; i < optends->num_nodes; i++)
3661  {
3662    node_index = optends->nodes[i];
3663    node = &context->FSMnode_list[ node_index];
3664    for (arc_index = node->first_prev_arc; arc_index != MAXarcID;
3665         arc_index = arc->linkl_prev_arc)
3666    {
3667      arc = &context->FSMarc_list[arc_index];
3668      if (arc->fr_node != node_index)
3669      {
3670
3671        if (IS_SILENCE_ILABEL(arc->ilabel, context) || arc->ilabel < EPSILON_OFFSET)
3672        {
3673          /* ok, fr_node goes to the end via silence, check for dups */
3674          for (j = 0; j < optends->num_nodes; j++)
3675            if (optends->nodes[j] == arc->fr_node) break;
3676          /* append it to the list */
3677          if (j == optends->num_nodes)
3678          {
3679            if (optends->num_nodes < optends->num_alloc)
3680              optends->nodes[ optends->num_nodes++] = arc->fr_node;
3681            else
3682            {
3683              ASSERT(0 && "allocated too few optend nodes");
3684              return 0;
3685            }
3686          }
3687        }
3688      }
3689    }
3690  }
3691
3692  /* now set the info array, default is regular */
3693  for (i = 0; i < context->num_nodes; i++)
3694    infos[i] = NODE_INFO_REGULAR;
3695  /* also set the other nodes, we'll check whether nodes were added
3696     via these flags */
3697  for (; i < context->FSMnode_list_len; i++)
3698    infos[i] = NODE_INFO_UNKNOWN;
3699  infos[ context->end_node] = NODE_INFO_ENDNODE;
3700
3701  /* get rid of direct to end nodes, etc */
3702  for (i = 0, j = 0; i < optends->num_nodes; i++)
3703  {
3704    optends->nodes[j] = optends->nodes[i];
3705    if (fst_node_has_speech_to_come(context, optends->nodes[i]))
3706    {
3707      j++;
3708      infos[ optends->nodes[i]] = NODE_INFO_OPTENDN;
3709    }
3710    else
3711    {
3712      infos[ optends->nodes[i]] = NODE_INFO_ENDNODE;
3713      /* num_near_end_nodes++; */
3714    }
3715  }
3716  optends->num_nodes = j;
3717
3718  /* printf("get_oppend_nodes (%d)", optends->num_nodes);
3719     for(i=0; i<optends->num_nodes; i++) printf(" %d", optends->nodes[i]);
3720     printf("\n");
3721  */
3722
3723  FREE(optends->nodes);
3724
3725  /* find the wrapup cost */
3726  node = &context->FSMnode_list[ context->end_node];
3727  wrapup_cost = MAXcostdata;
3728  wrapup_cost_inconsistencies = 0;
3729  for (arc_index = node->first_prev_arc; arc_index != MAXarcID;
3730       arc_index = arc->linkl_prev_arc)
3731  {
3732    arc = &context->FSMarc_list[ arc_index];
3733    if (IS_SILENCE_ILABEL(arc->ilabel, context) &&
3734        arc->olabel == context->end_silence_word)
3735    {
3736      if (wrapup_cost == MAXcostdata)
3737        wrapup_cost = arc->cost;
3738      else if (context->wrapup_cost != arc->cost)
3739      {
3740        wrapup_cost = arc->cost;
3741        wrapup_cost_inconsistencies++;
3742      }
3743    }
3744  }
3745#define MIN_WRAPUP_COST 100
3746  context->wtw_average = wrapup_cost;
3747  if (context->wtw_average > 200)
3748    context->wtw_average = 200;
3749  if (context->wrapup_cost < MIN_WRAPUP_COST)
3750    context->wrapup_cost = MIN_WRAPUP_COST;
3751  /*log_report("context->wrapup_cost %d (%d)\n", context->wrapup_cost,
3752    wrapup_cost_inconsistencies);*/
3753  return 0;
3754}
3755
3756int fst_alloc_transit_points(srec_context* context)
3757{
3758  arcID i;
3759  wordID num_slots = context->olabels->num_slots;
3760  wordID olabel;
3761  asr_int16_t nfxps = 0;
3762  FSMarc* arc;
3763  FSMnode* node;
3764
3765  context->num_fsm_exit_points = 0;
3766  /* slot 0 is invalid, it is the "eps", so num_slots==1 means no slots! */
3767  if (num_slots == 1)
3768    return 0;
3769
3770  /* scan through to finds arc that carry rule references */
3771  for (i = 0; i < context->num_arcs; i++)
3772  {
3773    olabel =  context->FSMarc_list[i].olabel;
3774    /* (wordID)(olabel-1) < (num_slots-1) ?? might work */
3775    if (olabel > 0 && olabel < num_slots)
3776    {
3777      context->FSMarc_list[i].cost = DISABLE_ARC_COST;
3778      if (nfxps >= MAX_NUM_SLOTS)
3779      {
3780        PLogError("error: too many fsm exit points in fsm, too many public rules referenced from here\n");
3781        return 0;
3782      }
3783      context->fsm_exit_points[nfxps].arc_index = i;
3784      context->fsm_exit_points[nfxps].from_node_index = context->FSMarc_list[i].fr_node;
3785      /* skip over the trailing silence and .wb labels */
3786      for (arc = &context->FSMarc_list[i]; arc->ilabel != WORD_BOUNDARY;)
3787      {
3788        node = &context->FSMnode_list[arc->to_node];
3789        arc =  &context->FSMarc_list[node->un_ptr.first_next_arc];
3790      }
3791      context->fsm_exit_points[nfxps].wbto_node_index = arc->to_node;
3792      nfxps++;
3793
3794    } /* olabel<num_slots */
3795  } /* i<num_arcs */
3796  context->num_fsm_exit_points  = nfxps;
3797  return 0;
3798}
3799
3800
3801/*
3802  cross-word modeling:
3803  for now we'll assume silence context on either side, later we'll create
3804  a graph with the right fan out on the edges of each slot.  At that point we
3805  may have routines like ... if(is_dead_end(arc)) continue;
3806
3807  word removal:
3808  we'll use refcounters on arcs, we can search the whole graph for the
3809  olabel, then remove forward and remove backwards
3810
3811  word addition:
3812  glenville ^ hmm183 hmm222 hmm162 hmm246 hmm346 hmm191 hmm219
3813
3814  set imap=C:/users/dahan/speech2go/fst/enu_d2f_fray_g/model.map
3815  set omap=C:/users/dahan/speech2go/fst/enu_d2f_fray_g/namesnnumsSC.map
3816  wintel/debug/addword.exe -min
3817  $SWISDK/bin/fsmcompileD -t -i $imap -o $omap -F out.PCLG out.PCLG.txt
3818  $SWISDK/bin/fsmdrawD -i $imap -o $omap out.PCLG > out.PCLG.dot
3819  dotty out.PCLG.dot
3820
3821  need
3822  - a sanity check
3823  - remove word
3824  - multiple pronunciations handling
3825  - homonyms handling, other boundary conditions checking
3826  - measure the speed
3827
3828  alan_adams al~#ad}z
3829  alan_adams al~ad}z
3830  paul_adams p{l#ad}z
3831
3832
3833
3834*/
3835