1/*---------------------------------------------------------------------------*
2 *  srec_context.c                                                           *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20
21#include "stdlib.h"
22#include "string.h"
23#include "buildopt.h"
24#include "setting.h"
25#include "passert.h"
26#include "pendian.h"
27#include "portable.h"
28#include "pstdio.h"
29#include "ptypes.h"
30#include "srec_context.h"
31#include "srec_sizes.h"
32#include "search_network.h"
33#include "srec_arb.h"
34#include "hmm_desc.h"
35#if USE_COMP_STATS
36#include "comp_stats.h"
37#endif
38
39#ifdef SET_RCSID
40static const char *rcsid = 0 ? (const char *) &rcsid :
41"$Id: srec_context.c,v 1.84.4.54 2008/05/15 20:06:39 dahan Exp $";
42#endif
43
44#include <stdint.h>
45
46/* these are constants for now, we need to figure out how to make
47   this more flexible, possible ideas are:
48   (1) to specify these via API functions such as CR_LoadSyntax(),
49   (2) use defaults that would have been set the main parfile,
50   (3) put values for these fields in an actual grammar
51*/
52
53#define AVG_ARCS_PER_WORD    10
54#define AVG_NODES_PER_WORD    7
55#define AVG_CHARS_PER_WORD   18
56#define MAX_PRON_LEN         1024
57#define MAX_WORD_LEN 128
58#define DO_WEIGHTED_ADDWORD   1
59#define SLOTLOOP_OFFSET 64
60#define SLOTNAME_INDICATOR "__"
61#define RULE_INDICATOR '@'
62#define MAX_LINE_LENGTH 512
63#define DEFAULT_WB_COST 40
64#define DISABLE_ARC_COST 999
65#define DO_ARCS_IO_IN_ARC_ORDER 1
66
67#define IS_SILENCE_ILABEL(ilabel,context) (ilabel >= context->hmm_ilabel_offset+EPSILON_OFFSET && ilabel<context->hmm_ilabel_offset+EPSILON_OFFSET+NUM_SILENCE_HMMS)
68// looking to match filename.grxml.Names@__Names__
69// see also grxmlcompile.cpp where these names are constructed
70#define IS_SLOT_OLABEL(wd) (strchr(wd,IMPORTED_RULES_DELIM)<strstr(wd,SLOTNAME_INDICATOR) && strlen(wd)>4 && !strcmp(wd+strlen(wd)-2,SLOTNAME_INDICATOR) )
71
72/*
73SYNTAX : DATA_ALIGN(pointer, data type, bytes_filled)
74EXAMPLE: We need to cast a memory address to a (wordmap*)
75         so we call DATA_ALIGN(memptr, wordmap, filled),
76         where FILLED contains the number of bytes that were used to align the pointer
77*/
78#if (CPU & CPU_ARM)||(CPU & CPU_STRONGARM)
79/* under IPAQ it appears we need to align the structures
80   to certain boundaries.  More attention needed here for other
81   platforms ! */
82#define DATA_ALIGN(x, y, z) if ((int) (x) % sizeof(y)) { \
83    (z)=sizeof(y) - ((int) (x) % sizeof(y)); \
84    (x) += (z); /* force correct data alignment */ \
85  } else z=0;
86#else
87#define DATA_ALIGN(x, y, z)
88#endif
89
90static const int do_minimize = 1;
91#define PTR_TO_IDX(ptr, base) ((asr_uint32_t) (ptr == NULL ? 0xFFFFFFFFu : (asr_uint32_t)(ptr - base)))
92#define IDX_TO_PTR(idx, base) (idx == 0xFFFFFFFFu ? NULL : base + idx)
93
94/* prototypes */
95
96/*----------------------------------------------------------------------*
97 *                                                                      *
98 * internal prototypes                                                  *
99 *                                                                      *
100 *----------------------------------------------------------------------*/
101
102/* general use functions */
103char split_line(char* line, char** av);
104asr_int32_t atoi_with_check(const char* buf, asr_int32_t max);
105int sprintf_arc(char* buf, srec_context* fst, FSMarc* arc);
106int printf_arc1(srec_context* fst, char* msg, FSMarc* arc);
107int printf_node1(srec_context* fst, FSMnode* node);
108
109/* fst_* functions are internal fst functions */
110int fst_add_arcs(srec_context* fst, nodeID start_node, nodeID end_node,
111                 wordID olabel, costdata cost,
112                 modelID* model_sequence, int num_models);
113int fst_push_arc_olabel(srec_context* fst, FSMarc* arc);
114int fst_push_arc_cost(srec_context* fst, FSMarc* arc);
115int fst_pull_arc_olabel(srec_context* fst, FSMarc* arc);
116int fst_free_arc(srec_context* fst, FSMarc* arc);
117int fst_free_node(srec_context* fst, FSMnode* node);
118int fst_free_arc_net(srec_context* fst, FSMarc* arc);
119arcID fst_get_free_arc(srec_context* fst);
120nodeID fst_get_free_node(srec_context* fst);
121int fst_pack_arc_usage(srec_context* fst);
122
123void append_arc_arriving_node(srec_context* fst, FSMnode* to_node, FSMarc_ptr atok);
124void append_arc_leaving_node(srec_context* fst, FSMnode* fr_node, FSMarc_ptr atok);
125int num_arcs_leaving(srec_context* fst, FSMnode* node);
126
127int num_arcs_arriving(srec_context* fst, FSMnode* node);
128int num_arcs_arriving_gt_1(srec_context* fst, FSMnode* node);
129int fst_fill_node_info(srec_context* context);
130int fst_alloc_transit_points(srec_context* context);
131
132/* higher-level functions */
133int FST_LoadReverseWordGraph(srec_context* context, int num_words_to_add,
134                             PFile* fp);
135int FST_LoadParams(srec_context* context, PFile* fp);
136int FST_AssumeParams(srec_context* context);
137int FST_UnloadReverseWordGraph(srec_context* context);
138int FST_AttachArbdata(srec_context* fst, srec_arbdata* allophone_tree);
139
140static ESR_ReturnCode wordmap_clean ( wordmap *word_map );
141static ESR_ReturnCode wordmap_populate ( wordmap *word_map, wordID num_words );
142
143/*--------------------------------------------------------------------------*
144 *                                                                          *
145 *                                                                          *
146 *                                                                          *
147 *--------------------------------------------------------------------------*/
148
149int FST_IsVoiceEnrollment(srec_context* context)
150{
151  if (context->olabels == NULL) return 0;
152  if (context->olabels->num_words < 2) return 0;
153  if (strstr(context->olabels->words[1], "enroll")) return 1;
154  return 0;
155}
156
157int FST_LoadContext(const char* synbase, srec_context** pcontext,
158                    int num_words_to_add)
159{
160  int rc;
161  PFile* fp;
162  char buffer[MAX_LINE_LENGTH];
163  srec_context* context;
164
165  context = (srec_context*)CALLOC_CLR(1, sizeof(srec_context), "srec.graph.base");
166  if(!context)
167	  return FST_FAILED_ON_MEMORY;
168  memset(context, 0, sizeof(srec_context));
169
170  context->addWordCaching_lastslot_name = 0;
171  context->addWordCaching_lastslot_num = MAXwordID;
172  context->addWordCaching_lastslot_needs_post_silence = ESR_FALSE;
173
174  sprintf(buffer, "%s.map", synbase);
175  fp = file_must_open(NULL, buffer, L("r"), ESR_TRUE);
176  if (!fp) return FST_FAILED_ON_INVALID_ARGS;
177  rc = FST_LoadWordMap(&context->olabels, num_words_to_add, fp);
178  pfclose(fp);
179  if (rc) return rc;
180
181  sprintf(buffer, "%s.PCLG.txt", synbase);
182  fp = file_must_open(NULL, buffer, L("r"), ESR_TRUE);
183  if (!fp) return FST_FAILED_ON_INVALID_ARGS;
184  rc = FST_LoadGraph(context, context->ilabels, context->olabels, num_words_to_add, fp);
185  pfclose(fp);
186  if (rc != FST_SUCCESS) return rc;
187
188  sprintf(buffer, "%s.Grev2.det.txt", synbase);
189  fp = file_must_open(NULL, buffer, L("r"), ESR_TRUE);
190  if (!fp) return FST_FAILED_ON_INVALID_ARGS;
191  rc = FST_LoadReverseWordGraph(context, num_words_to_add, fp);
192  pfclose(fp);
193  if (rc != FST_SUCCESS) return rc;
194
195  sprintf(buffer, "%s.params", synbase);
196  fp = file_must_open(NULL, buffer, L("r"), ESR_TRUE);
197  if (fp) {
198    rc = FST_LoadParams(context, fp);
199    pfclose(fp);
200    if (rc != FST_SUCCESS) return rc;
201  } else {
202    rc = FST_AssumeParams(context);
203  }
204
205  context->grmtyp = (arcID)FST_GetGrammarType(context);
206
207  *pcontext = context;
208  rc = fst_fill_node_info(context);
209  return rc ? FST_FAILED_ON_INVALID_ARGS : FST_SUCCESS;
210}
211
212void FST_UnloadContext(srec_context* context)
213{
214  if (!context) return;
215
216  FST_UnloadWordMap(&context->ilabels);
217  FST_UnloadWordMap(&context->olabels);
218  FST_UnloadGraph(context);
219  FST_UnloadReverseWordGraph(context);
220  FREE(context);
221}
222
223int FST_PrepareContext(srec_context* context)
224{
225  nodeID i;
226  int rc = FST_SUCCESS;
227  /* after all word additions, we need to "prepare" the context */
228
229  /* first check for any changes to optional endnodes etc .. */
230  for (i = 0; i < context->num_nodes; i++)
231    if (context->FSMnode_info_list[i] == NODE_INFO_UNKNOWN)
232      break;
233  if (i != context->num_nodes)
234    rc = fst_fill_node_info(context);
235
236  context->whether_prepared = 1;
237  return rc ? FST_FAILED_ON_INVALID_ARGS : FST_SUCCESS;
238}
239
240void fst_set_wb_costs( srec_context* context, costdata wbcost)
241{
242  unsigned int i;
243  for(i=0; i<(unsigned int)context->FSMarc_list_len; i++) {
244    if(context->FSMarc_list[i].ilabel == WORD_BOUNDARY)
245      context->FSMarc_list[i].cost = wbcost;
246  }
247}
248
249int FST_LoadParams(srec_context* context, PFile* fp)
250{
251  char line[MAX_LINE_LENGTH];
252  costdata wbcost = MAXcostdata;
253  while (pfgets(line, MAX_LINE_LENGTH, fp))
254  {
255    char* key = strtok(line," \n\r\t");
256    char* val = key ? strtok(NULL," \n\r\t") : NULL;
257    if(val && !strcmp(val,"="))
258      val = key ? strtok(NULL," \n\r\t") : NULL;
259    if(!key || !key[0])
260      continue;
261    else if(val && val[0]) {
262      if(!strcmp(key,"word_penalty")) {
263        wbcost = (costdata)atoi_with_check(val, MAXcostdata);
264	if(wbcost == MAXcostdata) {
265	  return FST_FAILED_ON_INVALID_ARGS;
266	}
267      } else {
268	PLogError(L("error: unknown parameter %s in .params file"), key);
269	return FST_FAILED_ON_INVALID_ARGS;
270      }
271    }
272  }
273  if(wbcost != MAXcostdata)
274    fst_set_wb_costs( context, wbcost);
275  return FST_SUCCESS;
276}
277
278int FST_AssumeParams( srec_context* context)
279{
280  fst_set_wb_costs( context, (costdata)DEFAULT_WB_COST);
281  return FST_SUCCESS;
282}
283
284
285/*--------------------------------------------------------------------------*
286 *                                                                          *
287 *                                                                          *
288 *                                                                          *
289 *--------------------------------------------------------------------------*/
290
291modelID hmm_number(const char* hmm_Name, modelID hmm_ilabel_offset)
292{
293  if (!strcmp(hmm_Name, "eps")) return EPSILON_LABEL;
294  if (!strcmp(hmm_Name, ".wb")) return WORD_BOUNDARY;
295  if (!strcmp(hmm_Name, ".ph")) return PHONE_BOUNDARY;
296  ASSERT(hmm_Name[0] == 'h' && hmm_Name[1] == 'm' && hmm_Name[2] == 'm' && hmm_Name[3]);
297  return (modelID)(hmm_ilabel_offset + (modelID)atoi_with_check(hmm_Name + 3, MAXmodelID));
298}
299
300char* hmm_name(modelID ilabel, modelID hmm_ilabel_offset, char* buf)
301{
302  if (ilabel == EPSILON_LABEL)
303    sprintf(buf, "eps");
304  else if (ilabel == WORD_BOUNDARY)
305    sprintf(buf, ".wb");
306  else if (ilabel == PHONE_BOUNDARY)
307    sprintf(buf, ".ph");
308  else
309    sprintf(buf, "hmm%03d", ilabel - hmm_ilabel_offset);
310  return buf;
311}
312
313/*------------------------------------------------------------------*
314 *                                                                  *
315 * words                                                            *
316 *                                                                  *
317 *------------------------------------------------------------------*/
318int HashCmpWord(const LCHAR *key1, const LCHAR *key2)
319{
320	return LSTRCMP(key1,key2);
321}
322unsigned int HashGetCode(const void *key)
323{
324	const LCHAR* k = (const LCHAR*)key;
325	unsigned int i, len, h = 0;
326	len = LSTRLEN( k);
327	for ( i = 0; i < len; i++)
328		h = 31*h + (unsigned int)k[i];
329	return h;
330}
331
332int wordmap_create(wordmap** pwmap, int num_chars, int num_words, int num_words_to_add)
333{
334  wordmap* Interface;
335  PHashTableArgs hashArgs;
336  ESR_ReturnCode rc;
337
338  Interface = (wordmap*)CALLOC_CLR(1, sizeof(wordmap), "srec.graph.wordmap.base");
339  Interface->max_words = (wordID)(num_words + num_words_to_add);
340  Interface->num_words = (wordID)0;
341  /* *pwmap->words = (ptr32*) CALLOC_CLR(wmap->max_words, sizeof(ptr32), "graph.wordmap.words"); */
342  Interface->words = (char**) CALLOC_CLR(Interface->max_words, sizeof(char*), "srec.graph.wordmap.words");
343  Interface->max_chars = num_chars + num_words_to_add * AVG_CHARS_PER_WORD;
344  Interface->chars = (char*) CALLOC_CLR(Interface->max_chars, sizeof(char), "srec.graph.wordmap.chars");
345  Interface->next_chars = Interface->chars;
346  Interface->wordIDForWord = NULL;
347  *pwmap = Interface;
348
349  /* use a hashtable to save mapping between wdID and array index */
350  if (num_words_to_add >= 0)
351  {
352    hashArgs.capacity = num_words + num_words_to_add + 10;
353	if(hashArgs.capacity%2==0) hashArgs.capacity += 1;
354    hashArgs.compFunction = HashCmpWord; // PHASH_TABLE_DEFAULT_COMP_FUNCTION;
355    hashArgs.hashFunction = HashGetCode; // PHASH_TABLE_DEFAULT_HASH_FUNCTION;
356    hashArgs.maxLoadFactor = PHASH_TABLE_DEFAULT_MAX_LOAD_FACTOR;
357    CHKLOG(rc, PHashTableCreate(&hashArgs, L("srec.graph.wordmap.wordIDForWord.wordmap_create()"), &Interface->wordIDForWord));
358  }
359  else
360  {
361    Interface->wordIDForWord = NULL;
362  }
363
364  return FST_SUCCESS;
365
366CLEANUP:
367  wordmap_destroy(pwmap);
368  return rc;
369}
370wordID wordmap_add_word(wordmap* wmap, const char* word);
371
372int FST_LoadWordMap(wordmap** pwmap, int num_words_to_add, PFile* fp)
373{
374  wordID my_wID, wm_wID;
375  char *word, *wID_string;
376  int num_words;
377  asr_int32_t num_chars;
378  char buf[MAX_LINE_LENGTH];
379  long fpos;
380  wordmap* wmap;
381  /* figure out the number of words */
382  fpos = pftell(fp);
383  for (num_words = 0, num_chars = 0; pfgets(buf, MAX_LINE_LENGTH, fp); num_words++)
384  {
385    word = strtok(buf, " \n\r\t");
386    num_chars += strlen(word);
387  }
388  num_chars += num_words * 2; /* for null terminators */
389  pfseek(fp, fpos, SEEK_SET);
390
391  /* Alex added this to reuse this functionality elsewhere
392    destroy function for this create function is actually taken care of by the
393    FST_UnloadWordMap function
394  */
395  wordmap_create(&wmap, num_chars, num_words, num_words_to_add);
396
397  /* later replace this with wordblocks, linked list of 8 character blocks! */
398  while (pfgets(buf, MAX_LINE_LENGTH, fp))
399  {
400    word = strtok(buf, " \n\r\t");
401    wID_string = strtok(NULL, " \n\r\t");
402    my_wID = (wordID)atoi_with_check(wID_string, MAXwordID);
403
404    // if(strstr(word,SLOTNAME_INDICATOR))
405    //    word = strstr(word,SLOTNAME_INDICATOR);
406    wm_wID = wordmap_add_word(wmap, word);
407    ASSERT(my_wID == wm_wID);
408  }
409  ASSERT(wmap->num_words == num_words);
410
411  for (my_wID = 1; my_wID < num_words; my_wID++)
412  {
413    if (!IS_SLOT_OLABEL(wmap->words[my_wID]))
414      break;
415  }
416  wmap->num_slots = my_wID;
417  wordmap_setbase(wmap);
418  *pwmap = wmap;
419  if(wmap->num_slots > MAX_NUM_SLOTS)
420    return FST_FAILED_ON_INVALID_ARGS;
421  else
422    return FST_SUCCESS;
423}
424
425int wordmap_destroy(wordmap** wmap)
426{
427  if (wmap && *wmap)
428  {
429    wordmap_clean ( *wmap );
430    if (((*wmap)->wordIDForWord)) PHashTableDestroy((*wmap)->wordIDForWord);
431    if (((*wmap)->chars)) FREE((*wmap)->chars);
432    if (((*wmap)->words)) FREE((*wmap)->words);
433    if ((*wmap)) FREE((*wmap));
434    *wmap = 0;
435  }
436  return FST_SUCCESS;
437}
438
439int FST_UnloadWordMap(wordmap** wmap)
440{
441  return wordmap_destroy(wmap);
442}
443
444
445int FST_DumpWordMap(PFile* fp, wordmap* wmap)
446{
447  wordID my_wID;
448  for (my_wID = 0; my_wID < wmap->num_words; my_wID++)
449  {
450    pfprintf(fp, "%s %hu\n", wmap->words[my_wID], my_wID);
451  }
452  return FST_SUCCESS;
453}
454
455wordID wordmap_find_index(wordmap* wmap, const char* word)
456{
457  void *wdID_void;
458  ESR_ReturnCode rc;
459  size_t i;
460
461  if (!word)
462    return MAXwordID;
463
464  if (wmap->num_words == 0)
465    return MAXwordID;
466
467  if (wmap->wordIDForWord)
468  {
469    rc = PHashTableGetValue(wmap->wordIDForWord, word, (void**)&wdID_void);
470
471    if (rc == ESR_SUCCESS)
472      return (wordID)(uintptr_t)wdID_void;
473  }
474  else
475  {
476    for (i = 0; i < wmap->num_words; i++)
477    {
478      if (!strcmp(wmap->words[i], word))
479      {
480        return (wordID)i;
481      }
482    }
483  }
484  return MAXwordID;
485}
486
487wordID wordmap_find_rule_index(wordmap* wmap, const char* rule)
488{
489  int i;
490  int strlen_rule = strlen(rule);
491  for (i = wmap->num_slots; --i > 0;)
492  { /* > (not >=) because eps is always 0 */
493    char* p = strstr(wmap->words[i], SLOTNAME_INDICATOR);
494    if(p != NULL){
495      if (!strcmp(p, rule)) break;
496      else if(!strncmp(p,rule,strlen_rule) && !strcmp(p+strlen_rule,SLOTNAME_INDICATOR)) break;
497    }
498  }
499  if (i == 0) return MAXwordID;
500  else return(wordID)i;
501}
502
503wordID wordmap_find_index_in_rule(wordmap* wmap, const char* word, wordID rule)
504{
505  int len = strlen(word);
506  int rule0 = rule + '0';
507  void *wdID_void;
508  LCHAR word_dot_rule[256];
509  ESR_ReturnCode rc;
510
511  if (!word)
512    return MAXwordID;
513  LSTRCPY(word_dot_rule, word);
514  word_dot_rule[len++] = IMPORTED_RULES_DELIM;
515  word_dot_rule[len++] = (char)rule0;
516  word_dot_rule[len++] = 0;
517  rc = PHashTableGetValue(wmap->wordIDForWord, word_dot_rule, (void**)&wdID_void);
518  if (rc == ESR_SUCCESS)
519    return (wordID)(uintptr_t)(wdID_void);
520  return MAXwordID;
521}
522
523int strlen_with_null(const char* word)
524{
525#if 1 /* NT  */
526  int len = strlen(word) + 1;
527  if (len % 2 == 1) len++;
528  return len;
529#else /* C54 */
530#endif
531}
532
533int wordmap_whether_in_rule(wordmap* wmap, wordID word, wordID rule)
534{
535  char* word_chars;
536  int len;
537  if (word > wmap->num_words) return 0;
538  word_chars = wmap->words[word];
539  len = strlen(word_chars);
540  if (word_chars[len-1] == rule + '0' && word_chars[len-2] == IMPORTED_RULES_DELIM)
541    return 1;
542  else
543    return 0;
544}
545
546void wordmap_setbase(wordmap* wmap)
547{
548  wmap->num_base_words = wmap->num_words;
549  wmap->next_base_chars = wmap->next_chars;
550}
551
552void wordmap_ceiling(wordmap* wmap)
553{
554  /* this is irreversible */
555  wmap->max_words = wmap->num_words;
556  wmap->max_chars = (wmap->next_chars - wmap->chars);
557}
558
559
560static ESR_ReturnCode wordmap_clean ( wordmap *word_map )
561    {
562  ESR_ReturnCode  clean_status;
563  PHashTableEntry *entry;
564  PHashTableEntry *oldEntry;
565  wordID          *value;
566
567  clean_status = ESR_SUCCESS;
568
569  if ( word_map->wordIDForWord == NULL )
570    return clean_status;
571
572  clean_status = PHashTableEntryGetFirst ( word_map->wordIDForWord, &entry);
573
574  while ( ( entry != NULL ) && ( clean_status == ESR_SUCCESS ) )
575    {
576      clean_status = PHashTableEntryGetKeyValue ( entry, NULL, (void **)&value );
577
578      if ( clean_status == ESR_SUCCESS )
579	{
580	  oldEntry = entry;
581	  clean_status = PHashTableEntryAdvance ( &entry );
582
583	  if ( clean_status == ESR_SUCCESS )
584	    clean_status = PHashTableEntryRemove ( oldEntry );
585	}
586    }
587    return ( clean_status );
588    }
589
590
591static ESR_ReturnCode wordmap_populate ( wordmap *word_map, wordID num_words )
592{
593  ESR_ReturnCode  populate_status;
594  wordID          wdID;
595
596  populate_status = ESR_SUCCESS;
597  wdID = 0;
598
599  if ( word_map->wordIDForWord != NULL )
600    {
601      while ( ( wdID < num_words ) && ( populate_status == ESR_SUCCESS ) )
602	{
603	  populate_status = PHashTablePutValue ( word_map->wordIDForWord, word_map->words[wdID],
604						 (const void *)(uintptr_t)wdID, NULL );
605	  if ( populate_status == ESR_SUCCESS )
606	    wdID++;
607	  else {
608	    return (populate_status);
609	  }
610	}
611    }
612  return ( populate_status );
613}
614
615void wordmap_reset(wordmap* wmap)
616{
617  char** tmp_words;
618  int i;
619  ESR_ReturnCode    reset_status;
620
621  if (wmap->num_base_words < wmap->num_words)
622  {
623    /*wordID i = (wordID)(wmap->num_base_words);*/
624    char* old_wmap_chars = wmap->chars;
625    int offset = wmap->next_base_chars - wmap->chars;
626    char* tmp_chars = NEW_ARRAY(char, offset, L("srec.g2g.graph.wordmap.chars"));
627    if(!tmp_chars) {
628      passert ( 0 && L("failed to reset the memory for wordmap.chars ") );
629    }
630    memcpy(tmp_chars,wmap->chars, offset * sizeof(char));
631    FREE(wmap->chars);
632
633    wmap->chars = tmp_chars;
634    wmap->next_base_chars = wmap->chars + (wmap->next_base_chars - old_wmap_chars);
635    wmap->max_chars = (wordID) offset;
636    wmap->next_chars = wmap->next_base_chars;
637
638    tmp_words = (char**) CALLOC_CLR(wmap->num_base_words, sizeof(char*), "srec.graph.wordmap.words");
639    if(!tmp_words) {
640      passert ( 0 && L("failed to reset the memory for wordmap.words ") );
641    }
642    memcpy( tmp_words, wmap->words, wmap->num_base_words * sizeof(char*));
643    FREE( wmap->words);
644
645    wmap->words = tmp_words;
646    wmap->max_words = wmap->num_base_words;
647    wmap->num_words  = wmap->num_base_words;
648
649    for(i=0; i<wmap->num_words; i++)
650      wmap->words[i] = wmap->chars + (wmap->words[i] - old_wmap_chars);
651  }
652
653  reset_status = wordmap_clean ( wmap );
654
655  if ( reset_status == ESR_SUCCESS )
656    {
657      reset_status = wordmap_populate ( wmap, wmap->num_base_words );
658
659      if ( reset_status != ESR_SUCCESS )
660	{
661	  wordmap_clean ( wmap );
662	  passert ( 0 && L("wordmap_reset failed") );
663	}
664    }
665  else
666    {
667      passert ( 0 && L("wordmap_reset failed") );
668    }
669}
670
671#define FST_GROW_FACTOR 12/10
672#define FST_GROW_MINCHARS 256
673#define FST_GROW_MINWORDS  32
674wordID wordmap_add_word(wordmap* wmap, const char* word)
675{
676  int len = strlen_with_null(word);
677  wordID wdID =0;
678
679  if (wmap->next_chars + len >= wmap->chars + wmap->max_chars)
680  {
681#if defined(FST_GROW_FACTOR)
682      uintptr_t i,tmp_max_chars= wmap->max_chars * FST_GROW_FACTOR;
683	 char* old_wmap__chars = wmap->chars;
684      if(tmp_max_chars - wmap->max_chars < FST_GROW_MINCHARS)
685	tmp_max_chars +=FST_GROW_MINCHARS;
686
687      char* tmp_chars = NEW_ARRAY(char, tmp_max_chars, L("srec.g2g.graph.wordmap.chars"));
688      if(!tmp_chars) {
689          PLogError(L("ESR_OUT_OF_MEMORY: Could not extend allocation of wordmap.chars"));
690          return MAXwordID;
691      }
692      memcpy(tmp_chars,wmap->chars,wmap->max_chars * sizeof(char));
693      FREE(wmap->chars);
694
695      wmap->chars = tmp_chars;
696      wmap->next_chars = wmap->chars + (wmap->next_chars - old_wmap__chars);
697      wmap->next_base_chars = wmap->chars + (wmap->next_base_chars - old_wmap__chars);
698      wmap->max_chars = (wordID)tmp_max_chars;
699      //Remove old keys --- Add WORD
700      wordmap_clean (wmap );
701
702      // adjust word pointers
703      for(i=0; i<wmap->num_words; i++)
704	{
705	  wmap->words[i] = wmap->chars + (wmap->words[i] - old_wmap__chars);
706	  // adjust hashtable ---
707	  if (wmap->wordIDForWord)
708	    {
709	      ESR_ReturnCode rc = PHashTablePutValue ( wmap->wordIDForWord, wmap->words[i],
710						       (const void *)i, NULL );
711	      if ( rc != ESR_SUCCESS ) {
712		goto CLEANUP;
713	      }
714	    }
715	}
716#else // so not defined(FST_GROW_FACTOR)
717      PLogError("error: char overflow in wmap %d max %d\n", (int)(wmap->next_chars - wmap->chars), wmap->max_chars);
718      return MAXwordID;
719#endif // defined(FST_GROW_FACTOR)
720  }
721  //Add word
722  if (wmap->num_words == wmap->max_words)
723    {
724#if defined(FST_GROW_FACTOR)
725      char** tmp_words;
726      wordID tmp_max_words;
727      int itmp_max_words =  wmap->max_words * FST_GROW_FACTOR;
728
729      if(itmp_max_words - wmap->max_words < FST_GROW_MINWORDS)
730	    itmp_max_words += FST_GROW_MINWORDS;
731
732      if( itmp_max_words >= MAXwordID) {
733	    PLogError("error: word ptr overflow in wmap %d max %d\n", (int)wmap->num_words, wmap->max_words);
734	    return MAXwordID;
735      }
736      tmp_max_words = (wordID)itmp_max_words;
737	  tmp_words = (char**) CALLOC_CLR(tmp_max_words, sizeof(char*), "srec.graph.wordmap.words");
738      if(!tmp_words) {
739	PLogError(L("ESR_OUT_OF_MEMORY: Could not extend allocation of wordmap.words"));
740        return MAXwordID;
741      }
742      memcpy( tmp_words, wmap->words, wmap->num_words * sizeof(char*));
743      FREE( wmap->words);
744      wmap->words = tmp_words;
745      wmap->max_words = tmp_max_words;
746#else // so not defined(FST_GROW_FACTOR)
747      PLogError("error: word ptr overflow in wmap %d max %d\n", (int)wmap->num_words, wmap->max_words);
748      return MAXwordID;
749#endif // defined(FST_GROW_FACTOR)
750    }
751  if(1)
752    {
753    strcpy(wmap->next_chars, word);
754    wmap->words[ wmap->num_words++] = wmap->next_chars;
755    wmap->next_chars += len;
756    wdID = (wordID)(wmap->num_words - (wordID)1);
757    if (wmap->wordIDForWord)
758    {
759       ESR_ReturnCode rc = PHashTablePutValue ( wmap->wordIDForWord, wmap->words[wdID],
760                      (const void *)(uintptr_t)wdID, NULL );
761    if ( rc != ESR_SUCCESS )
762      goto CLEANUP;
763    }
764    return wdID;
765  }
766CLEANUP:
767  PLogError("error: could not add word and wordID in wmap hash (%s -> %d)\n", word, wdID);
768  return MAXwordID;
769}
770
771wordID wordmap_add_word_in_rule(wordmap* wmap, const char* word, wordID rule)
772{
773  int len = strlen_with_null(word) + 2;
774  wordID wdID = 0;
775  wordID i;
776//wordmap_add_word_in_rule
777  if (wmap->next_chars + len >= wmap->chars + wmap->max_chars)
778  {
779#if defined(FST_GROW_FACTOR)
780      int tmp_max_chars = wmap->max_chars * FST_GROW_FACTOR;
781      char* tmp_chars;
782	  char* old_next_chars = wmap->next_chars;
783	  char* old_chars = wmap->chars;
784
785      if(tmp_max_chars-wmap->max_chars < FST_GROW_MINCHARS )
786	    tmp_max_chars += FST_GROW_MINCHARS;
787      if( wmap->chars + tmp_max_chars <= wmap->next_chars + len)
788	    tmp_max_chars += len;
789
790      tmp_chars = NEW_ARRAY(char, tmp_max_chars, L("srec.g2g.graph.wordmap.chars"));
791    if(!tmp_chars) {
792          PLogError(L("ESR_OUT_OF_MEMORY: Could not extend allocation of wordmap_add_in_rule.chars"));
793          return MAXwordID;
794      }
795      memcpy(tmp_chars, wmap->chars, wmap->max_chars * sizeof(char));
796      FREE(wmap->chars);
797
798      wmap->chars = tmp_chars;
799      wmap->next_chars = wmap->chars + (old_next_chars - old_chars) ;
800      wmap->next_base_chars = wmap->chars + (wmap->next_base_chars - old_chars);
801      wmap->max_chars = (wordID)tmp_max_chars;
802
803      //Remove old keys
804      wordmap_clean ( wmap );
805
806      // adjust word pointers wordmap_add_word_in_rule
807      for(i=0; i<wmap->num_words; i++)
808	{
809 	  wmap->words[i] = wmap->chars +(wmap->words[i] - old_chars) ;
810	  // adjust hashtable ----- add in rulewordmap_add_word_in_rule ----
811	  if (wmap->wordIDForWord) {
812	    ESR_ReturnCode rc = PHashTablePutValue ( wmap->wordIDForWord, wmap->words[i],
813						     (void*)(uintptr_t)(i), NULL );
814	    if ( rc != ESR_SUCCESS )
815	      goto CLEANUP;
816	  }
817	}
818#else
819      PLogError("error: char overflow in wmap %d max %d\n", (int)(wmap->next_chars - wmap->chars), wmap->max_chars);
820      return MAXwordID;
821#endif
822  }//wordmap_add_word_in_rule
823  if (wmap->num_words == wmap->max_words)
824    {
825#if defined(FST_GROW_FACTOR)
826      char** tmp_words;
827      wordID tmp_max_words;
828      int itmp_max_words =  wmap->max_words * FST_GROW_FACTOR;
829      if(itmp_max_words - wmap->max_words < FST_GROW_MINWORDS)
830	itmp_max_words += FST_GROW_MINWORDS;
831
832      if( itmp_max_words >= MAXwordID) {
833	PLogError("error: word ptr overflow in wmap %d max %d\n", (int)wmap->num_words, wmap->max_words);
834	return MAXwordID;
835      }
836
837      tmp_max_words = (wordID)itmp_max_words;
838      tmp_words = (char**) CALLOC_CLR(tmp_max_words, sizeof(char*), "srec.graph.wordmap.words");
839      if(!tmp_words) {
840        PLogError(L("ESR_OUT_OF_MEMORY: Could not extend allocation of wordmap_add_rule.words"));
841        return MAXwordID;
842      }
843      memcpy( tmp_words, wmap->words, wmap->num_words * sizeof(char*));
844      FREE( wmap->words);
845      wmap->words = tmp_words;
846      wmap->max_words = tmp_max_words;
847#else
848      PLogError("error: word ptr overflow in wmap %d max %d\n", (int)wmap->num_words, wmap->max_words);
849      return MAXwordID;
850#endif
851    }
852  if(1)
853  {
854    char *p;
855    const char *q;
856    /* word.1 */
857    for (p = wmap->next_chars, q = word; (*p = *q) != '\0'; p++, q++) ; /* basic strcpy() */
858    *p++ = IMPORTED_RULES_DELIM;
859    *p++ = (char)(rule + ((wordID)'0'));
860    *p   = '\0';
861    wmap->words[ wmap->num_words++] = wmap->next_chars;
862    wmap->next_chars += len;
863    wdID = (wordID)(wmap->num_words - (wordID)1);
864    if (wmap->wordIDForWord)
865      {
866        ESR_ReturnCode rc = PHashTablePutValue ( wmap->wordIDForWord, wmap->words[wdID],
867						 (void*)(uintptr_t)(wdID), NULL );
868	if ( rc != ESR_SUCCESS )
869	  goto CLEANUP;
870      }
871    return wdID;
872  }
873CLEANUP:
874  PLogError("error: could not add word and wordID in wmap hash (%s -> %d)\n", word, wdID);
875  return MAXwordID;
876}
877
878/*----------------------------------------------------------------------*
879 *                                                                      *
880 * API                                                                  *
881 *                                                                      *
882 *----------------------------------------------------------------------*/
883
884int FST_AttachArbdata(srec_context* fst, srec_arbdata* allophone_tree)
885{
886  int rc = 0;
887  unsigned int allotree__modelid;
888
889  fst->allotree = allophone_tree;
890  if( !allophone_tree)
891      return 1;
892  fst->hmm_info_for_ilabel = allophone_tree->hmm_infos - fst->hmm_ilabel_offset;
893  allotree__modelid = version_arbdata_models(allophone_tree);
894  if (allotree__modelid != 0 && fst->modelid != 0)
895  {
896    if (allotree__modelid != fst->modelid)
897    {
898      PLogError("Error: modelids disagree, sgcbaseline(%u) arbdata(%u)", fst->modelid, allotree__modelid);
899      rc = FST_FAILED_ON_INVALID_ARGS;
900    }
901  }
902
903  return rc;
904}
905
906int FST_LoadGraph(srec_context* pfst, wordmap* imap, wordmap* omap,
907                  int num_words_to_add, PFile* fp)
908{
909  int i, rc = 0;
910  char line[MAX_LINE_LENGTH], *args[32], nargs;
911  arcID max_num_FSMarcs;
912  arcID max_num_FSMnodes;
913  nodeID from_node, last_from_node, into_node, num_nodes, max_node_number = 0;
914  FSMarc_ptr atok =  (FSMarc_ptr)0;
915  FSMarc *atoken = NULL;
916  FSMnode *fr_node, *to_node;
917  costdata cost = FREEcostdata;
918  arcID num_arcs;
919
920  srec_context* fst = pfst;
921  char *ilabel_str = 0, *olabel_str = 0;
922  long fpos;
923  arcID new_arc_id;
924  asr_int32_t temp;
925
926
927  /* determine number of arcs and nodes to allocate, add 50% for dynamic */
928  fpos = pftell(fp);
929  max_num_FSMnodes = 0;
930  pfst->modelid = 0;
931  for (max_num_FSMarcs = 0; pfgets(line, sizeof(line) / sizeof(char), fp); max_num_FSMarcs++)
932  {
933    if (strstr(line, "modelid:") == line)
934    {
935      char *p;
936      pfst->modelid = strtoul(line + strlen("modelid:"), &p, 10);
937    }
938    from_node = (nodeID)atoi_with_check(line, MAXnodeID);
939    if (from_node > max_num_FSMnodes)
940      max_num_FSMnodes = from_node;
941  }
942  pfseek(fp, fpos, SEEK_SET);
943  temp = max_num_FSMnodes + 1 /*why+1?*/ + num_words_to_add * AVG_NODES_PER_WORD;
944  if (temp >= MAXnodeID)
945  {
946    max_num_FSMnodes = MAXnodeID - 1;
947    PLogMessage("Warning: using max nodes instead\n");
948  }
949  else
950  {
951    max_num_FSMnodes = (nodeID)temp;
952  }
953  temp = max_num_FSMarcs + num_words_to_add * AVG_ARCS_PER_WORD;
954  if (temp >= MAXarcID)
955  {
956    max_num_FSMarcs = MAXarcID - 1;
957    PLogMessage("Warning: using max arcs instead\n");
958  }
959  else
960  {
961    max_num_FSMarcs = (arcID)temp;
962  }
963  fst->olabels = omap;
964  if (imap)
965  {
966    /* generally no imap is specified */
967    fst->ilabels = imap;
968    fst->hmm_ilabel_offset = wordmap_find_index(fst->ilabels, "hmm0");
969    ASSERT(fst->hmm_ilabel_offset >= 0);
970  }
971  else
972  {
973    fst->ilabels = (wordmap*)CALLOC_CLR(1, sizeof(wordmap), "srec.graph.imap");
974    fst->ilabels->num_words = fst->ilabels->max_words = 0;
975    fst->ilabels->words = 0;
976    /* this is bad hard code, we can get this from the swiarb file, it is
977       equal to the number of phonemes (53 for enu, 39 for jpn) plus the special
978       models (eps,.wb,.ph,h#,#h,iwt,not) minus 1 ('cuz base number is 0) */
979    fst->hmm_ilabel_offset = 128; /* should match MAX_PHONEMES */
980  }
981  fst->FSMarc_list = (FSMarc*)CALLOC_CLR(max_num_FSMarcs, sizeof(FSMarc), "srec.graph.arcs");
982  fst->FSMnode_list = (FSMnode*)CALLOC_CLR(max_num_FSMnodes, sizeof(FSMnode), "srec.graph.nodes");
983  fst->FSMnode_info_list = (FSMnode_info*)CALLOC_CLR(max_num_FSMnodes, sizeof(char), "srec.graph.nodeinfos");
984
985  /* setup the arc freelist */
986  fst->FSMarc_freelist = 0;
987  fst->num_arcs = 0;
988  for (i = 0; i < max_num_FSMarcs - 1; i++)
989  {
990    fst->FSMarc_list[i].linkl_next_arc = ARC_ItoX(i + 1);
991    fst->FSMarc_list[i].linkl_prev_arc = FSMARC_FREE;
992  }
993  fst->FSMarc_list[i].linkl_next_arc = FSMARC_NULL;
994  fst->FSMarc_list[i].linkl_prev_arc = FSMARC_FREE;
995
996  /* initialize the nodes, 'cuz reading is random order */
997  fst->num_nodes = 0;
998  for (i = 0; i < max_num_FSMnodes; i++)
999  {
1000    fr_node = &fst->FSMnode_list[i];
1001    fr_node->un_ptr.first_next_arc = fr_node->first_prev_arc = FSMARC_NULL;
1002  }
1003
1004  /* 1. first load up all the information */
1005  IF_DEBUG_WDADD(printf("load graph ... 1\n"));
1006  num_arcs = 0;
1007  last_from_node = MAXnodeID;
1008  while (pfgets(line, sizeof(line) / sizeof(char), fp))
1009  {
1010    if (strstr(line, "modelid:") == line) continue;
1011    IF_DEBUG_WDADD(printf("read arc %s", line));
1012    nargs = split_line(line, args);
1013    if (nargs >= 4)
1014    {
1015      from_node = (nodeID)atoi_with_check(args[0], MAXnodeID);
1016      into_node = (nodeID)atoi_with_check(args[1], MAXnodeID);
1017      ilabel_str = args[2];
1018      olabel_str = args[3];
1019      cost = FREEcostdata;
1020      if (nargs == 5)
1021        PLogError(L("Warning: too many arguments on line %s"), line);
1022    }
1023    else if (nargs == 1)
1024    {
1025      from_node = (nodeID)atoi_with_check(args[0], MAXnodeID);
1026      into_node = MAXnodeID;
1027      ilabel_str = 0;
1028      olabel_str = 0;
1029      cost = FREEcostdata;
1030    }
1031    else
1032    {
1033      from_node = into_node = 0;
1034      PLogError("can't parse line %s\n", line);
1035      ASSERT(0);
1036    }
1037
1038    if (into_node == MAXnodeID)
1039    {
1040      fst->end_node = from_node;
1041    }
1042    else
1043    {
1044      new_arc_id = fst_get_free_arc(fst);
1045      if (new_arc_id == MAXarcID)
1046        return FST_FAILED_ON_MEMORY;
1047      atok = ARC_ItoX(new_arc_id);
1048      atoken = ARC_XtoP(atok);
1049      num_arcs++;
1050      fr_node = &fst->FSMnode_list[from_node];
1051      to_node = &fst->FSMnode_list[into_node];
1052      if (fst->ilabels->num_words == 0)
1053      {
1054        atoken->ilabel = hmm_number(ilabel_str, fst->hmm_ilabel_offset);
1055	/* Xufang: if olabel_str is a slotname, change the input label to a no-silence and no-eps HMM. It is true that the weight of slot arcs is 999 and hmm will not take effect in the runtime, but it was found that when the slot is a loop, eps and silence will cause the recursive function fst_node_has_speech_to_come into a no-step-out status.*/
1056	if(strstr(olabel_str, SLOTNAME_INDICATOR)!=NULL)
1057	  atoken->ilabel = fst->hmm_ilabel_offset + SLOTLOOP_OFFSET;
1058      }
1059      else
1060      {
1061        atoken->ilabel = wordmap_find_index(fst->ilabels, ilabel_str);
1062      }
1063      atoken->olabel = wordmap_find_index(fst->olabels, olabel_str);
1064      /* if(g_fst_options.wtw_cost_override>0) {
1065      if(atoken->ilabel == WORD_BOUNDARY)
1066      atoken->cost = g_fst_options.wtw_cost_override;
1067      else
1068      atoken->cost = g_fst_options.arc_cost_override;
1069      } else  */
1070      atoken->cost = cost;
1071#if DEBUG_WDADD
1072      atoken->ilabel_str = (atoken->ilabel < fst->ilabels->num_words ? fst->ilabels->words[atoken->ilabel] : 0);
1073      atoken->olabel_str = (atoken->olabel < fst->olabels->num_words ? fst->olabels->words[atoken->olabel] : 0);
1074#endif
1075      append_arc_leaving_node(fst, fr_node, atok);
1076      if (into_node > max_node_number)
1077        max_node_number = into_node;
1078      append_arc_arriving_node(fst, to_node, atok);
1079      atoken->fr_node = NODE_ItoX(from_node);
1080      atoken->to_node = NODE_ItoX(into_node);
1081    }
1082    last_from_node = from_node;
1083  }
1084  ASSERT(fst->num_arcs == num_arcs);
1085
1086  /* setup the node freelist */
1087  IF_DEBUG_WDADD(printf("load graph ... 6\n"));
1088  num_nodes = (nodeID)(max_node_number + 1);
1089  if( max_num_FSMnodes > num_nodes) {
1090  fst->FSMnode_freelist = num_nodes;
1091  for (i = num_nodes; i < (max_num_FSMnodes - 1); i++)
1092  {
1093    fst->FSMnode_list[i].un_ptr.next_node = NODE_ItoX(i + 1);
1094    fst->FSMnode_list[i].first_prev_arc = FSMARC_FREE;
1095  }
1096	if (i == (max_num_FSMnodes - 1)) {
1097     fst->FSMnode_list[i].un_ptr.next_node = FSMNODE_NULL;
1098     fst->FSMnode_list[i].first_prev_arc = FSMARC_FREE;
1099    }
1100  } else
1101	fst->FSMnode_freelist  = FSMNODE_NULL;
1102
1103  /* some book-keeping stuff */
1104  IF_DEBUG_WDADD(printf("load graph ... 7\n"));
1105  fst->num_base_arcs = fst->num_arcs = num_arcs;
1106  fst->FSMarc_list_len = max_num_FSMarcs;
1107  fst->FSMnode_list_len = max_num_FSMnodes;
1108  fst->num_base_nodes = fst->num_nodes = num_nodes;
1109  fst->start_node = 0;
1110  /* fst->end_node = 0; this is set up above */
1111
1112  /* beg and end word labels */
1113  fst->beg_silence_word = wordmap_find_index(fst->olabels, "-pau-");
1114  fst->end_silence_word = wordmap_find_index(fst->olabels, "-pau2-");
1115  fst->hack_silence_word = wordmap_find_index(fst->olabels, "silence");
1116
1117  /* set the search limitations, real ones will be registered later */
1118  fst->max_searchable_nodes = 0;
1119  fst->max_searchable_arcs = 0;
1120
1121  rc = fst_alloc_transit_points(fst);
1122  return (rc ? FST_FAILED_ON_INVALID_ARGS : FST_SUCCESS);
1123}
1124
1125int FST_UnloadGraph(srec_context* pfst)
1126{
1127  if (pfst->ilabels)
1128    FREE(pfst->ilabels);
1129  FREE(pfst->FSMarc_list);
1130  FREE(pfst->FSMnode_list);
1131  FREE(pfst->FSMnode_info_list);
1132  pfst->FSMarc_list = 0;
1133  pfst->FSMnode_list = 0;
1134  pfst->FSMnode_info_list = 0;
1135  return FST_SUCCESS;
1136}
1137
1138int FST_DumpGraph(srec_context* fst, PFile* fp)
1139{
1140  int rc = 0;
1141  nodeID i;
1142  FSMarc_ptr atok;
1143  FSMarc* atoken;
1144  nodeID from_node, into_node;
1145  FSMnode* ntoken;
1146  char *ilabel, *olabel;
1147
1148  for (i = 0; i < fst->num_nodes; i++)
1149  {
1150    from_node = i;
1151    ntoken = &fst->FSMnode_list[i];
1152    if (ntoken->first_prev_arc == FSMARC_FREE)
1153      continue;
1154    if (ntoken->un_ptr.first_next_arc != FSMARC_NULL)
1155    {
1156      for (atok = ntoken->un_ptr.first_next_arc; atok != FSMARC_NULL; atok = atoken->linkl_next_arc)
1157      {
1158        char buf[32];
1159        atoken = ARC_XtoP(atok);
1160        into_node = NODE_XtoI(atoken->to_node);
1161        ilabel = fst->ilabels->num_words == 0 ?
1162                 hmm_name(atoken->ilabel, fst->hmm_ilabel_offset, buf) :
1163                 fst->ilabels->words[atoken->ilabel] ;
1164        olabel = fst->olabels->words[atoken->olabel];
1165
1166        if (atoken->cost != FREEcostdata)
1167        {
1168          /* regular arc */
1169          pfprintf(fp, "%hu\t%hu\t%s\t%s\t%hu\n",
1170                  from_node, into_node, ilabel, olabel, atoken->cost);
1171        }
1172        else
1173        {
1174          /* regular zero cost arc */
1175          pfprintf(fp, "%hu\t%hu\t%s\t%s\n",
1176                  from_node, into_node, ilabel, olabel);
1177        }
1178      }
1179    }
1180    else
1181    {
1182      pfprintf(fp, "%hu\n", from_node);
1183    }
1184  }
1185  return rc;
1186}
1187
1188int FST_AddWordToGrammar(srec_context* fst, const char* _slot,
1189                         const char* word, const char* pron,
1190                         const int cost)
1191{
1192  int num_pron_ampersands = 0, num_prons_added = 0, num_words_added = 0;
1193  int i, pron_len, model_sequence_len;
1194  modelID model_sequence[MAX_PRON_LEN];
1195  char phoneme_sequence[MAX_PRON_LEN];
1196  wordID olabel = MAXwordID;
1197  int irc, rc = FST_SUCCESS;
1198  nodeID start_node = MAXnodeID, end_node = MAXnodeID;
1199  char veslot[MAX_WORD_LEN];
1200#if USE_HMM_BASED_ENROLLMENT
1201  const char* Tpron;
1202#endif
1203
1204  /* The addword from voice enroll still use @, and to test voice enroll, @ sign was removed and __ was added at the begining and ending of a word. Xufang */
1205  if(_slot[0] == '@') {
1206    strcpy(veslot,SLOTNAME_INDICATOR);
1207    strcat(veslot,_slot+1);
1208    strcat(veslot,SLOTNAME_INDICATOR);
1209  } else
1210    strcpy(veslot, _slot);
1211
1212#if USE_COMP_STATS
1213  if (!comp_stats)
1214    comp_stats = init_comp_stats1();
1215  start_cs_clock1(&comp_stats->word_addition);
1216#endif
1217
1218  /* we expect developers to call AddWord with the same slot many times,
1219  so we cache the slot start and end nodes and re-use on subseq calls */
1220  if( fst->addWordCaching_lastslot_num == MAXwordID )
1221    fst->addWordCaching_lastslot_name = NULL;
1222  else {
1223    fst->addWordCaching_lastslot_name = fst->olabels->words[fst->addWordCaching_lastslot_num];
1224    fst->addWordCaching_lastslot_name = strstr(fst->addWordCaching_lastslot_name, SLOTNAME_INDICATOR);
1225    ASSERT( fst->addWordCaching_lastslot_name);
1226  }
1227  if( fst->addWordCaching_lastslot_name==NULL || strcmp( fst->addWordCaching_lastslot_name, veslot)) {
1228
1229    fst->addWordCaching_lastslot_num = wordmap_find_rule_index(fst->olabels, veslot);
1230    /* olabel not found is a user error */
1231    if (fst->addWordCaching_lastslot_num == MAXwordID)
1232    {
1233      size_t i;
1234
1235      pfprintf(PSTDOUT, L("error: slot '%s' not found among ["), veslot);
1236      for (i = 1; i < (size_t) fst->olabels->num_slots; ++i)
1237        pfprintf(PSTDOUT, "%s, ", fst->olabels->words[i]);
1238      pfprintf(PSTDOUT, L("] possible\n"));
1239      rc = FST_FAILED_ON_INVALID_ARGS;
1240      goto RETRC;
1241    }
1242
1243    fst->addWordCaching_lastslot_name = fst->olabels->words[fst->addWordCaching_lastslot_num];
1244    fst->addWordCaching_lastslot_name = strstr(fst->addWordCaching_lastslot_name, SLOTNAME_INDICATOR);
1245    ASSERT(fst->addWordCaching_lastslot_name);
1246
1247    /* now find where in the graph this slot is referenced, note that
1248       there might be more than one place, but here we ignore multiples */
1249    for (i = fst->num_fsm_exit_points; --i >= 0;)
1250      {
1251	arcID arcid = fst->fsm_exit_points[i].arc_index;
1252	if (fst->FSMarc_list[arcid].olabel == fst->addWordCaching_lastslot_num)
1253	  {
1254	    FSMarc* arc;
1255	    FSMnode* node;
1256	    start_node = fst->fsm_exit_points[i].from_node_index;
1257	    end_node = fst->fsm_exit_points[i].wbto_node_index;
1258	    fst->addWordCaching_lastslot_needs_post_silence = ESR_TRUE;
1259	    fst->addWordCaching_lastslot_ifsm_exit_point = i;
1260	    node = &fst->FSMnode_list[ end_node];
1261	    arc = &fst->FSMarc_list[node->un_ptr.first_next_arc];
1262	    if (arc->olabel == fst->end_silence_word && arc->linkl_next_arc == MAXarcID)
1263	      fst->addWordCaching_lastslot_needs_post_silence = ESR_FALSE;
1264	    break;
1265	  }
1266      }
1267
1268    /* not found in the graph is an internal error, coding error */
1269    if (i < 0 || start_node>=fst->num_nodes || end_node>=fst->num_nodes)
1270      {
1271	PLogError("error: (internal) finding olabel %d %d %d\n", fst->addWordCaching_lastslot_num,
1272		  start_node, end_node);
1273	goto RETRC;
1274      }
1275  } /* cached or not */
1276
1277  i = fst->addWordCaching_lastslot_ifsm_exit_point;
1278  start_node = fst->fsm_exit_points[i].from_node_index;
1279  end_node = fst->fsm_exit_points[i].wbto_node_index;
1280
1281  /* now start_node and end_node are known */
1282  if (!word || !*word || !pron || !*pron)
1283    {
1284      rc = FST_FAILED_ON_INVALID_ARGS;
1285      PLogError("error: null word/pron on input to FST_AddWordToGrammar()\n");
1286      goto RETRC;
1287    }
1288
1289  /* from here */
1290  IF_DEBUG_WDADD(printf("Adding %s %s\n", word, (const char*)pron));
1291
1292  /* loop over all prons, we break when we hit the double-null (hence the +1) */
1293  for( ; (*pron)!='\0'; pron+=(pron_len+1)) {
1294    pron_len = strlen((char*)pron);
1295    if (pron_len >= MAX_PRON_LEN)
1296      {
1297	PLogError("error: wordadd failed on word %s due to pron [%s] too long\n", word, (char*)pron);
1298	rc = FST_FAILED_ON_INVALID_ARGS;
1299	goto RETRC;
1300      }
1301
1302    /* check for searchability after adding, estimating at most pron_len
1303       arcs and nodes will be added */
1304    if (fst->num_arcs + pron_len > fst->max_searchable_arcs)
1305      {
1306	PLogError("error: wordadd failed on word %s due to %d arc search limit\n",
1307		  word, fst->max_searchable_arcs);
1308	rc = FST_FAILED_ON_MEMORY;
1309	goto RETRC;
1310      }
1311
1312    if (fst->num_nodes + pron_len > fst->max_searchable_nodes)
1313      {
1314	PLogError("error: wordadd failed on word %s due to %d node search limit\n",
1315		  word, fst->max_searchable_nodes);
1316	rc = FST_FAILED_ON_MEMORY;
1317	goto RETRC;
1318      }
1319
1320    /* add the word if necessary, !FST_SUCCESS is allowed if we're just adding
1321       an alternate pronunciation */
1322    olabel = wordmap_find_index_in_rule(fst->olabels, word, fst->addWordCaching_lastslot_num);
1323    if (olabel == MAXwordID)
1324      {
1325	olabel = wordmap_add_word_in_rule(fst->olabels, word, fst->addWordCaching_lastslot_num);
1326	if (olabel != MAXwordID)
1327	  num_words_added++;
1328      }
1329    if (olabel == MAXwordID)
1330      {
1331	PLogError("error: wordmap_add_word failed\n");
1332	rc = FST_FAILED_ON_MEMORY;
1333	goto RETRC;
1334      }
1335
1336#if USE_HMM_BASED_ENROLLMENT
1337    // pron should be converted to model_sequence, model_sequence_len
1338#define VETRANS_PREFIX "wd_hmm"
1339#define VETRANS_PREFIX_LEN 6
1340    if( LSTRSTR(pron, VETRANS_PREFIX)!=NULL) {
1341      // printf("Debug-pron: %d, %s\n",pron_len, pron);
1342      model_sequence_len=0;
1343      for(Tpron=pron; (Tpron=strstr(Tpron, VETRANS_PREFIX))!=NULL; ){
1344	// Tpron = strstr(Tpron, "wd_hmm");
1345	Tpron += VETRANS_PREFIX_LEN; // skip over "wd_hmm"
1346	// Copy hmm number (56,132,...)
1347	model_sequence[ model_sequence_len] = atoi_with_check(Tpron, MAXmodelID);
1348	model_sequence_len++;
1349      }
1350
1351
1352      /* we need to append silence at the end since we might be dealing with a slot
1353	 that has ensuing speech, formerly we only dealt with ROOT which defacto
1354	 was followed by the pau2 silence, so ROOT did not need its own */
1355      if (fst->addWordCaching_lastslot_needs_post_silence)
1356	model_sequence[model_sequence_len++] = 3; // <<< hmm3_sil !!! ugly hard-code here
1357
1358      /* append the word boundary */
1359      model_sequence[model_sequence_len++] = WORD_BOUNDARY;
1360      /* translate to input label ids */
1361      for (i = 0; i < model_sequence_len; i++)
1362	{
1363	  if (model_sequence[i] >= EPSILON_OFFSET)
1364	    model_sequence[i] = (modelID)(model_sequence[i] + fst->hmm_ilabel_offset);
1365	}
1366    }
1367    else
1368#endif
1369      {
1370	pron_len = strlen((char*)pron);
1371	if (pron_len >= MAX_PRON_LEN)
1372	  {
1373	    PLogError("error: wordadd failed on word %s due to pron [%s] too long\n", word, (char*)pron);
1374	    rc = FST_FAILED_ON_INVALID_ARGS;
1375	    goto RETRC;
1376	  }
1377
1378	for (i = 0; i < pron_len; i++)
1379	  {
1380	    if (pron[i] == OPTSILENCE_CODE)
1381	      {
1382		phoneme_sequence[i] = SILENCE_CODE;
1383		num_pron_ampersands++;
1384	      }
1385	    else
1386	      phoneme_sequence[i] = pron[i];
1387	  }
1388	/* we need to append silence at the end since we might be dealing with a slot
1389	   that has ensuing speech, formerly we only dealt with ROOT which defacto
1390	   was followed by the pau2 silence, so ROOT did not need its own */
1391	if (fst->addWordCaching_lastslot_needs_post_silence)
1392	  phoneme_sequence[i++] = SILENCE_CODE;
1393
1394	model_sequence_len = i;
1395	irc = get_modelids_for_pron(fst->allotree, phoneme_sequence, model_sequence_len, model_sequence);
1396	/* check for bad sequence of phonemes */
1397	if (irc)
1398	  {
1399	    PLogError("error: get_modelids_for_pron(%s) returned %d\n", pron, irc);
1400	    rc = FST_FAILED_ON_INVALID_ARGS;
1401	    goto RETRC;
1402	  }
1403	IF_DEBUG_WDADD(printf("model_sequence ...\n"));
1404
1405	/* append the word boundary */
1406	model_sequence[model_sequence_len++] = WORD_BOUNDARY;
1407	/* translate to input label ids */
1408	for (i = 0; i < model_sequence_len; i++)
1409	  {
1410	    if (model_sequence[i] >= EPSILON_OFFSET)
1411	      model_sequence[i] = (modelID)(model_sequence[i] + fst->hmm_ilabel_offset);
1412	  }
1413      } /*  end of ph_t ph_r .. type decoding of the model  sequence */
1414
1415    /* now do the actual addition */
1416    rc = fst_add_arcs(fst, start_node, end_node, olabel, (costdata)cost, model_sequence, model_sequence_len);
1417    if (rc == FST_SUCCESS)
1418      {
1419	num_prons_added++;
1420      }
1421    else if (rc == FST_FAILED_ON_HOMONYM)
1422      {
1423	/* maybe the another pron will work? */
1424      }
1425    else
1426      {
1427	PLogMessage("error: fst_add_arcs() failed adding word %s pron %s ('&' as 'iwt')\n", word, (char*)pron);
1428	goto RETRC;
1429      }
1430
1431    /* second add the pron with no silences,
1432       this only applies to true prons, not wd_hmm333 wd_hmm222 type prons */
1433
1434    if (num_pron_ampersands > 0)
1435      {
1436	for (i = 0, model_sequence_len = 0; i < pron_len; i++)
1437	  {
1438	    if (pron[i] != OPTSILENCE_CODE)
1439	      phoneme_sequence[model_sequence_len++] = pron[i];
1440	  }
1441
1442	irc = get_modelids_for_pron(fst->allotree, phoneme_sequence, model_sequence_len, model_sequence);
1443	/* check for bad sequence of phonemes */
1444	if (irc)
1445	  {
1446	    PLogError("error: get_modelids_for_pron(%s) returned %d\n", pron, rc);
1447	    rc = FST_FAILED_ON_INVALID_ARGS;
1448	    goto RETRC;
1449	  }
1450	else
1451	  {
1452	    IF_DEBUG_WDADD(printf("model_sequence ...\n"));
1453
1454	    /* append the word boundary */
1455	    model_sequence[model_sequence_len++] = WORD_BOUNDARY;
1456	    /* translate to input label ids */
1457	    for (i = 0; i < model_sequence_len; i++)
1458	      {
1459		if (model_sequence[i] >= EPSILON_OFFSET)
1460		  model_sequence[i] = (modelID)(model_sequence[i] + fst->hmm_ilabel_offset);
1461	      }
1462	    /* now do the actual addition */
1463	    rc = fst_add_arcs(fst, start_node, end_node,
1464			      olabel, (costdata)cost, model_sequence, model_sequence_len);
1465
1466	    if (rc == FST_SUCCESS)
1467	      {
1468		num_prons_added++;
1469	      }
1470	    else if (rc == FST_FAILED_ON_HOMONYM)
1471	      {
1472		/* maybe another pron worked? */
1473	      }
1474	    else
1475	      {
1476		PLogMessage("Warning: fst_add_arcs() failed while adding "
1477			    "word %s pron %s (skip '&')\n", word, (char*)pron);
1478		goto RETRC;
1479	      }
1480	  }
1481      }
1482  }
1483
1484
1485 RETRC:
1486  /* set this to make sure that FST_Prepare gets called after add word */
1487  fst->whether_prepared = 0;
1488#if USE_COMP_STATS
1489  end_cs_clock1(&comp_stats->word_addition, 1);
1490#endif
1491
1492  if (rc < 0 && rc != FST_FAILED_ON_HOMONYM)
1493    return rc;
1494
1495  if (num_prons_added == 0 && num_words_added > 0)
1496    {
1497      return FST_FAILED_ON_HOMONYM;
1498    }
1499  else if (num_prons_added == 0 && num_words_added == 0)
1500    {
1501      return FST_FAILED_ON_HOMOGRAPH;
1502    }
1503  else if (num_prons_added > 0 && num_words_added == 0)
1504    {
1505      return FST_SUCCESS_ON_OLD_WORD;
1506    }
1507  else
1508    {
1509      /* if(num_prons_added>0 && num_words_added>0) */
1510      return FST_SUCCESS;
1511    }
1512}
1513
1514/* remove arcs leaving a node, arcs added via word add, are just discarded
1515to be cleaned up for re-use by other functions */
1516void remove_added_arcs_leaving(srec_context* fst, nodeID ni)
1517{
1518  FSMnode* node = &fst->FSMnode_list[ ni];
1519  FSMarc *arc = NULL, *arc2;
1520  arcID *pai, ai, ai2;
1521  for (pai = &node->un_ptr.first_next_arc, ai = (*pai); ai != MAXarcID; pai = &arc->linkl_next_arc, ai = (*pai))
1522  {
1523    if (ai < fst->num_base_arcs)
1524    {
1525      arc = &fst->FSMarc_list[ai];
1526    }
1527    else
1528    {
1529      arc2 = &fst->FSMarc_list[ai];
1530      for (ai2 = arc2->linkl_next_arc; ai2 >= fst->num_base_arcs && ai2 != MAXarcID;
1531           ai2 = arc2->linkl_next_arc)
1532      {
1533        arc2 = &fst->FSMarc_list[ai2];
1534      }
1535      *pai = ai2;
1536    }
1537  }
1538}
1539
1540/* remove arcs arriving at a node, arcs added via word add, are just discarded
1541   to be cleaned up for re-use by other functions */
1542void remove_added_arcs_arriving(srec_context* fst, nodeID ni)
1543{
1544  FSMnode* node = &fst->FSMnode_list[ni];
1545  FSMarc *arc = NULL, *arc2;
1546  arcID *pai, ai, ai2;
1547  for (pai = &node->first_prev_arc, ai = (*pai); ai != MAXarcID;
1548       pai = &arc->linkl_prev_arc, ai = (*pai))
1549  {
1550    if (ai < fst->num_base_arcs)
1551    {
1552      arc = &fst->FSMarc_list[ai];
1553    }
1554    else
1555    {
1556      arc2 = &fst->FSMarc_list[ai];
1557      for (ai2 = arc2->linkl_prev_arc; ai2 >= fst->num_base_arcs && ai2 != MAXarcID;
1558           ai2 = arc2->linkl_prev_arc)
1559      {
1560        arc2 = &fst->FSMarc_list[ai2];
1561      }
1562      *pai = ai2;
1563    }
1564  }
1565}
1566
1567/* reset greammar, but resetting all the arcs, nodes and words added
1568   via the addword functions */
1569
1570int FST_ResetGrammar(srec_context* fst)
1571{
1572  int i, rc = 0;
1573  nodeID fst_slot_start_node, fst_slot_end_node;
1574  wordID fst_slot_slotnum;
1575  arcID ai;
1576  nodeID ni2, ni3, ni4, ni3a;
1577  FSMnode_t *node, *node2, *node3;
1578  FSMarc_t *arc, *arc2, *arc3;
1579
1580  /*fst_slot_slotnum = wordmap_find_rule_index(fst->olabels, slot);*/
1581  for (fst_slot_slotnum = 1; fst_slot_slotnum < fst->olabels->num_slots;
1582       fst_slot_slotnum++)
1583  {
1584
1585    if (fst_slot_slotnum == MAXwordID)
1586    {
1587      char *slot = "";
1588      PLogError("error: slot '%s' not found among [%d,%d] possible\n", slot, 1, fst->olabels->num_slots - 1);
1589      return (rc = FST_FAILED_ON_INVALID_ARGS);
1590    }
1591
1592    /* now find where in the graph this slot is referenced, note that
1593       there might be more than one place, but here we ignore multiples */
1594    fst_slot_start_node = MAXnodeID;
1595    fst_slot_end_node = MAXnodeID;
1596    for (i = fst->num_fsm_exit_points; --i >= 0;)
1597    {
1598      ai = fst->fsm_exit_points[i].arc_index;
1599      if (fst->FSMarc_list[ai].olabel == fst_slot_slotnum)
1600      {
1601        fst_slot_start_node = fst->fsm_exit_points[i].from_node_index;
1602        fst_slot_end_node = fst->fsm_exit_points[i].wbto_node_index;
1603      }
1604    }
1605
1606    /* this 'slot' could be the root rule, which can't be removed */
1607    if (fst_slot_start_node == MAXnodeID || fst_slot_end_node == MAXnodeID)
1608      continue;
1609
1610    /*
1611      start                     end
1612      node   node2    node3   node4
1613       o--@N-->o--sil-->o--wb-->o
1614         arc     arc2   | arc3  ^
1615                 |       |wb
1616                 \__sil->o
1617    arc4   ni3a
1618    */
1619
1620    remove_added_arcs_leaving(fst, fst_slot_start_node);
1621    node = &fst->FSMnode_list[ fst_slot_start_node];
1622    for (ai = node->un_ptr.first_next_arc; ai != MAXarcID; ai = arc->linkl_next_arc)
1623    {
1624      arc = &fst->FSMarc_list[ai];
1625      if (arc->olabel != fst_slot_slotnum)
1626        continue;
1627
1628      ni2 = arc->to_node;
1629      remove_added_arcs_arriving(fst, ni2);
1630      if (ni2 == fst_slot_end_node)
1631        continue;
1632      node2 = &fst->FSMnode_list[ni2];
1633      arc2 = &fst->FSMarc_list[ node2->un_ptr.first_next_arc];
1634
1635
1636      ni3 = arc2->to_node;
1637      remove_added_arcs_arriving(fst, ni3);
1638      if (ni3 == fst_slot_end_node)
1639        continue;
1640      node3 = &fst->FSMnode_list[ni3];
1641      arc3 = &fst->FSMarc_list[ node3->un_ptr.first_next_arc];
1642      while (arc3->linkl_next_arc != MAXarcID)
1643      {
1644        arc3 = &fst->FSMarc_list[arc3->linkl_next_arc];
1645        ni3a = arc3->to_node;
1646        remove_added_arcs_arriving(fst, ni3a);
1647      }
1648      arc3 = &fst->FSMarc_list[ node3->un_ptr.first_next_arc];
1649
1650      ni4 = arc3->to_node;
1651      remove_added_arcs_arriving(fst, ni4);
1652      ASSERT(ni4 == fst_slot_end_node);
1653      if (ni4 == fst_slot_end_node)
1654        continue;
1655    }
1656  }
1657
1658  /* reset the freelist for nodes */
1659  if( fst->num_nodes == fst->num_base_nodes )
1660    {}
1661  else{
1662    FSMnode *ntoken;
1663    FSMnode* tmp_FSMnode_list;
1664    FSMnode_info* tmp_FSMnode_info_list;
1665
1666	fst->FSMnode_freelist = MAXnodeID;
1667    fst->num_nodes = fst->FSMnode_list_len = fst->num_base_nodes;
1668
1669	tmp_FSMnode_list = (FSMnode*)CALLOC_CLR(fst->FSMnode_list_len, sizeof(FSMnode), "srec.graph.nodes");
1670    if(!tmp_FSMnode_list){
1671     PLogError("ERROR: Could NOT reset the memory for srec.graph.nodes");
1672     return FST_FAILED_ON_MEMORY;
1673     }
1674    memcpy( tmp_FSMnode_list, fst->FSMnode_list, fst->FSMnode_list_len*sizeof(FSMnode));
1675
1676    /* scan to the end of the free list (to be re-used) */
1677    nodeID* last_free_node = (&fst->FSMnode_freelist);
1678	ntoken = (*last_free_node==MAXnodeID) ? NULL : &fst->FSMnode_list[*last_free_node] ;
1679
1680	for( ; *last_free_node!=MAXnodeID; last_free_node = &ntoken->un_ptr.next_node)
1681		 ntoken = &tmp_FSMnode_list[ *last_free_node];
1682
1683	FREE( fst->FSMnode_list);
1684    tmp_FSMnode_info_list = (FSMnode_info*)CALLOC_CLR(fst->FSMnode_list_len, sizeof(FSMnode_info), "srec.graph.nodeinfos");
1685    if(!tmp_FSMnode_info_list){
1686     PLogError("ERROR: Could NOT reset the memory for srec.graph.nodeinfos");
1687     return FST_FAILED_ON_MEMORY;
1688     }
1689	// copy in node info
1690	memcpy( tmp_FSMnode_info_list, fst->FSMnode_info_list, fst->FSMnode_list_len*sizeof(FSMnode_info));
1691
1692
1693	FREE( fst->FSMnode_info_list);
1694	fst->FSMnode_info_list = tmp_FSMnode_info_list;
1695    fst->FSMnode_list = tmp_FSMnode_list;
1696  }
1697
1698  /*ni = fst->FSMnode_freelist = fst->num_base_nodes;
1699    node = &fst->FSMnode_list[ni];
1700    for (; ni < fst->FSMnode_list_len - 1; ni++, node++)
1701       node->un_ptr.next_node = (nodeID)(ni + 1);
1702    node->un_ptr.next_node = MAXnodeID;
1703    fst->num_nodes = fst->num_base_nodes;*/
1704
1705  /* reset the freelist for arcs */
1706  if( fst->num_arcs == fst->num_base_arcs )
1707    {}
1708  else {
1709    FSMarc* atoken = NULL;
1710    FSMarc* tmp_FSMarc_list;
1711
1712    fst->num_arcs = fst->num_base_arcs;
1713    fst->FSMarc_list_len = fst->num_base_arcs;
1714    fst->FSMarc_freelist = MAXarcID;
1715    tmp_FSMarc_list = (FSMarc*)CALLOC_CLR(fst->FSMarc_list_len, sizeof(FSMarc), "srec.graph.arcs");
1716    if(!tmp_FSMarc_list){
1717      PLogError("ERROR: Could NOT reset the memory for srec.graph.arcs");
1718     return FST_FAILED_ON_MEMORY;
1719    }
1720    memcpy( tmp_FSMarc_list, fst->FSMarc_list, fst->FSMarc_list_len*sizeof(FSMarc));
1721
1722    /* scan to the end of the free list (to be re-used) */
1723    arcID* last_free_arc = &fst->FSMarc_freelist;
1724    atoken = (*last_free_arc==MAXarcID) ? NULL : &fst->FSMarc_list[*last_free_arc] ;
1725
1726    for( ; *last_free_arc!=MAXarcID; last_free_arc=&atoken->linkl_next_arc)
1727      atoken = &tmp_FSMarc_list[ *last_free_arc];
1728
1729    FREE( fst->FSMarc_list);
1730    fst->FSMarc_list = tmp_FSMarc_list;
1731  }
1732
1733  /*ai = fst->FSMarc_freelist = fst->num_base_arcs;
1734    arc = &fst->FSMarc_list[ai];
1735    for (; ai < fst->FSMarc_list_len - 1; ai++, arc++)
1736    arc->linkl_next_arc = (arcID)(ai + 1);
1737    arc->linkl_next_arc = MAXarcID;
1738    fst->num_arcs = fst->num_base_arcs;
1739    fst->FSMarc_list_len = fst->num_base_arcs;*/
1740
1741  /* now remove all the added words */
1742  wordmap_reset(fst->olabels);
1743  return FST_SUCCESS;
1744}
1745
1746/* See the description of arc_tokens in the header file. These
1747   arcs are nodeless, which make loading from a file that
1748   references nodes a little harder.  We need to do it in stages.
1749   Later, when we load from binary image it should be much simpler.
1750*/
1751
1752arc_token_lnk get_first_arc_leaving_node(arc_token* arc_token_list,
1753    arcID num_arcs,
1754    nodeID node)
1755{
1756  arcID i;
1757  for (i = 0; i < num_arcs; i++)
1758  {
1759    if ((nodeID)(int)arc_token_list[i].next_token_index == node)
1760      return ARC_TOKEN_LNK(arc_token_list, i);
1761  }
1762  return ARC_TOKEN_NULL;
1763}
1764
1765int FST_LoadReverseWordGraph(srec_context* context, int num_words_to_add, PFile* fp)
1766{
1767  arcID i;
1768  char line[MAX_LINE_LENGTH];
1769  char word_label_as_str[128];
1770  arcID num_alloc_arc_tokens;
1771  nodeID from_node, into_node;
1772  labelID word_label = MAXwordID;
1773  arc_token *atoken, *last_atoken;
1774  costdata cost;
1775  arcID num_arcs;
1776  arc_token *arc_token_list, *tmp;
1777  long fpos;
1778
1779  /* determine number of word arcs to allocate */
1780  fpos = pftell(fp);
1781  for (num_arcs = 0; pfgets(line, MAX_LINE_LENGTH, fp);  num_arcs++) ;
1782  num_alloc_arc_tokens = num_arcs;
1783  num_alloc_arc_tokens = (arcID)(num_arcs + num_words_to_add);
1784  pfseek(fp, fpos, SEEK_SET);
1785
1786  context->arc_token_list = (arc_token*)CALLOC_CLR(num_alloc_arc_tokens, sizeof(arc_token), "srec.graph.wordgraph");
1787  arc_token_list = context->arc_token_list;
1788
1789  /* 1. first load up all the information */
1790  i = 0;
1791  while (pfgets(line, MAX_LINE_LENGTH, fp))
1792  {
1793    if (sscanf(line, "%hu\t%hu\t%s", &from_node, &into_node, word_label_as_str) == 3)
1794    {
1795      word_label = wordmap_find_index(context->olabels, word_label_as_str);
1796      // ASSERT(word_label >= 0);
1797      cost = FREEcostdata;
1798    }
1799    else if (sscanf(line, "%hu", &from_node) == 1)
1800    {
1801      into_node = MAXnodeID;
1802      word_label = MAXwordID;
1803      cost = FREEcostdata;
1804    }
1805    else
1806    {
1807      PLogError("FST_LoadReverseWordGraph() .. can't parse line %s\n", line);
1808      ASSERT(0);
1809    }
1810    atoken = &arc_token_list[i];
1811    i++;
1812    atoken->ilabel = word_label;
1813	/* atoken->olabel = WORD_EPSILON_LABEL; */
1814    /*atoken->cost = cost; cost is not used for now */
1815#if DEBUG_ASTAR
1816    atoken->label = (word_label == MAXwordID ? 0 : wmap->words[word_label]);
1817#endif
1818    atoken->first_next_arc = (arc_token_lnk)into_node;
1819    atoken->next_token_index = (arc_token_lnk)from_node;
1820  }
1821  num_arcs = i;
1822
1823  /* 2. now do the internal cross references */
1824  for (i = 0; i < num_arcs; i++)
1825  {
1826    atoken = &arc_token_list[i];
1827    into_node = (nodeID)atoken->first_next_arc;
1828    if (into_node == MAXnodeID)
1829      atoken->first_next_arc = ARC_TOKEN_NULL;
1830    else
1831      atoken->first_next_arc = get_first_arc_leaving_node(arc_token_list, num_arcs, (nodeID)atoken->first_next_arc);
1832  }
1833
1834  /* 3. now do more internal cross refs */
1835  last_atoken = &arc_token_list[0];
1836  for (i = 1; i < num_arcs; i++)
1837  {
1838    atoken = &arc_token_list[i];
1839    if (atoken->next_token_index != last_atoken->next_token_index)
1840    {
1841      last_atoken->next_token_index = ARC_TOKEN_NULL;
1842    }
1843    else
1844    {
1845      last_atoken->next_token_index = ARC_TOKEN_LNK(arc_token_list, i);
1846    }
1847    last_atoken = atoken;
1848  }
1849  last_atoken->next_token_index = ARC_TOKEN_NULL;
1850
1851#if DEBUG_ASTAR
1852  /* under debug, it's nice to be able to see the words leaving the
1853     destination node, they are stored sequentially in the debug ary */
1854  for (i = 0; i < num_arcs; i++)
1855  {
1856    char * p;
1857    atoken = &arc_token_list[i];
1858    atoken->debug[0] = 0;
1859    for (tmp = ARC_TOKEN_LNK(arc_token_list, atoken->first_next_arc); tmp != NULL;
1860         tmp = ARC_TOKEN_PTR(arc_token_list, tmp->next_token_index))
1861    {
1862      if (tmp->first_next_arc == ARC_TOKEN_NULL)
1863      {
1864        p = "END";
1865      }
1866      else if (!tmp->label)
1867      {
1868        p = "NULL";
1869      }
1870      else
1871      {
1872        p = tmp->label;
1873      }
1874      if (strlen(atoken->debug) + strlen(p) + 6 < 64)
1875      {
1876        strcat(atoken->debug, p);
1877        strcat(atoken->debug, " ");
1878      }
1879      else
1880      {
1881        strcat(atoken->debug, "...");
1882        break;
1883      }
1884    }
1885  }
1886#endif
1887  context->arc_token_list_len = num_alloc_arc_tokens;
1888  if (num_alloc_arc_tokens > num_arcs)
1889  {
1890    atoken = &context->arc_token_list[num_arcs];
1891    for (i = num_arcs; i < num_alloc_arc_tokens; i++, atoken++)
1892    {
1893      atoken->first_next_arc = ARC_TOKEN_NULL;
1894      atoken->ilabel         = MAXwordID;
1895      atoken->next_token_index = ARC_TOKEN_LNK(arc_token_list, i + 1);/*atoken+1;*/
1896    }
1897    atoken--;
1898    atoken->next_token_index = ARC_TOKEN_NULL;
1899    context->arc_token_freelist = &context->arc_token_list[num_arcs];
1900  }
1901  else
1902  {
1903    context->arc_token_freelist = NULL;
1904  }
1905  /* the insert start is the arc that goes to the end */
1906  atoken = context->arc_token_list;
1907  atoken = ARC_TOKEN_PTR(arc_token_list, atoken->first_next_arc);
1908  for (; atoken; atoken = ARC_TOKEN_PTR(arc_token_list, atoken->next_token_index))
1909  {
1910    if (atoken->first_next_arc == ARC_TOKEN_NULL)
1911      break;
1912    tmp = ARC_TOKEN_PTR(context->arc_token_list, atoken->first_next_arc);
1913    if (tmp->ilabel == MAXwordID && tmp->first_next_arc == ARC_TOKEN_NULL)
1914      break;
1915  }
1916  context->arc_token_insert_start = atoken;
1917
1918  return FST_SUCCESS;
1919}
1920
1921int FST_UnloadReverseWordGraph(srec_context* context)
1922{
1923  FREE(context->arc_token_list);
1924  context->arc_token_list = 0;
1925  return FST_SUCCESS;
1926}
1927
1928/*----------------------------------------------------------------------*
1929 *                                                                      *
1930 * general                                                              *
1931 *                                                                      *
1932 *----------------------------------------------------------------------*/
1933
1934char split_line(char* line, char** av)
1935{
1936  int i = 0;
1937  av[i] = strtok(line, "\n\r\t ");
1938  for (; av[i];)
1939  {
1940    av[++i] = strtok(NULL, "\n\r\t ");
1941  }
1942  return i;
1943}
1944
1945asr_int32_t atoi_with_check(const char* buf, asr_int32_t mymax)
1946{
1947  asr_int32_t ans = atol(buf);
1948  ASSERT(ans < mymax);
1949  return ans;
1950}
1951
1952/*----------------------------------------------------------------------*
1953 *                                                                      *
1954 * fst                                                                  *
1955 *                                                                      *
1956 *----------------------------------------------------------------------*/
1957
1958arcID fst_get_free_arc(srec_context* fst)
1959{
1960  FSMarc* atoken;
1961  arcID atokid = fst->FSMarc_freelist;
1962  if (atokid == MAXarcID)
1963  {
1964    PLogError("error: ran out of arcs\n");
1965    return atokid;
1966  }
1967  atoken = &fst->FSMarc_list[atokid];
1968  fst->FSMarc_freelist = ARC_XtoI(atoken->linkl_next_arc);
1969  memset(atoken, 0, sizeof(FSMarc));
1970  atoken->to_node = FSMNODE_NULL;
1971  atoken->fr_node = FSMNODE_NULL;
1972  atoken->linkl_next_arc = FSMARC_NULL;
1973  atoken->linkl_prev_arc = FSMARC_NULL;
1974  atoken->ilabel = 0;
1975  atoken->olabel = 0;
1976  atoken->cost   = 0;
1977  fst->num_arcs++;
1978  return atokid;
1979}
1980nodeID fst_get_free_node(srec_context* fst)
1981{
1982  FSMnode* ntoken;
1983  nodeID ntokid = fst->FSMnode_freelist;
1984  if (ntokid == MAXnodeID)
1985  {
1986    PLogError("error: ran out of nodes\n");
1987    return ntokid;
1988  }
1989  ntoken = &fst->FSMnode_list[ntokid];
1990  fst->FSMnode_freelist = NODE_XtoI(ntoken->un_ptr.next_node);
1991  ntoken->un_ptr.first_next_arc = FSMARC_NULL;
1992  ntoken->first_prev_arc = FSMARC_NULL;
1993  /* ASSERT( (int)(ntok - fst->FSMnode_list) == fst->num_nodes); */
1994  fst->num_nodes++;
1995  return ntokid;
1996}
1997
1998int fst_free_node(srec_context* fst, FSMnode* node)
1999{
2000  IF_DEBUG_WDADD(printf_node(fst, "freeing: ", node));
2001  node->first_prev_arc = FSMARC_NULL;
2002  node->un_ptr.next_node = fst->FSMnode_freelist;
2003  node->first_prev_arc = FSMARC_FREE;
2004  fst->FSMnode_freelist = (nodeID)(node - fst->FSMnode_list);
2005  /* fst->num_nodes--; not allowed unless we compress! */
2006  return 0;
2007}
2008
2009int fst_free_arc(srec_context* fst, FSMarc* arc)
2010{
2011  IF_DEBUG_WDADD(printf_arc(fst, "freeing: ", arc));
2012  arc->linkl_prev_arc = FSMARC_FREE;
2013  arc->linkl_next_arc = ARC_ItoX(fst->FSMarc_freelist);
2014  arc->cost = 2;
2015  arc->ilabel = 0;
2016  arc->olabel = 0;
2017  IF_DEBUG_WDADD(arc->ilabel_str = 0);
2018  IF_DEBUG_WDADD(arc->olabel_str = 0);
2019  fst->FSMarc_freelist = (arcID)(arc - fst->FSMarc_list);
2020  fst->num_arcs--;
2021  return 0;
2022}
2023
2024int fst_free_arc_net(srec_context* fst, FSMarc* arc)
2025{
2026  if (!arc)
2027    return 0;
2028  /* need to traverse the arc, get the 'from' nodes,
2029     then traverse the from nodes and kill arcs */
2030  /* we'll need this after an error condition */
2031  return 0;
2032}
2033
2034int fst_push_arc_olabel(srec_context* fst, FSMarc* arc)
2035{
2036  FSMarc_ptr atok;
2037  FSMarc* atoken;
2038  FSMnode_ptr ntok = arc->to_node;
2039
2040  /* look for all arcs leaving this arc, and mark them
2041     normally there is only one, except for multipron words */
2042  for (atok = FIRST_NEXT(ntok); atok != FSMARC_NULL; atok = atoken->linkl_next_arc)
2043  {
2044    atoken = ARC_XtoP(atok);
2045    /* cannot have multiple olabels before a boundary */
2046    if (atoken->olabel != WORD_EPSILON_LABEL)
2047      return FST_FAILED_INTERNAL;
2048    atoken->olabel = arc->olabel;
2049#if DO_WEIGHTED_ADDWORD
2050    atoken->cost = (costdata)(atoken->cost + arc->cost);
2051#endif
2052    IF_DEBUG_WDADD(atoken->olabel_str = arc->olabel_str);
2053  }
2054  /* unlabel this arc */
2055  arc->olabel = WORD_EPSILON_LABEL;
2056#if DO_WEIGHTED_ADDWORD
2057  arc->cost = FREEcostdata;
2058#endif
2059  IF_DEBUG_WDADD(arc->olabel_str = fst->olabels->words[ arc->olabel]);
2060  return FST_SUCCESS;
2061}
2062
2063#if DO_WEIGHTED_ADDWORD
2064int fst_push_arc_cost(srec_context* fst, FSMarc* arc)
2065{
2066  FSMarc_ptr atok;
2067  FSMarc* atoken;
2068  FSMnode_ptr ntok = arc->to_node;
2069  /* look for all arcs leaving this arc, and push the cost thereto */
2070  for (atok = FIRST_NEXT(ntok); atok != FSMARC_NULL; atok = atoken->linkl_next_arc)
2071  {
2072    atoken = ARC_XtoP(atok);
2073    atoken->cost = (costdata)(atoken->cost + arc->cost);
2074  }
2075  arc->cost = FREEcostdata;
2076  return FST_SUCCESS;
2077}
2078#endif
2079
2080
2081int fst_pull_arc_olabel(srec_context* fst, FSMarc* arc)
2082{
2083  FSMarc_ptr atok;
2084  FSMarc* atoken;
2085  FSMnode_ptr fr_node = arc->fr_node;
2086  FSMarc_ptr tmp_atok;
2087  FSMarc *tmp_atoken;
2088  FSMnode *anode;
2089
2090  if (arc->olabel == WORD_EPSILON_LABEL)
2091    return FST_SUCCESS;
2092
2093  /* can the olabel be pulled earlier? */
2094  for (atok = FIRST_PREV(fr_node); atok != FSMARC_NULL; atok = atoken->linkl_prev_arc)
2095  {
2096    atoken = ARC_XtoP(atok);
2097    anode = NODE_XtoP(atoken->to_node);
2098
2099    tmp_atok = anode->un_ptr.first_next_arc;
2100    if (tmp_atok != FSMARC_NULL)
2101    {
2102      tmp_atoken = ARC_XtoP(tmp_atok);
2103      if (tmp_atoken->linkl_next_arc != FSMARC_NULL)
2104      {
2105        return FST_CONTINUE;
2106      }
2107    }
2108  }
2109
2110  /* look for all arcs arriving at this arc, and mark them */
2111  for (atok = FIRST_PREV(fr_node);atok != FSMARC_NULL;atok = atoken->linkl_prev_arc)
2112  {
2113    atoken = ARC_XtoP(atok);
2114    /* cannot have multiple olabels! serious internal error!? */
2115    if (atoken->olabel != WORD_EPSILON_LABEL)
2116    {
2117      PLogError("error: internal error, in fst_pull_arc_olabel()\n");
2118      return FST_FAILED_INTERNAL;
2119    }
2120    atoken->olabel = arc->olabel;
2121#if DO_WEIGHTED_ADDWORD
2122    atoken->cost = arc->cost;
2123#endif
2124    IF_DEBUG_WDADD(atoken->olabel_str = arc->olabel_str);
2125  }
2126  /* unlabel this arc */
2127  arc->olabel = WORD_EPSILON_LABEL;
2128#if DO_WEIGHTED_ADDWORD
2129  arc->cost   = FREEcostdata;
2130#endif
2131  IF_DEBUG_WDADD(arc->olabel_str = fst->olabels->words[ arc->olabel]);
2132  return FST_SUCCESS;
2133}
2134
2135static FSMarc* find_next_arc_with_ilabel(srec_context* fst, FSMnode* node, labelID ilabel, FSMarc** last)
2136{
2137  FSMarc_ptr atok;
2138  FSMarc* atoken = NULL;
2139  for (atok = node->un_ptr.first_next_arc; atok != FSMARC_NULL; atok = atoken->linkl_next_arc)
2140  {
2141    atoken = ARC_XtoP(atok);
2142    if (atoken->ilabel == ilabel)
2143      return atoken;
2144  }
2145  *last = atoken; /* to this we may want to append a new arc */
2146  return NULL;
2147}
2148
2149FSMarc* find_prev_arc_with_iolabels(srec_context* fst, FSMnode* node, labelID ilabel, labelID olabel, FSMarc** last)
2150{
2151  FSMarc_ptr atok;
2152  FSMarc* atoken = NULL;
2153  FSMarc_ptr tmp_atok;
2154  FSMarc* tmp_atoken;
2155
2156  for (atok = node->first_prev_arc; atok != FSMARC_NULL; atok = atoken->linkl_prev_arc)
2157  {
2158    atoken = ARC_XtoP(atok);
2159    if (atoken->olabel == olabel && atoken->ilabel == ilabel)
2160    {
2161      tmp_atok = NODE_XtoP(atoken->fr_node)->un_ptr.first_next_arc;
2162      if (tmp_atok != FSMARC_NULL)
2163      {
2164        tmp_atoken = ARC_XtoP(tmp_atok);
2165
2166        if (tmp_atoken->linkl_next_arc == FSMARC_NULL)
2167          return atoken;
2168      }
2169      else
2170        return atoken;
2171    }
2172
2173  }
2174  if (last) *last = atoken;
2175  return NULL;
2176}
2177
2178int num_arcs_leaving(srec_context* fst, FSMnode* node)
2179{
2180  int num_arcs = 0;
2181  FSMarc_ptr atok;
2182  FSMarc* atoken;
2183  for (atok = node->un_ptr.first_next_arc; atok != FSMARC_NULL; atok = atoken->linkl_next_arc)
2184  {
2185    atoken = ARC_XtoP(atok);
2186    num_arcs++;
2187  }
2188  return num_arcs;
2189}
2190
2191int num_arcs_arriving(srec_context* fst, FSMnode* node)
2192{
2193  int num_arcs = 0;
2194  FSMarc_ptr atok;
2195  FSMarc* atoken;
2196  for (atok = node->first_prev_arc; atok != FSMARC_NULL; atok = atoken->linkl_prev_arc)
2197  {
2198    atoken = ARC_XtoP(atok);
2199    num_arcs++;
2200  }
2201  return num_arcs;
2202}
2203
2204PINLINE int num_arcs_arriving_gt_1(srec_context* fst, FSMnode* node)
2205{
2206  FSMarc_ptr atok = node->first_prev_arc;
2207  FSMarc* atoken;
2208  if (atok == FSMARC_NULL)
2209    return 0;
2210  atoken = ARC_XtoP(atok);
2211  return atoken->linkl_prev_arc == FSMARC_NULL ? 0 : 1;
2212}
2213
2214static void remove_arc_arriving(srec_context* fst, FSMnode* to_node, FSMarc* arc)
2215{
2216  FSMarc* atoken;
2217  FSMarc_ptr* atok;
2218  for (atok = &to_node->first_prev_arc;(*atok) != FSMARC_NULL; atok = &atoken->linkl_prev_arc)
2219  {
2220    atoken = ARC_XtoP(*atok);
2221    if (atoken == arc)
2222    {
2223      (*atok) = arc->linkl_prev_arc;
2224      return;
2225    }
2226  }
2227  ASSERT(0);
2228}
2229
2230int split_node_for_arc(srec_context* fst, FSMarc* arc)
2231{
2232  arcID new_arc_id;
2233  nodeID new_node_id;
2234  FSMnode* new_node;
2235  FSMnode* old_node;
2236  FSMarc_ptr atok, new_next, last_new_next;
2237  FSMarc *atoken, *new_next_p;
2238
2239  IF_DEBUG_WDADD(printf_arc(fst, "spliting ... ", arc));
2240
2241  new_node_id = fst_get_free_node(fst);
2242  if (new_node_id == MAXnodeID)
2243    return FST_FAILED_ON_MEMORY;
2244  new_node = &fst->FSMnode_list[ new_node_id];
2245  old_node = NODE_XtoP(arc->to_node);
2246  arc->to_node = NODE_ItoX(new_node_id);
2247  new_node->first_prev_arc = ARC_PtoX(arc);
2248  remove_arc_arriving(fst, old_node, arc);
2249  arc->linkl_prev_arc = FSMARC_NULL;
2250
2251  last_new_next = FSMARC_NULL;
2252  for (atok = old_node->un_ptr.first_next_arc; atok != FSMARC_NULL; atok = atoken->linkl_next_arc)
2253  {
2254    atoken = ARC_XtoP(atok);
2255    new_arc_id = fst_get_free_arc(fst);
2256    if (new_arc_id == MAXarcID)
2257      return FST_FAILED_ON_MEMORY;
2258    new_next = ARC_ItoX(new_arc_id);
2259    if (last_new_next != FSMARC_NULL)
2260      LINKL_NEXT(last_new_next) = new_next;
2261    else
2262      new_node->un_ptr.first_next_arc = new_next;
2263    new_next_p = ARC_XtoP(new_next);
2264    new_next_p->ilabel = atoken->ilabel;
2265    new_next_p->to_node = atoken->to_node;
2266    new_next_p->fr_node = arc->to_node;
2267    new_next_p->olabel = atoken->olabel;
2268    IF_DEBUG_WDADD(new_next_p->ilabel_str = atoken->ilabel_str);
2269    IF_DEBUG_WDADD(new_next_p->olabel_str = atoken->olabel_str);
2270    new_next_p->linkl_next_arc = FSMARC_NULL; /* set at next loop */
2271    append_arc_arriving_node(fst, NODE_XtoP(atoken->to_node), new_next);
2272    last_new_next = new_next;
2273  }
2274  return FST_SUCCESS;
2275}
2276
2277int fst_add_arcs(srec_context* fst, nodeID start_node, nodeID end_node,
2278                 wordID add_tree_olabel, costdata add_tree_cost,
2279                 modelID* add_tree_start, int add_tree_len)
2280{
2281  int rc = FST_SUCCESS;
2282  FSMarc_ptr atok = (FSMarc_ptr)0, last_atok = (FSMarc_ptr)0;
2283  FSMarc* atoken = NULL;
2284  FSMarc *next_atoken, *prev_atoken;
2285  FSMarc *append_to;
2286  FSMnode *late_start_node, *early_end_node;
2287  FSMnode *ntoken, *last_ntoken;
2288  FSMnode_ptr ntok, last_ntok;
2289  modelID *add_tree_end = add_tree_start + add_tree_len - 1;
2290  modelID *add_tree_late_start, *add_tree_early_end;
2291  modelID *add_tree;
2292  arcID new_arc_id, atokid;
2293  nodeID new_node_id, atokid_node;
2294  wordID add_tree_olabel_use = add_tree_olabel;
2295  wordID add_tree_cost_use   = add_tree_cost;
2296
2297  append_to = NULL;
2298  add_tree = add_tree_start;
2299#define FST_GROW_MINARCS  100
2300#define FST_GROW_MINNODES 100
2301#if defined(FST_GROW_FACTOR)
2302
2303  /* make sure we have enough arcs available */
2304  if(fst->num_arcs + add_tree_len >= fst->FSMarc_list_len)
2305   {
2306	FSMarc* tmp_FSMarc_list;
2307	arcID tmp_FSMarc_list_len;
2308	int itmp_FSMarc_list_len = fst->FSMarc_list_len*FST_GROW_FACTOR ;
2309
2310	if( itmp_FSMarc_list_len - fst->FSMarc_list_len < FST_GROW_MINARCS)
2311		itmp_FSMarc_list_len += FST_GROW_MINARCS;
2312
2313	if( itmp_FSMarc_list_len - fst->FSMarc_list_len < add_tree_len)
2314		itmp_FSMarc_list_len += add_tree_len;
2315
2316    if(itmp_FSMarc_list_len >= (int)MAXarcID)
2317	     return FST_FAILED_ON_MEMORY;
2318
2319	tmp_FSMarc_list_len = (arcID)itmp_FSMarc_list_len;
2320
2321    tmp_FSMarc_list = (FSMarc*)CALLOC_CLR(tmp_FSMarc_list_len, sizeof(FSMarc), "srec.graph.arcs");
2322      if(!tmp_FSMarc_list) {
2323	PLogError("error: Failed to extend the memory for arcs\n");
2324        return FST_FAILED_INTERNAL;
2325      }
2326    memcpy( tmp_FSMarc_list, fst->FSMarc_list, fst->FSMarc_list_len*sizeof(FSMarc));
2327
2328	/* initialize the new free list.
2329    head of this new free list is tmp_FSMarc_list[ fst->FSMarc_list_len] */
2330	for(atokid=fst->FSMarc_list_len; atokid<tmp_FSMarc_list_len-1; atokid++) {
2331	   tmp_FSMarc_list[atokid].linkl_next_arc = ARC_ItoX(atokid+1);
2332       tmp_FSMarc_list[atokid].linkl_prev_arc = FSMARC_FREE;
2333    }
2334    tmp_FSMarc_list[atokid].linkl_next_arc = FSMARC_NULL;
2335    tmp_FSMarc_list[atokid].linkl_prev_arc = FSMARC_FREE;
2336
2337    /* scan to the end of the old free list (to be re-used) */
2338    arcID* last_free_arc = &fst->FSMarc_freelist;
2339    atoken = (*last_free_arc==MAXarcID) ? NULL : &fst->FSMarc_list[*last_free_arc] ;
2340
2341    for( ; *last_free_arc!=MAXarcID; last_free_arc=&atoken->linkl_next_arc)
2342	    atoken = &tmp_FSMarc_list[ *last_free_arc];
2343
2344	/* append the new free list to the current free list */
2345	*last_free_arc = fst->FSMarc_list_len;
2346
2347	FREE( fst->FSMarc_list);
2348    fst->FSMarc_list = tmp_FSMarc_list;
2349    fst->FSMarc_list_len = tmp_FSMarc_list_len;
2350   }
2351
2352   /* make sure we have enough nodes available */
2353   if(fst->num_nodes + add_tree_len >= fst->FSMnode_list_len)
2354   {
2355     FSMnode* tmp_FSMnode_list;
2356	 nodeID tmp_FSMnode_list_len;
2357	 FSMnode_info* tmp_FSMnode_info_list;
2358	 int itmp_FSMnode_list_len = fst->FSMnode_list_len * FST_GROW_FACTOR ;
2359
2360	 if( itmp_FSMnode_list_len - fst->FSMnode_list_len < FST_GROW_MINNODES)
2361		itmp_FSMnode_list_len += FST_GROW_MINNODES;
2362
2363	 if( itmp_FSMnode_list_len - fst->FSMnode_list_len < add_tree_len)
2364		itmp_FSMnode_list_len += add_tree_len;
2365
2366	 if(itmp_FSMnode_list_len >= (int)MAXnodeID)
2367	     return FST_FAILED_ON_MEMORY;
2368
2369	 tmp_FSMnode_list_len = (nodeID)itmp_FSMnode_list_len;
2370
2371     tmp_FSMnode_list = (FSMnode*)CALLOC_CLR(tmp_FSMnode_list_len, sizeof(FSMnode), "srec.graph.nodes");
2372     if(!tmp_FSMnode_list) {
2373       PLogError("ERROR: Failed to extend the memory for nodes\n");
2374       return FST_FAILED_INTERNAL;
2375     }
2376     memcpy( tmp_FSMnode_list, fst->FSMnode_list, fst->FSMnode_list_len*sizeof(FSMnode));
2377
2378	/* initialize the new free node list.
2379    head of this new free list is tmp_FSMnode_list[ fst->FSMnode_list_len] */
2380    for(atokid_node=fst->FSMnode_list_len; atokid_node<tmp_FSMnode_list_len-1; atokid_node++)
2381	{
2382	   tmp_FSMnode_list[atokid_node].un_ptr.next_node = NODE_ItoX(atokid_node + 1);
2383       tmp_FSMnode_list[atokid_node].first_prev_arc = FSMARC_FREE;
2384    }
2385       tmp_FSMnode_list[atokid_node].un_ptr.next_node = FSMNODE_NULL;
2386       tmp_FSMnode_list[atokid_node].first_prev_arc = FSMARC_FREE;
2387
2388    /* scan to the end of the old free list (to be re-used) */
2389    nodeID* last_free_node = (&fst->FSMnode_freelist);
2390	ntoken = (*last_free_node==MAXnodeID) ? NULL : &fst->FSMnode_list[*last_free_node] ;
2391
2392	for( ; *last_free_node!=MAXnodeID; last_free_node = &ntoken->un_ptr.next_node)
2393		 ntoken = &tmp_FSMnode_list[ *last_free_node];
2394
2395	/* append the new free list to the current free list */
2396	*last_free_node = fst->FSMnode_list_len;
2397
2398	FREE( fst->FSMnode_list);
2399
2400	tmp_FSMnode_info_list = (FSMnode_info*)CALLOC_CLR( tmp_FSMnode_list_len, sizeof(FSMnode_info), "srec.graph.nodeinfos");
2401     if(!tmp_FSMnode_info_list) {
2402       PLogError("ERROR: Failed to extend the memory for node infos\n");
2403       return FST_FAILED_INTERNAL;
2404     }
2405	// copy in old node info
2406	memcpy( tmp_FSMnode_info_list, fst->FSMnode_info_list, fst->FSMnode_list_len*sizeof(FSMnode_info));
2407
2408	// initialize the new node info
2409	for (atokid_node=fst->FSMnode_list_len; atokid_node < tmp_FSMnode_list_len; atokid_node++)
2410		tmp_FSMnode_info_list[atokid_node] = NODE_INFO_UNKNOWN;
2411
2412	FREE( fst->FSMnode_info_list);
2413	fst->FSMnode_info_list = tmp_FSMnode_info_list;
2414    fst->FSMnode_list = tmp_FSMnode_list;
2415    fst->FSMnode_list_len = tmp_FSMnode_list_len;
2416}
2417#endif
2418   late_start_node = &fst->FSMnode_list[start_node];
2419
2420  while (1)
2421  {
2422
2423    next_atoken = find_next_arc_with_ilabel(fst, late_start_node,
2424                                            *add_tree /*->ilabel*/,
2425                                            &append_to);
2426    if (next_atoken != NULL)
2427    {
2428      /* so next_atok->ilabel == add_tree->ilabel */
2429
2430      if (next_atoken->ilabel == WORD_BOUNDARY && *add_tree/*->ilabel*/ == WORD_BOUNDARY)
2431      {
2432	 next_atoken = NULL;
2433	 break; /* break as if nothing more can be shared! */
2434	// return FST_FAILED_ON_HOMONYM;
2435      }
2436
2437      if (num_arcs_arriving_gt_1(fst, NODE_XtoP(next_atoken->to_node)))
2438      {
2439        split_node_for_arc(fst, next_atoken);
2440        /* unfortunate side effect here, that if we later find out this
2441           was a homonym addition, then this expansion was useless and
2442           for now we don't undo it! */
2443      }
2444
2445      /* we shouldn't really push the olabel if it's the same for both! */
2446      if (next_atoken->olabel != add_tree_olabel_use)
2447      {
2448        if (next_atoken->olabel != WORD_EPSILON_LABEL)
2449        {
2450          if (fst_push_arc_olabel(fst, next_atoken) != FST_SUCCESS)
2451          {
2452            PLogError("error: internal error fst_push_arc_olabel()\n");
2453            return FST_FAILED_INTERNAL;
2454          }
2455        }
2456#if DO_WEIGHTED_ADDWORD
2457        else
2458          fst_push_arc_cost(fst, next_atoken);
2459#endif
2460      }
2461      else if (add_tree_olabel_use != WORD_EPSILON_LABEL)
2462      {
2463        /* the graph already has this word, so we just re-use the olabel
2464           and disable the setting of olabel down below */
2465        add_tree_olabel_use = WORD_EPSILON_LABEL;
2466#if DO_WEIGHTED_ADDWORD
2467        add_tree_cost_use   = FREEcostdata;
2468#endif
2469      }
2470      add_tree++;
2471      late_start_node = NODE_XtoP(next_atoken->to_node);
2472    }
2473    else
2474    {
2475      break;
2476    }
2477  }
2478
2479  add_tree_late_start = add_tree;
2480  early_end_node = &fst->FSMnode_list[end_node];
2481
2482  if (!do_minimize)
2483  {
2484    last_ntoken = NULL;
2485    last_ntok = NODE_PtoX(late_start_node);
2486    for (; add_tree <= add_tree_end; add_tree++)
2487    {
2488      new_arc_id = fst_get_free_arc(fst);
2489      if (new_arc_id == MAXarcID)
2490      {
2491        PLogError("fst_get_free_arc() failed\n");
2492        return FST_FAILED_ON_MEMORY;
2493      }
2494      atok = ARC_ItoX(new_arc_id);
2495      atoken = ARC_XtoP(atok);
2496      if (add_tree != add_tree_end)
2497      {
2498        new_node_id = fst_get_free_node(fst);
2499        if (new_node_id == MAXnodeID)
2500        {
2501          PLogError("fst_get_free_node() failed\n");
2502          return FST_FAILED_ON_MEMORY;
2503        }
2504        ntok = NODE_ItoX(new_node_id);
2505        ntoken = NODE_XtoP(ntok);
2506        ntoken->first_prev_arc = atok;
2507      }
2508      else
2509      {
2510        ntok = FSMNODE_NULL;
2511        ntoken = NULL;
2512      }
2513      atoken->fr_node = last_ntok; /* was NODE_PtoX(late_start_node) */
2514      atoken->to_node = ntok;
2515      atoken->ilabel = *add_tree;
2516      atoken->linkl_next_arc = FSMARC_NULL;
2517      atoken->linkl_prev_arc = FSMARC_NULL;
2518      IF_DEBUG_WDADD(atoken->ilabel_str = fst->ilabels->words[ atoken->ilabel]);
2519      if (!last_ntoken)
2520      {
2521        append_to->linkl_next_arc = atok;
2522        atoken->olabel = add_tree_olabel_use;
2523#if DO_WEIGHTED_ADDWORD
2524        atoken->cost  =  add_tree_cost_use;
2525#endif
2526        IF_DEBUG_WDADD(atok->olabel_str = fst->olabels->words[ atoken->olabel]);
2527      }
2528      else
2529      {
2530        last_ntoken->un_ptr.first_next_arc = atok;
2531      }
2532      last_ntoken = ntoken;
2533      last_ntok   = ntok;
2534    }
2535    /*  add_tree_end->to_node = early_end_node;
2536    add_tree_end->linkl_next_arc = FSMARC_NULL;
2537    atok = last_arc_arriving( early_end_node);
2538    atok->linkl_prev_arc = add_tree_end;
2539    add_tree_end->linkl_prev_arc = FSMARC_NULL; */
2540    atoken->to_node = NODE_ItoX(end_node);
2541    append_arc_arriving_node(fst, &fst->FSMnode_list[end_node], atok);
2542  }
2543
2544  else /* ie. do_minimize from the rear */
2545  {
2546
2547    add_tree = add_tree_end;
2548    new_arc_id = fst_get_free_arc(fst);
2549    if (new_arc_id == MAXarcID)
2550      return FST_FAILED_ON_MEMORY;
2551    atok = ARC_ItoX(new_arc_id);
2552    atoken = ARC_XtoP(atok);
2553    atoken->olabel = add_tree_olabel_use;
2554#if DO_WEIGHTED_ADDWORD
2555    atoken->cost   = add_tree_cost_use;
2556#endif
2557    atoken->ilabel = *add_tree_late_start;
2558    atoken->fr_node = NODE_PtoX(late_start_node);
2559    if (atoken->ilabel == WORD_BOUNDARY)
2560    {
2561      atoken->cost = (costdata)(atoken->cost + fst->wtw_average);
2562    }
2563    else
2564    {
2565      atoken->cost   = add_tree_cost_use + fst->wtw_average;
2566      add_tree_cost_use = FREEcostdata;
2567    }
2568    IF_DEBUG_WDADD(atoken->olabel_str = fst->olabels->words[ atoken->olabel]);
2569    IF_DEBUG_WDADD(atoken->ilabel_str = fst->ilabels->words[ atoken->ilabel]);
2570    append_arc_leaving_node(fst, late_start_node, atok);
2571
2572    last_atok = atok;
2573    while (1)
2574    {
2575
2576      if (add_tree == add_tree_late_start)
2577        break;
2578      /* if there are other arcs leaving this node then joining
2579      earlier than here will result in over-generation, so don't
2580      if( atok!=end_arc && atok->linkl_next_arc != FSMARC_NULL)
2581      break;
2582      */
2583      /*
2584      also need boundary conditions checker here
2585      */
2586
2587      /* */
2588      IF_DEBUG_WDADD(printf_node(fst, early_end_node));
2589
2590      /* if( num_arcs_leaving( add_tree_end->fr_node->first_prev_arc->to_node) >1) {
2591      printf("merge stopped due to num_arcs leaving ..\n");
2592      printf_arc(fst, add_tree_end->fr_node->first_prev_arc->to_node->first_next_arc);
2593      break;
2594      } */
2595
2596      for (atok = early_end_node->first_prev_arc; atok != FSMARC_NULL; atok = atoken->linkl_prev_arc)
2597      {
2598        atoken = ARC_XtoP(atok);
2599	/* never pull before the slot marker */
2600	if(atoken->cost == DISABLE_ARC_COST) break;
2601	if(atoken->olabel>0 && atoken->olabel<fst->olabels->num_slots) break;
2602        fst_pull_arc_olabel(fst, atoken); /* fails are ok */
2603      }
2604
2605      prev_atoken = find_prev_arc_with_iolabels(fst, early_end_node,
2606                    *add_tree /*->ilabel*/,
2607                    WORD_EPSILON_LABEL,
2608                    NULL /*&append_to*/);
2609      if (!prev_atoken)
2610        break;
2611      else
2612      {
2613        /* this check is now inside find_prev_arc_with_iolabels */
2614        /* if( num_arcs_leaving(prev_atok->fr_node->first_prev_arc->to_node->first_next_arc)>1)*/
2615
2616        /* now look to merge earlier still */
2617        early_end_node = NODE_XtoP(prev_atoken->fr_node);
2618        add_tree--;
2619        if (add_tree == add_tree_late_start)
2620          break;
2621      }
2622    }
2623    add_tree_early_end = add_tree;
2624    for (add_tree = add_tree_late_start + 1; add_tree <= add_tree_early_end; add_tree++)
2625    {
2626      new_node_id = fst_get_free_node(fst);
2627      if (new_node_id == MAXnodeID)
2628        return FST_FAILED_ON_MEMORY;
2629      ntok = NODE_ItoX(new_node_id);
2630      ntoken = NODE_XtoP(ntok);
2631      new_arc_id = fst_get_free_arc(fst);
2632      if (new_arc_id == MAXarcID)
2633        return FST_FAILED_ON_MEMORY;
2634      atok = ARC_ItoX(new_arc_id);
2635      atoken = ARC_XtoP(atok);
2636      atoken->ilabel = *add_tree;
2637#if DO_WEIGHTED_ADDWORD
2638      atoken->cost   = FREEcostdata; /* idea: distribute add_tree_cost here;*/
2639#endif
2640      atoken->olabel = WORD_EPSILON_LABEL;
2641      atoken->fr_node = ntok;
2642      atoken->to_node = FSMNODE_NULL; /* filled in next loop */
2643      IF_DEBUG_WDADD(atoken->olabel_str = fst->olabels->words[atoken->olabel]);
2644      IF_DEBUG_WDADD(atoken->ilabel_str = fst->ilabels->words[atoken->ilabel]);
2645
2646      ntoken->un_ptr.first_next_arc = atok;
2647      ntoken->first_prev_arc = last_atok;
2648      TO_NODE(last_atok) = ntok;
2649      last_atok = atok;
2650    }
2651    TO_NODE(last_atok) = NODE_PtoX(early_end_node);
2652    append_arc_arriving_node(fst, early_end_node, last_atok);
2653  }
2654  return rc;
2655}
2656
2657
2658void append_arc_leaving_node(srec_context* fst, FSMnode* fr_node, FSMarc_ptr arc)
2659{
2660  FSMarc_ptr* atok = &fr_node->un_ptr.first_next_arc;
2661  FSMarc* atoken = ARC_XtoP(*atok);
2662  for (; (*atok) != FSMARC_NULL; atok = &atoken->linkl_next_arc)
2663  {
2664    atoken = ARC_XtoP(*atok);
2665  }
2666  *atok = arc;
2667  LINKL_NEXT(arc) = FSMARC_NULL;
2668}
2669void append_arc_arriving_node(srec_context* fst, FSMnode* to_node, FSMarc_ptr arc)
2670{
2671  FSMarc_ptr* atok = &to_node->first_prev_arc;
2672  FSMarc* atoken = ARC_XtoP(*atok);
2673  for (; (*atok) != FSMARC_NULL; atok = &atoken->linkl_prev_arc)
2674  {
2675    atoken = ARC_XtoP(*atok);
2676  }
2677  *atok = arc;
2678  LINKL_PREV(arc) = FSMARC_NULL;
2679}
2680
2681
2682int printf_node1(srec_context* fst, FSMnode* node)
2683{
2684  return 0;
2685}
2686int printf_arc1(srec_context* fst, char* msg, FSMarc* arc)
2687{
2688  char buffer[MAX_LINE_LENGTH];
2689  sprintf_arc(buffer, fst, arc);
2690  printf("%s%s\n", msg, buffer);
2691  return 0;
2692}
2693int sprintf_arc(char* buf, srec_context* fst, FSMarc* arc)
2694{
2695  int rc;
2696  FSMnode* to_node = NODE_XtoP(arc->to_node);
2697  arcID arc_index = (arcID)(arc - fst->FSMarc_list);
2698  if (to_node->un_ptr.first_next_arc == FSMARC_NULL)
2699    rc = sprintf(buf, "arc%hu\n", arc_index);
2700  else
2701  {
2702    rc = sprintf(buf, "arc%hu\t%hu,%hu\t%s\t%s\t%hu\n",
2703                 arc_index,
2704                 ARC_XtoI(to_node->un_ptr.first_next_arc),
2705                 arc->linkl_next_arc != FSMARC_NULL ? ARC_XtoI(arc->linkl_next_arc) : -1,
2706                 fst->ilabels->words[arc->ilabel],
2707                 fst->olabels->words[arc->olabel],
2708                 arc->cost);
2709  }
2710  return rc;
2711}
2712
2713/* dumps the recognition context as a binary file,
2714   it will also store any empty space for growing */
2715
2716#define ENCODE(SIGN,TYP) { SIGN+=sizeof(TYP); SIGN=SIGN<<3; ASSERT((shifted+=3)<32); }
2717
2718asr_int32_t FST_sizes_signature()
2719{
2720#ifndef NDEBUG
2721  int shifted = 0;
2722#endif
2723  asr_int32_t signature = 0;
2724  ENCODE(signature, arcID);
2725  ENCODE(signature, nodeID);
2726  ENCODE(signature, wordID);
2727  ENCODE(signature, labelID);
2728  ENCODE(signature, costdata);
2729  return signature;
2730}
2731
2732int FST_ContextImageSize(srec_context* context)
2733{
2734  int size = 0;
2735  size += sizeof(srec_context);
2736  size += sizeof(wordmap);
2737  size += sizeof(char*) * context->olabels->max_words;
2738  size += sizeof(char) * context->olabels->max_chars;
2739  size += sizeof(wordmap);
2740  if (context->ilabels->words != NULL)
2741    size += sizeof(char*) * context->olabels->max_words;
2742  if (context->ilabels->chars != NULL)
2743    size += sizeof(char) * context->olabels->max_chars;
2744  size += sizeof(FSMarc) * context->FSMarc_list_len;
2745  size += sizeof(FSMnode) * context->FSMnode_list_len;
2746  size += sizeof(FSMnode_info) * context->FSMnode_list_len;
2747  size += sizeof(arc_token) * context->arc_token_list_len;
2748  return size;
2749}
2750
2751ESR_ReturnCode serializeWordMapV2(wordmap *wordmap, PFile* fp)
2752{
2753  unsigned int i = 0;
2754  unsigned int nfields;
2755  unsigned int tmp2[32];
2756
2757  i = 0;
2758  tmp2[i++] = wordmap->num_words;
2759  tmp2[i++] = wordmap->num_slots;
2760  tmp2[i++] = wordmap->max_words;
2761  tmp2[i++] = wordmap->num_base_words;
2762  tmp2[i++] = wordmap->max_chars;
2763  tmp2[i++] = (wordmap->next_chars - wordmap->chars);
2764  tmp2[i++] = (wordmap->next_base_chars - wordmap->chars);
2765  nfields = i;
2766  if (pfwrite(tmp2, sizeof(tmp2[0]), nfields, fp) != nfields)
2767    return ESR_WRITE_ERROR;
2768
2769  if (pfwrite(wordmap->chars, sizeof(char), wordmap->max_chars, fp) != (size_t)wordmap->max_chars)
2770    return ESR_WRITE_ERROR;
2771
2772  return ESR_SUCCESS;
2773}
2774
2775ESR_ReturnCode deserializeWordMapV2(wordmap **pwordmap, PFile* fp)
2776{
2777  unsigned int i = 0;
2778  unsigned int nfields;
2779  unsigned int tmp2[32];
2780  wordmap *awordmap;
2781  char *p;
2782  ESR_ReturnCode rc = ESR_SUCCESS;
2783  unsigned int next_chars_idx, next_base_chars_idx;
2784
2785  awordmap = NEW(wordmap, L("srec.g2g.graph.wordmap.base"));
2786  if (awordmap == NULL)
2787  {
2788    PLogError("NEW failed on srec.g2g.graph.wordmap.base\n");
2789    rc = ESR_OUT_OF_MEMORY;
2790    goto CLEANUP;
2791  }
2792  awordmap->wordIDForWord = NULL; // early break to cleanup needs this
2793
2794  nfields = 7;
2795  if (pfread(tmp2, sizeof(tmp2[0]), nfields, fp) != nfields)
2796  {
2797    PLogError("pfread failed when reading nfields\n");
2798    rc = ESR_READ_ERROR;
2799    goto CLEANUP;
2800  }
2801
2802  i = 0;
2803  awordmap->num_words      = (wordID)tmp2[i++];
2804  awordmap->num_slots      = (wordID)tmp2[i++];
2805  awordmap->max_words      = (wordID)tmp2[i++];
2806  awordmap->num_base_words = (wordID)tmp2[i++];
2807  awordmap->max_chars      = tmp2[i++];
2808  next_chars_idx           = tmp2[i++];
2809  next_base_chars_idx      = tmp2[i++];
2810  ASSERT(nfields == i);
2811
2812  awordmap->words = NEW_ARRAY(char*, awordmap->max_words, L("srec.g2g.graph.wordmap.words"));
2813  if (awordmap->words == NULL)
2814  {
2815    PLogError("NEW_ARRAY failed for srec.g2g.graph.wordmap.words %d\n", awordmap->max_words);
2816    rc = ESR_OUT_OF_MEMORY;
2817    goto CLEANUP;
2818  }
2819  awordmap->chars = NEW_ARRAY(char, awordmap->max_chars, L("srec.g2g.graph.wordmap.chars"));
2820  if (awordmap->chars == NULL)
2821  {
2822    PLogError("NEW_ARRAY failed for srec.g2g.graph.wordmap.chars %d\n", awordmap->max_chars);
2823    rc = ESR_OUT_OF_MEMORY;
2824    goto CLEANUP;
2825  }
2826  awordmap->next_chars = awordmap->chars + next_chars_idx;
2827  awordmap->next_base_chars = awordmap->chars + next_base_chars_idx;
2828
2829  if ((i=pfread(awordmap->chars, sizeof(char), awordmap->max_chars, fp)) != (size_t)awordmap->max_chars)
2830  {
2831    PLogError("pfread failed while reading %d chars\n", awordmap->max_chars);
2832    rc = ESR_READ_ERROR;
2833    goto CLEANUP;
2834  }
2835
2836
2837  p = awordmap->chars;
2838  ASSERT((uintptr_t)p % 2 == 0);
2839  nfields = 0;
2840  if (nfields < awordmap->num_words)
2841    awordmap->words[nfields++] = p;
2842  for (; p < awordmap->next_chars; p++) // was next_base_chars
2843  {
2844    if ( ( *p ) == '\0' )
2845    {
2846      if (nfields == awordmap->num_words) // was num_base_words
2847        break;
2848      if (((uintptr_t)p) % 2 == 0) p++; /* so that words begin on even byte bound */
2849      awordmap->words[nfields++] = p + 1;
2850    }
2851  }
2852  ASSERT(nfields == awordmap->num_words); // was num_base_words
2853
2854  if (awordmap->max_words >= awordmap->num_base_words)
2855    {
2856      PHashTableArgs hashArgs;
2857      hashArgs.capacity = awordmap->max_words;
2858      if(hashArgs.capacity%2==0) hashArgs.capacity += 1;
2859      hashArgs.compFunction = HashCmpWord; //PHASH_TABLE_DEFAULT_COMP_FUNCTION;
2860      hashArgs.hashFunction = HashGetCode; //PHASH_TABLE_DEFAULT_HASH_FUNCTION;
2861      hashArgs.maxLoadFactor = PHASH_TABLE_DEFAULT_MAX_LOAD_FACTOR;
2862      CHKLOG(rc, PHashTableCreate(&hashArgs, L("srec.graph.wordmap.wordIDForWord.deserializeWordMap()"), &awordmap->wordIDForWord));
2863
2864      rc = wordmap_populate ( awordmap, awordmap->num_words );
2865
2866      if (rc != ESR_SUCCESS)
2867	{
2868        wordmap_clean ( awordmap );
2869        goto CLEANUP;
2870      }
2871    }
2872  else
2873    {
2874      awordmap->wordIDForWord = NULL;
2875    }
2876
2877  /* success */
2878  *pwordmap = awordmap;
2879  return ESR_SUCCESS;
2880
2881 CLEANUP:
2882  if (awordmap != NULL)
2883  {
2884    if (awordmap->wordIDForWord != NULL)
2885      PHashTableDestroy(awordmap->wordIDForWord);
2886    if (awordmap->words != NULL) FREE(awordmap->words);
2887    if (awordmap->chars != NULL) FREE(awordmap->chars);
2888    FREE(awordmap);
2889  }
2890  return rc;
2891}
2892
2893static ESR_ReturnCode serializeArcToken(arc_token *token_base,
2894                                        int i,
2895                                        PFile* fp)
2896{
2897  arc_token *token = token_base + i;
2898  asr_uint32_t idx;
2899
2900  if (pfwrite(&token->ilabel, 2, 1, fp) != 1)
2901    return ESR_WRITE_ERROR;
2902
2903  if (pfwrite(&token->olabel, 2, 1, fp) != 1)
2904    return ESR_WRITE_ERROR;
2905
2906  /* if (pfwrite(&token->cost, 2, 1, fp) != 1)
2907     return ESR_WRITE_ERROR; */
2908  idx = PTR_TO_IDX(ARC_TOKEN_PTR(token_base, token->first_next_arc), token_base);
2909
2910  if (pfwrite(&idx, 4, 1, fp) != 1)
2911    return ESR_WRITE_ERROR;
2912
2913  idx = PTR_TO_IDX(ARC_TOKEN_PTR(token_base, token->next_token_index), token_base);
2914
2915  if (pfwrite(&idx, 4, 1, fp) != 1)
2916    return ESR_WRITE_ERROR;
2917
2918  return ESR_SUCCESS;
2919}
2920
2921static ESR_ReturnCode serializeArcTokenInfo(srec_context *context,
2922    PFile* fp)
2923{
2924  int i;
2925  asr_uint32_t idx;
2926  ESR_ReturnCode rc;
2927
2928  if (pfwrite(&context->arc_token_list_len, 2, 1, fp) != 1)
2929    return ESR_WRITE_ERROR;
2930
2931  idx = PTR_TO_IDX(context->arc_token_freelist, context->arc_token_list);
2932
2933  if (pfwrite(&idx, 4, 1, fp) != 1)
2934    return ESR_WRITE_ERROR;
2935
2936  idx = PTR_TO_IDX(context->arc_token_insert_start, context->arc_token_list);
2937
2938  if (pfwrite(&idx, 4, 1, fp) != 1)
2939    return ESR_WRITE_ERROR;
2940
2941  for (i = 0; i < context->arc_token_list_len; ++i)
2942  {
2943    rc = serializeArcToken(context->arc_token_list, i, fp);
2944    if (rc != ESR_SUCCESS) return rc;
2945  }
2946  return ESR_SUCCESS;
2947}
2948
2949static ESR_ReturnCode deserializeArcToken(arc_token *token_base,
2950    int i,
2951    PFile* fp)
2952{
2953  arc_token *token = token_base + i;
2954  asr_uint32_t idx[2];
2955  asr_uint16_t labels[2];
2956
2957  if (pfread(labels, 2, 2, fp) != 2)
2958    return ESR_READ_ERROR;
2959
2960  token->ilabel = labels[0];
2961  token->olabel = labels[1];
2962
2963  /* if (pfread(&token->cost, 2, 1, fp) != 1)
2964     return ESR_READ_ERROR; */
2965
2966  if (pfread(idx, 4, 2, fp) != 2)
2967    return ESR_READ_ERROR;
2968
2969  token->first_next_arc = ARC_TOKEN_PTR2LNK(token_base, IDX_TO_PTR(idx[0], token_base));
2970  token->next_token_index = ARC_TOKEN_PTR2LNK(token_base, IDX_TO_PTR(idx[1], token_base));
2971
2972  return ESR_SUCCESS;
2973}
2974
2975static ESR_ReturnCode deserializeArcTokenInfo(srec_context *context,
2976    PFile* fp)
2977{
2978  ESR_ReturnCode rc;
2979  int i;
2980  asr_uint32_t idx;
2981
2982  if (pfread(&context->arc_token_list_len, 2, 1, fp) != 1) {
2983    PLogError("pfread failed in deserializeArcTokenInfo()\n");
2984    return ESR_READ_ERROR;
2985  }
2986
2987  context->arc_token_list = NEW_ARRAY(arc_token, context->arc_token_list_len,
2988                                      L("srec.graph.wordgraph"));
2989
2990  if (context->arc_token_list == NULL)
2991    return ESR_OUT_OF_MEMORY;
2992
2993  if (pfread(&idx, 4, 1, fp) != 1)
2994  {
2995    rc = ESR_READ_ERROR;
2996    goto CLEANUP;
2997  }
2998
2999  context->arc_token_freelist =
3000    IDX_TO_PTR(idx, context->arc_token_list);
3001
3002  if (pfread(&idx, 4, 1, fp) != 1)
3003  {
3004    rc = ESR_READ_ERROR;
3005    goto CLEANUP;
3006  }
3007
3008  context->arc_token_insert_start =
3009    IDX_TO_PTR(idx, context->arc_token_list);
3010
3011  for (i = 0; i < context->arc_token_list_len; ++i)
3012  {
3013    rc = deserializeArcToken(context->arc_token_list, i, fp);
3014    if (rc != ESR_SUCCESS) goto CLEANUP;
3015  }
3016  return ESR_SUCCESS;
3017
3018CLEANUP:
3019  FREE(context->arc_token_list);
3020  context->arc_token_list =
3021    context->arc_token_freelist =
3022      context->arc_token_insert_start = NULL;
3023  return rc;
3024}
3025
3026int FST_GetGrammarType(srec_context* context)
3027{
3028  arc_token *t, *u;
3029  wordID expected_wdid;
3030  /* 0 1 0
3031     1 2 4
3032     ...
3033     1 2 1316
3034     2
3035  */
3036  t = context->arc_token_list;
3037  if (t->ilabel != WORD_EPSILON_LABEL)
3038    return GrammarTypeBNF;
3039  if (t->next_token_index)
3040    return GrammarTypeBNF;
3041  t = ARC_TOKEN_PTR(context->arc_token_list, t->first_next_arc);
3042  expected_wdid = NUM_ITEMLIST_HDRWDS;
3043  for (; t; t = ARC_TOKEN_PTR(context->arc_token_list, t->next_token_index))
3044  {
3045    if (t->ilabel != expected_wdid)
3046      return GrammarTypeBNF;
3047    u = ARC_TOKEN_PTR(context->arc_token_list, t->first_next_arc);
3048    if (u != NULL && u->ilabel != MAXwordID)
3049      return GrammarTypeBNF;
3050    expected_wdid++;
3051  }
3052  if (expected_wdid != context->olabels->num_words)
3053    return GrammarTypeBNF;
3054  return GrammarTypeItemList;
3055}
3056
3057int FST_DumpContextAsImageV2(srec_context* context, PFile* fp)
3058{
3059  asr_uint32_t header[4];
3060  arcID tmp[32], i, j, nfields;
3061  FSMarc* arc;
3062  ESR_ReturnCode rc;
3063#if !defined(DO_ARCS_IO_IN_ARC_ORDER)
3064  FSMnode* node;
3065  arcID arcid, num_arcs, num_arcs_written;
3066#endif
3067
3068  /* Write header information. */
3069  header[0] = 0;
3070  header[1] = IMAGE_FORMAT_V2;
3071  header[2] = context->modelid;
3072  header[3] = context->grmtyp;
3073#ifdef SREC_ENGINE_VERBOSE_LOGGING
3074  PLogMessage("FST_DumpContextAsImageV2() fstgrmtyp %d", context->grmtyp);
3075#endif
3076
3077  if (pfwrite(header, sizeof(header[0]), 4, fp) != 4)
3078  {
3079    PLogError("FST_DumpContextAsImage: Could not write header.\n");
3080    return FST_FAILED_INTERNAL;
3081  }
3082
3083  /* arcs and nodes */
3084  i = 0;
3085  tmp[i++] = context->num_arcs;
3086  tmp[i++] = context->FSMarc_list_len;
3087  tmp[i++] = context->num_base_arcs;
3088  tmp[i++] = context->FSMarc_freelist;
3089  tmp[i++] = context->max_searchable_arcs;
3090
3091  tmp[i++] = context->num_nodes;
3092  tmp[i++] = context->FSMnode_list_len;
3093  tmp[i++] = context->num_base_nodes;
3094  tmp[i++] = context->FSMnode_freelist;
3095  tmp[i++] = context->start_node;
3096  tmp[i++] = context->end_node;
3097  tmp[i++] = context->max_searchable_nodes;
3098
3099  tmp[i++] = context->beg_silence_word;
3100  tmp[i++] = context->end_silence_word;
3101  tmp[i++] = context->hack_silence_word;
3102  tmp[i++] = context->hmm_ilabel_offset;
3103
3104  nfields = i;
3105
3106  if (pfwrite(tmp, sizeof(tmp[0]), nfields, fp) != nfields)
3107    return ESR_WRITE_ERROR;
3108#ifdef SREC_ENGINE_VERBOSE_LOGGING
3109  PLogMessage("G2G done WR hdrs %d", pftell(fp));
3110#endif
3111
3112#if defined(DO_ARCS_IO_IN_ARC_ORDER)
3113  if( 1) {
3114	 i = 0;
3115     tmp[i++] = context->end_node;
3116     tmp[i++] = MAXnodeID;
3117     tmp[i++] = MAXmodelID;
3118     tmp[i++] = MAXwordID;
3119     tmp[i++] = FREEcostdata;
3120     nfields = i;
3121     if (pfwrite(tmp, sizeof(tmp[0]), nfields, fp) != nfields)
3122        return ESR_WRITE_ERROR;
3123  }
3124  for(j=0; j<context->num_arcs; j++) {
3125        arc = &context->FSMarc_list[j];
3126        i = 0;
3127        tmp[i++] = arc->fr_node;
3128        tmp[i++] = arc->to_node;
3129	ASSERT(arc->to_node == context->end_node || arc->to_node != MAXnodeID);
3130        tmp[i++] = arc->ilabel;
3131        tmp[i++] = arc->olabel;
3132        tmp[i++] = arc->cost;
3133        nfields = i;
3134        if (pfwrite(tmp, sizeof(tmp[0]), nfields, fp) != nfields)
3135          return ESR_WRITE_ERROR;
3136  }
3137#else
3138  num_arcs_written = 0;
3139  for (j = 0; j < context->num_nodes; j++) {
3140    node = &context->FSMnode_list[j];
3141    num_arcs = (arcID)num_arcs_leaving(context, node);
3142    arcid = node->un_ptr.first_next_arc;
3143    if (arcid == MAXarcID) {
3144      i = 0;
3145      tmp[i++] = (arcID)j;
3146      tmp[i++] = MAXarcID;
3147      tmp[i++] = MAXarcID;
3148      tmp[i++] = MAXarcID;
3149      tmp[i++] = MAXarcID;
3150      nfields = i;
3151      if (pfwrite(tmp, sizeof(tmp[0]), nfields, fp) != nfields)
3152        return ESR_WRITE_ERROR;
3153      /* num_arcs_written++; end node don't have a to-arc */
3154    }
3155    else {
3156      for (; arcid != MAXarcID; arcid = arc->linkl_next_arc) {
3157        arc = &context->FSMarc_list[arcid];
3158        i = 0;
3159        tmp[i++] = (arcID)j;
3160        tmp[i++] = arc->to_node;
3161	ASSERT(arc->to_node == context->end_node || arc->to_node != MAXnodeID);
3162        tmp[i++] = arc->ilabel;
3163        tmp[i++] = arc->olabel;
3164        tmp[i++] = arc->cost;
3165        nfields = i;
3166        if (pfwrite(tmp, sizeof(tmp[0]), nfields, fp) != nfields)
3167          return ESR_WRITE_ERROR;
3168        num_arcs_written++;
3169      }
3170    }
3171  }
3172  ASSERT(num_arcs_written == context->num_arcs);
3173#endif
3174
3175#ifdef SREC_ENGINE_VERBOSE_LOGGING
3176  PLogMessage("G2G done WR arcs %d", pftell(fp));
3177#endif
3178
3179  /* node info   .. will now be calculated on line */
3180  /* wrapup_cost .. will now be calculated on line */
3181
3182  /* exit points for slots */
3183  if (1)
3184  {
3185    srec_fsm_exit_point *p, *q;
3186    if (pfwrite(&context->num_fsm_exit_points, 2, 1, fp) != 1)
3187      return ESR_WRITE_ERROR;
3188    p = context->fsm_exit_points;
3189    q = p + MAX_NUM_SLOTS;
3190    while (p < q)
3191    {
3192      if (pfwrite(&p->from_node_index, 2, 1, fp) != 1)
3193        return ESR_WRITE_ERROR;
3194      if (pfwrite(&p->arc_index, 2, 1, fp) != 1)
3195        return ESR_WRITE_ERROR;
3196      if (pfwrite(&p->wbto_node_index, 2, 1, fp) != 1)
3197        return ESR_WRITE_ERROR;
3198      ++p;
3199    }
3200  }
3201#ifdef SREC_ENGINE_VERBOSE_LOGGING
3202  PLogMessage("G2G done WR exits %d", pftell(fp));
3203#endif
3204  /* no entry points stuff */
3205
3206  /* no saving ilabels */
3207
3208  /* now load the ilabels */
3209  rc = serializeWordMapV2(context->olabels, fp);
3210  if (rc != ESR_SUCCESS)
3211    return rc;
3212#ifdef SREC_ENGINE_VERBOSE_LOGGING
3213  PLogMessage("G2G done WR wmap %d", pftell(fp));
3214#endif
3215
3216  if (context->grmtyp != GrammarTypeItemList)
3217  {
3218    if (serializeArcTokenInfo(context, fp) != ESR_SUCCESS)
3219    {
3220      PLogError("FST_DumpContextAsImage: could not write arc token data.\n");
3221      return FST_FAILED_INTERNAL;
3222    }
3223  }
3224  else
3225  {
3226    /* nothing, at decoding time, we'll just construct the graph
3227       from the word list, ie just this ..
3228       0 1 0
3229       1 2 4
3230       ...
3231       1 2 1316
3232       2
3233       ie
3234       0 1 0
3235       for(i=4;i<nwords;i++) { "1 2 i" }
3236       2
3237       because words 0-3 are eps,-pau-,-pau2-,slot@grammarname.grxml
3238    */
3239  }
3240
3241  /* Write header information. */
3242  header[0] = pftell(fp);
3243  header[1] = IMAGE_FORMAT_V2;
3244
3245  /* goto beginning of file. */
3246  if (pfseek(fp, 0, SEEK_SET))
3247  {
3248    PLogError("FST_DumpContextAsImage: could not reposition for header.\n");
3249    return FST_FAILED_INTERNAL;
3250  }
3251
3252  if (pfwrite(header, 4, 2, fp) != 2)
3253  {
3254    PLogError("FST_DumpContextAsImage: Could not write header.\n");
3255    return FST_FAILED_INTERNAL;
3256  }
3257
3258  /* reset pointer at end of file. */
3259  if (pfseek(fp, 0, SEEK_END))
3260  {
3261    PLogError("FST_DumpContextAsImage: could not reposition file pointer at end.\n");
3262    return FST_FAILED_INTERNAL;
3263  }
3264
3265  return FST_SUCCESS;
3266}
3267
3268
3269int FST_LoadContextFromImageV2(srec_context* fst, PFile* fp)
3270{
3271  asr_int32_t header[6];
3272  arcID tmp[32], num_arcs, new_arc_id;
3273  unsigned int i, nfields;
3274  srec_fsm_exit_point *p, *q;
3275  arcID max_num_FSMarcs;
3276  nodeID max_num_FSMnodes;
3277  FSMarc_ptr atok =  (FSMarc_ptr)0;
3278  FSMarc *atoken = NULL;
3279  FSMnode *fr_node, *to_node;
3280  int rc = FST_SUCCESS;
3281  ESR_ReturnCode esr_rc;
3282  ESR_BOOL seenFinalNode = ESR_FALSE;
3283
3284  /* read header information. */
3285  if (pfread(&header[2], sizeof(header[0]), 2, fp) != 2)
3286  {
3287    PLogError("FST_DumpContextAsImage: Could not read header.\n");
3288    return FST_FAILED_INTERNAL;
3289  }
3290  fst->modelid = header[2];
3291  fst->grmtyp  = header[3];
3292
3293  nfields = 16;
3294  if (pfread(tmp, sizeof(tmp[0]), nfields, fp) != nfields)
3295    return ESR_READ_ERROR;
3296
3297  /* arcs and nodes */
3298  i = 0;
3299  fst->num_arcs             = max_num_FSMarcs = tmp[i++];
3300  fst->FSMarc_list_len      = tmp[i++];
3301  fst->num_base_arcs        = tmp[i++];
3302  fst->FSMarc_freelist      = tmp[i++];
3303  fst->max_searchable_arcs  = tmp[i++] * 0; /*5*/
3304  fst->num_nodes            = max_num_FSMnodes = tmp[i++];
3305  fst->FSMnode_list_len     = tmp[i++];
3306  fst->num_base_nodes       = tmp[i++];
3307  fst->FSMnode_freelist     = tmp[i++];
3308  fst->start_node           = tmp[i++];/*10*/
3309  fst->end_node             = tmp[i++];
3310  fst->max_searchable_nodes = tmp[i++] * 0;
3311  fst->beg_silence_word     = tmp[i++];
3312  fst->end_silence_word     = tmp[i++];
3313  fst->hack_silence_word    = tmp[i++]; /*15*/
3314  fst->hmm_ilabel_offset    = tmp[i++]; /* 16 */
3315
3316  fst->addWordCaching_lastslot_name = 0;
3317  fst->addWordCaching_lastslot_num = MAXwordID;
3318  fst->addWordCaching_lastslot_needs_post_silence = ESR_FALSE;
3319
3320  ASSERT(i == nfields);
3321#ifdef SREC_ENGINE_VERBOSE_LOGGING
3322  PLogMessage("G2G done RD hdrs %d", pftell(fp));
3323#endif
3324
3325  ASSERT(fst->num_arcs >= fst->num_base_arcs); // was ==
3326  ASSERT(fst->num_nodes >= fst->num_base_nodes); // was ==
3327
3328  fst->FSMarc_list = (FSMarc*)CALLOC_CLR(fst->FSMarc_list_len, sizeof(FSMarc), "srec.graph.arcs");
3329  if (!fst->FSMarc_list)
3330  {
3331    rc = FST_FAILED_ON_MEMORY;
3332    PLogError("CALLOC_CLR fst->FSMarc_list \n");
3333    goto CLEANUP;
3334  }
3335  fst->FSMnode_list = (FSMnode*)CALLOC_CLR(fst->FSMnode_list_len, sizeof(FSMnode), "srec.graph.nodes");
3336  if (!fst->FSMnode_list)
3337  {
3338    rc = FST_FAILED_ON_MEMORY;
3339    PLogError("CALLOC_CLR fst->FSMnode_list  failed\n");
3340    goto CLEANUP;
3341  }
3342  fst->FSMnode_info_list = (FSMnode_info*)CALLOC_CLR(fst->FSMnode_list_len, sizeof(FSMnode_info), "srec.graph.nodeinfos");
3343
3344  if (!fst->FSMnode_info_list)
3345  {
3346    rc = FST_FAILED_ON_MEMORY;
3347    PLogError("CALLOC_CLR fst->FSMnode_info_list failed\n");
3348    goto CLEANUP;
3349  }
3350
3351  /* setup the arc freelist */
3352  fst->FSMarc_freelist = 0;
3353  fst->num_arcs = 0;
3354  for (i = 0; i < (unsigned)fst->FSMarc_list_len - 1; i++)
3355  {
3356    fst->FSMarc_list[i].linkl_next_arc = ARC_ItoX(i + 1);
3357    fst->FSMarc_list[i].linkl_prev_arc = FSMARC_FREE;
3358  }
3359  fst->FSMarc_list[i].linkl_next_arc = FSMARC_NULL;
3360  fst->FSMarc_list[i].linkl_prev_arc = FSMARC_FREE;
3361
3362  /* initialize the nodes, 'cuz reading is random order */
3363  for (i = 0; i < max_num_FSMnodes; i++)
3364  {
3365    fr_node = &fst->FSMnode_list[i];
3366    fr_node->un_ptr.first_next_arc = fr_node->first_prev_arc = FSMARC_NULL;
3367  }
3368
3369  /* 1. first load up all the information */
3370  num_arcs = 0;
3371  for (i = 0; i < max_num_FSMarcs || !seenFinalNode; i++)
3372  {
3373    if (i > max_num_FSMarcs && !seenFinalNode)
3374    {
3375      PLogError("Final node never encountered");
3376      rc = ESR_INVALID_STATE;
3377      goto CLEANUP;
3378    }
3379	nfields = 5;
3380    if (pfread(tmp,sizeof(tmp[0]),nfields,fp) != nfields)
3381    {
3382      PLogError("reading arc");
3383      rc = ESR_INVALID_STATE;
3384      goto CLEANUP;
3385    }
3386
3387    if (tmp[1] == MAXnodeID)
3388    {
3389      seenFinalNode = ESR_TRUE;
3390      if(fst->end_node != tmp[0]) {
3391	PLogError("error with arc %d->%d ilabel %d olabel %d cost %d\n", tmp[0], tmp[1], tmp[2], tmp[3], tmp[4]);
3392      ASSERT(fst->end_node == tmp[0]);
3393      }
3394      i--;
3395    }
3396    else
3397    {
3398      new_arc_id = fst_get_free_arc(fst);
3399      if (new_arc_id == MAXarcID)
3400        return FST_FAILED_ON_MEMORY;
3401      atok = ARC_ItoX(new_arc_id);
3402      atoken = ARC_XtoP(atok);
3403      num_arcs++;
3404
3405      ASSERT(tmp[0] < fst->num_nodes);
3406      ASSERT(tmp[1] < fst->num_nodes);
3407      fr_node = &fst->FSMnode_list[ tmp[0]];
3408      to_node = &fst->FSMnode_list[ tmp[1]];
3409      atoken->ilabel = tmp[2];
3410      atoken->olabel = tmp[3];
3411      atoken->cost   = tmp[4];
3412      append_arc_leaving_node(fst, fr_node, atok);
3413      append_arc_arriving_node(fst, to_node, atok);
3414      atoken->fr_node = NODE_ItoX(tmp[0]);
3415      atoken->to_node = NODE_ItoX(tmp[1]);
3416    }
3417  }
3418  ASSERT(fst->num_arcs == num_arcs);
3419#ifdef SREC_ENGINE_VERBOSE_LOGGING
3420  PLogMessage("G2G done RD arcs %d", pftell(fp));
3421#endif
3422
3423  /* setup the node freelist */
3424  if (fst->num_nodes < fst->FSMnode_list_len)
3425  {
3426    fst->FSMnode_freelist = fst->num_nodes;
3427    for (i = fst->num_nodes; i < (unsigned)fst->FSMnode_list_len - 1; i++)
3428    {
3429      fst->FSMnode_list[i].un_ptr.next_node = NODE_ItoX(i + 1);
3430      fst->FSMnode_list[i].first_prev_arc = FSMARC_FREE;
3431    }
3432    fst->FSMnode_list[i].un_ptr.next_node = FSMNODE_NULL;
3433    fst->FSMnode_list[i].first_prev_arc = FSMARC_FREE;
3434  }
3435  else
3436  {
3437    fst->FSMnode_freelist = MAXnodeID;
3438  }
3439
3440  /* node info   .. will now be calculated on line */
3441  /* wrapup_cost .. will now be calculated on line */
3442  fst->whether_prepared = 0;
3443  fst_fill_node_info(fst);
3444  for (i = 0; i < fst->num_nodes; i++)
3445    fst->FSMnode_info_list[i] = NODE_INFO_UNKNOWN;
3446
3447  /* exit points for slots */
3448  if (pfread(&fst->num_fsm_exit_points, 2, 1, fp) != 1)
3449    return ESR_READ_ERROR;
3450  p = fst->fsm_exit_points;
3451  q = p + MAX_NUM_SLOTS;
3452  while (p < q)
3453  {
3454    if (pfread(&p->from_node_index, 2, 1, fp) != 1)
3455      return ESR_WRITE_ERROR;
3456    if (pfread(&p->arc_index, 2, 1, fp) != 1)
3457      return ESR_WRITE_ERROR;
3458    if (pfread(&p->wbto_node_index, 2, 1, fp) != 1)
3459      return ESR_WRITE_ERROR;
3460    ++p;
3461  }
3462#ifdef SREC_ENGINE_VERBOSE_LOGGING
3463  PLogMessage("G2G done RD exits %d", pftell(fp));
3464#endif
3465
3466  /* no entry points stuff */
3467
3468  /* ilabels were not saved, create them as empty */
3469  fst->ilabels = (wordmap*)CALLOC_CLR(1, sizeof(wordmap), "srec.graph.imap");
3470  fst->ilabels->num_words = fst->ilabels->max_words = 0;
3471  fst->ilabels->words = 0;
3472
3473  /* now save the olabels */
3474  esr_rc = deserializeWordMapV2(&fst->olabels, fp);
3475  if (esr_rc != ESR_SUCCESS)
3476  {
3477    PLogError("deserializeWordMapV2() failed\n");
3478    rc = FST_FAILED_INTERNAL;
3479    goto CLEANUP;
3480  }
3481#ifdef SREC_ENGINE_VERBOSE_LOGGING
3482  PLogMessage("G2G done RD wmap %d", pftell(fp));
3483#endif
3484
3485  /* arctokeninfo */
3486  if (fst->grmtyp != GrammarTypeItemList)
3487  {
3488    if (deserializeArcTokenInfo(fst, fp) != ESR_SUCCESS)
3489    {
3490      PLogError("FST_DumpContextAsImage: could not write arc token data.\n");
3491      rc = FST_FAILED_INTERNAL;
3492      goto CLEANUP;
3493    }
3494  }
3495  else
3496  {
3497    /* here we just construct an isolated list, but we should scrap it later */
3498    wordID wdid;
3499    arc_token *atl, *final, *atli;
3500    fst->arc_token_list_len = fst->olabels->num_words + 2 - NUM_ITEMLIST_HDRWDS;
3501    atl = NEW_ARRAY(arc_token, fst->arc_token_list_len, L("srec.graph.wordgraph"));
3502    if(!atl) {
3503      PLogError("fst->arc_token_list_len failed\n");
3504      goto CLEANUP;
3505    }
3506    atl->first_next_arc = ARC_TOKEN_LNK(atl, 1);
3507    atl->next_token_index = ARC_TOKEN_NULL;
3508    final = atl + fst->arc_token_list_len - 1;
3509    for (i = 1, wdid = NUM_ITEMLIST_HDRWDS; wdid < fst->olabels->num_words; i++, wdid++)
3510    {
3511      atli = &atl[i];
3512      atli->ilabel = wdid;
3513      atli->olabel = wdid;/*not used*/
3514      atli->next_token_index = ARC_TOKEN_LNK(atl, (i + 1));
3515      atli->first_next_arc = ARC_TOKEN_LNK(atl, (fst->arc_token_list_len - 1)); /*final*/
3516    }
3517    ASSERT(atl + i == final);
3518    final->next_token_index = ARC_TOKEN_NULL;
3519    final->first_next_arc = ARC_TOKEN_NULL;
3520    final->ilabel = final->olabel = 0;
3521    fst->arc_token_list = atl;
3522  }
3523  return FST_SUCCESS;
3524CLEANUP:
3525  return FST_FAILED_INTERNAL;
3526}
3527
3528
3529int FST_LoadContextFromImageV2(srec_context* context, PFile* fp);
3530
3531int FST_LoadContextFromImage(srec_context** pcontext, PFile* fp)
3532{
3533  srec_context* context = NULL;
3534  /* used to force correct data alignment */
3535  asr_int32_t header[2];
3536  int rc = FST_SUCCESS;
3537
3538  /*printf("FST_LoadContexFromImage\n");*/
3539  if (!fp)
3540  {
3541    PLogError("FST_LoadContextImage() failed; bad file pointer\n");
3542    return FST_FAILED_ON_INVALID_ARGS;
3543  }
3544
3545  context = NEW(srec_context, L("srec.graph.binary"));
3546  if (context == NULL)
3547  {
3548    PLogError("FST_LoadContextFromImage: out of memory while allocating context.\n");
3549    return FST_FAILED_ON_MEMORY;
3550  }
3551  memset(context, 0, sizeof(srec_context));
3552
3553  /* header */
3554  if (pfread(header, 4, 2, fp) != 2)
3555  {
3556    PLogError("FST_LoadContextFromImage: failed reading header.\n");
3557    rc = FST_FAILED_ON_INVALID_ARGS;
3558    goto CLEANUP;
3559  }
3560
3561  if (header[1] == IMAGE_FORMAT_V2)
3562  {
3563    rc = FST_LoadContextFromImageV2(context, fp);
3564    if (rc != FST_SUCCESS)
3565      goto CLEANUP;
3566  }
3567  else
3568  {
3569    PLogError("FST_LoadContextFromImage() failed on image_format\n");
3570    goto CLEANUP;
3571  }
3572  *pcontext = context;
3573  return FST_SUCCESS;
3574CLEANUP:
3575  if (context) FREE(context);
3576  *pcontext = 0;
3577  return rc;
3578}
3579
3580
3581int FST_DumpReverseWordGraph(srec_context* context, PFile* fp)
3582{
3583  /* not implemented, use FST_DumpSyntaxAsImage() for now */
3584  return 1;
3585}
3586
3587/* this belongs part of the srec_context */
3588typedef struct
3589{
3590  asr_int32_t image_format;
3591  asr_int32_t image_size;
3592  asr_int32_t sizes_signature;
3593}
3594context_image_header;
3595
3596
3597int FST_LoadContextFromStreamImage(srec_context** pcontext, char* buffer, asr_int32_t buffer_len)
3598{
3599  return FST_FAILED_INTERNAL;
3600}
3601
3602typedef struct nodeID_list_t
3603{
3604  nodeID *nodes, num_nodes, num_alloc;
3605}
3606nodeID_list;
3607
3608int fst_node_has_speech_to_come(srec_context* context, nodeID node_index)
3609{
3610  /* recursive func ... try to keep stack use low! */
3611  /* later we should cache this information:
3612     need 2bits:  endn opte regl dunno */
3613  FSMarc* arc;
3614  arcID arc_index = context->FSMnode_list[ node_index].un_ptr.first_next_arc;
3615  for (; arc_index != MAXarcID; arc_index = arc->linkl_next_arc)
3616  {
3617    arc = &context->FSMarc_list[arc_index];
3618    if (arc->ilabel >= context->hmm_ilabel_offset + EPSILON_OFFSET + NUM_SILENCE_HMMS)
3619    {
3620      return 1;
3621    }
3622    else if (arc->ilabel < EPSILON_OFFSET)
3623    {
3624      if (fst_node_has_speech_to_come(context, arc->to_node))
3625      {
3626        return 1;
3627      }
3628    }
3629    else if (IS_SILENCE_ILABEL(arc->ilabel, context))
3630    {
3631      /* another silence! */
3632      if (fst_node_has_speech_to_come(context, arc->to_node))
3633        return 1;
3634    }
3635  }
3636  return 0;
3637}
3638
3639int fst_fill_node_info(srec_context* context)
3640{
3641  char* infos = context->FSMnode_info_list;
3642  nodeID_list optends_obj;
3643  nodeID_list *optends = &optends_obj;
3644
3645  FSMnode* node;
3646  FSMarc* arc;
3647  nodeID i, j, node_index;
3648  arcID arc_index;
3649  costdata wrapup_cost;
3650  int wrapup_cost_inconsistencies;
3651  /* int num_near_end_nodes; nodes within EPS or silence to the end, these
3652     are called NODE_INFO_ENDNODES, so that they can be excluded from the
3653     comparison with end_node score in the eos detection */
3654
3655  optends->num_nodes    = 1;
3656  optends->num_alloc    = 8192; /* scaled up from 512 to support dynamic grammar add word of 5000 */
3657  optends->nodes        = (nodeID*)CALLOC(optends->num_alloc, sizeof(nodeID), "srec.tmp.optendnodes");
3658  optends->nodes[0]     = context->end_node;
3659  /* num_near_end_nodes = 0; */
3660
3661  for (i = 0; i < optends->num_nodes; i++)
3662  {
3663    node_index = optends->nodes[i];
3664    node = &context->FSMnode_list[ node_index];
3665    for (arc_index = node->first_prev_arc; arc_index != MAXarcID;
3666         arc_index = arc->linkl_prev_arc)
3667    {
3668      arc = &context->FSMarc_list[arc_index];
3669      if (arc->fr_node != node_index)
3670      {
3671
3672        if (IS_SILENCE_ILABEL(arc->ilabel, context) || arc->ilabel < EPSILON_OFFSET)
3673        {
3674          /* ok, fr_node goes to the end via silence, check for dups */
3675          for (j = 0; j < optends->num_nodes; j++)
3676            if (optends->nodes[j] == arc->fr_node) break;
3677          /* append it to the list */
3678          if (j == optends->num_nodes)
3679          {
3680            if (optends->num_nodes < optends->num_alloc)
3681              optends->nodes[ optends->num_nodes++] = arc->fr_node;
3682            else
3683            {
3684              ASSERT(0 && "allocated too few optend nodes");
3685              return 0;
3686            }
3687          }
3688        }
3689      }
3690    }
3691  }
3692
3693  /* now set the info array, default is regular */
3694  for (i = 0; i < context->num_nodes; i++)
3695    infos[i] = NODE_INFO_REGULAR;
3696  /* also set the other nodes, we'll check whether nodes were added
3697     via these flags */
3698  for (; i < context->FSMnode_list_len; i++)
3699    infos[i] = NODE_INFO_UNKNOWN;
3700  infos[ context->end_node] = NODE_INFO_ENDNODE;
3701
3702  /* get rid of direct to end nodes, etc */
3703  for (i = 0, j = 0; i < optends->num_nodes; i++)
3704  {
3705    optends->nodes[j] = optends->nodes[i];
3706    if (fst_node_has_speech_to_come(context, optends->nodes[i]))
3707    {
3708      j++;
3709      infos[ optends->nodes[i]] = NODE_INFO_OPTENDN;
3710    }
3711    else
3712    {
3713      infos[ optends->nodes[i]] = NODE_INFO_ENDNODE;
3714      /* num_near_end_nodes++; */
3715    }
3716  }
3717  optends->num_nodes = j;
3718
3719  /* printf("get_oppend_nodes (%d)", optends->num_nodes);
3720     for(i=0; i<optends->num_nodes; i++) printf(" %d", optends->nodes[i]);
3721     printf("\n");
3722  */
3723
3724  FREE(optends->nodes);
3725
3726  /* find the wrapup cost */
3727  node = &context->FSMnode_list[ context->end_node];
3728  wrapup_cost = MAXcostdata;
3729  wrapup_cost_inconsistencies = 0;
3730  for (arc_index = node->first_prev_arc; arc_index != MAXarcID;
3731       arc_index = arc->linkl_prev_arc)
3732  {
3733    arc = &context->FSMarc_list[ arc_index];
3734    if (IS_SILENCE_ILABEL(arc->ilabel, context) &&
3735        arc->olabel == context->end_silence_word)
3736    {
3737      if (wrapup_cost == MAXcostdata)
3738        wrapup_cost = arc->cost;
3739      else if (context->wrapup_cost != arc->cost)
3740      {
3741        wrapup_cost = arc->cost;
3742        wrapup_cost_inconsistencies++;
3743      }
3744    }
3745  }
3746#define MIN_WRAPUP_COST 100
3747  context->wtw_average = wrapup_cost;
3748  if (context->wtw_average > 200)
3749    context->wtw_average = 200;
3750  if (context->wrapup_cost < MIN_WRAPUP_COST)
3751    context->wrapup_cost = MIN_WRAPUP_COST;
3752  /*log_report("context->wrapup_cost %d (%d)\n", context->wrapup_cost,
3753    wrapup_cost_inconsistencies);*/
3754  return 0;
3755}
3756
3757int fst_alloc_transit_points(srec_context* context)
3758{
3759  arcID i;
3760  wordID num_slots = context->olabels->num_slots;
3761  wordID olabel;
3762  asr_int16_t nfxps = 0;
3763  FSMarc* arc;
3764  FSMnode* node;
3765
3766  context->num_fsm_exit_points = 0;
3767  /* slot 0 is invalid, it is the "eps", so num_slots==1 means no slots! */
3768  if (num_slots == 1)
3769    return 0;
3770
3771  /* scan through to finds arc that carry rule references */
3772  for (i = 0; i < context->num_arcs; i++)
3773  {
3774    olabel =  context->FSMarc_list[i].olabel;
3775    /* (wordID)(olabel-1) < (num_slots-1) ?? might work */
3776    if (olabel > 0 && olabel < num_slots)
3777    {
3778      context->FSMarc_list[i].cost = DISABLE_ARC_COST;
3779      if (nfxps >= MAX_NUM_SLOTS)
3780      {
3781        PLogError("error: too many fsm exit points in fsm, too many public rules referenced from here\n");
3782        return 0;
3783      }
3784      context->fsm_exit_points[nfxps].arc_index = i;
3785      context->fsm_exit_points[nfxps].from_node_index = context->FSMarc_list[i].fr_node;
3786      /* skip over the trailing silence and .wb labels */
3787      for (arc = &context->FSMarc_list[i]; arc->ilabel != WORD_BOUNDARY;)
3788      {
3789        node = &context->FSMnode_list[arc->to_node];
3790        arc =  &context->FSMarc_list[node->un_ptr.first_next_arc];
3791      }
3792      context->fsm_exit_points[nfxps].wbto_node_index = arc->to_node;
3793      nfxps++;
3794
3795    } /* olabel<num_slots */
3796  } /* i<num_arcs */
3797  context->num_fsm_exit_points  = nfxps;
3798  return 0;
3799}
3800
3801
3802/*
3803  cross-word modeling:
3804  for now we'll assume silence context on either side, later we'll create
3805  a graph with the right fan out on the edges of each slot.  At that point we
3806  may have routines like ... if(is_dead_end(arc)) continue;
3807
3808  word removal:
3809  we'll use refcounters on arcs, we can search the whole graph for the
3810  olabel, then remove forward and remove backwards
3811
3812  word addition:
3813  glenville ^ hmm183 hmm222 hmm162 hmm246 hmm346 hmm191 hmm219
3814
3815  set imap=C:/users/dahan/speech2go/fst/enu_d2f_fray_g/model.map
3816  set omap=C:/users/dahan/speech2go/fst/enu_d2f_fray_g/namesnnumsSC.map
3817  wintel/debug/addword.exe -min
3818  $SWISDK/bin/fsmcompileD -t -i $imap -o $omap -F out.PCLG out.PCLG.txt
3819  $SWISDK/bin/fsmdrawD -i $imap -o $omap out.PCLG > out.PCLG.dot
3820  dotty out.PCLG.dot
3821
3822  need
3823  - a sanity check
3824  - remove word
3825  - multiple pronunciations handling
3826  - homonyms handling, other boundary conditions checking
3827  - measure the speed
3828
3829  alan_adams al~#ad}z
3830  alan_adams al~ad}z
3831  paul_adams p{l#ad}z
3832
3833
3834
3835*/
3836