1/*---------------------------------------------------------------------------*
2 *  SemanticGraphImpl.c  *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20#include "SR_SemprocPrefix.h"
21#include "SR_SemprocDefinitions.h"
22#include "SR_SemanticGraph.h"
23#include "SR_SemanticGraphImpl.h"
24#include "SR_SemanticProcessorImpl.h"
25#include "ESR_ReturnCode.h"
26#include "passert.h"
27#include "pendian.h"
28#include "plog.h"
29
30static const char* MTAG = __FILE__;
31#define AVG_SCRIPTS_PER_WORD 2.5
32#define SLOTNAME_INDICATOR "__"
33#define SLOTNAME_INDICATOR_LEN 2
34
35#define PTR_TO_IDX(ptr, base) ((asr_uint32_t) (ptr == NULL ? 0xFFFFFFFFu : \
36                               (asr_uint32_t)(ptr - base)))
37#define IDX_TO_PTR(idx, base) (idx == 0xFFFFFFFFu ? NULL : base + idx)
38
39ESR_ReturnCode SR_SemanticGraphCreate(SR_SemanticGraph** self)
40{
41  SR_SemanticGraphImpl* impl;
42
43  if (self == NULL)
44  {
45    PLogError(L("ESR_INVALID_ARGUMENT"));
46    return ESR_INVALID_ARGUMENT;
47  }
48  impl = NEW(SR_SemanticGraphImpl, MTAG);
49  if (impl == NULL)
50  {
51    PLogError(L("ESR_OUT_OF_MEMORY"));
52    return ESR_OUT_OF_MEMORY;
53  }
54  /* do not assume NEW initialize impl as zero, do it here */
55  memset(impl, 0, sizeof(SR_SemanticGraphImpl));
56
57  impl->Interface.destroy = &SR_SemanticGraph_Destroy;
58  impl->Interface.unload = &SR_SemanticGraph_Unload;
59  impl->Interface.load = &SR_SemanticGraph_Load;
60  impl->Interface.save = &SR_SemanticGraph_Save;
61  impl->Interface.addWordToSlot = &SR_SemanticGraph_AddWordToSlot;
62  impl->Interface.reset = &SR_SemanticGraph_Reset;
63  impl->script_olabel_offset = SEMGRAPH_SCRIPT_OFFSET;
64  impl->scopes_olabel_offset = SEMGRAPH_SCOPE_OFFSET;
65
66  *self = (SR_SemanticGraph*) impl;
67  return ESR_SUCCESS;
68}
69
70
71/**
72 * Default implementation.
73 */
74ESR_ReturnCode SR_SemanticGraph_Destroy(SR_SemanticGraph* self)
75{
76  SR_SemanticGraphImpl* impl = (SR_SemanticGraphImpl*) self;
77
78  if (self == NULL)
79  {
80    PLogError(L("ESR_INVALID_ARGUMENT"));
81    return ESR_INVALID_ARGUMENT;
82  }
83
84  FREE(impl);
85  return ESR_SUCCESS;
86}
87
88ESR_ReturnCode sr_semanticgraph_loadV2(SR_SemanticGraphImpl* impl, wordmap* ilabels, PFile* fp);
89
90
91/* private function */
92ESR_ReturnCode SR_SemanticGraph_LoadFromImage(SR_SemanticGraph* self, wordmap* ilabels, const LCHAR* g2g)
93{
94  SR_SemanticGraphImpl* impl = (SR_SemanticGraphImpl*) self;
95  PFile* fp = NULL;
96  struct
97  {
98    asr_uint32_t rec_context_image_size;
99    /*  image data size of the recognition graph */
100    asr_uint32_t format;
101  }
102  header;
103  ESR_ReturnCode rc = ESR_SUCCESS;
104  ESR_BOOL isLittleEndian;
105  /*
106    #if __BYTE_ORDER==__LITTLE_ENDIAN
107    isLittleEndian = ESR_TRUE;
108    #else
109    isLittleEndian = ESR_FALSE;
110    #endif
111  */
112  isLittleEndian = ESR_TRUE;
113
114  fp = pfopen ( g2g, L("rb"));
115/*  CHKLOG(rc, PFileSystemCreatePFile(g2g, isLittleEndian, &fp));
116  CHKLOG(rc, PFileOpen(fp, L("rb")));*/
117
118  if ( fp == NULL )
119    goto CLEANUP;
120
121  /* header */
122  if (pfread(&header, 4, 2, fp) != 2)
123  {
124    rc = ESR_READ_ERROR;
125    PLogError(ESR_rc2str(rc));
126    goto CLEANUP;
127  }
128
129  if (pfseek(fp, header.rec_context_image_size, SEEK_SET))
130  {
131    rc = ESR_READ_ERROR;
132    PLogError(L("ESR_READ_ERROR: could not seek to semgraph data"));
133    goto CLEANUP;
134  }
135
136  if (header.format == IMAGE_FORMAT_V2)
137  {
138    rc = sr_semanticgraph_loadV2(impl, ilabels, fp);
139  }
140  else
141  {
142    rc = ESR_INVALID_STATE;
143    PLogError("PCLG.txt P.txt inconsistency");
144    goto CLEANUP;
145  }
146
147CLEANUP:
148  if (fp)
149    pfclose (fp);
150  if (rc != ESR_SUCCESS)
151  {
152    if (impl->arc_token_list != NULL)
153    {
154      FREE(impl->arc_token_list);
155      impl->arc_token_list = NULL;
156    }
157  }
158  return rc;
159}
160
161static ESR_ReturnCode deserializeArcTokenInfoV2(SR_SemanticGraphImpl *impl,
162    PFile* fp);
163
164static ESR_ReturnCode serializeArcTokenInfoV2(SR_SemanticGraphImpl *impl,
165    PFile* fp);
166
167ESR_ReturnCode sr_semanticgraph_loadV2(SR_SemanticGraphImpl* impl, wordmap* ilabels, PFile* fp)
168{
169  unsigned int i, nfields;
170  ESR_ReturnCode rc = ESR_SUCCESS;
171  struct
172  {
173    asr_uint32_t format;
174    asr_uint32_t sgtype;
175  }
176  header;
177  asr_uint32_t tmp[32];
178
179  if (pfread(&header, 4/*sz*/, 2/*ni*/, fp) != 2)
180  {
181    rc = ESR_READ_ERROR;
182    PLogError(L("ESR_READ_ERROR: could not read V2"));
183    goto CLEANUP;
184  }
185
186  if (header.sgtype == GrammarTypeItemList)
187  {
188    /*
189      tmp = new unsigned short[num_words];
190      if( pfread( tmp, sizeof(tmp[0]), num_words, fp) != num_words) {
191      rc = ESR_READ_ERROR;
192      PLogMessage("can't read %d word script assocs\n", num_words);
193      goto CLEANUP;
194      }
195    */
196    /* convert these to an arc_token_list or whatever */
197    PLogError("not supported v2 itemlist type");
198    rc = ESR_INVALID_STATE;
199    goto CLEANUP;
200
201  }
202  else
203  {
204
205    nfields = 2;
206    if (pfread(tmp, sizeof(tmp[0]), nfields, fp) != nfields)
207    {
208      rc = ESR_WRITE_ERROR;
209      PLogError(L("ESR_WRITE_ERROR: could not write script_olabel_offset"));
210      goto CLEANUP;
211    }
212    i = 0;
213    impl->script_olabel_offset = (wordID)tmp[i++];
214    impl->scopes_olabel_offset = (wordID)tmp[i++];
215    ASSERT(i == nfields);
216
217    /* word arcs */
218    if ((rc = deserializeArcTokenInfoV2(impl, fp)) != ESR_SUCCESS)
219    {
220      PLogError(ESR_rc2str(rc));
221      goto CLEANUP;
222    }
223
224    /* use the ilabels provided externally (from recog graph ilabels) */
225    impl->ilabels = ilabels;
226
227    /* scopes */
228    if ((rc = deserializeWordMapV2(&impl->scopes_olabels, fp)) != ESR_SUCCESS)
229    {
230      PLogError(ESR_rc2str(rc));
231      goto CLEANUP;
232    }
233
234    /* scripts */
235    if ((rc = deserializeWordMapV2(&impl->scripts, fp)) != ESR_SUCCESS)
236    {
237      PLogError(ESR_rc2str(rc));
238      goto CLEANUP;
239    }
240  }
241CLEANUP:
242  return rc;
243}
244
245
246static arc_token_lnk get_first_arc_leaving_node1(arc_token* arc_token_list,
247    arcID num_arcs,
248    nodeID node)
249{
250  arcID i;
251  for (i = 0; i < num_arcs; i++)
252  {
253    if ((nodeID)(int)arc_token_list[i].next_token_index == node)
254      return ARC_TOKEN_LNK(arc_token_list, i);
255  }
256  return ARC_TOKEN_NULL;
257}
258
259static int strlen_with_null(const char* word)
260{ /* from srec_context.c */
261  int len = strlen(word) + 1;
262  if (len % 2 == 1) len++;
263  return len;
264}
265/* private function */
266ESR_ReturnCode SR_SemanticGraph_LoadFromTextFiles(SR_SemanticGraph* self, wordmap* ilabels, const LCHAR* basename, int num_words_to_add)
267{
268  ESR_ReturnCode rc = ESR_FATAL_ERROR;
269  arcID num_scripts;
270  int isConstString = 0;
271  LCHAR filename[MAX_STRING_LEN];
272  LCHAR line[MAX_SCRIPT_LEN];
273  LCHAR iword[MAX_STRING_LEN];
274  LCHAR oword[MAX_SCRIPT_LEN];
275  LCHAR *p;
276  unsigned int max_num_arc_tokens;
277  nodeID from_node, into_node;
278  wordID ilabel = 0;
279  labelID olabel = 0;
280  arc_token *atoken;
281  arc_token *last_atoken;
282  costdata cost = 0;
283  arcID num_arcs;
284  arc_token* arc_token_list;
285  long fpos;
286  PFile* p_text_file = NULL;
287  PFile* scripts_file;
288  SR_SemanticGraphImpl* semgraph = (SR_SemanticGraphImpl*) self;
289  size_t lineNo;
290  unsigned int i;
291  wordID num_scope_words;
292  asr_int32_t num_scope_chars;
293  LCHAR* _tMp;    /* used by IS_SCOPE_MARKER() below */
294
295  /* use the ilables that are provided externally (from recog graph ilabels) */
296  semgraph->ilabels = ilabels;
297
298
299
300  /* try to open the .script file */
301  LSTRCPY(filename, basename);
302  LSTRCAT(filename, ".script");
303  scripts_file = pfopen ( filename, L("r") );
304/*  CHKLOG(rc, PFileSystemCreatePFile(filename, TRUE, &scripts_file));
305  CHKLOG(rc, PFileOpen(scripts_file, L("r")));*/
306
307  if ( scripts_file == NULL )
308  {
309    rc = ESR_OPEN_ERROR;
310    goto CLEANUP;
311  }
312
313  /* Load the scripts file
314    assumptions:
315
316  - the scripts file has each line ordered starting from 0 as such
317  <integer><space><script>
318
319  - the integer MUST become the index of the script in the wordmap
320
321  - output labels referenced in the semgraph are the integers (wordmap index) prepending with '_'
322
323  - output labels stored in the semgraph are actually integers which are equal to
324    script_olabel_offset + <integer>
325  */
326
327  /* determine number of words/chars to allocate */
328  fpos = pftell(scripts_file);
329  for (i = num_scripts = 0; pfgets(line, MAX_SCRIPT_LEN, scripts_file); num_scripts++)
330  {
331    size_t len = LSTRLEN(line) + 1;
332    if (len % 2) len++;
333    i = i + len; /* count the chars */
334  }
335  pfseek(scripts_file, fpos, SEEK_SET);
336
337  /* on each line I will have 1 big word */
338  /* figure that each script for dynamically added words will be a simple assignment
339     like myVar='someVal' ... which looks like almost 2.5 words, hence *2.5 */
340  wordmap_create(&semgraph->scripts, i, num_scripts, (int)AVG_SCRIPTS_PER_WORD*num_words_to_add);
341
342  /* load up all the information */
343  lineNo = 0;
344  while (pfgets(line, MAX_SCRIPT_LEN, scripts_file))
345  {
346    ASSERT( sizeof( iword[0]) == sizeof(char)); // else more code to write!
347    if (sscanf(line, "%s ", iword) == 1)
348    {
349      LSTRCPY(oword, line + LSTRLEN(iword) + 1);
350      /* may actually have spaces in it and this is messing me up ... here is the fix */
351      /* copy the line starting after the iword */
352      for (i = 0, p = line + LSTRLEN(iword) + 1; *p; p++)
353      {
354        if (*p == '\\')
355        {
356          if (isConstString)
357            oword[i++] = *p;
358          ++p;
359        }
360        else if (*p == '\'')
361          isConstString = (isConstString ? 0 : 1) ; /* toggle */
362        if (isConstString || !isspace(*p))
363          oword[i++] = *p;
364      }
365      oword[i] = '\0';
366
367      /* make sure that the index in the wordmap matches the line number */
368      if (wordmap_add_word(semgraph->scripts, oword) != lineNo)
369      {
370        PLogError(L("ESR_READ_ERROR: internal error adding script (%d)"), num_words_to_add);
371        return ESR_NO_MATCH_ERROR;
372      }
373      lineNo++;
374    }
375    else
376    {
377      PLogMessage(L("can't parse line %s"), line);
378      passert(0);
379    }
380  }
381  pfclose (scripts_file);
382
383  /* try to open the P.txt file */
384  LSTRCPY(filename, basename);
385  LSTRCAT(filename, ".P.txt");
386  p_text_file = pfopen ( filename, L("r"));
387/*  CHKLOG(rc, PFileSystemCreatePFile(filename, TRUE, &p_text_file));
388  CHKLOG(rc, PFileOpen(p_text_file, L("r")));*/
389
390  if ( p_text_file == NULL )
391    goto CLEANUP;
392
393  /* determine number of word arcs to allocate */
394  fpos = pftell(p_text_file);
395  num_scope_words = 0;
396  num_scope_chars = 0;
397  for (num_arcs = 0; pfgets(line, MAX_STRING_LEN, p_text_file); ++num_arcs)
398  {
399    if (num_arcs == MAXarcID)
400      break; /* error */
401	if (sscanf(line, "%hu\t%hu\t%[^\t]\t%[^\t\n\r]", &from_node, &into_node, iword, oword) == 4)
402    {
403		if (IS_SCOPE_MARKER(oword)) {
404			num_scope_words++;
405			num_scope_chars += strlen_with_null( oword);
406			if(num_scope_chars) num_scope_chars++ ;
407  }
408	}
409  }
410  max_num_arc_tokens = num_arcs + (arcID)num_words_to_add;
411  MEMCHK(rc, max_num_arc_tokens, MAXarcID);
412  pfseek(p_text_file, fpos, SEEK_SET);
413
414  semgraph->arc_token_list = NEW_ARRAY(arc_token,max_num_arc_tokens, L("semgraph.wordgraph"));
415  arc_token_list = semgraph->arc_token_list;
416  /* need to initialize my wordmap */
417  wordmap_create(&semgraph->scopes_olabels, num_scope_chars, num_scope_words,0); // max_num_arc_tokens);
418
419  /* 1. first load up all the information */
420  i = 0;
421  while (pfgets(line, MAX_STRING_LEN, p_text_file))
422  {
423    if (sscanf(line, "%hu\t%hu\t%[^\t]\t%[^\t\n\r]", &from_node, &into_node, iword, oword) == 4)
424    {
425      /* the cost is 0 by default */
426      cost = 0;
427      /* since I am reading strings, and I want to store integers, I need to get
428      the index of the string by looking up in the ilabels wordmap */
429      ilabel = wordmap_find_index(ilabels, iword);
430
431      /* now for the olabels, depending on the type of the label, I either use the index directly
432      or save the index in a wordmap which will eventually give me the right index.
433      Remember that the index must be offset by a certain value depending on which wordmap I'm using */
434
435      if (IS_SCRIPT_MARKER(oword)) /* olabel type: script */
436      {
437        olabel = (labelID) atoi(&oword[1]);
438        olabel = (wordID)(olabel + semgraph->script_olabel_offset); /* the offset */
439      }
440      else if (IS_SCOPE_MARKER(oword)) /* olabel type: scope marker */
441      {
442        /* check if the label is already in the wordmap, and reuse index */
443        olabel = wordmap_find_index(semgraph->scopes_olabels, oword);
444
445        if (olabel == MAXwordID) /* not found so add to wordmap and get new index */
446          olabel = wordmap_add_word(semgraph->scopes_olabels, oword);
447        olabel = (wordID)(olabel + semgraph->scopes_olabel_offset); /* the offset */
448      }
449      else /* olabel type: input symbols hopefully !!! */
450      {
451	/* if oword does not have a \t in the end, add a \t*/
452
453        /* check if the label is already in the wordmap, and reuse index */
454        olabel = wordmap_find_index(ilabels, oword);
455
456        if (olabel == MAXwordID) /* not found so add to wordmap and get new index */
457          PLogMessage(L("output label not found: %s"), oword);
458      }
459
460    }
461    else if (sscanf(line, "%hu", &from_node) == 1)
462    {
463      into_node = MAXnodeID;
464      ilabel = MAXwordID;
465      olabel = MAXwordID;
466      cost = 0;
467    }
468    else
469    {
470      PLogMessage(L("can't parse line %s"), line);
471      passert(0);
472    }
473
474    /* okay, now that I have the data for the current arc, save it to the arc_token data structure*/
475    atoken = &arc_token_list[i];
476    ++i;
477
478    atoken->ilabel = ilabel;
479    atoken->olabel = olabel;
480    /* atoken->cost = cost; not used for now */
481
482    /* initially this stores INTEGERS !!! , I need to cross-reference the integers with the
483    appropriate arc_token pointers (in the next steps for the algorithm) */
484    atoken->first_next_arc = (arc_token_lnk)into_node;
485    atoken->next_token_index = (arc_token_lnk)from_node;
486  }
487  num_arcs = (arcID) i;
488
489  pfclose(p_text_file);
490  p_text_file = NULL;
491
492  wordmap_setbase(semgraph->scopes_olabels);
493  wordmap_ceiling(semgraph->scopes_olabels); /* we won't be adding scopes! */
494  wordmap_setbase(semgraph->scripts);
495
496  /* 2. now do the internal cross references */
497  /* in this pass we build the 1-to-1 links, and n-to-1 links in a graph */
498  /* in other words... first_next_arc points to the first arc leaving the node */
499  for (i = 0; i < num_arcs; ++i)
500  {
501    atoken = &arc_token_list[i];
502    into_node = (nodeID)(int)atoken->first_next_arc; /* get the integer */
503    atoken->first_next_arc = /* converts the integer id to a arc_token pointer */
504      get_first_arc_leaving_node1(arc_token_list, num_arcs, (nodeID)(int)atoken->first_next_arc);
505  }
506
507  /* 3. now do more internal cross refs */
508  /* in this pass we build the 1-to-n links */
509  /* in other words ... setup the linked list of all arc leaving from the same node */
510  last_atoken = &arc_token_list[0];
511  for (i = 1; i < num_arcs; ++i)
512  {
513    atoken = &arc_token_list[i];
514    /* if this arc and the last one do NOT leave the same node (i.e. from_node, see above),
515    then the next_token_index is not used */
516    if (atoken->next_token_index != last_atoken->next_token_index)
517      last_atoken->next_token_index = ARC_TOKEN_NULL;
518    else
519      last_atoken->next_token_index = ARC_TOKEN_LNK(arc_token_list, i);
520    last_atoken = atoken;
521  }
522  last_atoken->next_token_index = ARC_TOKEN_NULL;
523
524#if DEBUG_ASTAR
525  /* under debug, it's nice to be able to see the words leaving the
526     destination node, they are stored sequentially in the debug ary */
527  for (i = 0; i < num_arcs; i++)
528  {
529    LCHAR * p;
530    arc_token* tmp;
531    atoken = &arc_token_list[i];
532    atoken->debug[0] = 0;
533    tmp = ARC_TOKEN_PTR(arc_token_list, atoken->first_next_arc);
534    for (; tmp; tmp = ARC_TOKEN_PTR(arc_token_list, tmp->next_token_index))
535    {
536      if (tmp->first_next_arc == ARC_TOKEN_NULL)
537        p = "END";
538      else if (!tmp->label)
539        p = "NULL";
540      else
541        p = tmp->label;
542      if (strlen(atoken->debug) + strlen(p) + 6 < 64)
543      {
544        strcat(atoken->debug, p);
545        strcat(atoken->debug, " ");
546      }
547      else
548      {
549        strcat(atoken->debug, "...");
550        break;
551      }
552    }
553  }
554#endif
555  semgraph->arc_token_list_len = (arcID)max_num_arc_tokens;
556  /* initialize the freelist */
557  if (num_arcs < max_num_arc_tokens)
558  {
559    semgraph->arc_token_freelist = &semgraph->arc_token_list[num_arcs];
560    for (i = num_arcs; i < max_num_arc_tokens - 1; i++)
561    {
562      semgraph->arc_token_list[i].first_next_arc = ARC_TOKEN_NULL;
563      semgraph->arc_token_list[i].next_token_index = ARC_TOKEN_LNK(semgraph->arc_token_list, (i + 1));
564    }
565    semgraph->arc_token_list[i].first_next_arc = ARC_TOKEN_NULL;
566    semgraph->arc_token_list[i].next_token_index = ARC_TOKEN_NULL;
567  }
568  else
569    semgraph->arc_token_freelist = NULL;
570
571  /* for dynamic addition */
572  for (i = 0; i < MAX_NUM_SLOTS; i++)
573    semgraph->arcs_for_slot[i] = NULL;
574
575	semgraph->arc_token_insert_start = semgraph->arc_token_list + num_arcs;
576    semgraph->arc_token_insert_end = NULL;
577  return ESR_SUCCESS;
578CLEANUP:
579  if (p_text_file)
580    pfclose (p_text_file);
581  return rc;
582}
583
584ESR_ReturnCode SR_SemanticGraph_Load(SR_SemanticGraph* self, wordmap* ilabels, const LCHAR* basename, int num_words_to_add)
585{
586  ESR_ReturnCode rc;
587
588  if (LSTRSTR(basename, L(".g2g")))
589  {
590    rc = SR_SemanticGraph_LoadFromImage(self, ilabels, basename);
591  }
592  else
593  {
594    rc = SR_SemanticGraph_LoadFromTextFiles(self, ilabels, basename, num_words_to_add);
595  }
596  return rc;
597}
598
599/**
600 * Unload Sem graph
601 */
602ESR_ReturnCode SR_SemanticGraph_Unload(SR_SemanticGraph* self)
603{
604  SR_SemanticGraphImpl* semgraph = (SR_SemanticGraphImpl*) self;
605
606  /* see the wordmap_create in the Load function */
607  wordmap_destroy(&semgraph->scopes_olabels);
608  wordmap_destroy(&semgraph->scripts);
609
610  FREE(semgraph->arc_token_list);
611  semgraph->arc_token_list = 0;
612  return ESR_SUCCESS;
613}
614
615ESR_ReturnCode sr_semanticgraph_saveV1(SR_SemanticGraphImpl* impl, const LCHAR* g2g);
616ESR_ReturnCode sr_semanticgraph_saveV2(SR_SemanticGraphImpl* impl, const LCHAR* g2g);
617
618ESR_ReturnCode SR_SemanticGraph_Save(SR_SemanticGraph* self, const LCHAR* g2g, int version_number)
619{
620  SR_SemanticGraphImpl* impl = (SR_SemanticGraphImpl*) self;
621  ESR_ReturnCode rc = ESR_SUCCESS;
622
623  if (version_number == 2)
624  {
625    rc = sr_semanticgraph_saveV2(impl,  g2g);
626  }
627  else
628  {
629    PLogError("invalid version_number %d\n", version_number);
630    rc = ESR_INVALID_ARGUMENT;
631  }
632  return rc;
633}
634
635
636int sr_semanticgraph_get_type(SR_SemanticGraphImpl* impl)
637{
638  arc_token *atoken, *arc_token_list = impl->arc_token_list;
639  arc_token_lnk mergept;
640  int expected_ilabel;
641  atoken = impl->arc_token_list;
642
643  /* 0 1 eps {
644     1 2 13e_avenue myRoot}
645     ...
646     1 2 13e_avenue myRoot}
647     2 */
648  if (atoken->ilabel != WORD_EPSILON_LABEL)
649    return GrammarTypeBNF;
650  atoken = ARC_TOKEN_PTR(arc_token_list, atoken->first_next_arc);
651  if (!atoken)
652    return GrammarTypeBNF;
653  mergept = atoken->first_next_arc;
654  expected_ilabel = NUM_ITEMLIST_HDRWDS;
655  for (; atoken; atoken = ARC_TOKEN_PTR(arc_token_list, atoken->next_token_index))
656  {
657    if (atoken->first_next_arc != mergept)
658      return GrammarTypeBNF;
659    if (atoken->ilabel != expected_ilabel)
660      return GrammarTypeBNF;
661    expected_ilabel++;
662  }
663  if (expected_ilabel != impl->ilabels->num_words)
664    return GrammarTypeBNF;
665  atoken = ARC_TOKEN_PTR(arc_token_list, mergept);
666  for (; atoken; atoken = ARC_TOKEN_PTR(arc_token_list, atoken->first_next_arc))
667  {
668    if (atoken->next_token_index != ARC_TOKEN_NULL)
669      return GrammarTypeBNF;
670    if (atoken->ilabel != WORD_EPSILON_LABEL &&
671        !(atoken->ilabel == MAXwordID && atoken->olabel == MAXwordID))
672      return GrammarTypeBNF;
673  }
674  return GrammarTypeItemList;
675}
676
677#define SEMGR_OUTPUT_FORMAT_V2 478932784
678
679ESR_ReturnCode sr_semanticgraph_saveV2(SR_SemanticGraphImpl* impl, const LCHAR* g2g)
680{
681  ESR_ReturnCode rc;
682  PFile* fp;
683  asr_uint32_t tmp[32];
684  struct
685  {
686    asr_uint32_t format;
687    asr_uint32_t sgtype;
688  }
689  header;
690  unsigned int i, nfields;
691
692  fp = pfopen ( g2g, L("r+b"));
693/*  CHKLOG(rc, PFileSystemCreatePFile(g2g, isLittleEndian, &fp));
694  CHKLOG(rc, PFileOpen(fp, L("r+b")));*/
695
696  if ( fp == NULL )
697  {
698  	rc = ESR_OPEN_ERROR;
699    goto CLEANUP;
700  }
701
702  pfseek(fp, 0, SEEK_END);
703
704  header.format = IMAGE_FORMAT_V2;
705  header.sgtype = sr_semanticgraph_get_type(impl);
706  header.sgtype = GrammarTypeBNF;
707
708#ifdef SREC_ENGINE_VERBOSE_LOGGING
709  PLogMessage("sr_semanticgraph_saveV2() semgraphtype %d", header.sgtype);
710#endif
711  if (pfwrite(&header, 4 /*sz*/, 2/*ni*/, fp) != 2)
712  {
713    rc = ESR_WRITE_ERROR;
714    PLogError(L("ESR_WRITE_ERROR: could not write V2"));
715    goto CLEANUP;
716  }
717
718  if (header.sgtype == GrammarTypeItemList)
719  {
720    arc_token *parser, *atok;
721
722    /* write num_words size array of short script ids
723       this might be just a y=x array, but it could be there
724       are synonyms, eg. NEW_YORK NEW_YORK_CITY -> same script
725    */
726    parser = impl->arc_token_list;
727    parser = ARC_TOKEN_PTR(impl->arc_token_list, parser->first_next_arc);
728    for (i = NUM_ITEMLIST_HDRWDS; i < impl->ilabels->num_words; i++)
729    {
730      for (atok = parser; atok; atok = ARC_TOKEN_PTR(impl->arc_token_list, atok->next_token_index))
731      {
732        if (atok->ilabel == i) break;
733      }
734      if (!atok)
735      {
736        rc = ESR_INVALID_STATE;
737        PLogError("Can't find word %d in semgraph\n", i);
738        goto CLEANUP;
739      }
740      tmp[0] = atok->olabel;
741      if (pfwrite(tmp, sizeof(tmp[0]), 1, fp) != 1)
742      {
743        rc = ESR_WRITE_ERROR;
744        PLogError(L("ESR_WRITE_ERROR: could not write V2"));
745        goto CLEANUP;
746      }
747    }
748    if ((rc = serializeWordMapV2(impl->scripts, fp)) != ESR_SUCCESS)
749    {
750      PLogError(ESR_rc2str(rc));
751      goto CLEANUP;
752    }
753  }
754  else
755  {
756
757    i = 0;
758    tmp[i++] = impl->script_olabel_offset;
759    tmp[i++] = impl->scopes_olabel_offset;
760    nfields = i;
761
762    if (pfwrite(tmp, sizeof(tmp[0]), nfields, fp) != nfields)
763    {
764      rc = ESR_WRITE_ERROR;
765      PLogError(L("ESR_WRITE_ERROR: could not write script_olabel_offset"));
766      goto CLEANUP;
767    }
768
769    /* word arcs */
770    if ((rc = serializeArcTokenInfoV2(impl, fp)) != ESR_SUCCESS)
771    {
772      PLogError(ESR_rc2str(rc));
773      goto CLEANUP;
774    }
775
776    /* do not WRITE ilabels... this is a ref to the olabels from rec context */
777
778    /* scopes */
779    if ((rc = serializeWordMapV2(impl->scopes_olabels, fp)) != ESR_SUCCESS)
780    {
781      PLogError(ESR_rc2str(rc));
782      goto CLEANUP;
783    }
784
785    if ((rc = serializeWordMapV2(impl->scripts, fp)) != ESR_SUCCESS)
786    {
787      PLogError(ESR_rc2str(rc));
788      goto CLEANUP;
789    }
790#ifdef SREC_ENGINE_VERBOSE_LOGGING
791    PLogMessage("G2G done WR semg %d", pftell(fp));
792#endif
793  }
794  rc = ESR_SUCCESS;
795CLEANUP:
796  if (fp)
797    pfclose (fp);
798  return rc;
799}
800
801arc_token* arc_tokens_find_ilabel(arc_token* base, arc_token* arc_token_list, wordID ilabel)
802{
803  arc_token* p;
804  for (p = arc_token_list; p != NULL; p = ARC_TOKEN_PTR(base, p->next_token_index))
805    if (p->ilabel == ilabel) return p;
806  return NULL;
807}
808
809arc_token* arc_tokens_get_free(arc_token* base, arc_token** arc_token_freelist)
810{
811  arc_token* tmp = (*arc_token_freelist);
812  if (tmp == NULL)
813    return NULL;
814  (*arc_token_freelist) = ARC_TOKEN_PTR(base, tmp->next_token_index);
815  tmp->ilabel = tmp->olabel = 0;
816  tmp->next_token_index = ARC_TOKEN_NULL;
817  tmp->first_next_arc = ARC_TOKEN_NULL;
818  return tmp;
819}
820
821int arc_tokens_list_size(arc_token* base, arc_token* head)
822{
823  arc_token* tmp = head;
824  int count = 0;
825  for (; tmp; tmp = ARC_TOKEN_PTR(base, tmp->next_token_index))
826  {
827    count++;
828  }
829  return count;
830}
831
832void arc_tokens_free_list(arc_token* base, arc_token** arc_token_freelist, arc_token* head)
833{
834  arc_token *tail, *next = (arc_token*)1;
835  if (head == NULL)
836    return;
837  for (tail = head; ; tail = next)
838  {
839    next = ARC_TOKEN_PTR(base, tail->next_token_index);
840    if (next == NULL) break;
841  }
842  tail->next_token_index = ARC_TOKEN_PTR2LNK(base, (*arc_token_freelist));
843  *arc_token_freelist = head;
844}
845
846ESR_ReturnCode find_in_union_of_scripts(const LCHAR* union_script, const LCHAR* script, ESR_BOOL* isFound)
847{
848  const LCHAR* start;
849  const LCHAR* end;
850  const LCHAR* p;
851  const LCHAR* q;
852
853  if (union_script == NULL || script == NULL)
854    return ESR_INVALID_ARGUMENT;
855
856  start = LSTRCHR(union_script, L('\''));
857  if (start == NULL)
858    return ESR_INVALID_ARGUMENT;
859
860  start++; /* point to first char after \' */
861
862  end = LSTRCHR(start, L('\'')); /* point to last \' */
863  if (end == NULL)
864    return ESR_INVALID_ARGUMENT;
865
866  p = start;
867
868  start = LSTRCHR(script, L('\''));
869  if (start == NULL)
870    return ESR_INVALID_ARGUMENT;
871  start++; /* point to first char after \' */
872
873  q = start;
874
875  while (p < end)
876  {
877    if (*p == MULTIPLE_MEANING_JOIN_CHAR) /* if at the end of a meaning (not end of union)
878                                                  and p matched q all the way up to join char then found! */
879    {
880      *isFound = ESR_TRUE;
881      return ESR_SUCCESS;
882    }
883    else if (*p == *q) /* while same keep going */
884    {
885      if (*p == *(end - 1)) /* if at the end and p matched q all the way then found! */
886      {
887        *isFound = ESR_TRUE;
888        return ESR_SUCCESS;
889      }
890      q++;
891    }
892    else /* skip to next meaning after join char */
893    {
894      while (*p != MULTIPLE_MEANING_JOIN_CHAR && p < end)
895        p++;
896      /* reset q */
897      q = start;
898    }
899    p++;
900  }
901
902  *isFound = ESR_FALSE;
903  return ESR_SUCCESS;
904}
905
906#define QUOTE_CHAR L('\'')
907int count_num_literals(const LCHAR* a, const LCHAR** start_points, int max_num_start_points)
908{
909  int num = 0;
910  const LCHAR *p, *q = a;
911  const LCHAR *end = a + LSTRLEN(a);
912  while (1)
913  {
914    /* look for starting QUOTE_CHAR */
915    for (p = q; p < end; p++)
916    {
917      if (*p == ESC_CHAR) p++;
918      else if (*p == QUOTE_CHAR) break;
919    }
920    if (p == end) break;
921    if (num > max_num_start_points) break; /* just abort the counting! */
922    start_points[num] = p;
923    /* look for ending QUOTE_CHAR */
924    for (q = p + 1; q < end; q++)
925    {
926      if (*q == ESC_CHAR) q++;
927      else if (*q == QUOTE_CHAR) break;
928    }
929    if (q == end) /* does not close! */
930      return -1;
931    p = ++q;
932    num++;
933  }
934  return num;
935}
936int union_literal_pair(LCHAR* o, LCHAR* a, LCHAR* b, LCHAR** pptra)
937{
938  LCHAR *enda, *ptra, *endb, *ptrb;
939  LCHAR *p, *ptro;
940  enda = a + LSTRLEN(a);
941  endb = b + LSTRLEN(b);
942  /* capture the data from a to ptra */
943  for (ptra = a + 1; ptra < enda; ptra++)
944  {
945    if (*ptra == ESC_CHAR) ptra++;
946    else if (*ptra == QUOTE_CHAR) break;
947  }
948  /* capture the data from b to ptrb */
949  for (ptrb = b + 1; ptrb < endb; ptrb++)
950  {
951    if (*ptrb == ESC_CHAR) ptrb++;
952    else if (*ptrb == QUOTE_CHAR) break;
953  }
954  /* now make the output */
955  ptro = o;
956  *ptro++ = QUOTE_CHAR;
957  for (p = a + 1; p < ptra; p++) *ptro++ = *p;
958  *ptro++ = MULTIPLE_MEANING_JOIN_CHAR;
959  for (p = b + 1; p < ptrb; p++) *ptro++ = *p;
960  *ptro++ = QUOTE_CHAR;
961  *ptro++ = 0;
962  *pptra = ptra + 1;
963  return 0;
964}
965
966/* now handles n1='52';n2='62'; UNION n1='53';nx='63'; */
967
968ESR_ReturnCode make_union_of_scripts(LCHAR* union_script, const size_t max_len, const LCHAR* a, const LCHAR* b)
969{
970  int i, num_literals_in_a, num_literals_in_b;
971  LCHAR *spa[8], *spb[8], *spo[8], *ptra;
972
973  if (a == NULL || b == NULL)
974    return ESR_INVALID_ARGUMENT;
975
976  num_literals_in_a = count_num_literals(a, (const LCHAR **)spa, 8);
977  num_literals_in_b = count_num_literals(b, (const LCHAR **)spb, 8);
978
979  if (num_literals_in_a == 0 && num_literals_in_b == 0)
980  {
981    if (LSTRLEN(a) > max_len) return ESR_BUFFER_OVERFLOW;
982    else
983    {
984      LSTRCPY(union_script, a);
985      return ESR_SUCCESS;
986    }
987  }
988  else if (num_literals_in_a != num_literals_in_b)
989  {
990    return ESR_INVALID_ARGUMENT;
991  }
992
993  /* V='Springfield_IL' union V='Springfield_MA' is V='Springfield_IL#Springfield_MA' */
994  /* 18               +       18          -2     =  33 + 1 for NULL             */
995  if ((LSTRLEN(a) + LSTRLEN(b) - 2) > max_len)
996  {
997    PLogError("Temp buffer (size %d) to hold union of multiple meanings (size %d) is too small", max_len, (LSTRLEN(a) + LSTRLEN(b) - 2));
998    return ESR_BUFFER_OVERFLOW;
999  }
1000
1001  LSTRCPY(union_script, a);
1002  for (i = 0; i < num_literals_in_a; i++)
1003  {
1004    count_num_literals(union_script, (const LCHAR **)spo, 8);
1005    /* here union_script is n0='52';n1='62'; */
1006    union_literal_pair(spo[i], spa[i], spb[i], &ptra);
1007#ifdef _WIN32
1008    if (LSTRLEN(spo[i]) > MAX_SEMPROC_VALUE)
1009      pfprintf(PSTDOUT, "Warning: won't be able to parse this script! len %d>%d %s\n", LSTRLEN(spo[i]), MAX_SEMPROC_VALUE, spo[i]);
1010#endif
1011    /* here union_script is n0='52#53' */
1012    LSTRCAT(union_script, ptra);
1013    /* here union_script is n0='52#53';n1='62'; */
1014  }
1015  return ESR_SUCCESS;
1016}
1017
1018/**
1019 * Default implementation.
1020 */
1021ESR_ReturnCode SR_SemanticGraph_AddWordToSlot(SR_SemanticGraph* self, const LCHAR* _slot, const LCHAR* word, const LCHAR* script, const ESR_BOOL newWordAddedToFST)
1022{
1023  struct SR_SemanticGraphImpl_t *impl = (struct SR_SemanticGraphImpl_t*) self;
1024  arc_token *token, *tmp;
1025  arc_token *tmp_arc_token_list;
1026  wordID wdID, scriptID, old_scriptID;
1027  wordID slotID;
1028  LCHAR union_script[MAX_STRING_LEN]; /* sizeof used elsewhere */
1029  ESR_ReturnCode rc; int i;
1030  int tmp_arc_token_list_len;
1031  int offset;
1032#define MAX_WORD_LEN 128
1033  char veslot[MAX_WORD_LEN];
1034
1035  if (script == NULL || *script == L('\0') || !LSTRCMP(script, L("NULL")))
1036    return ESR_SUCCESS; /* no script to add so keep going */
1037
1038  /* find out if the word I am adding already exists. If it already exists, then that means that I
1039     potentially am adding an alternate meaning for the word */
1040  /* the slotname in .PCLG.txt and .map files use __ as the indicator. Xufang */
1041  if(_slot[0] == '@') {
1042    strcpy(veslot,SLOTNAME_INDICATOR);
1043    strcat(veslot,_slot+1);
1044    strcat(veslot,SLOTNAME_INDICATOR);
1045  } else
1046    strcpy(veslot, _slot);
1047
1048  slotID = wordmap_find_rule_index(impl->ilabels, veslot);
1049  if (slotID == MAXwordID)
1050  {
1051    PLogError(L("ESR_NO_MATCH_ERROR: Could not find slotID in wordmap %s"), _slot);
1052    return ESR_NO_MATCH_ERROR;
1053  }
1054  wdID = wordmap_find_index_in_rule(impl->ilabels, word, slotID);
1055  if (wdID == MAXwordID)
1056  {
1057    PLogError(L("ESR_NO_MATCH_ERROR: Could not find wordID/slotID in wordmap %s/%d"), word, slotID);
1058    return ESR_NO_MATCH_ERROR;
1059  }
1060
1061  /* **this is an optimization step** */
1062  /* Is word already added in this slot? if so, get the token pointer, else, token is NULL
1063   *
1064   * the assumption is that FST_AddWordToGrammar will tell us if this word was newly added in the FST, or
1065   * if the word was added at least 1 iteration ago, meaning that I have already added it to my
1066   * semgraph slot at some earlier point
1067   */
1068  if (newWordAddedToFST)
1069    token = NULL;
1070  else
1071    token = arc_tokens_find_ilabel(impl->arc_token_list, impl->arcs_for_slot[slotID], wdID);
1072
1073#define FST_GROW_FACTOR   12/10
1074#define FST_GROWARCS_MIN    100
1075  if (token == NULL) /* new word to add to slot */
1076  {
1077    /* add the script if new  */
1078    scriptID = wordmap_find_index(impl->scripts, script);
1079    if (scriptID == MAXwordID)
1080      scriptID = wordmap_add_word(impl->scripts, script);
1081    if (scriptID == MAXwordID)
1082    {
1083      PLogError(L("ESR_OUT_OF_MEMORY: Could not add script to wordmap"));
1084      return ESR_OUT_OF_MEMORY;
1085    }
1086
1087    token = impl->arcs_for_slot[slotID];
1088    tmp = arc_tokens_get_free(impl->arc_token_list, &(impl->arc_token_freelist));
1089    if (tmp == NULL)
1090      {
1091#if defined (FST_GROW_FACTOR)
1092	tmp_arc_token_list_len = impl->arc_token_list_len * FST_GROW_FACTOR;
1093	if(tmp_arc_token_list_len - impl->arc_token_list_len <=FST_GROWARCS_MIN)
1094	  tmp_arc_token_list_len+=FST_GROWARCS_MIN;
1095
1096	tmp_arc_token_list= NEW_ARRAY(arc_token,tmp_arc_token_list_len, L("semgraph.wordgraph"));
1097	if(!tmp_arc_token_list) {
1098	  PLogError(L("ESR_OUT_OF_MEMORY: Could not extend allocation of semgraph.wordgraph"));
1099	  return ESR_OUT_OF_MEMORY;
1100	}
1101	memcpy(tmp_arc_token_list,impl->arc_token_list, impl->arc_token_list_len*sizeof(arc_token));
1102
1103	for(i=0; i<MAX_NUM_SLOTS;i++)
1104	  {
1105	    if(impl->arcs_for_slot[i] != NULL) {
1106	      offset = impl->arcs_for_slot[i] - impl->arc_token_list;
1107	      impl->arcs_for_slot[i] = tmp_arc_token_list + offset;
1108	    }
1109	  }
1110	token = impl->arcs_for_slot[slotID];
1111
1112	ASSERT( impl->arc_token_freelist == NULL);
1113
1114	impl->arc_token_freelist = tmp_arc_token_list + impl->arc_token_list_len;
1115
1116	FREE(impl->arc_token_list);
1117	impl->arc_token_insert_start = tmp_arc_token_list + (impl->arc_token_insert_start - impl->arc_token_list); //Rabih fix
1118	impl->arc_token_list = tmp_arc_token_list;
1119
1120	for (i = impl->arc_token_list_len; i < tmp_arc_token_list_len - 1; i++)
1121	  {
1122	    impl->arc_token_list[i].first_next_arc = ARC_TOKEN_NULL;
1123	    impl->arc_token_list[i].next_token_index = ARC_TOKEN_LNK(impl->arc_token_list, (i + 1));
1124	  }
1125	impl->arc_token_list[i].first_next_arc = ARC_TOKEN_NULL;
1126	impl->arc_token_list[i].next_token_index = ARC_TOKEN_NULL;
1127
1128	impl->arc_token_list_len = tmp_arc_token_list_len;
1129	tmp = arc_tokens_get_free(impl->arc_token_list, &(impl->arc_token_freelist));
1130      }
1131#endif
1132    if(tmp == NULL) {
1133      PLogError(L("ESR_OUT_OF_MEMORY: Error adding more arcs to graph\n"));
1134      return ESR_OUT_OF_MEMORY;
1135    }
1136    impl->arcs_for_slot[slotID] = tmp;
1137    tmp->next_token_index = ARC_TOKEN_PTR2LNK(impl->arc_token_list, token);
1138    tmp->ilabel = wdID;
1139    tmp->olabel = (wordID)(impl->script_olabel_offset + scriptID);
1140  }
1141  else
1142  {
1143    old_scriptID = token->olabel - impl->script_olabel_offset;
1144
1145    if (!LSTRCMP(impl->scripts->words[old_scriptID], script))
1146    {
1147      /* nothing to do, we have the word, same meaning again so do nothing */
1148    }
1149    else
1150    {
1151
1152      CHKLOG(rc, make_union_of_scripts(union_script, sizeof(union_script), impl->scripts->words[old_scriptID], script));
1153
1154#ifdef SREC_ENGINE_VERBOSE_LOGGING
1155      PLogMessage(L("Adding alternate meaning %s for word %s (%s) in slot %s\n"), script, word,
1156                  impl->scripts->words[old_scriptID], impl->ilabels->words[slotID]);
1157#endif
1158      /* add the union as if new (if not already there) */
1159      scriptID = wordmap_find_index(impl->scripts, union_script);
1160      if (scriptID == MAXwordID)
1161        scriptID = wordmap_add_word(impl->scripts, union_script);
1162      if (scriptID == MAXwordID)
1163      {
1164        PLogError(L("ESR_OUT_OF_MEMORY: Could not add script to wordmap"));
1165        return ESR_OUT_OF_MEMORY;
1166      }
1167
1168      /* make the olabel point to the union */
1169      token->olabel = (wordID)(impl->script_olabel_offset + scriptID);
1170    }
1171  }
1172  return ESR_SUCCESS;
1173CLEANUP:
1174  return rc;
1175}
1176
1177
1178/**
1179 * Default implementation.
1180 */
1181ESR_ReturnCode SR_SemanticGraph_Reset(SR_SemanticGraph* self)
1182{
1183  struct SR_SemanticGraphImpl_t *impl = (struct SR_SemanticGraphImpl_t*) self;
1184  wordID slotid;
1185  arc_token* tmp;
1186  arc_token *tmp_arc_token_list;
1187
1188  wordmap_reset(impl->scopes_olabels);
1189  wordmap_reset(impl->scripts);
1190  wordmap_reset(impl->ilabels);   //Rabih: I added this
1191  for (slotid = 1; slotid < impl->ilabels->num_slots; slotid++)
1192  {
1193    tmp = impl->arcs_for_slot[slotid];
1194    arc_tokens_free_list(impl->arc_token_list, &(impl->arc_token_freelist), tmp);
1195    impl->arcs_for_slot[slotid] = NULL;
1196#if defined(SANITY_CHECK)
1197    int count;
1198    for (count = 0, tmp = impl->arc_token_freelist; tmp != NULL;
1199         tmp = ARC_TOKEN_PTR(impl->arc_token_list, tmp->next_token_index))
1200    {
1201      ASSERT(tmp->ilabel != 79324);
1202      tmp->ilabel = 79324;
1203      count++;
1204    }
1205    PLogError("after reset freelist size is %d", count);
1206#endif
1207  }
1208
1209  // Rabih : Reset the arc_token_list
1210  if(impl->ilabels->num_words == impl->ilabels->num_base_words)
1211  {}
1212  else{
1213  impl->arc_token_list_len = (size_t)(impl->arc_token_insert_start - impl->arc_token_list);
1214  tmp_arc_token_list= NEW_ARRAY(arc_token,impl->arc_token_list_len, L("semgraph.wordgraph"));
1215  memcpy(tmp_arc_token_list,impl->arc_token_list, impl->arc_token_list_len*sizeof(arc_token));
1216
1217  impl->arc_token_freelist = NULL;
1218
1219  FREE(impl->arc_token_list);
1220  impl->arc_token_list = tmp_arc_token_list;
1221  }
1222  return ESR_SUCCESS;
1223}
1224
1225static ESR_ReturnCode serializeArcTokenInfoV2(SR_SemanticGraphImpl *impl,
1226    PFile* fp)
1227{
1228  int i;
1229  asr_uint32_t idx;
1230  arcID tmp[32];
1231
1232  if (pfwrite(&impl->arc_token_list_len, 2, 1, fp) != 1)
1233    return ESR_WRITE_ERROR;
1234
1235  idx = PTR_TO_IDX(impl->arc_token_freelist, impl->arc_token_list);
1236
1237  if (pfwrite(&idx, 4, 1, fp) != 1)
1238    return ESR_WRITE_ERROR;
1239
1240  idx = PTR_TO_IDX(impl->arc_token_insert_start, impl->arc_token_list);
1241
1242  if (pfwrite(&idx, 4, 1, fp) != 1)
1243    return ESR_WRITE_ERROR;
1244
1245  idx = 0;
1246  if (pfwrite(&idx, 4, 1, fp) != 1)
1247    return ESR_WRITE_ERROR;
1248
1249  for (i = 0; i < impl->arc_token_list_len; ++i)
1250  {
1251    arc_token* token = &impl->arc_token_list[i];
1252    tmp[0] = token->ilabel;
1253    tmp[1] = token->olabel;
1254    tmp[2] = ARC_TOKEN_IDX(impl->arc_token_list, token->first_next_arc);
1255    tmp[3] = ARC_TOKEN_IDX(impl->arc_token_list, token->next_token_index);
1256    if (pfwrite(tmp, sizeof(tmp[0]), 4, fp) != 4)
1257      return ESR_WRITE_ERROR;
1258  }
1259
1260  /* new, fixes load/save bug 2007 July 31
1261	todo: change 4 to sizeof(asr_uint32) */
1262  if(1) {
1263	asr_uint32_t idx[MAX_NUM_SLOTS];
1264	for(i=0; i<MAX_NUM_SLOTS; i++)
1265		idx[i] = PTR_TO_IDX(impl->arcs_for_slot[i], impl->arc_token_list);
1266	if (pfwrite(&idx, 4, MAX_NUM_SLOTS, fp) != MAX_NUM_SLOTS)
1267			return ESR_WRITE_ERROR;
1268  }
1269
1270  return ESR_SUCCESS;
1271}
1272
1273static ESR_ReturnCode deserializeArcTokenInfoV2(SR_SemanticGraphImpl *impl,
1274    PFile* fp)
1275{
1276  int i;
1277  asr_uint32_t idx;
1278  ESR_ReturnCode rc = ESR_SUCCESS;
1279  arcID tmp[32];
1280
1281  if (pfread(&impl->arc_token_list_len, 2, 1, fp) != 1)
1282  {
1283    rc = ESR_READ_ERROR;
1284    PLogError(L("ESR_READ_ERROR: could not read arc_token_list_len"));
1285    return rc;
1286  }
1287
1288  impl->arc_token_list = NEW_ARRAY(arc_token,
1289                                   impl->arc_token_list_len,
1290                                   L("semgraph.wordgraph"));
1291
1292  if (impl->arc_token_list == NULL)
1293  {
1294    rc = ESR_OUT_OF_MEMORY;
1295    PLogError(ESR_rc2str(rc));
1296    return ESR_OUT_OF_MEMORY;
1297  }
1298
1299  if (pfread(&idx, 4, 1, fp) != 1)
1300  {
1301    rc = ESR_READ_ERROR;
1302    PLogError(ESR_rc2str(rc));
1303    goto CLEANUP;
1304  }
1305
1306  impl->arc_token_freelist = IDX_TO_PTR(idx, impl->arc_token_list);
1307
1308  if (pfread(&idx, 4, 1, fp) != 1)
1309  {
1310    rc = ESR_READ_ERROR;
1311    PLogError(ESR_rc2str(rc));
1312    goto CLEANUP;
1313  }
1314
1315  impl->arc_token_insert_start = IDX_TO_PTR(idx, impl->arc_token_list);
1316  // impl->arc_token_insert_start = impl->arc_token_list + impl->arc_token_list_len; // Rabih's fix
1317
1318  if (pfread(&idx, 4, 1, fp) != 1)
1319  {
1320    rc = ESR_READ_ERROR;
1321    PLogError(ESR_rc2str(rc));
1322    goto CLEANUP;
1323  }
1324  impl->arc_token_insert_end = 0;
1325
1326  for (i = 0; i < impl->arc_token_list_len; ++i)
1327  {
1328    arc_token* token = &impl->arc_token_list[i];
1329    if (pfread(tmp, sizeof(tmp[0]), 4, fp) != 4)
1330    {
1331      rc = ESR_READ_ERROR;
1332      goto CLEANUP;
1333    }
1334    token->ilabel = tmp[0];
1335    token->olabel = tmp[1];
1336    if (tmp[2] == MAXarcID)
1337      token->first_next_arc = ARC_TOKEN_NULL;
1338    else
1339      token->first_next_arc = ARC_TOKEN_LNK(impl->arc_token_list, tmp[2]);
1340    if (tmp[3] == MAXarcID)
1341      token->next_token_index = ARC_TOKEN_NULL;
1342    else
1343      token->next_token_index = ARC_TOKEN_LNK(impl->arc_token_list, tmp[3]);
1344  }
1345
1346  /* new, fixes load/save bug 2007 July 31
1347	todo: change 4 to sizeof(asr_uint32) */
1348  if(1) {
1349		asr_uint32_t idx[MAX_NUM_SLOTS];
1350		if (pfread(&idx[0], 4, MAX_NUM_SLOTS, fp) != MAX_NUM_SLOTS) {
1351			rc = ESR_READ_ERROR;
1352			PLogError(ESR_rc2str(rc));
1353			goto CLEANUP;
1354		}
1355		for(i=0; i<MAX_NUM_SLOTS; i++)
1356			impl->arcs_for_slot[i] = IDX_TO_PTR(idx[i], impl->arc_token_list);
1357   }
1358
1359  return ESR_SUCCESS;
1360
1361CLEANUP:
1362  FREE(impl->arc_token_list);
1363  impl->arc_token_list =
1364    impl->arc_token_freelist =
1365      impl->arc_token_insert_start =
1366        impl->arc_token_insert_end = NULL;
1367  return rc;
1368}
1369