1/*---------------------------------------------------------------------------*
2 *  grxmlcompile.cpp  *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20#include "ptypes.h"
21
22#include <unordered_map>
23
24#include "fst/lib/fst.h"
25#include "fst/lib/fstlib.h"
26#include "fst/lib/arc.h"
27#include "fst/lib/fst-decl.h"
28#include "fst/lib/vector-fst.h"
29#include "fst/lib/arcsort.h"
30#include "fst/lib/invert.h"
31
32#include "fst-io.h"
33
34#include "ESR_Locale.h"
35#include "LCHAR.h"
36#include "pstdio.h"
37#include "PFileSystem.h"
38#include "PANSIFileSystem.h"
39#include "plog.h"
40#include "pmemory.h"
41#include "ESR_Session.h"
42#include "SR_Session.h"
43#include "SR_Vocabulary.h"
44#include "srec_arb.h"       // for EPSILON_LABEL etc
45#include <fstream>
46#include <iostream>
47#include "tinyxml.h"
48#include "grxmldoc.h"
49
50#ifdef MEMTRACE
51#include <mcheck.h>
52#endif
53
54#define OPENFST_ACKNOWLEDGEMENT	\
55	"This tool uses the OpenFst library. \n" \
56 "Licensed under the Apache License, Version 2.0 (the \"License\");\n" \
57" you may not use this file except in compliance with the License.\n" \
58" You may obtain a copy of the License at" \
59"\n" \
60"      http://www.apache.org/licenses/LICENSE-2.0\n" \
61"\n" \
62" Unless required by applicable law or agreed to in writing, software\n" \
63" distributed under the License is distributed on an \"AS IS\" BASIS,\n" \
64" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" \
65" See the License for the specific language governing permissions and\n" \
66" limitations under the License.\n" \
67"\n" \
68" This library was developed at Google Research (M. Riley, J. Schalkwyk, W. Skut) and NYU's Courant Institute (C. Allauzen, M. Mohri). It is intended to be comprehensive, flexible, efficient and scale well to large problems. It is an open source project distributed under the Apache license. \n"
69
70
71#define TINYXML_ACKNOWLEDGEMENT	\
72	"This tool uses the tinyxml library. \n" \
73"Copyright (c) 2007 Project Admins: leethomason \n" \
74"The TinyXML software is provided 'as-is', without any express or implied\n" \
75"warranty. In no event will the authors be held liable for any damages\n" \
76"arising from the use of this software.\n" \
77"\n" \
78"Permission is granted to anyone to use this software for any purpose,\n" \
79"including commercial applications, and to alter it and redistribute it\n" \
80"freely, subject to the following restrictions:\n"
81
82#define NUANCE_COPYRIGHT \
83"// grxmlcompile\n" \
84"//\n" \
85"// Licensed under the Apache License, Version 2.0 (the \"License\");\n" \
86"// you may not use this file except in compliance with the License.\n" \
87"// You may obtain a copy of the License at\n" \
88"//\n" \
89"//      http://www.apache.org/licenses/LICENSE-2.0\n" \
90"//\n" \
91"// Unless required by applicable law or agreed to in writing, software\n" \
92"// distributed under the License is distributed on an \"AS IS\" BASIS,\n" \
93"// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" \
94"// See the License for the specific language governing permissions and\n" \
95"// limitations under the License.\n" \
96"//\n" \
97"// This program compiles a .grxml grammar into the graphs needed for \n" \
98"// decoding with SREC\n" \
99"// \n"
100
101#define MAX_LINE_LENGTH   256
102#define MAX_PATH_NAME 512
103#define MAX_PRONS_LENGTH 1024
104#define SILENCE_PREFIX_WORD "-pau-"
105#define SILENCE_SUFFIX_WORD "-pau2-"
106#define SLOT_SUFFIX "__"
107#define SLOT_PREFIX "__"
108#define MAX_NUM_SLOTS 12 /* must agree with srec_context.h */
109#define EXTRA_EPSILON_LABEL 39999 // must be higher than the number of models
110#define DEFAULT_WB_COST 40
111#define DEFAULT_WB_COST_STR "40"
112#define SLOT_COUNTER_OFFSET 30000 // must be higher than the number of models
113#define NOISE_PHONEME_CODE 'J'
114
115static int debug = 0;
116static int verbose = 0;
117
118using namespace std;
119
120ESR_ReturnCode make_openfst_graphs(GRXMLDoc* pDoc, /* for metas */
121				   const std::string& grxmlBasename,
122				   const char* vocabFilename,
123				   const char* cfstFilename,
124				   const char* modelmapFilename);
125
126const char* showline(const char* fn, int line_num)
127{
128  static char line[8096] = { 0 };
129  int line_count = 0;
130  ifstream strm(fn);
131  while (strm && strm.getline(line, sizeof(line)))
132	  if(line_count++ == line_num) break;
133  return &line[0];
134}
135
136std::string ExtractFileName(const std::string& full)
137{
138  std::string::size_type idx = full.find_last_of("/");
139
140  if (idx != std::string::npos)
141    return full.substr(idx+1);
142  else
143    return full;
144}
145
146/*-----------------------------------------------------------------------*
147 *                                                                       *
148 *                                                                       *
149 *-----------------------------------------------------------------------*/
150
151int usage_error(const char* prgname)
152{
153  printf("USAGE: -par <par file> -grxml <grxml grammar file> -vocab <dictionary file (.ok)> [-outdir <output directory>]\n");
154  return (int)ESR_INVALID_ARGUMENT;
155}
156
157int main(int argc, char* argv[])
158{
159  ESR_ReturnCode status = ESR_SUCCESS;
160  char *parfile = NULL;
161  char *grxmlfile = NULL;
162  char *cmdline_vocfile = NULL;
163  std::string outdir("."); // default output dir is current directory
164  /* for now, assume char and LCHAR are the same, else fail to compile! */
165  { char zzz[ 1 - (sizeof(LCHAR)!=sizeof(char))]; zzz[0] = 0; }
166
167#ifdef MEMTRACE
168    mtrace();
169#endif
170
171#if defined(GRXMLCOMPILE_PRINT_ACKNOWLEDGEMENT)
172    cout << OPENFST_ACKNOWLEDGEMENT <<std::endl;
173    cout << TINYXML_ACKNOWLEDGEMENT <<std::endl;
174    cout << NUANCE_COPYRIGHT <<std::endl;
175#endif
176
177    // Process all XML files given on command line
178
179    if(argc<5){
180      return usage_error(argv[0]);
181    }
182
183    for(int i=1;i<argc;i++)
184    {
185      if(!strcmp(argv[i],"-grxml"))
186        grxmlfile = argv[++i];
187      else if(!strcmp(argv[i],"-debug"))
188        debug++;
189      else if(!strcmp(argv[i],"-verbose"))
190        verbose++;
191      else if(!strcmp(argv[i],"-par") || !strcmp(argv[i],"-parfile"))
192        parfile = argv[++i];
193      else if(!strcmp(argv[i],"-vocab"))
194        cmdline_vocfile = argv[++i];
195      else if(!strcmp(argv[i],"-outdir"))
196        outdir = std::string(argv[++i]);
197      else {
198        printf("error_usage: argument [%s]\n", argv[i]);
199	return usage_error(argv[0]);
200	return (int)ESR_INVALID_ARGUMENT;
201      }
202    }
203
204    //process_xml( std::string(grxmlfile), parfile );
205    std::string filename = std::string(grxmlfile);
206
207    /***************************
208            process xml
209    ***************************/
210
211    cout << "processing [" << filename << "] ..." << endl;
212
213    TiXmlDocument node;
214    bool bLoadedOK = node.LoadFile( filename.c_str() );
215    if(!bLoadedOK || node.Error()) {
216      std::cout << "Error: while creating TiXmlDocument from " << filename << std::endl;
217      std::cout << "Error: " << node.Error() << " id " << node.ErrorId() << " row " << node.ErrorRow() << " col " << node.ErrorCol() << std::endl;
218      std::cout << "Error: " << node.ErrorDesc() <<  std::endl;
219      std::cout << "Error: near " << showline( filename.c_str(), node.ErrorRow()) << std::endl;
220      return (int)ESR_INVALID_ARGUMENT;
221    }
222
223
224    // *************************************************
225    //	Parse the file into a DOM object and create word graph
226    //
227    GRXMLDoc *doc = new (GRXMLDoc);
228    std::string filenameNoPath = ExtractFileName(filename);
229    doc->parseGrammar( node, filenameNoPath );   // THE PARSING AND NETWORK BUILD HAPPENS IN HERE
230    /************************
231      end of xml processing
232    ************************/
233
234    // Create grammar network files. Use prefix of input file for output.
235    std::string s = filename;
236    std::string grxmlbase = outdir + "/" + ExtractFileName(grxmlfile);
237    size_t p1 = grxmlbase.find_last_of(".");
238    if ( p1 != string::npos )
239      grxmlbase.assign( grxmlbase, 0, p1);
240
241    std::string newName;
242    newName = grxmlbase + ".map";
243    doc->writeMapFile( newName );
244    newName = grxmlbase + ".script";
245    doc->writeScriptFile( newName );
246
247    doc->writeGraphFiles( grxmlbase, false );
248
249    //
250    // SR initialization
251    //
252    char vocfile[MAX_PATH_NAME];
253    char cfstfile[MAX_PATH_NAME];
254    char modelmapfile[MAX_PATH_NAME];
255    size_t len;
256
257    PMemInit();
258    printf("info: Using parfile %s\n",parfile);
259    status = SR_SessionCreate((const LCHAR*) parfile);
260    // status = SR_SessionCreate ( parfile );
261    if (  status != ESR_SUCCESS ) {
262      LPRINTF("Error: SR_SessionCreate(%s) %s\n", parfile, ESR_rc2str(status));
263      return (int)status;
264    }
265
266    // vocfile
267    if(cmdline_vocfile) {
268      strcpy( vocfile, cmdline_vocfile);
269    } else {
270      len = MAX_PATH_NAME;
271      ESR_SessionGetLCHAR ( L("cmdline.vocabulary"), (LCHAR*)vocfile, &len );
272      // skip PrefixWithBaseDirectory(), 'tis done inside SR_VocabularyLoad()
273    }
274    printf("info: Using dictionary %s\n",vocfile);
275
276    // modelmapfile
277    len = MAX_PATH_NAME;
278    ESR_SessionGetLCHAR ( L("cmdline.arbfile"), (LCHAR*)modelmapfile, &len);
279    len = MAX_PATH_NAME;
280    status = ESR_SessionPrefixWithBaseDirectory ( (LCHAR*)modelmapfile, &len);
281    char* p = strrchr(modelmapfile,'/');
282    if(!p) p = strrchr(modelmapfile,'\\');
283    if(p) strcpy(p, "/models128x.map");
284
285    // cfstfile
286    len = MAX_PATH_NAME;
287    ESR_SessionGetLCHAR ( L("cmdline.arbfile"), (LCHAR*)cfstfile, &len);
288    len = MAX_PATH_NAME;
289    status = ESR_SessionPrefixWithBaseDirectory ( (LCHAR*)cfstfile, &len);
290    p = strrchr(cfstfile,'/');
291    if(!p) p = strrchr(cfstfile,'\\');
292    if(p) strcpy(p, "/generic.C");
293
294    status = make_openfst_graphs( doc, grxmlbase, (const char*)vocfile, (const char*)cfstfile, (const char*)modelmapfile);
295    if(status != ESR_SUCCESS) {
296      LPRINTF("Error: make_openfst_graphs() returned %s\n",  ESR_rc2str(status));
297    } else {
298      /* make_openfst_graphs() can sometimes call doc->setMeta() to put
299	 Session parameters into the .params file, so writeParamsFile()
300	 should be called after make_openfst_graphs() */
301      newName = grxmlbase + ".params";
302      doc->writeParamsFile( newName );
303    }
304
305    //
306    // SR de-initialization
307    //
308    SR_SessionDestroy();
309    PMemShutdown();
310
311    delete doc;
312    return (int)status;
313}
314
315/*-----------------------------------------------------------------*
316 * utils                                                           *
317 *-----------------------------------------------------------------*/
318
319bool is_slot_symbol( const char* sym)
320{
321  const char* p = strstr(sym,SLOT_PREFIX);
322  int len = strlen(sym);
323  if(len>4 && !strcmp(sym+len-2,SLOT_SUFFIX) && (p-sym)<len-2) {
324    return true;
325  } else
326    return false;
327}
328
329int64 StrToId(const char *s, fst::SymbolTable *syms,
330	      const char *name)
331{
332  int64 n;
333  if (syms) {
334    n = syms->Find(s);
335    if (n < 0) {
336      cerr << "FstReader: Symbol \"" << s
337	   << "\" is not mapped to any integer " << name
338	   << ", symbol table = " << syms->Name();
339    }
340  } else {
341    char *p;
342    n = strtoll(s, &p, 10);
343    if (p < s + strlen(s) || n < 0) {
344      cerr << "FstReader: Bad " << name << " integer = \"" << s;
345    }
346  }
347  return n;
348}
349
350/* FstMergeOLabelsToILabels, FstSplitOLabelsFromILabels
351   are used to make sure the minimization does not go overboard in pushing
352   output labels toward the beginning of the graph.  When that happens
353   then the speech recognition decoder fails! */
354
355ESR_ReturnCode FstMergeOLabelsToILabels( fst::StdVectorFst& fst_, int max_ilabels )
356{
357  fst::StdArc::StateId s = fst_.Start();
358  if (s == fst::kNoStateId)
359    return ESR_INVALID_ARGUMENT;
360  for (fst::StateIterator< fst::StdVectorFst> siter(fst_);
361       !siter.Done(); siter.Next()) {
362    s = siter.Value();
363
364    for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, s);
365	!aiter.Done(); aiter.Next()) {
366      fst::StdArc arc = aiter.Value();
367      if( arc.ilabel >= max_ilabels ||
368	  (float)arc.ilabel + ((float)max_ilabels)*arc.olabel > INT_MAX) {
369	std::cout << "Error: internal error in FstMergeOLabelsToILabels() " << std::endl;
370	return ESR_NOT_IMPLEMENTED;
371      }
372      arc.ilabel = arc.ilabel + max_ilabels * arc.olabel;
373      arc.olabel = 0;
374      aiter.SetValue( arc);
375    }
376  }
377  return ESR_SUCCESS;
378}
379
380ESR_ReturnCode FstMergeOLabelsToILabels_GetMax( fst::StdVectorFst& fst_, int& max_ilabel )
381{
382  if (fst_.Start() == fst::kNoStateId) return ESR_INVALID_ARGUMENT;
383  for (fst::StateIterator< fst::StdVectorFst> siter(fst_);
384       !siter.Done(); siter.Next()) {
385    for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, siter.Value());
386	!aiter.Done(); aiter.Next()) {
387      if( aiter.Value().ilabel > max_ilabel)
388	max_ilabel = aiter.Value().ilabel;
389    }
390  }
391  max_ilabel++;
392  return ESR_SUCCESS;
393}
394
395ESR_ReturnCode FstSplitOLabelsFromILabels( fst::StdVectorFst& fst_, int max_ilabels )
396{
397  fst::StdArc::StateId s = fst_.Start();
398  if (s == fst::kNoStateId)
399    return ESR_INVALID_ARGUMENT;
400  for (fst::StateIterator< fst::StdVectorFst> siter(fst_);
401       !siter.Done(); siter.Next()) {
402    s = siter.Value();
403
404    for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, s);
405	!aiter.Done(); aiter.Next()) {
406      fst::StdArc arc = aiter.Value();
407      arc.olabel = arc.ilabel / max_ilabels;
408      arc.ilabel = arc.ilabel - arc.olabel*max_ilabels;
409      aiter.SetValue( arc);
410    }
411  }
412  return ESR_SUCCESS;
413}
414
415/* this is to replace the "fake" extra epsilon input labels, which were
416   put there to disambiguate homonyms */
417
418ESR_ReturnCode FstReplaceILabel( fst::StdVectorFst& fst_, int from_ilabel, int into_ilabel)
419{
420  fst::StdArc::StateId s = fst_.Start();
421  if (s == fst::kNoStateId)
422    return ESR_INVALID_ARGUMENT;
423  for (fst::StateIterator< fst::StdVectorFst> siter(fst_);
424       !siter.Done(); siter.Next()) {
425    s = siter.Value();
426
427    for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, s);
428	!aiter.Done(); aiter.Next()) {
429      fst::StdArc arc = aiter.Value();
430      if(arc.ilabel == from_ilabel) {
431	arc.ilabel = into_ilabel;
432	aiter.SetValue( arc);
433      }
434    }
435  }
436  return ESR_SUCCESS;
437}
438
439/* this pushes the slot labels forward which gives an opportunity for
440   multiple instances of the slot to be merged, eg. lookup NAME
441   vs lookup contact NAME .. if in separate rules, then they will
442   merge thanks to using 3 arcs for the NAME */
443
444ESR_ReturnCode FstPushSlotLikeOLabels( fst::StdVectorFst& fst_, int myMin, int myMax)
445{
446  int i;
447  ESR_ReturnCode rc = ESR_SUCCESS;
448  char done_for_state[2*65536]; // hope this is enough!
449  memset( &done_for_state[0], 0, sizeof(done_for_state));
450
451  fst::StdArc::StateId s = fst_.Start();
452  for (fst::StateIterator< fst::StdVectorFst> siter(fst_);
453       !siter.Done(); siter.Next()) {
454    s = siter.Value();
455
456    if(done_for_state[ s]) continue;
457    done_for_state[ s]++;
458
459    for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, s);
460	!aiter.Done(); aiter.Next()) {
461      fst::StdArc arc = aiter.Value();
462      if(arc.olabel >= myMin && arc.olabel < myMax) {
463	fst::StdArc::StateId s2 = arc.nextstate;
464	int slotId = arc.olabel;
465
466	if(verbose)
467	  std::cout << "info: FstPushSlotLikeOLabels() at state " << s << " arc ilabel " << arc.ilabel << " olabel " << arc.olabel << std::endl;
468
469	arc.ilabel = EPSILON_LABEL;
470	arc.olabel = EPSILON_LABEL;
471	arc.weight = 0; // zero weight
472	aiter.SetValue( arc);
473	done_for_state[ s2]++;
474	for(fst::MutableArcIterator<fst::StdVectorFst> aiter2(&fst_, s2);
475	    !aiter2.Done(); aiter2.Next()) {
476	  fst::StdArc arc2 = aiter2.Value();
477	  if(arc2.ilabel == WORD_BOUNDARY) {
478	    std::cout << "Error: FstPushSlotLikeOLabels() failing, there could be confusion between the slot (hack-pron) and a real-pron, the slot olabel may have been pushed by earlier fst operations!" << std::endl;
479	    rc = ESR_INVALID_STATE;
480	  } else
481	    arc2.ilabel = EPSILON_LABEL;
482	  arc2.olabel = slotId;
483	  aiter2.SetValue( arc2);
484	}
485      }
486    }
487  }
488
489  /* check */
490  int *num_pclg_arcs_using_slot = new int[myMax];
491  for(i=0;i<myMax;i++) num_pclg_arcs_using_slot[i] = 0;
492  for (fst::StateIterator< fst::StdVectorFst> siter(fst_);
493       !siter.Done(); siter.Next()) {
494    s = siter.Value();
495
496    for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, s);
497	!aiter.Done(); aiter.Next()) {
498      fst::StdArc arc = aiter.Value();
499      if(arc.olabel >= myMin && arc.olabel < myMax)
500	num_pclg_arcs_using_slot[arc.olabel]++;
501    }
502  }
503  for(i=0; i<myMax; i++) {
504    if(num_pclg_arcs_using_slot[i] > 1) {
505      std::cout << "Error: SREC will not support multiply referred slots." << std::endl;
506      std::cout << "Error: Consider re-working your grammar to merge the references into one rule" << std::endl;
507      std::cout << "Error: or use two different slots" << std::endl;
508      rc = ESR_NOT_SUPPORTED;
509    }
510  }
511  delete [] num_pclg_arcs_using_slot;
512
513  return rc;
514}
515
516/* gets the range of slot numbers, myMin inclusive, myMax is exclusive */
517
518void get_slot_olabel_range( const fst::SymbolTable* syms, int* myMin, int* myMax)
519{
520  // assumes slots are at the top of the symbol table
521  fst::SymbolTableIterator iter( *syms);
522  *myMin = *myMax = 0;
523  for(iter.Reset(); !iter.Done(); iter.Next() ) {
524    const char* sym = iter.Symbol();
525    if ( is_slot_symbol( sym)) {
526      if(! (*myMin)) *myMin = iter.Value();
527      *myMax = iter.Value()+1;
528    }
529  }
530}
531
532/* SLOT_COUNTER_OFFSET
533   The cfst is used to turn phonemes into acoustic models, but we're using
534   special phonemes for the slots, and must here add those as pass through
535   in the Cfst, meaning that the slot marker must be unchanged after
536   composition.  To do that we find the places in the Cfst where silence is
537   used, and put the slot marker arcs in parallel.  This also causes the
538   models before the slot to assume silence to the right, and the models after
539   the slot to assume silence to the left, both of which are reasonable */
540
541ESR_ReturnCode FstAddSlotMarkersToCFst( fst::StdVectorFst& cfst_, int myMin, int myMax)
542{
543  int num_silence_arcs_in_cfst = 0;
544  int mimicPhonemeCode = SILENCE_CODE;
545
546  fst::StdArc::StateId s = cfst_.Start();
547  if (s == fst::kNoStateId)
548    return ESR_INVALID_ARGUMENT;
549  for (fst::StateIterator< fst::StdVectorFst> siter(cfst_);
550       !siter.Done(); siter.Next()) {
551    s = siter.Value();
552
553    for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&cfst_, s);
554	!aiter.Done(); aiter.Next()) {
555      fst::StdArc arc = aiter.Value();
556      if( arc.olabel == mimicPhonemeCode) {
557	num_silence_arcs_in_cfst++;
558	for(int i=myMin; i<myMax; i++)
559	  cfst_.AddArc( s, fst::StdArc(SLOT_COUNTER_OFFSET+i /*model*/,
560				       SLOT_COUNTER_OFFSET+i /*phoneme*/, 0.0, arc.nextstate));
561      }
562    }
563  }
564  fst::ArcSort(&cfst_, fst::StdOLabelCompare());
565  if(!num_silence_arcs_in_cfst)
566    return ESR_INVALID_ARGUMENT;
567  else
568    return ESR_SUCCESS;
569}
570
571/*
572 * make the graphs used by the recognition engine during the search.
573 */
574
575ESR_ReturnCode make_openfst_graphs(  GRXMLDoc* pDoc,
576				     const std::string& grxmlBasename,
577				     const char* vocabFilename,
578				     const char* cfstFilename,
579				     const char* modelmapFilename)
580{
581  SR_Vocabulary *vocab = 0;
582  ESR_ReturnCode rc;
583
584  fst::StdVectorFst l_fst;      // .L file, created from the .map and .ok
585
586  int stateSt, stateEn;
587  size_t len;
588  bool do_skip_interword_silence = false;
589  std::unordered_map<string,int> homonym_count;
590  int word_penalty = 0;
591
592  rc = SR_VocabularyLoad(vocabFilename, &vocab);
593  if (rc != ESR_SUCCESS) {
594    cerr << "Error: " <<  ESR_rc2str(rc) << endl;
595    return ESR_INVALID_ARGUMENT; // goto CLEANUP;
596  }
597
598  std::string word_penalty_str;
599  if( pDoc->findMeta(std::string("word_penalty"),word_penalty_str))
600    word_penalty = atoi((const char *)word_penalty_str.c_str());
601  else {
602    rc = ESR_SessionGetInt( L("CREC.Recognizer.wordpen"), &word_penalty);
603    if(rc != ESR_SUCCESS)
604      word_penalty = DEFAULT_WB_COST;
605    word_penalty_str = DEFAULT_WB_COST_STR;
606    pDoc->setMeta( std::string("word_penalty"), word_penalty_str) ;
607    cout << "using word_penalty " << word_penalty << endl;
608  }
609
610  std::string do_skip_interword_silence_str;
611  if( pDoc->findMeta(std::string("do_skip_interword_silence"), do_skip_interword_silence_str))
612    do_skip_interword_silence = ((do_skip_interword_silence_str != "true") ? false : true);
613
614  /*-----------------------------------------------------------------*
615   *   read the .map and .omap created from grxmlcompiler classes    *
616   *-----------------------------------------------------------------*/
617
618  std::string omapFilename = grxmlBasename + std::string(".omap");
619  std::string imapFilename = grxmlBasename + std::string(".map");
620
621  cout << "info: reading word symbols " << imapFilename << endl;
622  fst::SymbolTable *word_syms = fst::SymbolTable::ReadText(imapFilename);
623  if(!word_syms) {
624    cerr << "error: reading word_syms" << endl;
625    return ESR_INVALID_ARGUMENT;
626  }
627  cout << "info: reading parser symbols " << omapFilename << endl;
628  fst::SymbolTable *prsr_syms = fst::SymbolTable::ReadText(omapFilename);
629  if(!prsr_syms) {
630    cerr << "error: reading prsr_syms" << endl;
631    return ESR_INVALID_ARGUMENT;
632  }
633  cout << "info: reading model symbols " << modelmapFilename << endl;
634  fst::SymbolTable *model_syms = fst::SymbolTable::ReadText(modelmapFilename);
635  if(!prsr_syms) {
636    cerr << "error: reading prsr_syms" << endl;
637    return ESR_INVALID_ARGUMENT;
638  }
639  int max_model_sym = 0;
640  /* if(1) {
641     fst::SymbolTableIterator iter( *model_syms);
642     for(iter.Reset(); !iter.Done(); iter.Next() ) max_model_sym++; */
643
644  /*-----------------------------------------------------------------*
645   * create the .L pronunciations transducer                         *
646   *-----------------------------------------------------------------*/
647
648  // Adds state 0 to the initially empty FST and make it the start state.
649  stateSt = l_fst.AddState();
650  stateEn = l_fst.AddState();
651  l_fst.SetStart(stateSt);  // arg is state ID
652  l_fst.SetFinal(stateEn, 0.0);  // 1st arg is state ID, 2nd arg weight
653  l_fst.AddArc(stateEn, fst::StdArc(EPSILON_LABEL,EPSILON_LABEL,0.0,stateSt));
654
655  int num_slots = 0;
656  fst::SymbolTableIterator iter( *word_syms);
657  for(iter.Reset(); !iter.Done(); iter.Next() ) {
658    ESR_ReturnCode rc;
659    LCHAR prons[MAX_PRONS_LENGTH];
660    const char* phrase = iter.Symbol();
661    int wordId = iter.Value();
662    bool wordId_is_silence = false;
663    bool wordId_is_slot    = false;
664    /* script or scope marker, skip it */
665    /* if( is_scope_marker( phrase) || is_script_marker(phrase))
666       continue; */
667    /* epsilon */
668    if(!strcmp( phrase, SILENCE_PREFIX_WORD)
669       || !strcmp(phrase,SILENCE_SUFFIX_WORD))
670      wordId_is_silence = true;
671    else if( !strcmp( phrase, "eps") && wordId == 0)
672      continue;
673    /* rule markers */
674    else if( strstr( phrase, ".grxml@"))
675      continue;
676    /* script markers */
677    else if( phrase[0]=='_' && strspn(phrase+1,"0123456789")==strlen(phrase+1))
678      continue;
679    else if(is_slot_symbol(phrase)) {
680      cout << "SLOT>> " << phrase << endl;
681      wordId_is_slot = true;
682      num_slots++;
683    }
684
685    if(num_slots > MAX_NUM_SLOTS) {
686      std::cout << "Error: SREC may have trouble with this many slots! (" << num_slots << ")" << std::endl;
687      // return ESR_NOT_SUPPORTED;
688    }
689
690    if(wordId_is_slot) {
691      int stateP = stateSt, statePp1;
692      /* with 2 arcs, we have a better chance to merge the slot if used from
693	 different parts of the grammar, see FstPushSlotLikeOLabels elsewhere */
694      statePp1 = l_fst.AddState();
695      l_fst.AddArc(stateP, fst::StdArc( wordId+SLOT_COUNTER_OFFSET, wordId, 0.0, statePp1));
696      stateP = statePp1;
697      statePp1 = l_fst.AddState();
698      l_fst.AddArc(stateP, fst::StdArc( wordId+SLOT_COUNTER_OFFSET, EPSILON_LABEL, 0.0, statePp1));
699      stateP = statePp1;
700      l_fst.AddArc(stateP, fst::StdArc( WORD_BOUNDARY, EPSILON_LABEL, 0.0, stateEn));
701    } else {
702      size_t len_used;
703      LCHAR *pron = 0, *p;
704      /* word is ok, get the pron */
705      len = MAX_PRONS_LENGTH;
706      rc = SR_VocabularyGetPronunciation(vocab, phrase, prons, &len);
707      if (rc != ESR_SUCCESS) {
708	LPRINTF( "ERROR: SR_VocabularyGetPronunciation(*,%s,*,*) returned %s\n", phrase, ESR_rc2str(rc));
709	SR_VocabularyDestroy(vocab);
710	return rc;
711      }
712      for(len_used=0; len_used<len; ) {
713	pron = &prons[0]+len_used;
714	len_used += LSTRLEN(pron)+1;
715	if( *pron == 0) break;
716	int stateP = stateSt, statePp1;
717	int olabel = wordId;
718	LPRINTF("%s : %s\n", phrase, pron);
719	/* main pronunciation */
720	for(p=pron; *p; p++) {
721	  statePp1 = l_fst.AddState();
722	  if(*p == OPTSILENCE_CODE) {
723	    l_fst.AddArc(stateP, fst::StdArc( SILENCE_CODE, olabel, 0.0, statePp1));
724	    l_fst.AddArc(stateP, fst::StdArc( EPSILON_LABEL, olabel, 0.0, statePp1));
725	  } else {
726	    l_fst.AddArc(stateP, fst::StdArc( *p, olabel, 0.0, statePp1));
727	  }
728	  stateP = statePp1;
729	  olabel = EPSILON_LABEL;
730	}
731	/* add epsilons if this is a homonym */
732	string pron_string = pron;
733	std::unordered_map<string,int>::const_iterator it = homonym_count.find( pron_string);
734	if(it == homonym_count.end()) {
735	  homonym_count[ pron_string] = 0;
736	} else {
737	  homonym_count[ pron_string] = homonym_count[ pron_string]+1;
738	}
739	int extra_epsilons_needed = homonym_count[ pron_string] ;
740	if(wordId_is_silence) extra_epsilons_needed = 0;
741	for(int i=0;i<extra_epsilons_needed;i++) {
742	  statePp1 = l_fst.AddState();
743	  l_fst.AddArc(stateP, fst::StdArc( EXTRA_EPSILON_LABEL, olabel, 0.0, statePp1));
744	  stateP = statePp1;
745	}
746	/* add optional silence after each word */
747	if(!do_skip_interword_silence && !wordId_is_silence && !wordId_is_slot) {
748	  statePp1 = l_fst.AddState();
749	  l_fst.AddArc(stateP, fst::StdArc( SILENCE_CODE, EPSILON_LABEL, 0.0, statePp1));
750	  l_fst.AddArc(statePp1, fst::StdArc( WORD_BOUNDARY, EPSILON_LABEL, 0.0, stateEn));
751	  l_fst.AddArc(stateP, fst::StdArc( WORD_BOUNDARY, EPSILON_LABEL, 0.0, stateEn));
752	} else if(wordId_is_silence && !strcmp(phrase, SILENCE_SUFFIX_WORD)) {
753	  /* SILENCE_SUFFIX_WORD does not need a terminal .wb */
754	  l_fst.AddArc(stateP, fst::StdArc( EPSILON_LABEL, EPSILON_LABEL, 0.0, stateEn));
755	} else {
756	  l_fst.AddArc(stateP, fst::StdArc( WORD_BOUNDARY, EPSILON_LABEL, 0.0, stateEn));
757	}
758      } // loop over multiple prons
759    } // slot vs non-slot
760  } /* .map (word_syms) iterator */
761
762  std::string lfstFilename = grxmlBasename + ".L";
763  // We can save this FST to a file with:
764  if(debug) l_fst.Write(lfstFilename.c_str());
765
766  /*-----------------------------------------------------------------*
767   *   read the .P.txt created from grxmlcompiler classes            *
768   *-----------------------------------------------------------------*/
769
770  std::string ptxtFilename = grxmlBasename + std::string(".P.txt");
771  std::ifstream istrm(ptxtFilename.c_str());
772  if(!istrm) {
773    cerr << "error: reading ptxtFilename" << endl;
774    return ESR_INVALID_ARGUMENT;
775  }
776
777  cout << "info: reading parser from text " << ptxtFilename << endl;
778  fst::FstReader<fst::StdArc> reader( istrm, ptxtFilename, word_syms, prsr_syms,
779				      /*state_syms*/ NULL,
780				      /*acceptor*/ false,
781				      /*ikeep*/ false,
782				      /*okeep*/ false,
783				      /*nkeep*/ false);
784  // .P file, created from the .P.txt and .omap
785  const fst::StdVectorFst& p_fst = reader.Fst();
786
787  /*-----------------------------------------------------------------*
788   *   make the helper FSTs                                          *
789   *-----------------------------------------------------------------*/
790
791  cout << "info: creating helper fsts" << endl;
792  fst::StdVectorFst prefix_fst;
793  fst::StdVectorFst suffix_fst;
794  fst::StdVectorFst eps_fst;
795  // int eps_word = StrToId("eps", word_syms, "arc ilabel");
796  int pau_word = StrToId(SILENCE_PREFIX_WORD, word_syms, "arc ilabel");
797  int pau2_word = StrToId(SILENCE_SUFFIX_WORD, word_syms, "arc ilabel");
798  if(pau_word < 0 || pau2_word < 0)
799    return ESR_INVALID_ARGUMENT;
800
801  stateSt = prefix_fst.AddState();
802  stateEn = prefix_fst.AddState();
803  prefix_fst.SetStart(stateSt);  // arg is state ID
804  prefix_fst.SetFinal(stateEn, 0.0);  // 1st arg is state ID, 2nd arg weight
805  prefix_fst.AddArc(stateSt, fst::StdArc(pau_word, pau_word, 0.0, stateEn));
806
807  stateSt = suffix_fst.AddState();
808  stateEn = suffix_fst.AddState();
809  suffix_fst.SetStart(stateSt);  // arg is state ID
810  suffix_fst.SetFinal(stateEn, 0.0);  // 1st arg is state ID, 2nd arg weight
811  suffix_fst.AddArc(stateSt, fst::StdArc(pau2_word, pau2_word, 0.0, stateEn));
812
813  stateSt = eps_fst.AddState();
814  stateEn = stateSt; // stateEn = eps_fst.AddState();
815  eps_fst.SetStart(stateSt);  // arg is state ID
816  eps_fst.SetFinal(stateEn, 0.0);  // 1st arg is state ID, 2nd arg weight
817  // eps_fst.AddArc(stateSt, fst::StdArc(eps_word, eps_word, 0.0, stateEn));
818
819  /*-----------------------------------------------------------------*
820   *    make Grev2.det.txt                                           *
821   *-----------------------------------------------------------------*/
822  cout << "info: creating reverse g fst" << endl;
823  fst::StdVectorFst g_fst = p_fst;   // this is a copy!!
824  fst::StdVectorFst grev_fst;        // reversed
825  fst::StdVectorFst grev_min_fst;    // eps removed and minimized
826  fst::StdVectorFst grev_det_fst;
827
828  fst::Project(&g_fst, fst::PROJECT_INPUT);
829  if(debug) g_fst.Write( grxmlBasename + ".G");
830  fst::Reverse( g_fst, &grev_fst);
831  if(debug) grev_fst.Write( grxmlBasename + ".Grev");
832  fst::RmEpsilon( &grev_fst, /*connect?*/ true );
833  if(debug) grev_fst.Write( grxmlBasename + ".Grevrme");
834  fst::Determinize(grev_fst, &grev_det_fst);
835  if(debug) grev_det_fst.Write( grxmlBasename + ".Grevrmedet");
836  if(1) fst::Minimize(&grev_det_fst);
837  if(debug) grev_det_fst.Write( grxmlBasename + ".Grevrmedetmin");
838  fst::Concat( &eps_fst, grev_det_fst);
839  grev_det_fst = eps_fst;
840  if(debug) grev_det_fst.Write( grxmlBasename + ".Grevrmedetmin2");
841  std::string grevFilename = grxmlBasename + std::string(".Grev2.det.txt");
842
843  cout << "info: writing reverse G fst as text " << grevFilename << endl;
844  ostream* ostrm1 = new ofstream( grevFilename.c_str(), ios_base::out);
845  fst::FstPrinter<fst::StdArc> printer1( grev_det_fst,
846					word_syms, word_syms,
847					 NULL, /*acceptor?*/ true);
848  printer1.Print( ostrm1, grevFilename);
849  delete ostrm1;
850
851  /*-----------------------------------------------------------------*
852   *    make PCLG.txt                                                *
853   *-----------------------------------------------------------------*/
854
855  fst::StdVectorFst* c_fst;
856  fst::StdVectorFst lg_fst;
857  fst::StdVectorFst clg_fst;
858  fst::StdVectorFst clg_det_fst;
859
860  cout << "info: reading model fst " << cfstFilename << endl;
861  c_fst = fst::StdVectorFst::Read( cfstFilename);
862
863  int slot_olabel_min=0, slot_olabel_max=0; // [min,max) .. ie excludes max
864  get_slot_olabel_range( word_syms, &slot_olabel_min, &slot_olabel_max);
865  if(slot_olabel_max > MAX_NUM_SLOTS)
866    std::cout << "Error: SREC may have trouble with this many slots! (" << slot_olabel_max << ")" << std::endl;
867
868  /* add slot markers as if they were silence phonemes, this makes the context
869     for them as if the slot were silence, which is reasonable, although another
870     reasonable thing would be to allow all contexts.  Adding the true context
871     only would add complexity and slow down word addition too much. */
872
873  rc = FstAddSlotMarkersToCFst( *c_fst, slot_olabel_min, slot_olabel_max);
874  if(rc) return rc;
875
876  fst::Concat( &g_fst, suffix_fst);
877  fst::Concat( &prefix_fst, g_fst);
878  if(debug) prefix_fst.Write( grxmlBasename + ".G2");
879  fst::ComposeOptions copts( /*connect?*/ true);
880
881  fst::ArcSort(&l_fst, fst::StdOLabelCompare());
882  fst::ArcSort(&prefix_fst, fst::StdILabelCompare());
883
884  fst::Compose(l_fst, prefix_fst, &lg_fst, copts);
885  if(debug) lg_fst.Write( grxmlBasename + ".LG");
886  fst::ArcSort(&lg_fst, fst::StdILabelCompare());
887  if(debug) lg_fst.Write( grxmlBasename + ".LG2");
888
889  fst::RmEpsilon( &lg_fst, /*connect?*/ true );
890  if(debug) lg_fst.Write( grxmlBasename + ".LGrme");
891  fst::Determinize( lg_fst, &clg_fst); // clg_fst is really lg_det_fst!
892  if(debug) clg_fst.Write( grxmlBasename + ".LGrmedet");
893  rc = FstReplaceILabel( clg_fst, EXTRA_EPSILON_LABEL, EPSILON_LABEL);
894  fst::Compose( *c_fst, clg_fst, &clg_det_fst, copts);
895  if(debug) clg_det_fst.Write( grxmlBasename + ".CLGrmedet");
896
897  rc = FstMergeOLabelsToILabels_GetMax( clg_det_fst, /*int&*/max_model_sym);
898  if(verbose)
899    cout << "info: merging into ilabels I=i+" << max_model_sym << "*o" << endl;
900  rc = FstMergeOLabelsToILabels( clg_det_fst, max_model_sym);
901  if(debug) clg_det_fst.Write( grxmlBasename + ".CLGrmedet2");
902  fst::Minimize( &clg_det_fst);
903  if(debug) clg_det_fst.Write( grxmlBasename + ".CLGrmedet3");
904  if(verbose)
905    cout << "info: splitting from ilabels" << endl;
906  rc = FstSplitOLabelsFromILabels( clg_det_fst, max_model_sym);
907  if(debug) clg_det_fst.Write( grxmlBasename + ".CLGrmedet4");
908
909  rc = FstPushSlotLikeOLabels( clg_det_fst, slot_olabel_min, slot_olabel_max);
910  if(rc != ESR_SUCCESS)
911        std::cout << "Error: FstPushSlotLikeOLabels() failed" << std::endl;
912  if(debug) clg_det_fst.Write( grxmlBasename + ".CLG");
913
914  std::string pclgFilename = grxmlBasename + ".PCLG.txt";
915  ostream* ostrm = new ofstream( pclgFilename.c_str(), ios_base::out);
916  fst::FstPrinter<fst::StdArc> printer( clg_det_fst,
917					model_syms, word_syms,
918					NULL, /*acceptor?*/ false);
919  printer.Print( ostrm, pclgFilename);
920  delete ostrm;
921
922  delete c_fst;
923  delete word_syms;  word_syms = NULL;
924  delete prsr_syms;  prsr_syms = NULL;
925  delete model_syms; model_syms = NULL;
926
927  /*-----------------------------------------------------------------*
928   *    cleanup                                                      *
929   *-----------------------------------------------------------------*/
930
931  if(vocab) {
932    SR_VocabularyDestroy(vocab);
933    vocab = NULL;
934  }
935
936  return rc;
937
938}
939
940
941