1/*---------------------------------------------------------------------------*
2 *  make_ve_grammar.c                                                            *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20
21#include <stdio.h>
22#include <stdlib.h>
23#include <string.h>
24
25#include "plog.h"
26#include "passert.h"
27#include "duk_args.h"
28#include "duk_err.h"
29#include "ptrd.h"
30
31#include "srec_arb.h"
32#include "simapi.h"
33
34#include "PFileSystem.h"
35#include "PANSIFileSystem.h"
36
37#define MAX_FILE_NAME_LEN 64
38#define DEFAULT_WWTRIPHONE_SILMODE 3
39
40
41/* check if the central phoneme is a word-specific phoneme; if so, do not enroll it into the ve grammar.*/
42int ws_verify(char * allo_phoneme){
43  switch(allo_phoneme[0]){
44  case '(': return 1;
45  case '.': return 1;
46  case '0': return 1;
47  case '1': return 1;
48  case '2': return 1;
49  case '3': return 1;
50  case '4': return 1;
51  case '5': return 1;
52  case '7': return 1;
53  case '8': return 1;
54  case '9': return 1;
55  case '=': return 1;
56  case '>': return 1;
57  case 'B': return 1;
58  case 'F': return 1;
59  case 'G': return 1;
60  case 'H': return 1;
61  case 'K': return 1;
62  case 'M': return 1;
63  case 'Q': return 1;
64  case 'R': return 1;
65  case 'W': return 1;
66  case 'X': return 1;
67  case 'Y': return 1;
68  case '[': return 1;
69  case '\\': return 1;
70  case '|': return 1;
71  case '+': return 1;
72  default: return 0;
73  }
74}
75
76int main (int argc, char **argv)
77{
78	int i;
79	char filen[MAX_FILE_NAME_LEN]="";
80	CA_Arbdata *ca_arbdata = NULL;     /* new, link btw acc/syn */
81	char *arbfile = NULL;
82	char *base = NULL;
83
84	FILE* pfile;
85	FILE* pFile_PCLG;
86	FILE* pFile_map;
87	FILE* pFile_P;
88        FILE* pFile_Grev;
89	FILE* pFile_script;
90
91	int num_hmms;
92	int num_wd = 0;
93	int script_line = 0;
94	int cflag = 0, fnode = 0;
95	int sil_model = DEFAULT_WWTRIPHONE_SILMODE;
96	int rc;
97	srec_arbdata *allotree = NULL;
98
99	nodeID startNode       = 0;
100	nodeID pauEndNode      = 1;
101	nodeID modelStartNode  = 2;
102	nodeID modelEndNode    = 3;
103	nodeID pau2StartNode   = 4;
104	nodeID pau2EndNode     = 5;
105	nodeID endNode         = 6;
106
107	/* initial memory */
108	CHKLOG(rc, PMemInit());
109
110	if(argc<5){
111	  printf("USAGE: -swiarb <swiarb file> -base <output base name>\n");
112	  exit(1);
113	}
114
115
116	for(i=1; i<argc; i++) {
117	  if(!strcmp(argv[i],"-swiarb")) {
118	    arbfile = argv[++i];
119	    printf("using swiarb from file %s\n", arbfile);
120	  }
121	  else if(!strcmp(argv[i],"-base")){
122	    base = argv[++i];
123	  }
124	  else {
125	    printf("error_usage: argument [%s]\n", argv[i]);
126	    exit(1);
127	  }
128	}
129
130	/* check arb file exist*/
131	if ( (pfile = fopen(arbfile, "r")) != NULL ){
132	    fclose(pfile);
133	}
134	else{
135	  printf("ERROR: the specified swiarb file does not exist.\n");
136	  exit(1);
137	}
138
139
140	ca_arbdata = CA_LoadArbdata(arbfile);
141
142	allotree = (srec_arbdata*)ca_arbdata;
143	num_hmms = allotree->num_hmms;
144
145
146	/* Dump out VE .PCLG.txt, .Grev2.det.txt, .P.txt, .script and .map files; .P.txt, .script and .map are not necessary for voice enroll, so just dump out to create .g2g file. Xufang */
147
148	printf("Dumping out VE files\n");
149
150	strcat(filen,base);
151	strcat(filen,".PCLG.txt");
152	pFile_PCLG = fopen(filen,"w");
153
154	filen[0]='\0';
155	strcat(filen,base);
156	strcat(filen,".map");
157        pFile_map = fopen(filen,"w");
158
159        filen[0]='\0';
160        strcat(filen,base);
161        strcat(filen,".P.txt");
162        pFile_P = fopen(filen,"w");
163
164        filen[0]='\0';
165        strcat(filen,base);
166        strcat(filen,".Grev2.det.txt");
167        pFile_Grev = fopen(filen,"w");
168
169        filen[0]='\0';
170        strcat(filen,base);
171        strcat(filen,".script");
172        pFile_script = fopen(filen,"w");
173
174        fprintf(pFile_Grev,"0\t1\teps\t80\n");
175        fprintf(pFile_Grev,"1\t2\t%s.grxml@VE_Words\n",base);
176
177	fprintf(pFile_map,"eps %d\n",num_wd++);
178        fprintf(pFile_map,"%s.grxml@ROOT %d\n",base,num_wd++);
179        fprintf(pFile_map,"%s.grxml@VE_Words %d\n",base,num_wd++);
180        fprintf(pFile_map,"-pau- %d\n",num_wd++);
181        fprintf(pFile_map,"-pau2- %d\n",num_wd++);
182        fprintf(pFile_map,"@VE_Words %d\n",num_wd++);
183
184        fprintf(pFile_P,"0\t1\teps\t{\t\n");
185        fprintf(pFile_P,"1\t2\teps\t{\t\n");
186        fprintf(pFile_P,"2\t3\teps\t{\t\n");
187        fprintf(pFile_P,"2\t4\teps\t{\t\n");
188        fprintf(pFile_P,"3\t5\t%s.grxml@VE_Words\t%s.grxml@VE_Words\t\n",base,base);
189        fprintf(pFile_P,"4\t8\teps\t{\t\n");
190        fprintf(pFile_P,"5\t6\teps\t_3\t\n");
191        fprintf(pFile_P,"6\t7\teps\tVE_Words}\t\n");
192        fprintf(pFile_P,"7\t9\teps\t_2\t\n");
193
194        fprintf(pFile_script,"%d type=SENT.type;meaning=SENT.V;\n",script_line++);
195        fprintf(pFile_script,"%d type='NEW';V=UTT.V;\n",script_line++);
196        fprintf(pFile_script,"%d type='OLD';V=VE_Words.V;\n",script_line++);
197	fprintf(pFile_script,"%d V=UTT.V?UTT.V:'--';\n",script_line++);
198        fprintf(pFile_script,"%d V=PHONEME.V\n",script_line++);
199
200	for(i=0;i<num_hmms;i++){
201	  if(ws_verify(allotree->hmm_infos[i].name))
202	    continue;
203	  if(!strcmp(allotree->hmm_infos[i].name,"#")){
204	    sil_model = i;
205	    fprintf(pFile_PCLG,"%d\t%d\thmm%d_#sil#\t-pau-\n", startNode, pauEndNode, i);
206            fprintf(pFile_PCLG,"%d\t%d\t.wb\teps\n", pauEndNode, modelStartNode);
207          }
208          else{
209            if(strlen(allotree->hmm_infos[i].name)>0){
210	      if(cflag==0){
211		fnode = i;
212		cflag = 1;
213	      }
214              fprintf(pFile_PCLG,"%d\t%d\thmm%d_%s\twd_hmm%d_%s\t40\n", modelStartNode, modelEndNode,
215		      i,allotree->hmm_infos[i].name,i,allotree->hmm_infos[i].name);
216	      fprintf(pFile_map,"wd_hmm%d_%s %d\n",i,allotree->hmm_infos[i].name,num_wd++);
217	      fprintf(pFile_Grev,"1\t3\twd_hmm%d_%s\n",i,allotree->hmm_infos[i].name);
218	      fprintf(pFile_P,"8\t10\twd_hmm%d_%s\t_%d\t\n",i,allotree->hmm_infos[i].name,script_line);
219	      fprintf(pFile_script,"%d V=V?V:'';V=V+'wd_hmm%d_%s';\n",script_line++,i,allotree->hmm_infos[i].name);
220	    }
221          }
222	}
223
224        fprintf(pFile_PCLG,"%d\t%d\t.wb\teps\n", modelEndNode, modelStartNode);
225        fprintf(pFile_PCLG,"%d\t%d\t.wb\teps\n", modelEndNode, pau2StartNode);
226        fprintf(pFile_PCLG,"%d\t%d\thmm%d_#sil#\t-pau2-\n",pau2StartNode, pau2EndNode, sil_model);
227        fprintf(pFile_PCLG,"%d\t%d\t.wb\teps\n", pau2EndNode, endNode);
228        fprintf(pFile_PCLG,"%d\n", endNode);
229
230        fprintf(pFile_Grev,"2\n");
231	for(i=fnode;i<num_hmms;i++){
232          if(ws_verify(allotree->hmm_infos[i].name))
233            continue;
234	  fprintf(pFile_Grev,"3\t3\twd_hmm%d_%s\t40\n",i,allotree->hmm_infos[i].name);
235	}
236        fprintf(pFile_Grev,"3\n");
237
238        fprintf(pFile_P,"9\t11\teps\tSENT}\t\n");
239        fprintf(pFile_P,"10\t12\teps\tPHONEME}\t\n");
240        fprintf(pFile_P,"11\t13\teps\t_0\t\n");
241        fprintf(pFile_P,"12\t14\teps\t_4\t\n");
242        fprintf(pFile_P,"13\t15\teps\tROOT}\t\n");
243        fprintf(pFile_P,"14\t16\teps\teps\t\n");
244        fprintf(pFile_P,"15\t\n");
245        fprintf(pFile_P,"16\t17\teps\tUTT}\t\n");
246        fprintf(pFile_P,"16\t8\teps\t{\t\n");
247        fprintf(pFile_P,"17\t9\teps\t_1\t\n");
248
249	fclose(pFile_PCLG);
250	printf("Creating %s.PCLG.txt...\n",base);
251        fclose(pFile_Grev);
252        printf("Creating %s.Grev2.det.txt...\n",base);
253        fclose(pFile_map);
254	printf("Creating %s.map...\n",base);
255        fclose(pFile_P);
256	printf("Creating %s.P.txt...\n",base);
257	fclose(pFile_script);
258	printf("Creating %s.script...\n",base);
259	printf("SUCCESS!\n");
260
261
262  CA_FreeArbdata( ca_arbdata);
263
264  PMemShutdown();
265  return 0;
266CLEANUP:
267  return 1;
268}
269
270