1/*---------------------------------------------------------------------------*
2 *  srec_arb.c                                                               *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20#include "pstdio.h"
21#include "passert.h"
22#include "portable.h"
23
24#include<string.h>
25
26#include"portable.h"
27
28#include"sizes.h"
29#include"hmm_desc.h"
30#include"search_network.h"     /* for EPSILON_OFFSET */
31#include"srec_arb.h"
32
33#define DEBUG_PRONS       0
34#define IF_DEBUG_PRONS(X)
35
36static const char *rcsid = 0 ? (const char *) &rcsid :
37"$Id: srec_arb.c,v 1.27.4.15 2007/12/14 22:03:51 dahan Exp $";
38
39int question_check(srec_question* quest, phonemeID lphon, phonemeID cphon, phonemeID rphon)
40{
41  asr_int16_t a = 0, b = 0;
42  /* phon = a*16+b */
43  if (quest->qtype == QUESTION_LEFT)
44  {
45    BIT_ADDRESS(lphon, a, b);
46  }
47#if USE_WWTRIPHONE
48  else if(quest->qtype == QUESTION_WBLEFT) {
49    if( lphon == WBPHONEME_CODE) return ANSWER_PASS;
50    else return ANSWER_FAIL;
51  }
52  else if(quest->qtype == QUESTION_WBRIGHT) {
53    if( rphon == WBPHONEME_CODE) return ANSWER_PASS;
54    else return ANSWER_FAIL;
55  }
56#endif
57  else
58  {
59    ASSERT(quest->qtype == QUESTION_RIGHT);
60    BIT_ADDRESS(rphon, a, b);
61  }
62  return (quest->membership_bits[a] & b ? ANSWER_PASS : ANSWER_FAIL);
63}
64
65/* get model id for phoneme in context */
66int get_modelid_for_pic(srec_arbdata* allotree, phonemeID lphon, phonemeID cphon, phonemeID rphon)
67{
68  int ans;
69  tree_node* tnode = allotree->pdata[cphon].model_nodes;
70  while (tnode->node.quest_index >= 0)
71  {
72    ans = question_check(&allotree->questions[tnode->node.quest_index],
73                         lphon, cphon, rphon);
74    tnode = (ans == ANSWER_FAIL ? (tree_node*)tnode->node.fail : (tree_node*)tnode->node.pass);
75  }
76  return tnode->term.pelid;
77}
78
79void read_questions(srec_question** pquestions, asr_int16_t num_questions, char **buffer, PFile *fp)
80{
81  srec_question *q;
82
83  q = *pquestions = (srec_question*)(*buffer);
84
85  *buffer += num_questions * sizeof(srec_question);
86  while (num_questions-- > 0)
87  {
88    pfread(&(q->qtype), sizeof(asr_uint16_t), 1, fp);
89    pfread(&(q->membership_bits), sizeof(asr_uint16_t), PSET_BIT_ARRAY_SIZE, fp);
90    q++;
91  }
92}
93
94/* we need to handle the interword silence here somehow,
95   proposal:  we create one supermodel which combines the
96   the model preceding silence and that follows silence, so
97   "boston&mass" .. we'll have "n&m" as a single "supermodel",
98   we'll put that supermodel in the graph but then overlay the
99   actual models there on.   the overlay only needs to be done
100   once.  The number of possible supermodels is 113655 which is
101   larger than what an ilabel can hold, the solution to that is
102   to encode also on the "cost" of the supermodel arc.
103
104   /------SUPER(a&b)---\
105   o----a1---o----b1----o
106   \--a2--o--#--o--b2--/
107
108   cost is 16bits, ilabel is 16bits
109   on ilabel we encode the a1,(a2-a1)
110   on cost we encode b1,(b2-b1)
111   ... a1,b1 use 9 bits (400 models)
112   ... deltas use 6 bits (+/-31 range)
113   That leaves 1 bit left over, which is the top bit to signal this encoding,
114   and make sure the cost is very high.
115*/
116
117
118int get_modelids_for_pron(srec_arbdata* allotree,
119                          const char* phonemes, int num_phonemes,
120                          modelID* acoustic_model_ids)
121{
122  int i;
123  modelID modelid;
124  phonemeID lphon, cphon, rphon;
125
126  if( allotree == NULL)
127	  return 1;
128
129  if (num_phonemes == 0)
130    return 0;
131
132  IF_DEBUG_PRONS(printf("%s get_modelids_for_pron pronunciation %s\n", __FILE__, (char*)phonemes));
133
134#if !USE_WWTRIPHONE
135  lphon = (phonemeID)allotree->phoneme_index[ SILENCE_CODE];
136  cphon = (phonemeID)allotree->phoneme_index[ (unsigned)phonemes[0]];
137#else
138  lphon = WBPHONEME_CODE; //(phonemeID)allotree->phoneme_index[ WBPHONEME_CODE];
139  cphon = (phonemeID)allotree->phoneme_index[ (unsigned)phonemes[0]];
140#endif
141  if(cphon == MAXphonemeID)
142    return 1; /* bad phoneme */
143  for(i=0; i<num_phonemes; i++) {
144#if !USE_WWTRIPHONE
145    rphon = (i==num_phonemes-1 ?
146	     (phonemeID)allotree->phoneme_index[ SILENCE_CODE] :
147	     (phonemeID)allotree->phoneme_index[ (unsigned)phonemes[i+1] ] ) ;
148#else
149    rphon = (i==num_phonemes-1 ?
150	     WBPHONEME_CODE /*(phonemeID)allotree->phoneme_index[ WBPHONEME_CODE] */ :
151	     (phonemeID)allotree->phoneme_index[ (unsigned)phonemes[i+1] ] ) ;
152#endif
153    if (rphon == MAXphonemeID)
154      return 1; /* bad phoneme */
155
156    modelid = (modelID) get_modelid_for_pic(allotree, lphon, cphon, rphon);
157    acoustic_model_ids[i] = modelid;
158#if DEBUG_PRONS
159    printf("%c%c%c hmm%d states", allotree->pdata[lphon].code,
160           allotree->pdata[cphon].code, allotree->pdata[rphon].code,
161           acoustic_model_ids[i]);
162    for (j = 0; j < allotree->hmm_infos[modelid].num_states; j++)
163      printf(" %d", allotree->hmm_infos[modelid].state_indices[j]);
164    printf("\n");
165#endif
166    lphon = cphon;
167    cphon = rphon;
168  }
169  return 0;
170}
171
172/*-----------------------------------------------------------------------*
173 *                                                                       *
174 * phoneme data stream functions                                         *
175 *                                                                       *
176 *-----------------------------------------------------------------------*/
177
178tree_node* read_tree_node_f(char **buffer, PFile *fp)
179{
180  tree_node* tnode = (tree_node*) * buffer;
181  pfread(&(tnode->node.quest_index), sizeof(asr_int16_t), 1, fp);
182  pfread(&(tnode->term.pelid), sizeof(asr_int16_t), 1, fp);
183  pfread(&(tnode->node.fail), sizeof(tree_branch_info*), 1, fp);
184  pfread(&(tnode->node.pass), sizeof(tree_branch_info*), 1, fp);
185
186  /* because tree_node is a union, the actual size maybe large than we have read */
187  ASSERT(sizeof(asr_int16_t)*2 + sizeof(tree_branch_info *)*2 == sizeof(tree_node));
188
189  *buffer += sizeof(tree_node);
190  if (tnode->node.quest_index >= 0)
191  {
192    tnode->node.fail = (struct tree_branch_info*)read_tree_node_f(buffer, fp);
193    tnode->node.pass = (struct tree_branch_info*)read_tree_node_f(buffer, fp);
194  }
195  return tnode;
196}
197
198void read_phoneme_data(phoneme_data** pdata, asr_int16_t num_phonemes, char **buffer,  PFile *fp)
199{
200  int i, ptr;
201  phoneme_data *pd;
202
203  pd = *pdata = (phoneme_data*)(*buffer);
204
205  for (i = 0; i < num_phonemes; i++)
206  {
207    pfread(&(pd->name), sizeof(char), MAX_PHONEME_NAME_LEN, fp);
208    pfread(&(pd->code), sizeof(asr_uint16_t), 1, fp);
209    pfread(&ptr, sizeof(asr_int16_t), 1, fp);
210    pfread(&(pd->model_nodes), sizeof(tree_node *), 1, fp);
211    pfread(&(pd->num_states), sizeof(asr_uint16_t), 1, fp);
212    pfread(&ptr, sizeof(asr_int16_t), 1, fp);
213    pfread(&(pd->state_nodes), sizeof(tree_node *), MAX_PHONE_STATES, fp);
214    pd++;
215  }
216  ASSERT(sizeof(phoneme_data) == MAX_PHONEME_NAME_LEN + sizeof(asr_int16_t)*4 + sizeof(tree_node *)*(1 + MAX_PHONE_STATES));
217  (*buffer) += num_phonemes * sizeof(phoneme_data) / BYTES_PER_ATOM;
218  ASSERT((char *)pd == *buffer);
219
220  for (i = 0; i < num_phonemes; i++)
221  {
222#if STATE_NODES_NEEDED_AT_RUNTIME
223    for (j = 0; j < (*pdata)[i].num_states; j++)
224      (*pdata)[i].state_nodes[j] = read_tree_node_f(buffer);
225#endif
226    (*pdata)[i].model_nodes = read_tree_node_f(buffer, fp);
227  }
228}
229
230/*-----------------------------------------------------------------------*
231 *                                                                       *
232 * hmm info stream functions                                             *
233 *                                                                       *
234 *-----------------------------------------------------------------------*/
235
236void read_hmminfos(srec_arbdata* allotree, char** buffer, PFile *fp)
237{
238  int i, offset, num_atoms, num_hmms = allotree->num_hmms, ptr;
239  HMMInfo* hmm_infos;
240  hmm_infos = (HMMInfo*) * buffer;
241  num_atoms = sizeof(HMMInfo) * num_hmms / BYTES_PER_ATOM;
242  (*buffer) += num_atoms;
243  for (i = 0; i < num_hmms; i++)
244  {
245    pfread(&hmm_infos[i].name[0], sizeof(char), MAX_PHONEME_NAME_LEN, fp);
246    pfread(&(hmm_infos[i].num_states), sizeof(asr_int16_t), 1, fp);
247    pfread(&ptr, sizeof(asr_int16_t), 1, fp);
248    pfread(&(hmm_infos[i].state_indices), sizeof(asr_int16_t*), 1, fp);
249  }
250
251  /* through this and comments below, I was trying to keep the state_indices
252     self-contained, to calculate offsets from saved pointers, but it doesn't
253     appear to work;  so we resort to recovering state offsets from num_states
254     state_indices = hmm_infos[0].state_indices; */
255  pfread(*buffer, sizeof(asr_int16_t), allotree->num_states, fp);
256
257  hmm_infos[0].state_indices = (asr_int16_t*) * buffer;
258  num_atoms = sizeof(hmm_infos[0].state_indices[0]) * allotree->num_states / BYTES_PER_ATOM;
259  (*buffer) += num_atoms;
260
261  for (i = 0, offset = 0; i < num_hmms; i++)
262  {
263    /* int j,offset2 = hmm_infos[i].state_indices - state_indices; */
264    hmm_infos[i].state_indices = hmm_infos[0].state_indices + offset;
265    if (i >= HMM_COUNTER_OFFSET + NUM_SILENCE_HMMS - 1)
266      offset += hmm_infos[i].num_states;
267    /* printf("offset %d %d offset2 %d\n", i, offset, offset2);
268       printf("hmm %d %x states", i, hmm_infos[i].state_indices);
269       for(j=0; j<hmm_infos[i].num_states; j++)
270       printf(" %d", hmm_infos[i].state_indices[j]);
271       printf("\n"); */
272
273  }
274  allotree->hmm_infos = hmm_infos;
275}
276
277/*-----------------------------------------------------------------------*
278 *                                                                       *
279 * top level stream functions                                            *
280 *                                                                       *
281 *-----------------------------------------------------------------------*/
282
283int read_arbdata_from_stream(srec_arbdata** pallotree, char* filename, int buffer_size)
284{
285  char* pbuf;
286  srec_arbdata* allotree;
287  int ptr;
288
289  PFile* fp;
290  long fpos;
291  char* buffer;
292
293  fp = file_must_open(NULL, (char*)filename, L("rb"), ESR_TRUE);
294  if(!fp) {
295    *pallotree = NULL;
296    return 0;
297  }
298  pfseek(fp, 0, SEEK_END);
299  fpos = pftell(fp);
300  buffer = (char*)CALLOC_CLR(fpos, sizeof(char), "srec.arbdata");
301  pfseek(fp, 0, SEEK_SET);
302
303  /* buffer_size = fpos; */
304  pbuf = buffer;
305
306  allotree = (srec_arbdata*)buffer;
307  /* ASSERT(allotree->image_size == buffer_size); hack for now */
308
309  /* read structure arbdata from file */
310  pfread(&allotree->image, sizeof(char *), 1, fp);             /* image */
311  pfread(&allotree->image_size, sizeof(asr_uint16_t), 1, fp);       /* image_szie */
312  pfread(&allotree->num_phonemes, sizeof(asr_int16_t), 1, fp);      /* num_phonemes */
313  pfread(&allotree->pdata, sizeof(phoneme_data *), 1, fp);     /* pdate */
314  pfread(&allotree->num_questions, sizeof(asr_int16_t), 1, fp);     /* num_questions */
315
316  pfread(&ptr, sizeof(asr_int16_t), 1, fp);     /* alignment problem */
317
318  pfread(&allotree->questions, sizeof(srec_question *), 1, fp);/* questions */
319  pfread(&allotree->num_states, sizeof(asr_int16_t), 1, fp);        /* num_states */
320  pfread(&allotree->num_hmms, sizeof(asr_int16_t), 1, fp);          /* num_hmms */
321  pfread(&allotree->hmm_infos, sizeof(HMMInfo *), 1, fp);      /* hmm_infos */
322  pfread(allotree->phoneme_index, sizeof(asr_uint16_t), NUM_PHONEME_INDICES, fp); /* phoneme_index */
323
324  allotree->image = buffer;
325
326  pbuf += sizeof(*allotree) / BYTES_PER_ATOM;
327  pbuf -= sizeof(void*); // PCPinfo
328
329  ASSERT(pftell(fp) == pbuf - buffer);
330
331#ifdef SREC_ENGINE_VERBOSE_LOGGING
332  PLogMessage("read allotree done %d\n", (int)(pbuf - buffer));
333#endif
334
335  allotree->questions = (srec_question *)pbuf;
336  read_questions(&allotree->questions, allotree->num_questions, &pbuf, fp);
337#ifdef SREC_ENGINE_VERBOSE_LOGGING
338  PLogMessage("read_questions done %d\n", (int)(pbuf - buffer));
339#endif
340  ASSERT(pftell(fp) == pbuf - buffer);
341
342  /* readme phoneme_data */
343  read_phoneme_data(&allotree->pdata, allotree->num_phonemes, &pbuf, fp);
344#ifdef SREC_ENGINE_VERBOSE_LOGGING
345  PLogMessage("read_phoneme_data done %d\n", (int)(pbuf - buffer));
346#endif
347  ASSERT(pftell(fp) == pbuf - buffer);
348
349  read_hmminfos(allotree, &pbuf, fp);
350#ifdef SREC_ENGINE_VERBOSE_LOGGING
351  PLogMessage("read_hmminfos done %d\n", (int)(pbuf - buffer));
352#endif
353  ASSERT(pftell(fp) == pbuf - buffer);
354
355  *pallotree = allotree;
356#ifdef SREC_ENGINE_VERBOSE_LOGGING
357  PLogMessage("read arbdata image size %d\n", allotree->image_size);
358#endif
359  ASSERT(pbuf - buffer == buffer_size);
360
361  pfclose(fp);
362
363  return 0;
364}
365
366/**
367 * @todo document
368 */
369typedef struct
370{
371  unsigned short nnodes;
372  unsigned long  size;
373  long    phoneme;
374  unsigned short node_pos;
375  unsigned long  node_off;
376  short    low_genone_no;
377  short    high_genone_no;
378  short    low_pel_no;
379  short    high_pel_no;
380}
381tree_head;
382
383static int traverse_tree(tree_node* node, tree_head *tree_topo, int *num_terminal_nodes)
384{
385  if (!node) return 0; /* should not happen */
386  if (!tree_topo) return 0; /* should not happen */
387  if (tree_topo->nnodes > 255)
388    return 0; /* should not happen, might indicate infinite looping */
389
390  tree_topo->nnodes++;
391
392  if (node->node.quest_index < 0)
393  {
394    if (num_terminal_nodes)
395    {
396      (*num_terminal_nodes)++;
397    }
398    if (node->term.pelid < tree_topo->low_pel_no)
399    {
400      tree_topo->low_pel_no = node->term.pelid;
401      tree_topo->low_genone_no = node->term.pelid;
402    }
403    if (node->term.pelid > tree_topo->high_pel_no)
404    {
405      tree_topo->high_pel_no = node->term.pelid;
406      tree_topo->high_genone_no = node->term.pelid;
407    }
408  }
409  else
410  {
411    traverse_tree((tree_node*)node->node.fail, tree_topo, num_terminal_nodes);
412    traverse_tree((tree_node*)node->node.pass, tree_topo, num_terminal_nodes);
413  }
414  return 0;
415
416}
417
418#if 0
419static int num_nodes_in_tree(tree_node* node, int *num_terminal_nodes)
420{
421  tree_head topo;
422  *num_terminal_nodes = 0;
423  topo.nnodes = 0;
424  traverse_tree(node, &topo, num_terminal_nodes);
425  return topo.nnodes;
426}
427#endif
428
429static unsigned int version_arbdata_add(unsigned int ics, int data)
430{
431  unsigned int ocs = ((ics << 3) | (ics >> 29)) + data;
432  /* if(debug)printf("ocs %d ics %d data %d\n", ocs, ics, data);*/
433  return ocs;
434}
435
436
437unsigned int version_arbdata_models(srec_arbdata* a)
438{
439  int i, num_hmms_in_phoneme;
440
441  tree_head topo;
442  unsigned int checksum = 0;
443  /* if(debug)printf("num_hmms %d\n", a->num_hmms); */
444  /* if(debug)printf("num_phonemes %d\n", a->num_phonemes); */
445  for (i = 0; i < a->num_phonemes; i++)
446  {
447    num_hmms_in_phoneme = 0;
448    topo.low_pel_no = 32567;
449    topo.high_pel_no = 0;
450    topo.nnodes = 0;
451    traverse_tree(a->pdata[i].model_nodes, &topo, &num_hmms_in_phoneme);
452    /* if(debug)printf("phoneme %d num_hmms %d (%d-%d)\n", i, num_hmms_in_phoneme,
453    topo.low_pel_no, topo.high_pel_no); */
454    if (topo.nnodes == 256) return 0;
455    checksum = version_arbdata_add(checksum, topo.low_pel_no);
456  }
457  return checksum;
458}
459
460
461
462
463