1/*---------------------------------------------------------------------------*
2 *  run_seq_lts.c  *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20
21
22#include <stdlib.h>
23#include <string.h>
24#include <math.h>
25#include <ctype.h>
26
27#ifndef NO_STDERR
28#include <stdio.h>
29#else
30extern void PrintError(char *msg, unsigned long p1, unsigned long p2, unsigned long p3);
31#endif
32
33#include "passert.h"
34#include "pmemory.h"
35#include "plog.h"
36#include "phashtable.h"
37#include "lts_error.h"
38#include "lts.h"
39#include "lts_seq_internal.h"
40#include "port_fileio.h"
41#include "platform_utils.h" /* strdup, safe_strtok, etc */
42
43#define ASSERT(x) passert(x)
44
45#ifdef TI_DSP
46#include "tidsp_defines.h"
47#endif
48
49#ifdef _DEBUG
50#define PRINT_LOAD_TREE_SUMMARY 0
51#define PRINT_LOAD_TREE 0
52#define PRINT_CONS_COMB 0
53#define PRINT_DP_LETTER 0
54#define PRINT_LTS_WORD 0
55#define PRINT_DICT_LOOKUP 0
56#endif
57
58#define LTS_MARKER_WORD_START "WS"
59#define LTS_MARKER_PRON_START "PS"
60#define LTS_MARKER_SYLL_START "SS"
61#define LTS_MARKER_SYLL_START_DD "SS%d"
62#define LTS_MARKER_PIPESEP "|"
63#define LTS_MARKER_PIPESEP_CHAR '|'
64
65static int load_int(PORT_FILE *fp);
66static SWIsltsResult load_lquestions(LQUESTION ***pquestions, int *pnum_questions, PORT_FILE *fp);
67static SWIsltsResult free_lquestions(LQUESTION ** questions, int num_questions);
68static SWIsltsResult load_letter_mapping(PORT_FILE *fp, LM **ppLetterMap);
69static SWIsltsResult free_letter_mapping(LM *lm);
70static SWIsltsResult load_phone_mapping(PORT_FILE *fp, PM **ppPhoneMap);
71static SWIsltsResult free_phone_mapping(PM *pm);
72static SWIsltsResult load_outputs(char ***poutputs, char ***pinputs, int *pnum, PORT_FILE *fp);
73static SWIsltsResult free_outputs(char **outputs, char **inputs, int num);
74static SWIsltsResult load_trees(RT_LTREE ***ptrees, int *num_letters,
75                              LQUESTION ***pquestions, int *num_questions, LM **plm, PORT_FILE *fp);
76static SWIsltsResult free_trees(RT_LTREE **trees, int num_letters, LQUESTION **questions, int num_questions, LM *lm);
77static SWIsltsResult load_allowable_cons_comb(LTS *lts, PORT_FILE *fp);
78static SWIsltsResult free_allowable_cons_comb(LTS *lts);
79static SWIsltsResult load_question_strings(LTS* lts, PORT_FILE* fp);
80static SWIsltsResult free_question_strings(LTS* lts);
81#define find_letter_index( myLet, myLM) (myLM->letter_index_for_letter[ toupper(myLet)])
82int find_phone(const char *ph, PM *pm);
83int find_best_string(const char *str, LTS* lts);
84int find_best_prefix_string(const char *str, LTS* lts);
85int fill_up_dp_for_letter(LTS *lts, const char *input_word, int word_len, int index, int root_start, int root_end, int left_phone);
86#define in_list(myV, myQ)   (bitarray_read_bit( myQ->membership, myV))
87#define qmatches(myQ, myU)  (in_list( myU->properties[ myQ->type], myQ))
88int matches(LQUESTION *q1, LQUESTION *q2, int type, LDP *dp) ;
89int find_output_for_dp(LTS *lts, int *pbackoff_output);
90int add_output(char *output, char **output_phone_string, int out_len, int max_phone_length);
91int is_allowable_cons_comb(LTS *lts, const char *cons_string);
92void adjust_syllable_boundaries(LTS *lts, char **output_phone_string, int num_out, int max_phone_length);
93SWIsltsResult lts_for_word(LTS *lts, char *word, int word_len, char **output_phone_string, int max_phone_length, int *num_out);
94
95/*------------
96 *
97 * bitarray
98 *
99 *-----------*/
100
101#define bitarray_read_bit( biTs, iBiT) ( biTs[iBiT/16] & (1<<((iBiT)%16)) )
102/* int bitarray_read_bit( unsigned short* bits, int iBit)
103   {  // ASSERT( iBit<256);
104   return bits[iBit/16] & (1<<((iBit)%16));
105   } */
106
107void bitarray_write_bit( unsigned short* bits, int iBit, int iVal)
108{
109  unsigned short sect;
110  ASSERT( iBit<256);
111  sect = bits[iBit/16];
112  if(iVal) { sect |= (1<<(iBit%16)); }
113  else { sect &= ~(1<<(iBit%16)); }
114  bits[ iBit/16] = sect;
115}
116void bitarray_populate_from_list(unsigned short* bits, char* list, int listlen)
117{
118  unsigned int i;
119  for(i=0; i<UCHAR_MAX/sizeof(unsigned short)/8; i++)
120    bits[i] = 0;
121  for(i=0; i<(unsigned int)listlen; i++)
122    bitarray_write_bit( bits, list[i], 1);
123}
124
125/*-----------
126 *
127 * PHashTable
128 *
129 *-----------*/
130
131static int HashCmpWord(const LCHAR *key1, const LCHAR *key2)
132{ return strcmp((const char*)key1,(const char*)key2); }
133static unsigned int HashGetCode(const void *key)
134{
135  const char* k = (const char*)key;
136  unsigned int i, len, h = 0;
137  len = strlen(k);
138  for (i=0; i<len; i++) h = 31*h + (unsigned int)k[i];
139  return h;
140}
141void* my_PHashTableCreate_FromStrings( const char* strings[], int num_strings,
142				       const LCHAR* hashName)
143{
144  PHashTable* table = NULL;
145  ESR_ReturnCode       rc = ESR_SUCCESS;
146  PHashTableArgs       hashArgs;
147  int i;
148  hashArgs.capacity = 63;
149  hashArgs.compFunction = HashCmpWord; // PHASH_TABLE_DEFAULT_COMP_FUNCTION;
150  hashArgs.hashFunction = HashGetCode; // PHASH_TABLE_DEFAULT_HASH_FUNCTION;
151  hashArgs.maxLoadFactor = PHASH_TABLE_DEFAULT_MAX_LOAD_FACTOR;
152  rc = PHashTableCreate( &hashArgs, hashName, &table);
153  for(i=0; i<num_strings; i++) {
154    void* old;
155    /* formerly the code used linear lookup, so let's avoid dups to match up */
156    rc = PHashTableGetValue( table, strings[i], (void**)&old);
157    if(rc != ESR_SUCCESS) {
158      rc = PHashTablePutValue( table, strings[i], (const void *)(intptr_t) i, NULL );
159    }
160  }
161  return table;
162}
163
164/*---------
165 *
166 * i/o
167 *
168 *---------*/
169
170static int load_int(PORT_FILE *fp)
171{
172  int v;
173
174  PORT_FREAD_INT16((uint16 *)&v, sizeof(int), 1, fp);
175
176  return v;
177}
178
179static SWIsltsResult load_lquestions(LQUESTION ***pquestions, int *pnum_questions, PORT_FILE *fp)
180{
181  int                  i, num_questions;
182  LQUESTION         ** questions;
183  SWIsltsResult          nRes = SWIsltsSuccess;
184
185  num_questions = load_int(fp);
186
187#if PRINT_LOAD_TREE_SUMMARY
188  pfprintf(PSTDOUT,"loading %d questions\n", num_questions);
189#endif
190
191  *pquestions = questions = (LQUESTION**) lts_alloc(num_questions, sizeof(LQUESTION*));
192  if (questions == NULL) {
193    nRes = SWIsltsErrAllocResource;
194    goto CLEAN_UP;
195  }
196
197  for (i=0;i<num_questions;i++) {
198    questions[i] = (LQUESTION*) lts_alloc(1, sizeof(LQUESTION));
199    if (questions[i] == NULL) {
200      nRes = SWIsltsErrAllocResource;
201      goto CLEAN_UP;
202    }
203
204#if PRINT_LOAD_TREE
205    pfprintf(PSTDOUT,"LOAD_TREE: loading question %d\n", i);
206#endif
207
208    PORT_FREAD_CHAR(&(questions[i]->type), sizeof(char), 1, fp);
209    PORT_FREAD_CHAR(&(questions[i]->num_list), sizeof(char), 1, fp);
210
211    questions[i]->list = (unsigned char*) lts_alloc(questions[i]->num_list, sizeof(unsigned char));
212    if (questions[i]->list == NULL) {
213      nRes = SWIsltsErrAllocResource;
214      goto CLEAN_UP;
215    }
216
217    PORT_FREAD_CHAR(questions[i]->list, sizeof(char), (questions[i]->num_list), fp);
218
219    bitarray_populate_from_list( questions[i]->membership, (char*) questions[i]->list, questions[i]->num_list);
220  }
221
222  *pnum_questions = num_questions;
223  return SWIsltsSuccess;
224
225 CLEAN_UP:
226
227  free_lquestions(questions, num_questions);
228  *pnum_questions = 0;
229  *pquestions = NULL;
230  return nRes;
231}
232
233/* deallocate questions */
234static SWIsltsResult free_lquestions(LQUESTION ** questions, int num_questions)
235{
236  SWIsltsResult          nRes = SWIsltsSuccess;
237  int                  i;
238
239  if (questions) {
240    for (i=0; i<num_questions; i++) {
241      if (questions[i]->list) {
242        FREE(questions[i]->list);
243        questions[i]->list = NULL;
244      }
245      FREE(questions[i]);
246      questions[i] = NULL;
247    }
248    FREE(questions);
249  }
250  return nRes;
251}
252
253static SWIsltsResult load_letter_mapping(PORT_FILE *fp, LM **ppLetterMap)
254{
255  SWIsltsResult          nRes = SWIsltsSuccess;
256  unsigned char        len;
257  LM                 * lm;
258  int                  i;
259
260  /*  pfprintf(PSTDOUT,"got len %d\n", len);*/
261  lm = (LM*) lts_alloc(1, sizeof(LM));
262  if (lm == NULL) {
263    nRes = SWIsltsErrAllocResource;
264    goto CLEAN_UP;
265  }
266
267  PORT_FREAD_CHAR(&len, sizeof(char), 1, fp);
268  lm->num_letters = len;
269
270  lm->letters = (char*) lts_alloc(len, sizeof(char));
271  if (lm->letters == NULL) {
272    nRes = SWIsltsErrAllocResource;
273    goto CLEAN_UP;
274  }
275
276  lm->type = (char*) lts_alloc(len, sizeof(char));
277  if (lm->type == NULL) {
278    nRes = SWIsltsErrAllocResource;
279    goto CLEAN_UP;
280  }
281
282  PORT_FREAD_CHAR(lm->letters, sizeof(char), len, fp);
283  PORT_FREAD_CHAR(lm->type, sizeof(char), len, fp);
284
285  {
286    unsigned int letter;
287    for (letter=0; letter <= UCHAR_MAX; letter++)
288      lm->letter_index_for_letter[letter] = LTS_MAXCHAR;
289  }
290
291  for (i=0;i<len;i++) {
292    char letter = toupper(lm->letters[i]);
293    lm->letters[i] = letter;
294    lm->letter_index_for_letter[(unsigned char)letter] = i;
295  }
296  *ppLetterMap = lm;
297  return SWIsltsSuccess;
298
299 CLEAN_UP:
300  free_letter_mapping(lm);
301  *ppLetterMap = NULL;
302  return nRes;
303}
304
305/* deallocate letter mapping */
306static SWIsltsResult free_letter_mapping(LM *lm)
307{
308  SWIsltsResult          nRes = SWIsltsSuccess;
309
310  if (lm) {
311    if (lm->letters) {
312      FREE(lm->letters);
313      lm->letters = NULL;
314    }
315    if (lm->type) {
316      FREE(lm->type);
317      lm->type = NULL;
318    }
319    lm->num_letters = 0;
320    FREE(lm);
321  }
322  return nRes;
323}
324
325static SWIsltsResult load_phone_mapping(PORT_FILE *fp, PM **ppPhoneMap)
326{
327  SWIsltsResult          nRes = SWIsltsSuccess;
328  PM                 * pm;
329  int                  i;
330  unsigned char        len;
331  char               * ph;
332
333  pm = (PM*) lts_alloc(1, sizeof(PM));
334  if (pm == NULL) {
335    nRes = SWIsltsErrAllocResource;
336    goto CLEAN_UP;
337  }
338
339  pm->num_phones = load_int(fp);
340
341  pm->phones = (char**) lts_alloc(pm->num_phones, sizeof(char*));
342  if (pm->phones == NULL) {
343    nRes = SWIsltsErrAllocResource;
344    goto CLEAN_UP;
345  }
346
347  for (i=0;i<pm->num_phones;i++) {
348    PORT_FREAD_CHAR(&len, sizeof(unsigned char), 1, fp);
349
350    pm->phoneH = NULL;
351    pm->phones[i] = ph = (char*) lts_alloc(len+1, sizeof(char));
352    if (ph == NULL) {
353      nRes = SWIsltsErrAllocResource;
354      goto CLEAN_UP;
355    }
356
357    PORT_FREAD_CHAR(ph, sizeof(char), len, fp);
358    ph[len] = '\0';
359  }
360  pm->phoneH = my_PHashTableCreate_FromStrings( (const char**)pm->phones,
361						pm->num_phones,
362						L("lts.phoneH"));
363  if(pm->phoneH == NULL) {
364    nRes = SWIsltsErrAllocResource;
365    goto CLEAN_UP;
366  }
367  *ppPhoneMap = pm;
368  return SWIsltsSuccess;
369
370 CLEAN_UP:
371  free_phone_mapping(pm);
372  *ppPhoneMap = NULL;
373
374  return nRes;
375}
376
377/* deallocate phone mapping */
378static SWIsltsResult free_phone_mapping(PM *pm)
379{
380  SWIsltsResult          nRes = SWIsltsSuccess;
381  int                  i;
382
383  if (pm) {
384    if (pm->phones) {
385      for (i=0; i<pm->num_phones; i++) {
386        if (pm->phones[i]) {
387          FREE(pm->phones[i]);
388          pm->phones[i] = NULL;
389        }
390      }
391      FREE(pm->phones);
392      pm->phones = NULL;
393    }
394    if(pm->phoneH)
395      PHashTableDestroy( (PHashTable*)pm->phoneH);
396    pm->phoneH = NULL;
397    FREE(pm);
398  }
399  return nRes;
400}
401
402
403static SWIsltsResult load_outputs(char ***poutputs, char ***pinputs, int *pnum, PORT_FILE *fp)
404{
405  SWIsltsResult        nRes = SWIsltsSuccess;
406  int                  i;
407  char              ** outputs = NULL;
408  char              ** inputs = NULL;
409  int                  num;
410  unsigned char        olen;
411  char               * out;
412  unsigned char        ilen;
413  char               * in;
414
415  num = load_int(fp);
416
417  *poutputs = outputs = (char **) lts_alloc(num, sizeof(char*));
418  if (outputs == NULL) {
419    nRes = SWIsltsErrAllocResource;
420    goto CLEAN_UP;
421  }
422
423  *pinputs = inputs = (char **) lts_alloc(num, sizeof(char*));
424  if (inputs == NULL) {
425    nRes = SWIsltsErrAllocResource;
426    goto CLEAN_UP;
427  }
428
429  for (i=0;i<num;i++) {
430    PORT_FREAD_CHAR(&olen, sizeof(char), 1, fp);
431    out = outputs[i] = lts_alloc(olen + 1, sizeof(char));
432    if (out == NULL) {
433      nRes = SWIsltsErrAllocResource;
434      goto CLEAN_UP;
435    }
436
437    if (olen > 0) {
438      PORT_FREAD_CHAR(out, sizeof(char), olen, fp);
439    }
440    out[olen] = '\0';
441    PORT_FREAD_CHAR(&ilen, sizeof(char), 1, fp);
442    in = inputs[i] = lts_alloc(ilen + 1, sizeof(char));
443    if (in == NULL) {
444      nRes = SWIsltsErrAllocResource;
445      goto CLEAN_UP;
446    }
447
448    if (ilen > 0) {
449      PORT_FREAD_CHAR(in, sizeof(char), ilen, fp);
450    }
451    in[ilen] = '\0';
452#if PRINT_LOAD_TREE
453    if (ilen > 0) pfprintf(PSTDOUT,"LOAD_TREE: got input %s out %s\n", in, outputs[i]);
454    pfprintf(PSTDOUT,"LOAD_TREE: outputs[%d] len %d out %x out %s\n", i, olen, outputs[i], outputs[i]);
455#endif
456  }
457
458  *pnum = num;
459  return SWIsltsSuccess;
460
461 CLEAN_UP:
462
463  free_outputs(outputs, inputs, num);
464  *poutputs = NULL;
465  *pinputs = NULL;
466  *pnum = 0;
467
468  return nRes;
469}
470
471static SWIsltsResult free_outputs(char **outputs, char **inputs, int num)
472{
473  SWIsltsResult          nRes = SWIsltsSuccess;
474  int                  i;
475
476  if (outputs) {
477    for (i=0; i<num; i++) {
478      if (outputs[i]) {
479        FREE(outputs[i]);
480        outputs[i] = NULL;
481      }
482    }
483    FREE(outputs);
484  }
485
486  if (inputs) {
487    for (i=0; i<num; i++) {
488      if (inputs[i]) {
489        FREE(inputs[i]);
490        inputs[i] = NULL;
491      }
492    }
493    FREE(inputs);
494  }
495  return nRes;
496}
497
498static SWIsltsResult load_trees(RT_LTREE ***ptrees, int *num_letters,
499                      LQUESTION ***pquestions, int *num_questions, LM **plm, PORT_FILE *fp)
500{
501  SWIsltsResult          nRes = SWIsltsSuccess;
502  int                  let, i;
503  RT_LTREE           * tree = NULL;
504  RT_LTREE          ** trees = NULL;
505
506#if PRINT_LOAD_TREE_SUMMARY
507  pfprintf(PSTDOUT,"loading letter mapping\n");
508#endif
509  *ptrees = NULL;
510  *pquestions = NULL;
511  *plm = NULL;
512
513  nRes = load_letter_mapping(fp, plm);
514  if (nRes != SWIsltsSuccess) {
515    goto CLEAN_UP;
516  }
517
518#if PRINT_LOAD_TREE_SUMMARY
519  pfprintf(PSTDOUT,"loading questions\n");
520#endif
521
522  nRes = load_lquestions(pquestions, num_questions, fp);
523  if (nRes != SWIsltsSuccess) {
524    goto CLEAN_UP;
525  }
526
527  *num_letters = load_int(fp);
528
529  if (*num_letters != (*plm)->num_letters) {
530#ifndef NO_STDERR
531    PLogError(L("Error loading data, num_letters %d doesn't match num from mapping %d\n"),
532            *num_letters, (*plm)->num_letters);
533#endif
534    nRes = SWIsltsInternalErr;
535    goto CLEAN_UP;
536  }
537
538  *ptrees = trees = (RT_LTREE**) lts_alloc(*num_letters, sizeof(RT_LTREE*));
539  if (trees == NULL) {
540    nRes = SWIsltsErrAllocResource;
541    goto CLEAN_UP;
542  }
543
544  for (let=0;let<*num_letters;let++) {
545    /*    pfprintf(PSTDOUT,"loading for t %d\n", t);*/
546
547    trees[let] = tree = (RT_LTREE*) lts_alloc(1, sizeof(RT_LTREE));
548    if (tree == NULL) {
549      nRes = SWIsltsErrAllocResource;
550      goto CLEAN_UP;
551    }
552
553    tree->num_nodes = load_int(fp);
554
555    tree->values_or_question1 = (short*) lts_alloc(tree->num_nodes, sizeof(short));
556    if (tree->values_or_question1 == NULL) {
557      nRes = SWIsltsErrAllocResource;
558      goto CLEAN_UP;
559    }
560
561    tree->question2 = (short*) lts_alloc(tree->num_nodes, sizeof(short));
562    if (tree->question2 == NULL) {
563      nRes = SWIsltsErrAllocResource;
564      goto CLEAN_UP;
565    }
566
567    tree->left_nodes = (short *) lts_alloc(tree->num_nodes, sizeof(short));
568    if (tree->left_nodes == NULL) {
569      nRes = SWIsltsErrAllocResource;
570      goto CLEAN_UP;
571    }
572
573#if PRINT_LOAD_TREE
574    pfprintf(PSTDOUT,"LOAD_TREE: Tree for let %d num_nodes %d\n", let, tree->num_nodes);
575#endif
576
577    for (i=0;i<tree->num_nodes;i++) {
578      PORT_FREAD_INT16(&(tree->left_nodes[i]), sizeof(short), 1, fp);
579      PORT_FREAD_INT16(&(tree->values_or_question1[i]), sizeof(short), 1, fp);
580
581#if PRINT_LOAD_TREE
582      pfprintf(PSTDOUT,"LOAD_TREE:  node[%d] %d %d", i, tree->left_nodes[i], tree->values_or_question1[i]);
583#endif
584
585      PORT_FREAD_INT16(&(tree->question2[i]), sizeof(short), 1, fp);
586      if (tree->left_nodes[i] != NO_NODE) {
587        if (tree->question2[i] == -1) tree->question2[i] = 0;
588#if PRINT_LOAD_TREE
589        pfprintf(PSTDOUT," %x", (unsigned short) tree->question2[i]);
590#endif
591      }
592
593#if PRINT_LOAD_TREE
594      pfprintf(PSTDOUT,"\n");
595#endif
596    }
597  }
598
599  return SWIsltsSuccess;
600
601 CLEAN_UP:
602
603  free_trees(trees, *num_letters, *pquestions, *num_questions, *plm);
604  *ptrees = NULL;
605  *pquestions = NULL;
606  *plm = NULL;
607  *num_letters = 0;
608  *num_questions = 0;
609
610  return nRes;
611}
612
613/* deallocate trees */
614static SWIsltsResult free_trees(RT_LTREE **trees, int num_letters,
615                       LQUESTION **questions, int num_questions, LM *lm)
616{
617  SWIsltsResult          nRes = SWIsltsSuccess;
618  int                  i;
619  RT_LTREE           * tree;
620
621  if (lm) {
622    free_letter_mapping(lm);
623  }
624  if (questions) {
625    free_lquestions(questions, num_questions);
626  }
627
628  if (trees) {
629    for (i=0; i<num_letters; i++) {
630      if (trees[i]) {
631        tree = trees[i];
632        if (tree->values_or_question1) {
633          FREE(tree->values_or_question1);
634          tree->values_or_question1 = NULL;
635        }
636        if (tree->question2) {
637          FREE(tree->question2);
638          tree->question2 = NULL;
639        }
640        if (tree->left_nodes) {
641          FREE(tree->left_nodes);
642          tree->left_nodes = NULL;
643        }
644        FREE(trees[i]);
645        trees[i] = NULL;
646      }
647    }
648    FREE(trees);
649  }
650  return nRes;
651}
652
653static SWIsltsResult load_allowable_cons_comb(LTS *lts, PORT_FILE *fp)
654{
655  SWIsltsResult          nRes = SWIsltsSuccess;
656  char                line[50];
657  char                tempstr[50];
658  char              * tok;
659  int                 i, toklen;
660  int                 count;
661  char          seps[] = " 	\n";
662
663  lts->num_cons_comb = 0;
664  lts->allowable_cons_combH = NULL;
665
666  while (PORT_FGETS(line, 50, fp)) {
667
668#ifndef TI_DSP
669
670    /*need to get rid of sme crud at the end of the line because it is being read in binary mode*/
671    for (i=strlen(line)-1;i>=0;i--) {
672      if (!isalpha(line[i])) line[i] = ' ';
673    }
674#endif
675    count = 0;
676    tok = safe_strtok(line, seps, &toklen);
677    tempstr[0] = '\0';
678
679    /* get all available sequence of tokens */
680    while(tok && toklen > 0){
681      count += toklen;
682      strncat(tempstr, tok, toklen);
683      tempstr[count+1] = '\0';
684      strcat(tempstr, " ");
685      count++;
686
687      tok = safe_strtok(tok+toklen, seps, &toklen);
688    }
689    if (count > 0) {
690
691        /* delete the final space */
692        tempstr[count-1] = '\0';
693
694        lts->allowable_cons_comb[lts->num_cons_comb] = (char*) lts_alloc(strlen(tempstr)+1, sizeof(char));
695        if (lts->allowable_cons_comb[lts->num_cons_comb] == NULL) {
696          nRes = SWIsltsErrAllocResource;
697          goto CLEAN_UP;
698        }
699
700        strcpy(lts->allowable_cons_comb[lts->num_cons_comb], tempstr);
701
702#if PRINT_CONS_COMB
703        pfprintf(PSTDOUT,"LOAD_TREE: allowable_cons_comb[%d]: %s\n", lts->num_cons_comb, tempstr);
704#endif
705
706        lts->num_cons_comb++;
707        if (lts->num_cons_comb >= MAX_CONS_COMB) {
708#ifndef NO_STDERR
709            PLogError(L("MAX_CONS_COMB %d exceeded\n"), MAX_CONS_COMB);
710#endif
711          nRes = SWIsltsInternalErr;
712          goto CLEAN_UP;
713        }
714    }
715  }
716  if (lts->num_cons_comb == 0) {
717#ifndef NO_STDERR
718    PLogError(L("Warning: the data file is missing consonant combinations - syllable boundaries will be incorrect\n"));
719#endif
720  }
721  lts->allowable_cons_combH = my_PHashTableCreate_FromStrings( (const char**)lts->allowable_cons_comb, lts->num_cons_comb, L("lts.allowable_cons_combH"));
722  if(lts->allowable_cons_combH == NULL) {
723    nRes = SWIsltsErrAllocResource;
724    goto CLEAN_UP;
725  }
726
727#if PRINT_LOAD_TREE_SUMMARY
728  pfprintf(PSTDOUT,"loaded %d cons combinations\n", lts->num_cons_comb);
729#endif
730
731  return SWIsltsSuccess;
732
733 CLEAN_UP:
734
735  free_allowable_cons_comb(lts);
736
737  return nRes;
738}
739
740static SWIsltsResult free_allowable_cons_comb(LTS *lts)
741{
742  SWIsltsResult          nRes = SWIsltsSuccess;
743  int                  i;
744
745  for (i=0; i<lts->num_cons_comb; i++) {
746    if (lts->allowable_cons_comb[i]) {
747      FREE(lts->allowable_cons_comb[i]);
748      lts->allowable_cons_comb[i] = NULL;
749    }
750  }
751  if(lts->allowable_cons_combH)
752    PHashTableDestroy( (PHashTable*)lts->allowable_cons_combH);
753  lts->allowable_cons_combH = NULL;
754  return nRes;
755}
756
757static SWIsltsResult load_question_strings(LTS* lts, PORT_FILE* fp)
758{
759  SWIsltsResult          nRes = SWIsltsSuccess;
760  int                  i;
761  int                  num;
762  unsigned char        len;
763  char              ** strings;
764  char               * str;
765
766  num = load_int(fp);
767
768  lts->strings = strings = (char **) lts_alloc(num, sizeof(char*));
769  lts->string_lens = (char*)lts_alloc(num, sizeof(char));
770
771  if (strings == NULL || lts->string_lens == NULL ) {
772    nRes = SWIsltsErrAllocResource;
773    goto CLEAN_UP;
774  }
775
776  for (i=0;i<num;i++) {
777    PORT_FREAD_CHAR(&len, sizeof(char), 1, fp);
778
779    str = strings[i] = lts_alloc(len + 1, sizeof(char));
780    if (str == NULL) {
781      nRes = SWIsltsErrAllocResource;
782      goto CLEAN_UP;
783    }
784
785    if (len > 0) {
786      PORT_FREAD_CHAR(str, sizeof(char), len, fp);
787    }
788    str[len] = '\0';
789
790    bitarray_populate_from_list( lts->membership, lts->strings[i], len);
791    lts->string_lens[i] = strlen(lts->strings[i]);
792  }
793
794  // *pnum = num;
795  lts->num_strings = num;
796
797  return SWIsltsSuccess;
798
799 CLEAN_UP:
800
801  free_question_strings(lts);
802
803  return nRes;
804}
805
806/* deallocate question strings */
807static SWIsltsResult free_question_strings(LTS* lts)
808{
809  SWIsltsResult          nRes = SWIsltsSuccess;
810  int                  i;
811
812  if (lts->strings) {
813    for (i=0;i<lts->num_strings;i++) {
814      if (lts->strings[i]) {
815        FREE(lts->strings[i]);
816        lts->strings[i] = NULL;
817      }
818    }
819    FREE(lts->strings);
820    if(lts->string_lens) FREE(lts->string_lens);
821    lts->strings = NULL;
822    lts->string_lens = NULL;
823  }
824  return nRes;
825}
826
827
828SWIsltsResult create_lts(char *data_filename, LTS_HANDLE *phLts)
829{
830  SWIsltsResult          nRes = SWIsltsSuccess;
831  LTS                * lts;
832
833#ifdef USE_STATIC_SLTS
834  /* TODO: language-specific ID here? */
835  lts = &g_lts;
836
837#else /* !USE_STATIC_SLTS */
838
839  PORT_FILE *fp;
840
841  lts = (LTS*) lts_alloc(1, sizeof(LTS));
842  if (lts == NULL) {
843    nRes = SWIsltsErrAllocResource;
844    goto CLEAN_UP;
845  }
846
847  fp = PORT_FOPEN(data_filename, "rb");
848  if (fp == NULL) {
849#ifndef NO_STDERR
850    PLogError(L("Cannot open %s\n"), data_filename);
851#endif
852    nRes = SWIsltsFileOpenErr;
853    goto CLEAN_UP;
854  }
855   nRes = load_phone_mapping(fp, &lts->phone_mapping);
856   if (nRes != SWIsltsSuccess) {
857     PLogError(L("SWIsltsErr: load_phone_mapping() failed: Err_code = %d\n"), nRes);
858     goto CLEAN_UP;
859   }
860
861   nRes = load_question_strings(lts, fp);
862   if (nRes != SWIsltsSuccess) {
863     PLogError(L("SWIsltsErr: load_question_strings() failed: Err_code = %d\n"), nRes);
864     goto CLEAN_UP;
865   }
866
867   nRes  = load_outputs(&(lts->outputs), &(lts->input_for_output), &lts->num_outputs, fp);
868   if (nRes != SWIsltsSuccess) {
869     PLogError(L("SWIsltsErr: load_outputs() failed: Err_code = %d\n"), nRes);
870     goto CLEAN_UP;
871   }
872
873#if PRINT_LOAD_TREE
874  pfprintf(PSTDOUT,"LOAD_TREE: got %d outputs, loading trees\n", lts->num_outputs);
875#endif
876
877  nRes = load_trees(&(lts->trees), &(lts->num_letters),
878                 &(lts->questions), &(lts->num_questions),
879                 &(lts->letter_mapping),
880                 fp);
881  if (nRes != SWIsltsSuccess) {
882    PLogError(L("SWIsltsErr: load_trees() failed: Err_code = %d\n"), nRes);
883    goto CLEAN_UP;
884  }
885
886  nRes = load_allowable_cons_comb(lts, fp);
887  if (nRes != SWIsltsSuccess) {
888    PLogError(L("SWIsltsErr: load_allowable_cons_comb() failed: Err_code = %d\n"), nRes);
889    goto CLEAN_UP;
890  }
891
892  PORT_FCLOSE(fp);
893
894#endif /* !USE_STATIC_SLTS */
895
896  *phLts = lts;
897  return SWIsltsSuccess;
898
899 CLEAN_UP:
900
901  free_lts(lts);
902  *phLts = NULL;
903  return nRes;
904}
905
906/* deallocates LTS */
907SWIsltsResult free_lts(LTS_HANDLE hlts)
908{
909  SWIsltsResult          nRes = SWIsltsSuccess;
910  LTS                * lts = (LTS *)hlts;
911
912  if (lts) {
913
914#ifndef USE_STATIC_SLTS
915    free_phone_mapping(lts->phone_mapping);
916    free_question_strings(lts);
917    lts->strings = NULL;
918    lts->phone_mapping = NULL;
919
920    free_outputs(lts->outputs, lts->input_for_output, lts->num_outputs);
921    lts->input_for_output = lts->outputs = NULL;
922
923    free_trees(lts->trees, lts->num_letters,
924               lts->questions, lts->num_questions,
925               lts->letter_mapping);
926    lts->trees = NULL;
927    lts->questions = NULL;
928    lts->letter_mapping = NULL;
929
930    free_allowable_cons_comb(lts);
931    FREE(lts);
932#endif /* !USE_STATIC_LTS */
933  }
934
935  return nRes;
936}
937
938
939int find_phone(const char *ph, PM *pm)
940{
941  ESR_ReturnCode rc;
942  int iRet = -1;
943  rc = PHashTableGetValue((PHashTable*)pm->phoneH, ph, (void**)(void*)&iRet);
944  if (rc != ESR_SUCCESS)
945    PLogError("error while in find_phone(%s,%x)\n", ph, pm);
946  return iRet;
947}
948
949int find_best_string(const char *str, LTS* lts)
950{
951  int i, maxlen, maxi, len;
952  int len_str;
953
954  if(str[0] == '\0')   return -1;
955  len_str = strlen(str);
956
957  maxi = -1;
958  maxlen = 0;
959
960  for (i=0;i<lts->num_strings;i++) {
961    len = lts->string_lens[i];
962    if( len > len_str)
963      continue; /* no point in comparison */
964    if (strncmp(str, lts->strings[i], len) == 0) {
965      if (len > maxlen) {
966	maxlen = len;
967        maxi = i;
968      }
969    }
970  }
971  return maxi;
972}
973
974int find_best_prefix_string(const char *str, LTS* lts)
975{
976  int i;
977  int maxlen;
978  int maxi;
979  int len;
980  int prelen;
981
982  maxi = -1;
983  maxlen = 0;
984
985  prelen = strlen(str);
986
987  for (i=0;i<lts->num_strings;i++) {
988    len = lts->string_lens[i];
989    if (len <= prelen) {
990      if (strncmp(str + (prelen - len), lts->strings[i], len) == 0) {
991        if (len > maxlen) {
992          maxlen = len;
993          maxi = i;
994        }
995      }
996    }
997  }
998  return maxi;
999}
1000
1001int fill_up_dp_for_letter(LTS *lts, const char *input_word, int word_len, int index, int root_start, int root_end, int left_phone)
1002{
1003  int i,j;
1004  LDP *dp;
1005  unsigned char letter;
1006  int hit_wb;
1007  LM *lm;
1008  unsigned char word[MAX_WORD_LEN];
1009  char tempstr[MAX_WORD_LEN];
1010  int first_syl_end;
1011  int last_syl_start;
1012
1013  dp = &(lts->dp);
1014  lm = lts->letter_mapping;
1015
1016  /* the LTS decision tree does not seem to be well trained at all for
1017     the letter ' when followed by "s"  ... It seems to result in the
1018	 phoneme 'm', which is wrong.   "'t" seems to be OK though.
1019	 BAD: Kevin's : k6v6nmz ...  pal's : palmz ... paul's : p{lz
1020	 BAD: janice's : jan6s6mz ... tom's house : t)mmz&h?s ... tonya's : t)ny6mz
1021	 BAD: jake's house : jAk6mz&h?s
1022	 Ignoring ' as below we get ...
1023     BETTER: Kevin's : kev6nz  ... pal's : palz ... paul's : p{lz
1024	 BETTER: janice's : jan6s6s ... tom's house : t)mz&h?s ... tonya's : t)ny6s
1025	 BETTER: jake's house : jAk6s&h?s
1026	 The proper solution requires a legitimate text normalizer with special
1027	 handling of cases like 's which would always put a "z" there,
1028	 except if preceded by an unvoiced stop (ptk) which requires a "s" there.
1029	 For now let's just skip the ' letter, which testing shows to be generally
1030	 safe (janice's, jake's etc are better but still not quite right). */
1031
1032  if(input_word[index] == '\'')
1033    return 1; // same as unknown character
1034
1035  letter = find_letter_index(input_word[index], lm);
1036
1037  if (letter == LTS_MAXCHAR) {
1038  /* lisa - we need to decide how to handle this case.  Do we just silently skip unknown
1039    characters or warn the app or user somehow*/
1040#ifdef NO_STDERR
1041    PrintError("unknown character on input %c - skipping\n", input_word[index], NULL, NULL);
1042#else
1043    PLogError(L("unknown character on input %c - skipping\n"), input_word[index]);
1044#endif
1045    return 1;
1046  }
1047
1048  hit_wb = 0;
1049
1050  /*pfprintf(PSTDOUT,"left context\n");*/
1051
1052  for (j=0;j<5;j++) {
1053    if (hit_wb) {
1054      dp->properties[ Left1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
1055    } else {
1056      i = index - (j+1);
1057      if (i < 0) dp->properties[ Left1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
1058      else {
1059        dp->properties[ Left1+j] = find_letter_index(input_word[i], lm);
1060        if (dp->properties[ Left1+j] == LTS_MAXCHAR) { /*assume an unknown character is a word boundary*/
1061          dp->properties[ Left1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
1062          hit_wb = 1;
1063        }
1064      }
1065    }
1066  }
1067
1068  /*pfprintf(PSTDOUT,"right context\n");*/
1069
1070  hit_wb = 0;
1071  for (j=0;j<5;j++) {
1072    if (hit_wb) {
1073      dp->properties[ Right1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
1074    } else {
1075      i = index + (j+1);
1076      if (i >= word_len) dp->properties[Right1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
1077      else {
1078        dp->properties[ Right1+j] = find_letter_index(input_word[i], lm);
1079        if (dp->properties[ Right1+j] == LTS_MAXCHAR) { /*assume an unknown character is a word boundary*/
1080          dp->properties[ Right1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
1081          hit_wb = 1;
1082        }
1083      }
1084    }
1085  }
1086
1087  dp->letter = letter; // properties[ Letter] = letter;
1088
1089  dp->properties[ LeftPhone1] = left_phone;
1090
1091  /*pfprintf(PSTDOUT,"word stuff\n"); */
1092
1093  /*find word start and end - use unknown character as word boundaries*/
1094
1095  dp->properties[ WordLen] = word_len;
1096
1097  if (index == 0) dp->properties[ LetInWord] = 0;
1098  else if (index == word_len-1) dp->properties[ LetInWord] = 2;
1099  else dp->properties[ LetInWord] = 1;
1100
1101  for (i=0;i<word_len;i++) {
1102    word[i] = find_letter_index(input_word[i], lm);
1103  }
1104
1105  /*figure out syllable in word - not really syllables - just looks to see if is or at first or last vowel*/
1106  /*  pfprintf(PSTDOUT,"syl stuff\n");*/
1107
1108  first_syl_end = word_len;
1109  for (i=0;i<word_len;i++) {
1110    if (lm->type[word[i]] == 1) {
1111      for (j=i+1;j<word_len;j++) {
1112        if (lm->type[word[j]] != 1) break;
1113      }
1114      first_syl_end = j;
1115      break;
1116    }
1117  }
1118  last_syl_start = 0;
1119  for (i=word_len-1;i>=0;i--) {
1120    if (lm->type[word[i]] == 1) {
1121      for (j=i-1;j>=0;j--) {
1122        if (lm->type[word[j]] != 1) break;
1123      }
1124      last_syl_start = j;
1125      break;
1126    }
1127  }
1128
1129#if PRINT_DP_LETTER
1130  pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start);
1131#endif
1132
1133  if (index > last_syl_start) dp->properties[ SylInWord] = 2;
1134  else if (index < first_syl_end) dp->properties[ SylInWord] = 0;
1135  else dp->properties[ SylInWord] = 1;
1136
1137  first_syl_end = word_len;
1138  for (i=0;i<word_len;i++) {
1139    if (lm->type[word[i]] == 1) {
1140      for (j=i+1;j<word_len;j++) {
1141        if (lm->type[word[j]] != 1) break;
1142      }
1143      for (;j<word_len;j++) {
1144        if (lm->type[word[j]] == 1) break;
1145      }
1146      first_syl_end = j;
1147      break;
1148    }
1149  }
1150  last_syl_start = 0;
1151  for (i=word_len-1;i>=0;i--) {
1152    if (lm->type[word[i]] == 1) {
1153      for (j=i-1;j>=0;j--) {
1154        if (lm->type[word[j]] != 1) break;
1155      }
1156      for (;j>=0;j--) {
1157        if (lm->type[word[j]] == 1) break;
1158      }
1159      last_syl_start = j;
1160      break;
1161    }
1162  }
1163
1164#if PRINT_DP_LETTER
1165  pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start);
1166#endif
1167
1168  if (index > last_syl_start) dp->properties[ Syl2InWord] = 2;
1169  else if (index  < first_syl_end) dp->properties[ Syl2InWord] = 0;
1170  else dp->properties[Syl2InWord] = 1;
1171
1172
1173  first_syl_end = word_len;
1174  for (i=root_start;i<root_end;i++) {
1175    if (lm->type[word[i]] == 1) {
1176      for (j=i+1;j<word_len;j++) {
1177        if (lm->type[word[j]] != 1) break;
1178      }
1179      first_syl_end = j;
1180      break;
1181    }
1182  }
1183  last_syl_start = 0;
1184  for (i=root_end-1;i>=root_start;i--) {
1185    if (lm->type[word[i]] == 1) {
1186      for (j=i-1;j>=0;j--) {
1187        if (lm->type[word[j]] != 1) break;
1188      }
1189      last_syl_start = j;
1190      break;
1191    }
1192  }
1193
1194#if PRINT_DP_LETTER
1195  pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start);
1196#endif
1197
1198  if (index > last_syl_start) dp->properties[SylInRoot] = 2;
1199  else if (index < first_syl_end) dp->properties[ SylInRoot] = 0;
1200  else dp->properties[ SylInRoot] = 1;
1201
1202  first_syl_end = word_len;
1203  for (i=root_start;i<root_end;i++) {
1204    if (lm->type[word[i]] == 1) {
1205      for (j=i+1;j<word_len;j++) {
1206        if (lm->type[word[j]] != 1) break;
1207      }
1208      for (;j<word_len;j++) {
1209        if (lm->type[word[j]] == 1) break;
1210      }
1211      first_syl_end = j;
1212      break;
1213    }
1214  }
1215  last_syl_start = 0;
1216  for (i=root_end-1;i>=root_start;i--) {
1217    if (lm->type[word[i]] == 1) {
1218      for (j=i-1;j>=0;j--) {
1219        if (lm->type[word[j]] != 1) break;
1220      }
1221      for (;j>=0;j--) {
1222        if (lm->type[word[j]] == 1) break;
1223      }
1224      last_syl_start = j;
1225      break;
1226    }
1227  }
1228
1229#if PRINT_DP_LETTER
1230  pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start);
1231#endif
1232
1233  if (index > last_syl_start) dp->properties[Syl2InRoot] = 2;
1234  else if (index  < first_syl_end) dp->properties[Syl2InRoot] = 0;
1235  else dp->properties[Syl2InRoot] = 1;
1236
1237
1238  dp->properties[Left_DFRE] = index - root_start;
1239  dp->properties[Right_DFRE] = (root_end - index) - 1;
1240
1241
1242  /*  pfprintf(PSTDOUT,"strings\n");*/
1243#if PRINT_DP_LETTER
1244  pfprintf(PSTDOUT,"input word %s num_strings %d\n", input_word, lts->num_strings);
1245#endif
1246
1247  dp->properties[RightString] = find_best_string(input_word+index+1, lts);
1248  strcpy(tempstr, input_word);
1249  tempstr[index] = '\0';
1250
1251  dp->properties[LeftString] = find_best_prefix_string(tempstr, lts);
1252
1253#if PRINT_DP_LETTER
1254  pfprintf(PSTDOUT,"dp %c ", lm->letters[dp->letter]);
1255
1256  for (i=0;i<word_len;i++) {
1257    pfprintf(PSTDOUT,"%c", lm->letters[word[i]]);
1258  }
1259  pfprintf(PSTDOUT," %c%c%c {%c} %c%c%c liw %d siw %d s2iw %d nw %d sir %d s2ir %d left_DFRE %d right_DFRE %d\n",
1260         lm->letters[dp->left_context[2]],
1261         lm->letters[dp->left_context[1]],
1262         lm->letters[dp->left_context[0]],
1263         lm->letters[dp->letter],
1264         lm->letters[dp->right_context[0]],
1265         lm->letters[dp->right_context[1]],
1266         lm->letters[dp->right_context[2]],
1267         dp->let_in_word,
1268         dp->syl_in_word,
1269         dp->syl2_in_word,
1270         dp->word_len,
1271         dp->syl_in_root,
1272         dp->syl2_in_root,
1273         dp->left_DFRE, dp->right_DFRE);
1274#endif
1275
1276  return 0;
1277}
1278
1279int matches(LQUESTION *q1, LQUESTION *q2, int type, LDP *dp)
1280{
1281  int m1, m2;
1282  switch(type) {
1283  case 0:
1284    return qmatches(q1, dp);
1285  case 1:
1286    m1 = qmatches(q1, dp);
1287    m2 = qmatches(q2, dp);
1288    return(m1 && m2);
1289  case 2:
1290    m1 = qmatches(q1, dp);
1291    m2 = qmatches(q2, dp);
1292    return(m1 && !m2);
1293  case 3:
1294    m1 = qmatches(q1, dp);
1295    m2 = qmatches(q2, dp);
1296    return(!m1 && m2);
1297  case 4:
1298    m1 = qmatches(q1, dp);
1299    m2 = qmatches(q2, dp);
1300    return(!m1 && !m2);
1301  default:
1302    return -1;
1303  }
1304  /* should not come here */
1305  return -1;
1306}
1307
1308int find_output_for_dp(LTS *lts, int *pbackoff_output)
1309{
1310  LDP *dp;
1311  int index;
1312  RT_LTREE *tree;
1313  LQUESTION *q1;
1314  LQUESTION *q2;
1315  int comb_type;
1316  int q2_index;
1317  int left_index;
1318
1319  dp = &(lts->dp);
1320  tree = lts->trees[dp->letter]; // properties[Letter]];
1321
1322  index = 0;
1323
1324  while (1) {
1325    left_index = tree->left_nodes[index];
1326
1327    if (left_index == NO_NODE) { /*means its a leaf node*/
1328      *pbackoff_output = tree->question2[index];
1329      return tree->values_or_question1[index];
1330    }
1331    q1 = lts->questions[tree->values_or_question1[index]];
1332    q2_index = tree->question2[index] & 0x1FFF;
1333    comb_type = (tree->question2[index] & 0xE000) >> 13;
1334
1335    q2 = lts->questions[q2_index];
1336
1337    if (matches(q1, q2, comb_type, dp)) {
1338      index = left_index;
1339    } else {
1340      index = left_index+1;
1341    }
1342  }
1343}
1344int add_output(char *output, char **output_phone_string, int out_len, int max_phone_length)
1345{
1346  char *tok;
1347  int toklen;
1348  char seps[] = " ";
1349
1350  if (strlen(output) == 0) return out_len;
1351
1352  tok = safe_strtok(output, seps, &toklen);
1353  while (tok && toklen) {
1354    if ((toklen > 0) && (strncmp(tok, "null", 4) != 0)) {
1355
1356      if (isdigit(tok[toklen-1])) {
1357        /*means it's a vowel.  So, add a syllable boundary.  It's position
1358          gets adjusted later by adjust_syllable_boundaries()*/
1359        strcpy(output_phone_string[out_len++], LTS_MARKER_SYLL_START);
1360        if (out_len >= max_phone_length) return max_phone_length;
1361      }
1362      strncpy(output_phone_string[out_len], tok, toklen);
1363      output_phone_string[out_len++][toklen] = '\0';
1364      if (out_len >= max_phone_length) return max_phone_length;
1365    }
1366    tok = safe_strtok(tok+toklen, seps, &toklen);
1367  }
1368  return out_len;
1369}
1370
1371int is_allowable_cons_comb(LTS *lts, const char *cons_string)
1372{
1373  /* int i;
1374     for (i=0;i<lts->num_cons_comb;i++) {
1375     #if PRINT_CONS_COMB
1376     pfprintf(PSTDOUT,"checking {%s} vs c[%d] {%s}\n", cons_string, i, lts->allowable_cons_comb[i]);
1377     #endif
1378     if (strcmp(cons_string, lts->allowable_cons_comb[i]) == 0) return 1;
1379     }
1380     return 0;
1381  */
1382  ESR_ReturnCode rc;
1383  void* iVal = NULL;
1384  rc = PHashTableGetValue( (PHashTable*)lts->allowable_cons_combH, cons_string, &iVal);
1385  if(rc == ESR_SUCCESS)
1386    return 1;
1387  else
1388    return 0;
1389}
1390
1391
1392
1393
1394
1395void adjust_syllable_boundaries(LTS *lts, char **output_phone_string, int num_out, int max_phone_length)
1396{
1397  char *out;
1398  int i,j;
1399  int syl_start;
1400  int stress = 0;
1401  int first_syl_bound;
1402
1403  char tempstr[20];
1404
1405  /*there should already be a syllable boundary before each vowel (add_output put one there)*/
1406  /*so just find these, then shift back by allowable consonant combinations and move the syllable mark*/
1407
1408  for (i=0;i<num_out;i++) {
1409    out = output_phone_string[i];
1410    if (strcmp(out, LTS_MARKER_SYLL_START) == 0) { /*means there is a syllable boundary
1411      														 find start of allowable sequence*/
1412
1413      syl_start = 0;
1414
1415      for (j=i-1;j>0;j--) {
1416        out = output_phone_string[j];
1417        if (isdigit(out[strlen(out)-1])) {
1418          syl_start = j+1;
1419          break; /*means it's a vowel*/
1420        }
1421        if (strcmp(out, LTS_MARKER_WORD_START) == 0) {
1422          syl_start = j+1;
1423          break; /*don't push syl boundaries before word boundaries*/
1424        }
1425        if (strcmp(out, LTS_MARKER_PRON_START) == 0) {
1426          syl_start = j+1;
1427          break; /*don't push syl boundaries before phrase boundaries*/
1428        }
1429
1430        /* for sequences longer than 2,
1431           check 3-syllable onset first, then check 2-syllable onset */
1432        if(j > 1){
1433          sprintf(tempstr, "%s %s %s", output_phone_string[j-2], output_phone_string[j-1],
1434            output_phone_string[j]);
1435          if (!is_allowable_cons_comb(lts, tempstr)) {
1436            sprintf(tempstr, "%s %s", output_phone_string[j-1], output_phone_string[j]);
1437            if (!is_allowable_cons_comb(lts, tempstr)) {
1438#if PRINT_CONS_COMB
1439              pfprintf(PSTDOUT,"cons comb %s %s not allowed\n", output_phone_string[j-1],
1440                output_phone_string[j]);
1441#endif
1442              syl_start = j;
1443              break;
1444            }
1445          }
1446        }
1447        /* for sequences shorter than 2 */
1448        else
1449        {
1450          sprintf(tempstr, "%s %s", output_phone_string[j-1], output_phone_string[j]);
1451          if (!is_allowable_cons_comb(lts, tempstr)) {
1452#if PRINT_CONS_COMB
1453            pfprintf(PSTDOUT,"cons comb %s %s not allowed\n", output_phone_string[j-1],
1454              output_phone_string[j]);
1455#endif
1456            syl_start = j;
1457            break;
1458          }
1459        }
1460      } /* end for j=i-1 */
1461
1462      /*shift over stuff between syl_start a gap*/
1463      for (j=i;j>syl_start;j--) {
1464        strcpy(output_phone_string[j], output_phone_string[j-1]);
1465      }
1466      /*now find stress level from phone (and remove it) and add it to syl bound*/
1467
1468      if (i<num_out-1) {
1469        out = output_phone_string[i+1];
1470
1471        if (isdigit(out[strlen(out)-1])) {
1472          stress = atoi(out + strlen(out)-1);
1473        } else {
1474          stress = 0; /*should not happen*/
1475        }
1476      } else {
1477        stress = 0; /*should not happen*/
1478      }
1479
1480      sprintf(output_phone_string[syl_start], LTS_MARKER_SYLL_START_DD, stress);
1481    } /* end if (strcmp(out, LTS_MARKER_SYLL_START) == 0) */
1482  } /* end for i=0 */
1483
1484  /*remove all the stress marking from the vowels*/
1485  for (i=0;i<num_out;i++) {
1486    out = output_phone_string[i];
1487    if ((strncmp(out, LTS_MARKER_SYLL_START, 2) != 0) && isdigit(out[strlen(out)-1])) {
1488      out[strlen(out)-1] = '\0'; /*remove the stress from the vowel*/
1489    }
1490  }
1491
1492  /* word boundary must be followed by syllable boundary
1493    if no syllable boundary exists after a word boundary, move the first
1494    syllable boundary to after the word boundary */
1495  first_syl_bound = -1;
1496  syl_start = -1;
1497  for (i=1;i<num_out;i++) {
1498    if ((strcmp(output_phone_string[i-1], LTS_MARKER_WORD_START) == 0) &&
1499      (strncmp(output_phone_string[i], LTS_MARKER_SYLL_START, 2) != 0)) {
1500
1501      syl_start = i;
1502      /* search for first occurance of syllable boundary */
1503      for(j=syl_start+1;j<num_out; j++){
1504        out = output_phone_string[j];
1505        if(strncmp(out, LTS_MARKER_SYLL_START, 2) == 0 && isdigit(out[strlen(out)-1])){
1506            stress = atoi(out + strlen(out)-1);
1507            first_syl_bound = j;
1508            break;
1509        }
1510      }
1511
1512      /* swap entries until syl bound reaches word bound */
1513      if(first_syl_bound >= 0){
1514        for(; j>syl_start; j--){
1515          strcpy(output_phone_string[j], output_phone_string[j-1]);
1516        }
1517        /* put syllable boundary after word boundary */
1518        sprintf(output_phone_string[syl_start], LTS_MARKER_SYLL_START_DD, stress);
1519
1520        /* advance i, reset variables */
1521        i = first_syl_bound;
1522        first_syl_bound = syl_start = -1;
1523
1524      }
1525    }
1526  }
1527
1528}
1529
1530
1531SWIsltsResult lts_for_word(LTS *lts, char *word, int word_len, char **output_phone_string, int max_phone_length, int *pnum_out)
1532{
1533  SWIsltsResult          nRes = SWIsltsSuccess;
1534  int                  i,j;
1535  int                  root_start;
1536  int                  root_end;
1537  int                  output_index;
1538  int                  left_phone;
1539  char               * input_seq;
1540  int                  found_match;
1541  int                  start_num_out;
1542  int                  backoff_output;
1543  int                  num_out;
1544
1545  start_num_out = num_out = *pnum_out;
1546
1547  root_start = 0;
1548  root_end = word_len;
1549
1550  for (i=0;i<word_len;i++) {
1551
1552    if ((i == 0) || (num_out == 0)) {
1553      /*      pfprintf(PSTDOUT,"about to call find_phone1\n");*/
1554      left_phone = find_phone(LTS_MARKER_PIPESEP, lts->phone_mapping);
1555
1556#if PRINT_LTS_WORD
1557      pfprintf(PSTDOUT,"got phone %d for initial | (LTS_MARKER_PIPESEP)\n", left_phone);
1558#endif
1559      if (left_phone < 0) {
1560
1561#ifdef NO_STDERR
1562        PrintError("Error, cannot find | in phone mappings\n", NULL, NULL, NULL);
1563#else
1564        PLogError(L("Error, cannot find | in phone mappings\n"));
1565#endif
1566        nRes = SWIsltsInternalErr;
1567        goto CLEAN_UP;
1568      }
1569    } else {
1570
1571#if PRINT_LTS_WORD
1572      pfprintf(PSTDOUT,"about to call find_phone2 num_out %d\n", num_out);
1573      pfprintf(PSTDOUT,"out[%d] %s\n", num_out-1, output_phone_string[num_out-1]);
1574#endif
1575
1576      if (strcmp(output_phone_string[num_out-1], LTS_MARKER_PRON_START) == 0) left_phone = find_phone(LTS_MARKER_PIPESEP, lts->phone_mapping);
1577      else if (strcmp(output_phone_string[num_out-1], LTS_MARKER_WORD_START) == 0) left_phone = find_phone(LTS_MARKER_PIPESEP, lts->phone_mapping);
1578      else left_phone = find_phone(output_phone_string[num_out-1], lts->phone_mapping);
1579
1580#if PRINT_LTS_WORD
1581      pfprintf(PSTDOUT,"got phone %d for %s\n", left_phone, output_phone_string[num_out-1]);
1582#endif
1583
1584      if (left_phone < 0) {
1585
1586#ifdef NO_STDERR
1587        PrintError("Error, cannot find %s in phone mappings\n", (unsigned long)output_phone_string[num_out-1], NULL, NULL);
1588#else
1589        PLogError(L("Error, cannot find %s in phone mappings\n"), output_phone_string[num_out-1]);
1590#endif
1591        nRes = SWIsltsInternalErr;
1592        goto CLEAN_UP;
1593      }
1594    }
1595
1596    /*    pfprintf(PSTDOUT,"calling fill up dp\n");*/
1597    if (fill_up_dp_for_letter(lts, word, word_len, i, root_start, root_end, left_phone)) continue;
1598
1599    /*    pfprintf(PSTDOUT,"calling find output\n");*/
1600    output_index = find_output_for_dp(lts, &backoff_output);
1601
1602#if PRINT_LTS_WORD
1603    pfprintf(PSTDOUT,"got output %d\n", output_index);
1604#endif
1605
1606    found_match = 1;
1607
1608    if (strlen(lts->input_for_output[output_index]) > 0) {
1609        /*some extra input string to use up*/
1610#if PRINT_LTS_WORD
1611      pfprintf(PSTDOUT,"GOT INPUT %s for %s letter %c\n", lts->input_for_output[output_index], word, word[i]);
1612#endif
1613
1614      input_seq = lts->input_for_output[output_index];
1615      if (input_seq[0] == '=') {
1616        root_end = i;
1617        input_seq = input_seq+1; /*skip suffix indicator*/
1618      }
1619      for (j=i+1;;j++) {
1620        if (input_seq[j-(i+1)] == '\0') break;
1621        if (input_seq[j-(i+1)] == '-') {
1622          root_start = j;
1623          break;
1624        }
1625        if (j >= word_len) {
1626          found_match = 0;
1627          break;
1628        }
1629
1630        if (input_seq[j-(i+1)] != word[j]) {
1631          found_match = 0;
1632          break;
1633        }
1634      }
1635      if (found_match) {
1636        i = j-1;
1637      }
1638    }
1639
1640    if (!found_match) {
1641#if PRINT_LTS_WORD
1642      pfprintf(PSTDOUT,"using backoff output %s instead of regular %s\n",
1643               lts->outputs[backoff_output],
1644               ts->outputs[output_index]);
1645#endif
1646
1647      num_out = add_output(lts->outputs[backoff_output], output_phone_string, num_out, max_phone_length);
1648    }
1649    else {
1650      num_out = add_output(lts->outputs[output_index], output_phone_string, num_out, max_phone_length);
1651    }
1652    if (num_out >= max_phone_length) {
1653      nRes = SWIsltsMaxInputExceeded;
1654      goto CLEAN_UP;
1655    }
1656  }
1657
1658  *pnum_out = num_out;
1659  return SWIsltsSuccess;
1660
1661 CLEAN_UP:
1662
1663  *pnum_out = 0;
1664  return nRes;
1665}
1666
1667
1668
1669SWIsltsResult run_lts(LTS_HANDLE h, FSM_DICT_HANDLE hdict, char *input_sentence, char **output_phone_string, int *phone_length)
1670{
1671  SWIsltsResult            nRes = SWIsltsSuccess;
1672  int                    i;
1673  int                    len;
1674  int                    num_out = 0;
1675  LTS                  * lts;
1676  int                    was_in_phrase;
1677  char                   word[MAX_WORD_LEN];
1678  int                    num_in_word;
1679  int                    max_phone_length;
1680  int                    pron_len;
1681
1682  max_phone_length = *phone_length;
1683
1684  len = strlen(input_sentence);
1685
1686  lts = (LTS*) h;
1687
1688  was_in_phrase = 0;
1689
1690  /*add a phrase start then word start at beginning*/
1691
1692  strcpy(output_phone_string[num_out++], LTS_MARKER_PRON_START);
1693  if (num_out >= max_phone_length) {
1694    nRes = SWIsltsMaxInputExceeded;
1695    goto CLEAN_UP;
1696  }
1697
1698  num_in_word = 0;
1699  pron_len = 1;    // for the first time through
1700
1701  for (i=0;i<=len;i++) {
1702
1703#if PRINT_LTS_WORD
1704    pfprintf(PSTDOUT,"WORKING on letter %d %c\n", i, input_sentence[i]);
1705#endif
1706
1707    /* Treat hyphen as word delimiter.  Not quite right for German
1708       hyphenated compounds, but still an improvement. */
1709    if ((input_sentence[i] == ' ') || (input_sentence[i] == '-') || (input_sentence[i] == '\t') || (i == len)) {
1710      if (num_in_word>0 ) {
1711        strcpy(output_phone_string[num_out++], LTS_MARKER_WORD_START);
1712        if (num_out >= max_phone_length) {
1713          nRes = SWIsltsMaxInputExceeded;
1714          goto CLEAN_UP;
1715        }
1716
1717        word[num_in_word] = '\0';
1718
1719        if (1) {
1720
1721#if PRINT_DICT_LOOKUP
1722          pfprintf(PSTDOUT,"Did not find %s in dictionary\n", word);
1723#endif
1724		  pron_len = -num_out;
1725          nRes = lts_for_word(lts, word, num_in_word, output_phone_string, max_phone_length, &num_out);
1726		  pron_len += num_out; // now pron_len is the number of phonemes/markers added
1727		  if(pron_len == 0)
1728			  num_out--; // to backspace on the LTS_MARKER_WORD_START !!
1729          if (nRes != SWIsltsSuccess) {
1730            goto CLEAN_UP;
1731          }
1732        }
1733        num_in_word = 0;
1734      }
1735    }
1736    else if ( (input_sentence[i] == '.')
1737                || (input_sentence[i] == ',')
1738                || (input_sentence[i] == '!')
1739                || (input_sentence[i] == '?')
1740                || (input_sentence[i] == '\n')) {
1741      if (was_in_phrase) {
1742        /*add a phrase boundary after lts is called*/
1743        if (num_in_word > 0) {
1744          strcpy(output_phone_string[num_out++], LTS_MARKER_WORD_START);
1745          if (num_out >= max_phone_length) {
1746            nRes = SWIsltsMaxInputExceeded;
1747            goto CLEAN_UP;
1748          }
1749
1750          word[num_in_word] = '\0';
1751
1752          if (1) {
1753            nRes = lts_for_word(lts, word, num_in_word, output_phone_string, max_phone_length, &num_out);
1754            if (nRes != SWIsltsSuccess) {
1755              goto CLEAN_UP;
1756            }
1757          }
1758          num_in_word = 0;
1759        }
1760        strcpy(output_phone_string[num_out++], LTS_MARKER_PRON_START);
1761        if (num_out >= max_phone_length) {
1762          nRes = SWIsltsMaxInputExceeded;
1763          goto CLEAN_UP;
1764        }
1765        was_in_phrase = 0;
1766      }
1767    }
1768    else {
1769      if (num_in_word < MAX_WORD_LEN-1) {
1770        word[num_in_word++] = toupper(input_sentence[i]);
1771        was_in_phrase = 1;
1772      }
1773    }
1774  }
1775  /*adjust syllable boundaries*/
1776  adjust_syllable_boundaries(lts, output_phone_string, num_out, max_phone_length);
1777
1778  *phone_length = num_out;
1779  return SWIsltsSuccess;
1780
1781 CLEAN_UP:
1782
1783  *phone_length = 0;
1784  return nRes;
1785}
1786
1787#ifdef USE_STATIC_SLTS
1788void *lts_alloc(int num, int size)
1789{
1790#ifdef NO_STDERR
1791    PrintError("USE_STATIC_SLTS: lts_alloc should not be called", NULL, NULL, NULL);
1792#else
1793    PLogError(L("USE_STATIC_SLTS: lts_alloc should not be called"));
1794#endif
1795  return NULL;
1796}
1797#else
1798
1799void *lts_alloc(int num, int size)
1800{
1801  void *p;
1802  p = CALLOC(num, size, MTAG);
1803  return p;
1804}
1805#endif /* USE_STATIC_SLTS */
1806