1/*---------------------------------------------------------------------------*
2 *  VocabularyImpl.c                                                         *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20#include "ESR_Session.h"
21#include "SR_Vocabulary.h"
22#include "SR_VocabularyImpl.h"
23#include "passert.h"
24#include "plog.h"
25#include "ptypes.h"
26#include "pmemory.h"
27
28//#define DEBUG 1
29#define MAX_PRON_LEN 256
30#define MAX_WORD_LEN    40
31#define MTAG NULL
32#define MAX_PHONE_LEN 4
33#define DO_DEFER_LOADING_UNTIL_LOOKUPS 1
34
35static PINLINE LCHAR* get_first_word(LCHAR* curr, LCHAR* end);
36static PINLINE LCHAR* get_next_word(LCHAR* curr, LCHAR* end);
37static ESR_ReturnCode run_ttt(const LCHAR *input_sentence, LCHAR *output_sentence, int *text_length);
38
39#define MAX_NUM_PRONS 4
40#define LSTRDUP(src) LSTRCPY(CALLOC(LSTRLEN(src)+1, sizeof(LCHAR), "srec.Vocabulary.LSTRDUP"), (src))
41#define LSTRFREE(src) FREE(src)
42
43/**
44 * Creates a new vocabulary but does not set the locale.
45 *
46 * @param self SR_Vocabulary handle
47 */
48#ifdef USE_TTP
49ESR_ReturnCode SR_CreateG2P(SR_Vocabulary* self)
50{
51  ESR_ReturnCode      rc = ESR_SUCCESS;
52  SWIsltsResult       res = SWIsltsSuccess;
53  SR_VocabularyImpl * impl = (SR_VocabularyImpl*) self;
54  LCHAR               szG2PDataFile[P_PATH_MAX];
55  size_t              len = P_PATH_MAX;
56  ESR_BOOL                bG2P = ESR_TRUE;
57
58     rc = ESR_SessionGetBool ( L("G2P.Available"), &bG2P );
59     if ( rc != ESR_SUCCESS )
60       {
61	 PLogError(L("ESR_FATAL_ERROR: ESR_SessionGetBool() - G2P.Available fails with return code %d\n"), rc);
62	 return rc;
63       }
64     if ( bG2P == ESR_FALSE )
65       {
66	 impl->hSlts = NULL;
67	 return ESR_SUCCESS;
68       }
69
70     rc = ESR_SessionGetLCHAR ( L("G2P.Data"), szG2PDataFile, &len );
71     if ( rc != ESR_SUCCESS )
72       {
73	 PLogError(L("ESR_FATAL_ERROR: ESR_SessionGetLCHAR() - G2P.Data fails with return code %d\n"), rc);
74	 return rc;
75     }
76     rc = ESR_SessionPrefixWithBaseDirectory(szG2PDataFile, &len);
77     if ( rc != ESR_SUCCESS )
78       {
79	 PLogError(L("ESR_FATAL_ERROR: ESR_SessionPrefixWithBaseDirectory() - G2P.Data fails with return code %d\n"), rc);
80	 return rc;
81       }
82
83     res = SWIsltsInit();
84     if (res == SWIsltsSuccess)
85       {
86	 /* data_file: en-US-ttp.data */
87	 res = SWIsltsOpen(&(impl->hSlts), szG2PDataFile);
88	 if (res != SWIsltsSuccess)
89	   {
90	     PLogError(L("ESR_FATAL_ERROR: SWIsltsOpen( ) fails with return code %d\n"), res);
91	     FREE(impl);
92	     return ESR_FATAL_ERROR;
93	   }
94       }
95     else
96     {
97       PLogError(L("ESR_FATAL_ERROR: SWIsltsInit( ) fails with return code %d\n"), res);
98       FREE(impl);
99       return ESR_FATAL_ERROR;
100     }
101     return rc;
102}
103
104ESR_ReturnCode SR_DestroyG2P(SR_Vocabulary* self)
105{
106  ESR_ReturnCode      rc = ESR_SUCCESS;
107  SWIsltsResult       res = SWIsltsSuccess;
108  SR_VocabularyImpl * impl = (SR_VocabularyImpl*) self;
109  ESR_BOOL                bG2P = ESR_TRUE;
110
111  rc = ESR_SessionGetBool ( L("G2P.Available"), &bG2P );
112  if ( rc != ESR_SUCCESS )
113     {
114       PLogError(L("ESR_FATAL_ERROR: ESR_SessionGetBool() - G2P.Available fails with return code %d\n"), rc);
115       return rc;
116     }
117  if ( bG2P == ESR_FALSE || impl->hSlts == NULL)
118    {
119      return ESR_SUCCESS;
120    }
121
122  res = SWIsltsClose(impl->hSlts);
123  if (res == SWIsltsSuccess)
124    {
125      res = SWIsltsTerm();
126      if (res != SWIsltsSuccess)
127	{
128	  PLogError(L("ESR_FATAL_ERROR: SWIsltsTerm( ) fails with return code %d\n"), res);
129	  rc = ESR_FATAL_ERROR;
130          }
131    }
132  else
133    {
134      PLogError(L("ESR_FATAL_ERROR: SWIsltsClose( ) fails with return code %d\n"), res);
135      rc = ESR_FATAL_ERROR;
136    }
137  return rc;
138}
139#endif /* USE_TTP */
140
141/**
142 * Creates a new vocabulary but does not set the locale.
143 *
144 * @param self SR_Vocabulary handle
145 */
146ESR_ReturnCode SR_VocabularyCreateImpl(SR_Vocabulary** self)
147{
148  SR_VocabularyImpl* impl;
149
150  if (self==NULL)
151    {
152      PLogError(L("ESR_INVALID_ARGUMENT"));
153      return ESR_INVALID_ARGUMENT;
154    }
155  impl = NEW(SR_VocabularyImpl, MTAG);
156  if (impl==NULL)
157    {
158      PLogError(L("ESR_OUT_OF_MEMORY"));
159      return ESR_OUT_OF_MEMORY;
160    }
161
162  impl->Interface.save = &SR_VocabularySaveImpl;
163  impl->Interface.getPronunciation = &SR_VocabularyGetPronunciationImpl;
164     impl->Interface.getLanguage = &SR_VocabularyGetLanguageImpl;
165     impl->Interface.destroy = &SR_VocabularyDestroyImpl;
166     impl->vocabulary = NULL;
167
168     *self = (SR_Vocabulary*) impl;
169     impl->hSlts = NULL;
170     return ESR_SUCCESS;
171}
172
173ESR_ReturnCode SR_VocabularyDestroyImpl(SR_Vocabulary* self)
174{
175  SR_VocabularyImpl* impl = (SR_VocabularyImpl*) self;
176
177#ifdef USE_TTP
178  SR_DestroyG2P(self);
179#endif
180
181     if (impl->vocabulary!=NULL)
182       {
183	 CA_UnloadDictionary(impl->vocabulary);
184	 CA_FreeVocabulary(impl->vocabulary);
185	 impl->vocabulary = NULL;
186       }
187	   LSTRFREE(impl->filename);
188     FREE(impl);
189     return ESR_SUCCESS;
190}
191
192ESR_ReturnCode sr_vocabularyloadimpl_for_real(SR_VocabularyImpl* impl)
193{
194	ESR_ReturnCode rc = ESR_SUCCESS;
195	ESR_BOOL sessionExists = ESR_FALSE;
196  LCHAR vocabulary[P_PATH_MAX];
197  size_t len;
198
199     impl->vocabulary = CA_AllocateVocabulary();
200     if (impl->vocabulary==NULL)
201       {
202	 rc = ESR_OUT_OF_MEMORY;
203	 PLogError(ESR_rc2str(rc));
204	 goto CLEANUP;
205       }
206
207     CHKLOG(rc, ESR_SessionExists(&sessionExists));
208
209     if (sessionExists)
210       {
211          LSTRCPY(vocabulary, impl->filename);
212          len = P_PATH_MAX;
213          CHKLOG(rc, ESR_SessionPrefixWithBaseDirectory(vocabulary, &len));
214       }
215     else
216       LSTRCPY(vocabulary, impl->filename);
217
218     CA_LoadDictionary(impl->vocabulary, vocabulary, L(""), &impl->locale);
219     if(impl->vocabulary->is_loaded == False /*(booldata)*/ ) {
220       CHKLOG(rc, ESR_INVALID_ARGUMENT);
221     }
222     impl->ttp_lang = TTP_LANG(impl->locale);
223
224#ifdef USE_TTP
225     rc = SR_CreateG2P((SR_Vocabulary*)impl);
226	 if (rc != ESR_SUCCESS) {
227          goto CLEANUP;
228     }
229#endif
230
231CLEANUP:
232	 return rc;
233}
234
235ESR_ReturnCode SR_VocabularyLoadImpl(const LCHAR* filename, SR_Vocabulary** self)
236{
237  SR_Vocabulary* Interface;
238  SR_VocabularyImpl* impl;
239  ESR_ReturnCode rc;
240
241     CHK(rc, SR_VocabularyCreateImpl(&Interface));
242     impl = (SR_VocabularyImpl*) Interface;
243#if DO_DEFER_LOADING_UNTIL_LOOKUPS
244	 impl->vocabulary = NULL;
245	 impl->ttp_lang = NULL;
246	 impl->filename = LSTRDUP( filename);
247	 impl->locale = ESR_LOCALE_EN_US; // default really
248	 impl->hSlts = NULL;
249#else
250	 impl->filename = LSTRDUP( filename);
251	 CHKLOG( rc, sr_vocabularyloadimpl_for_real( impl));
252#endif
253
254     *self = Interface;
255     return ESR_SUCCESS;
256 CLEANUP:
257     Interface->destroy(Interface);
258     return rc;
259}
260
261ESR_ReturnCode SR_VocabularySaveImpl(SR_Vocabulary* self, const LCHAR* filename)
262{
263  /* TODO: complete */
264  return ESR_SUCCESS;
265}
266
267/* internal util function prototype */
268/* we split the string on all non-alphanum and "'" which
269is handled below */
270#define LSINGLEQUOTE L('\'')
271int split_on_nonalphanum(LCHAR* toSplit, LCHAR** end, const ESR_Locale locale)
272{
273  int nsplits = 0;
274  LCHAR* _next = toSplit;
275    while(*_next)
276    {
277		do {
278			if(*_next == LSINGLEQUOTE && locale == ESR_LOCALE_EN_US) {
279				if(_next[1] != 't' && _next[1] != 's') break;
280				else if( LISALNUM(_next[2])) break; // LISDIGIT
281				else { *_next++; continue; }
282			}
283			if(!*_next || !LISALNUM(*_next)) break;
284			*_next++;
285		} while(1);
286      // FORMERLY:  while(*_next && LISALNUM(*_next))     _next++;
287
288      /* check if I am at the last word or not */
289      if(*_next)
290      {
291        *_next = 0; /* replace split_char with '\0' the word */
292		nsplits++;
293        _next++;    /* point to first char of next word */
294		*end = _next; /* we'll be push forward later, if there's content here!*/
295      }
296      else
297        *end = _next;
298    }
299	return nsplits;
300}
301
302void join(LCHAR* toJoin, LCHAR* end, LCHAR join_char)
303{
304  LCHAR* _next;
305    for(_next = toJoin; _next<end; _next++)
306		if(*_next == 0) *_next = join_char;
307}
308
309size_t get_num_prons( const LCHAR* word_prons, const LCHAR** word_pron_ptr, int max_num_prons)
310{
311  int num_prons = 0;
312  while(word_prons && *word_prons) {
313    word_pron_ptr[ num_prons++] = word_prons;
314    if(num_prons >= max_num_prons) break;
315    while( *word_prons) word_prons++;
316    word_prons++;
317  }
318  return num_prons;
319}
320
321/* This function is used from multi-word phrases, such as "mike smith".  We
322   build up the pronunication of the phrase, by appending the pronunciation
323   of each word.  We need to handle the cases of multiple prons for "mike"
324   and multiple prons for "smith".  For simple cases we try to run faster
325   code. */
326
327int append_to_each_with_joiner( LCHAR* phrase_prons, const LCHAR* word_prons, const LCHAR joiner, size_t max_len, size_t* len)
328{
329  LCHAR* word_pron_ptr[MAX_NUM_PRONS];
330  LCHAR* phrase_pron_ptr[MAX_NUM_PRONS];
331  LCHAR *dst, *max_dst;
332  const LCHAR *src;
333  size_t nphrase_prons = get_num_prons( phrase_prons, (const LCHAR**)phrase_pron_ptr, MAX_NUM_PRONS);
334  size_t nword_prons = get_num_prons( word_prons, (const LCHAR**)word_pron_ptr, MAX_NUM_PRONS);
335  max_dst = phrase_prons+max_len-3;
336
337  if( nword_prons == 0)
338    return 0;
339  else if(nphrase_prons == 0) {
340	for(src=word_prons,dst=phrase_prons; src && *src; ) {
341		for( ; *src && dst<max_dst; ) {
342			*dst++ = *src++;
343		}
344      *dst++ = *src++; // copy the null
345    }
346    *dst = 0; // add a double-null
347	*len = dst-phrase_prons;
348    return 0;
349  }
350  else if(nphrase_prons == 1 && nword_prons == 1) {
351    for(dst=phrase_prons; *dst; ) dst++;
352    if(joiner!=L('\0')) *dst++ = joiner;
353    for(src=word_prons; *src && dst<max_dst; ) *dst++ = *src++;
354    *dst++ = 0;
355    *dst = 0; // add a double-null
356	*len = dst-phrase_prons;
357    return 0;
358  }
359  else  {
360    size_t i,j;
361    LCHAR *phrase_pron_dups[MAX_NUM_PRONS];
362    LCHAR *dst_good_end = phrase_prons+1;
363    for(i=0;i<nphrase_prons; i++)
364      phrase_pron_dups[i] = LSTRDUP( phrase_pron_ptr[i]);
365    dst = phrase_prons;
366    for(i=0;i<nphrase_prons; i++) {
367      for(j=0; j<nword_prons; j++) {
368	for(src=phrase_pron_dups[i]; *src && dst<max_dst; ) *dst++=*src++;
369	if(dst>max_dst) break;
370	if(joiner!=L('\0')) *dst++ = joiner;
371	for(src=word_pron_ptr[j]; *src && dst<max_dst; ) *dst++=*src++;
372	if(dst>max_dst) break;
373	*dst++ = 0;
374	dst_good_end = dst;
375      }
376    }
377    *dst_good_end++ = 0; // double-null terminator
378    for(i=0; i<nphrase_prons; i++) LSTRFREE( phrase_pron_dups[i]);
379    return 0;
380  }
381}
382
383PINLINE LCHAR* get_first_word(LCHAR* curr, LCHAR* end)
384{
385  while(*curr==L('\0') && curr<end) curr++;
386  return curr;
387}
388
389PINLINE LCHAR* get_next_word(LCHAR* curr, LCHAR* end)
390{
391  while(*curr) curr++;
392  if(curr<end)  curr++;
393  while( !*curr && curr<end) curr++;
394  return curr;
395}
396
397/*
398  For each word in a phrase (words separated by spaces)
399
400  if the complete word is in the dictionary
401  return pron
402  else
403  if the word contains '_', split the word into parts
404  and check if parts are in the dictionary.
405  if none of the parts are in the dictionary,
406  reassemble the parts and pass the whole thing to TTP
407  else
408  build the pron by concat of TTP pron and dictionary pron for individual parts
409*/
410ESR_ReturnCode SR_VocabularyGetPronunciationImpl(SR_Vocabulary* self, const LCHAR* phrase, LCHAR* pronunciation, size_t* pronunciation_len)
411{
412  SR_VocabularyImpl* impl = (SR_VocabularyImpl*) self;
413  /* copy of phrase */
414  LCHAR copy_of_phrase[MAX_PRON_LEN];
415
416  /* pointer to curr phoneme output */
417  LCHAR* curr_phoneme = pronunciation;
418  // size_t pronunciation_len = *len;
419
420  ESR_ReturnCode nEsrRes = ESR_SUCCESS;
421  int text_length;
422  size_t len;
423  int nsplits;
424
425#ifdef USE_TTP
426  SWIsltsResult      res = SWIsltsSuccess;
427  SWIsltsTranscription  *pTranscriptions = NULL;
428  int nNbrOfTranscriptions = 0;
429#endif /* USE_TTP */
430  /* full inf pron after conversion */
431  LCHAR infpron[MAX_PRON_LEN];
432  LCHAR* p_infpron;
433  LCHAR* curr;     /* pointer to current word */
434  LCHAR* end = 0;   /* pointer to end of phrase */
435
436  if(self == NULL || phrase == NULL)
437    {
438      PLogError(L("ESR_INVALID_ARGUMENT"));
439      return ESR_INVALID_ARGUMENT;
440    }
441
442  if( LSTRLEN(phrase) >= MAX_PRON_LEN)
443	return ESR_ARGUMENT_OUT_OF_BOUNDS;
444
445#if DO_DEFER_LOADING_UNTIL_LOOKUPS
446  if( impl->vocabulary == NULL) {
447    CHKLOG( nEsrRes, sr_vocabularyloadimpl_for_real( impl));
448  }
449#endif
450
451  /* by default, check the whole word entry first (regardless of underscores) */
452  if( CA_GetEntryInDictionary(impl->vocabulary, phrase, pronunciation, (int*)&len, MAX_PRON_LEN)) {
453    // len includes the final null, but not the double-null
454    *pronunciation_len = LSTRLEN(pronunciation)+1;
455    // look for double-null terminator
456    while( pronunciation[ (*pronunciation_len)] != L('\0'))
457      *pronunciation_len += LSTRLEN( pronunciation + (*pronunciation_len)) + 1;
458
459    return ESR_SUCCESS;
460  }
461
462  /*************************/
463  /* split digit strings */
464  text_length = MAX_PRON_LEN;
465  nEsrRes = run_ttt(phrase, copy_of_phrase, &text_length);
466  if (nEsrRes != ESR_SUCCESS)
467    {
468      PLogError(L("ESR_FATAL_ERROR: run_ttt( ) fails with return code %d\n"), nEsrRes);
469      return nEsrRes;
470    }
471
472  len = 0;
473  *curr_phoneme = L('\0');
474  if( *pronunciation_len>=12) curr_phoneme[1] = L('\0');
475  else return ESR_INVALID_ARGUMENT;
476
477  /*************************/
478  /* split into word parts */
479  nsplits = split_on_nonalphanum(copy_of_phrase, &end, impl->locale);
480
481  /******************************************************/
482  /* if none of the words are found in the dictionary, then
483     reassemble and get the TTP pron for the whole thing */
484  curr=get_first_word(copy_of_phrase,end);
485  /* check if there are any valid characters at all */
486  if(!curr || !*curr)
487    return ESR_INVALID_ARGUMENT;
488  /* now loop over all words in the phrase */
489  for(   ; *curr; curr = get_next_word(curr,end))
490    {
491      LCHAR* squote = NULL;
492      p_infpron = infpron;
493
494      /* by default, check the whole word entry first (regardless of LSINGLEQUOTE) */
495      if(CA_GetEntryInDictionary(impl->vocabulary, curr, p_infpron, (int*)&len, MAX_PRON_LEN))
496        {
497          /* concatenate, and insert join_char between words */
498          append_to_each_with_joiner( pronunciation, p_infpron, OPTSILENCE_CODE, MAX_PRON_LEN, &len);
499        }
500      else {
501        p_infpron[0] = 0;
502        /* if this is English AND we're dealing with a quote (possessive or a
503           contraction), then we use the dictionary for the stuff before the
504           quote, and use the TTP to find out what single phoneme should
505           correspond the the thing after the quote ('s' or 't').  This keeps
506           the code clean (no phoneme codes here), and maps 's' to 's' or 'z'
507           with the intelligence of the G2P engine */
508        if( impl->locale == ESR_LOCALE_EN_US) {
509          if( (squote=LSTRCHR(curr,LSINGLEQUOTE))==NULL) {}
510          else {
511            *squote = L('\0');   // temporary
512            if( CA_GetEntryInDictionary(impl->vocabulary, curr, p_infpron, (int*)&len, MAX_PRON_LEN)) {
513            } else
514              p_infpron[0] = 0;
515            *squote = LSINGLEQUOTE; // undo temporary
516          }
517        }
518#ifdef USE_TTP
519        pTranscriptions = NULL;
520        if (impl->hSlts)
521          {
522            res = SWIsltsG2PGetWordTranscriptions(impl->hSlts, curr, &pTranscriptions, &nNbrOfTranscriptions);
523            if (res != SWIsltsSuccess) {
524              PLogError(L("ESR_FATAL_ERROR: SWIsltsG2PGetWordTranscriptions( ) fails with return code %d\n"), res);
525              return ESR_FATAL_ERROR;
526            }
527            if( impl->locale == ESR_LOCALE_EN_US && p_infpron[0] && squote!=L('\0')) {
528              const LCHAR* lastPhoneme = pTranscriptions[0].pBuffer;
529              while(lastPhoneme && *lastPhoneme && lastPhoneme[1]!=L('\0'))
530                lastPhoneme++;
531              append_to_each_with_joiner( pronunciation, p_infpron, OPTSILENCE_CODE, MAX_PRON_LEN, &len);
532              append_to_each_with_joiner( pronunciation, lastPhoneme, L('\0'), MAX_PRON_LEN, &len);
533            } else {
534              /* only one transcription available from seti */
535              p_infpron = pTranscriptions[0].pBuffer;
536              append_to_each_with_joiner( pronunciation, p_infpron, OPTSILENCE_CODE, MAX_PRON_LEN, &len);
537#if defined(SREC_ENGINE_VERBOSE_LOGGING)
538              PLogError("L: used G2P for %s", curr);
539#endif
540
541            }
542            if (pTranscriptions) {
543              res = SWIsltsG2PFreeWordTranscriptions(impl->hSlts, pTranscriptions);
544              pTranscriptions = NULL;
545              if (res != SWIsltsSuccess) {
546                PLogError(L("ESR_FATAL_ERROR: SWIsltsG2PFreeWordTranscriptions( ) fails with return code %d\n"), res);
547                return ESR_FATAL_ERROR;
548              }
549            }
550          } else {
551            nEsrRes = ESR_INVALID_ARGUMENT;
552            PLogError(L("ESR_INVALID_ARGUMENT: impl->hSlts was not configured!"));
553            return nEsrRes;
554          }
555#else /* USE_TTP */
556        nEsrRes = ESR_INVALID_ARGUMENT;
557        PLogError(L("ESR_INVALID_ARGUMENT: need USE_TTP build to guess pronunciations!"));
558        return nEsrRes;
559#endif
560      } /* multi-word phrase */
561    } /* loop over words in phrase */
562  len = LSTRLEN(pronunciation)+1;
563  // look for double-null terminator
564  while( pronunciation[ len] != L('\0'))
565    len += LSTRLEN( pronunciation + len) + 1;
566  *pronunciation_len = len;
567  nEsrRes = ESR_SUCCESS;
568 CLEANUP:
569  return nEsrRes;
570}
571
572ESR_ReturnCode SR_VocabularyGetLanguageImpl(SR_Vocabulary* self, ESR_Locale* locale)
573{
574  SR_VocabularyImpl* impl = (SR_VocabularyImpl*) self;
575
576  *locale = impl->locale;
577  return ESR_SUCCESS;
578}
579
580/* simple text normalization rountine for splitting up any digit string */
581static ESR_ReturnCode run_ttt(const LCHAR *input_sentence, LCHAR *output_sentence, int *text_length)
582{
583  ESR_ReturnCode         nRes = ESR_SUCCESS;
584  int                    num_out = 0;
585  int                    max_text_length = *text_length / sizeof(LCHAR) - 1;
586  ESR_BOOL                   bDigit = False;
587
588  while (*input_sentence != L('\0')) {
589    if (num_out + 2 >= max_text_length) {
590      nRes = ESR_FATAL_ERROR;
591      goto CLEAN_UP;
592    }
593
594    if (L('0') <= *input_sentence && *input_sentence <= L('9')) {
595      if (num_out > 0 && !LISSPACE(output_sentence[num_out-1]) ) {
596		  // put 1 space before digits
597        output_sentence[num_out] = L(' ');
598        num_out++;
599		while( LISSPACE(*input_sentence) ) input_sentence++;
600      }
601      output_sentence[num_out] = *input_sentence;
602      num_out++;
603      bDigit = True;
604    }
605    else {
606      if (bDigit == True && !LISSPACE(output_sentence[num_out-1])) {
607		// put 1 space after digits
608        output_sentence[num_out] = L(' ');
609        num_out++;
610		while( LISSPACE(*input_sentence)) input_sentence++;
611      }
612		output_sentence[num_out] = *input_sentence;
613		num_out++;
614      bDigit = False;
615    }
616    input_sentence++;
617	if( LISSPACE(output_sentence[num_out-1]))
618		while(LISSPACE(*input_sentence )) input_sentence++; // remove repeated spaces
619  }
620
621  output_sentence[num_out] = L('\0');
622  *text_length = num_out * sizeof(LCHAR);
623  return ESR_SUCCESS;
624
625 CLEAN_UP:
626
627  *output_sentence = L('\0');
628  *text_length = 0;
629  return nRes;
630}
631