1/*---------------------------------------------------------------------------*
2 *  Vocabulary.c                                                             *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20#include "plog.h"
21#include "SR_Vocabulary.h"
22#include "SR_VocabularyImpl.h"
23
24
25ESR_ReturnCode SR_VocabularyCreate(ESR_Locale locale, SR_Vocabulary** self)
26{
27     SR_Vocabulary* Interface;
28     SR_VocabularyImpl* impl;
29     ESR_ReturnCode rc;
30
31     CHK(rc, SR_VocabularyCreateImpl(&Interface));
32     impl = (SR_VocabularyImpl*) Interface;
33     impl->locale = locale;
34     impl->ttp_lang = TTP_LANG(locale);
35
36#ifdef USE_TTP
37     /* impl->ttp_lang should be set to the current language before G2P is created */
38     rc = SR_CreateG2P(Interface);
39     if (rc != ESR_SUCCESS)
40     {
41          SR_VocabularyDestroyImpl(Interface);
42          goto CLEANUP;
43     }
44#endif
45
46     *self = Interface;
47     return ESR_SUCCESS;
48 CLEANUP:
49     return rc;
50}
51
52ESR_ReturnCode SR_VocabularyLoad(const LCHAR* filename, SR_Vocabulary** self)
53{
54     SR_Vocabulary* Interface;
55     ESR_ReturnCode rc;
56
57     CHK(rc, SR_VocabularyLoadImpl(filename, &Interface));
58
59     *self = Interface;
60     return ESR_SUCCESS;
61 CLEANUP:
62     return rc;
63}
64
65ESR_ReturnCode SR_VocabularySave(SR_Vocabulary* self, const LCHAR* filename)
66{
67  if (self==NULL)
68  {
69    PLogError(L("ESR_INVALID_ARGUMENT"));
70    return ESR_INVALID_ARGUMENT;
71  }
72  return self->save(self, filename);
73}
74
75ESR_ReturnCode SR_VocabularyGetLanguage(SR_Vocabulary* self, ESR_Locale* locale)
76{
77  if (self==NULL)
78  {
79    PLogError(L("ESR_INVALID_ARGUMENT"));
80    return ESR_INVALID_ARGUMENT;
81  }
82  return self->getLanguage(self, locale);
83}
84
85ESR_ReturnCode SR_VocabularyDestroy(SR_Vocabulary* self)
86{
87  if (self==NULL)
88  {
89    PLogError(L("ESR_INVALID_ARGUMENT"));
90    return ESR_INVALID_ARGUMENT;
91  }
92  return self->destroy(self);
93}
94
95ESR_ReturnCode SR_VocabularyGetPronunciation(SR_Vocabulary* self, const LCHAR* word, LCHAR* phoneme, size_t* len)
96{
97  if (self==NULL)
98  {
99    PLogError(L("ESR_INVALID_ARGUMENT"));
100    return ESR_INVALID_ARGUMENT;
101  }
102  return self->getPronunciation(self, word, phoneme, len);
103}
104
105/****************************
106 * ETI to INFINITIVE Phoneme conversion stuff
107 */
108
109static const int CH_MAX = 128;
110
111static ESR_ReturnCode getTable(ESR_Locale locale, const LCHAR* m[])
112{
113     int i;
114     for(i = 0; i< CH_MAX; i++) m[i] = "";
115
116     switch (locale)
117     {
118     case ESR_LOCALE_EN_US:
119     case ESR_LOCALE_EN_GB:
120          /* enu_d2f_fray_g.pht */
121          m['}']="um";  m['?']="OW";  m['~']="un";  m['@']="uh";  m['A']="EY";
122          m['C']="ch";  m['D']="dh";  m['E']="EE";  m['I']="AY";  m['J']="jnk";
123          m['L']="ul";  m['N']="ng";  m['O']="OH";  m['P']="ur";  m['S']="sh";
124          m['T']="th";  m['U']="OOH"; m['V']="UR";  m['Z']="zh";  m[']']="oh";
125          m['^']="ENV"; m['#']="sil"; m['a']="AA";  m['b']="b";   m['c']="eh";
126          m['d']="d";   m['e']="EH";  m['f']="f";   m[')']="AH";  m['g']="g";
127          m['h']="h";   m['i']="IH";  m['j']="j";   m[',']="AE";  m['k']="k";
128          m['l']="l";   m['m']="m";   m['/']="ee";  m['n']="n";   m['o']="AW";
129          m['p']="p";   m['q']="OO";  m['r']="r";   m['s']="s";   m['t']="t";
130          m['6']="ih";  m['u']="UH";  m['v']="v";   m['w']="w";   m['y']="y";
131          m['z']="z";   m['<']="OY";  m['{']="AWH";
132          break;
133     case ESR_LOCALE_FR_FR:
134          /* fra_t22_m.pht */
135          m['A']="ACI"; m[3]="OEE";   m[6]="OEN";   m['E']="EAC"; m['J']="jnk";
136          m['M']="gn";  m[16]="QQ";   m['N']="ng";  m['O']="OCI"; m[19]="AE";
137          m['S']="sh";  m['U']="UY";  m['W']="yw";  m['Y']="EN";  m['Z']="ge";
138          m[31]="OE";   m['^']="ENV"; m['#']="sil"; m['a']="AGR"; m['b']="b";
139          m['d']="d";   m['e']="ECI"; m['f']="f";   m[')']="AN";  m['g']="g";
140          m['i']="II";  m['k']="k";   m['l']="l";   m['m']="m";   m['n']="n";
141          m['o']="OO";  m['p']="p";   m['r']="r";   m['s']="s";   m['t']="t";
142          m['u']="UGR"; m['v']="v";   m['w']="w";   m['y']="y";   m['z']="z";
143          m['{']="ON";
144          break;
145
146     case ESR_LOCALE_DE_DE:
147          m['@']="utt"; m['A']="AH";  m[4]="eu";    m['C']="ich"; m[6]="EU";
148          m['E']="EH";  m['H']="ue";  m['I']="IH";  m['J']="jnk"; m['K']="ach";
149          m['N']="ng";  m['O']="OH";  m['S']="sch"; m['T']="hr";  m['U']="UH";
150          m['V']="UEH"; m['W']="wu";  m['Z']="zh";  m['[']="ott"; m['^']="ENV";
151          m['!']="att"; m['#']="sil"; m['a']="ATT"; m['b']="b";   m['c']="ett";
152          m['d']="d";   m['e']="ETT"; m['f']="f";   m['g']="g";   m['h']="h";
153          m['i']="ITT"; m['j']="j";   m[',']="AEH"; m['k']="k";   m['l']="l";
154          m['m']="m";   m['n']="n";   m['o']="OTT"; m['p']="p";   m['q']="UE";
155          m['r']="r";   m['s']="s";   m['t']="t";   m['6']="itt"; m['u']="UTT";
156          m['w']="w";   m['x']="@@";  m[':']="oe";  m['z']="z";   m['<']="OE";
157          m['{']="OEH";
158          break;
159     case ESR_LOCALE_ES_ES:
160          m['@']="uu";  m['C']="ch";  m['D']="rr";  m['E']="EY";  m['J']="jnk";
161          m['M']="ks";  m['N']="nn";  m['T']="Z";   m['[']="oo";  m['^']="ENV";
162          m['!']="aa";  m['#']="sil"; m['a']="AA";  m['b']="b";   m['c']="ee";
163          m['d']="d";   m['e']="EE";  m['f']="f";   m[')']="AU";  m['g']="g";
164          m['i']="II";  m['j']="j";   m['k']="k";   m['l']="l";   m['m']="m";
165          m['n']="n";   m['o']="OO";  m['p']="p";   m['r']="r";   m['s']="s";
166          m['6']="ii";  m['t']="t";   m['u']="UU";  m['w']="w";   m['y']="y";
167          break;
168     case ESR_LOCALE_NL_NL:
169          m['S']="S";   m['a']="a";   m['N']="nK";  m['d']="d";   m['E']="E";
170          m['2']="ep";  m['j']="j";   m['y']="y";   m['Z']="Z";   m['u']="u";
171          m['1']="AA";  m['k']="k";   m['g']="g";   m['t']="t";   m['e']="e";
172          m['J']="jnk"; m['v']="v";   m['s']="s";   m['^']="ENV"; m['b']="b";
173          m['I']="I";   m['G']="G";   m['z']="z";   m['w']="w";   m['$']="$";
174          m['r']="r";   m['x']="x";   m['h']="h";   m['f']="f";   m['i']="i";
175          m['A']="A";   m['6']="A%t"; m['O']="O";   m['n']="n";   m['3']="Ei";
176          m['#']="sil"; m['m']="m";   m['8']="O%t"; m['l']="l";   m['4']="yy";
177          m['p']="p";   m['5']="Au";  m['o']="o";
178          break;
179     case ESR_LOCALE_IT_IT:
180          m['@']="uu";  m['A']="AI";  m['C']="ci";  m['E']="EI";  m['J']="jnk";
181          m['K']="rr";  m['M']="gi";  m['N']="gn";  m['O']="OI";  m[21]="gl";
182          m['S']="sci"; m['Y']="ETT"; m['[']="oo";  m['^']="ENV"; m['!']="aa";
183          m['#']="sil"; m['a']="AA";  m['b']="b";   m['c']="ee";  m['d']="d";
184          m['e']="EE";  m['f']="f";   m[')']="AU";  m['g']="g";   m['i']="II";
185          m['j']="j";   m['k']="k";   m['l']="l";   m['m']="m";   m['n']="n";
186          m['o']="OO";  m['p']="p";   m['r']="r";   m['s']="s";   m['t']="t";
187          m['6']="ii";  m['u']="UU";  m['v']="v";   m['w']="w";   m['z']="z";
188          m['{']="OTT";
189          break;
190     case ESR_LOCALE_PT_PT:
191          m['A']="ao";  m['B']="ojn"; m['E']="eh";  m['I']="ix";  m['J']="jnk";
192          m['L']="lj";  m['N']="nj";  m['O']="on";  m['R']="rr";  m['S']="sh";
193          m['U']="un";  m['Z']="zh";  m['^']="ENV"; m['#']="sil"; m['a']="a";
194          m['b']="b";   m['c']="ew";  m['d']="d";   m['e']="e";   m['f']="f";
195          m['g']="g";   m['h']="in";  m['i']="i";   m['j']="j";   m['k']="k";
196          m['l']="l";   m['m']="m";   m['n']="n";   m['1']="aj";  m['o']="o";
197          m['p']="p";   m['2']="ajn"; m['3']="an";  m['q']="iw";  m['r']="r";
198          m['4']="aw";  m['s']="s";   m['5']="awn"; m['t']="t";   m['6']="ax";
199          m['u']="u";   m['7']="axn"; m['v']="v";   m['8']="ej";  m['w']="w";
200          m['9']="en";  m['x']="ls";  m['y']="oj";  m['z']="z";
201          break;
202     case ESR_LOCALE_JA_JP:
203          return ESR_NOT_SUPPORTED;
204          break;
205     }
206     m['#']="iwt"; m['&']="&";
207
208     return ESR_SUCCESS;
209}
210
211ESR_ReturnCode SR_Vocabulary_etiinf_conv_multichar(ESR_Locale locale, const LCHAR* single, LCHAR* multi, size_t max_len)
212{
213    const LCHAR* m[CH_MAX];
214
215    ESR_ReturnCode rc = getTable(locale, m);
216    if (rc != ESR_SUCCESS) return rc;
217
218    for (*multi='\0'; *single; ++single)
219    {
220        LSTRCAT(multi, m[(int)*single]);
221        if (*(single+1)) LSTRCAT(multi, " ");
222    }
223    return ESR_SUCCESS;
224}
225
226ESR_ReturnCode SR_Vocabulary_etiinf_conv_from_multichar(ESR_Locale locale, const LCHAR* multi, LCHAR* single)
227{
228    const LCHAR* m[CH_MAX];
229    int i;
230
231    ESR_ReturnCode rc = getTable(locale, m);
232    if (rc != ESR_SUCCESS) return rc;
233
234    for (i = 0; i < CH_MAX; i++) {
235        if (!LSTRCMP(m[i], multi)) {
236            *single = (LCHAR)i;
237            return ESR_SUCCESS;
238        }
239    }
240    return ESR_NO_MATCH_ERROR;
241}
242