1/* com_svox_picottsengine.cpp
2
3 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *   This is the Manager layer.  It sits on top of the native Pico engine
18 *   and provides the interface to the defined Google TTS engine API.
19 *   The Google engine API is the boundary to allow a TTS engine to be swapped.
20 *   The Manager layer also provide the SSML tag interpretation.
21 *   The supported SSML tags are mapped to corresponding tags natively supported by Pico.
22 *   Native Pico functions always begin with picoXXX.
23 *
24 *   In the Pico engine, the language cannot be changed indpendently of the voice.
25 *   If either the voice or locale/language are changed, a new resource is loaded.
26 *
27 *   Only a subset of SSML 1.0 tags are supported.
28 *   Some SSML tags involve significant complexity.
29 *   If the language is changed through an SSML tag, there is a latency for the load.
30 *
31 */
32//#define LOG_NDEBUG 0
33
34#include <stdio.h>
35#include <unistd.h>
36#include <stdlib.h>
37
38#define LOG_TAG "SVOX Pico Engine"
39
40#include <utils/Log.h>
41#include <utils/String16.h>                     /* for strlen16 */
42#include <android_runtime/AndroidRuntime.h>
43#include <TtsEngine.h>
44
45#include <cutils/jstring.h>
46#include <picoapi.h>
47#include <picodefs.h>
48
49#include "svox_ssml_parser.h"
50
51using namespace android;
52
53/* adaptation layer defines */
54#define PICO_MEM_SIZE       2500000
55/* speaking rate    */
56#define PICO_MIN_RATE        20
57#define PICO_MAX_RATE       500
58#define PICO_DEF_RATE       100
59/* speaking pitch   */
60#define PICO_MIN_PITCH       50
61#define PICO_MAX_PITCH      200
62#define PICO_DEF_PITCH      100
63/* speaking volume  */
64#define PICO_MIN_VOLUME       0
65#define PICO_MAX_VOLUME     500
66#define PICO_DEF_VOLUME     100
67
68/* string constants */
69#define MAX_OUTBUF_SIZE     128
70const char * PICO_SYSTEM_LINGWARE_PATH      = "/system/tts/lang_pico/";
71const char * PICO_LINGWARE_PATH             = "/sdcard/svox/";
72const char * PICO_VOICE_NAME                = "PicoVoice";
73const char * PICO_SPEED_OPEN_TAG            = "<speed level='%d'>";
74const char * PICO_SPEED_CLOSE_TAG           = "</speed>";
75const char * PICO_PITCH_OPEN_TAG            = "<pitch level='%d'>";
76const char * PICO_PITCH_CLOSE_TAG           = "</pitch>";
77const char * PICO_VOLUME_OPEN_TAG           = "<volume level='%d'>";
78const char * PICO_VOLUME_CLOSE_TAG          = "</volume>";
79const char * PICO_PHONEME_OPEN_TAG          = "<phoneme ph='";
80const char * PICO_PHONEME_CLOSE_TAG         = "'/>";
81
82/* supported voices
83   Pico does not seperately specify the voice and locale.   */
84const char * picoSupportedLangIso3[]        = { "eng",              "eng",              "deu",              "spa",              "fra",              "ita" };
85const char * picoSupportedCountryIso3[]     = { "USA",              "GBR",              "DEU",              "ESP",              "FRA",              "ITA" };
86const char * picoSupportedLang[]            = { "en-US",            "en-GB",            "de-DE",            "es-ES",            "fr-FR",            "it-IT" };
87const char * picoInternalLang[]             = { "en-US",            "en-GB",            "de-DE",            "es-ES",            "fr-FR",            "it-IT" };
88const char * picoInternalTaLingware[]       = { "en-US_ta.bin",     "en-GB_ta.bin",     "de-DE_ta.bin",     "es-ES_ta.bin",     "fr-FR_ta.bin",     "it-IT_ta.bin" };
89const char * picoInternalSgLingware[]       = { "en-US_lh0_sg.bin", "en-GB_kh0_sg.bin", "de-DE_gl0_sg.bin", "es-ES_zl0_sg.bin", "fr-FR_nk0_sg.bin", "it-IT_cm0_sg.bin" };
90const char * picoInternalUtppLingware[]     = { "en-US_utpp.bin",   "en-GB_utpp.bin",   "de-DE_utpp.bin",   "es-ES_utpp.bin",   "fr-FR_utpp.bin",   "it-IT_utpp.bin" };
91const int picoNumSupportedVocs              = 6;
92
93/* supported properties */
94const char * picoSupportedProperties[]      = { "language", "rate", "pitch", "volume" };
95const int    picoNumSupportedProperties     = 4;
96
97
98/* adapation layer global variables */
99synthDoneCB_t * picoSynthDoneCBPtr;
100void *          picoMemArea         = NULL;
101pico_System     picoSystem          = NULL;
102pico_Resource   picoTaResource      = NULL;
103pico_Resource   picoSgResource      = NULL;
104pico_Resource   picoUtppResource    = NULL;
105pico_Engine     picoEngine          = NULL;
106pico_Char *     picoTaFileName      = NULL;
107pico_Char *     picoSgFileName      = NULL;
108pico_Char *     picoUtppFileName    = NULL;
109pico_Char *     picoTaResourceName  = NULL;
110pico_Char *     picoSgResourceName  = NULL;
111pico_Char *     picoUtppResourceName = NULL;
112int     picoSynthAbort = 0;
113char *  picoProp_currLang   = NULL;                 /* current language */
114int     picoProp_currRate   = PICO_DEF_RATE;        /* current rate     */
115int     picoProp_currPitch  = PICO_DEF_PITCH;       /* current pitch    */
116int     picoProp_currVolume = PICO_DEF_VOLUME;      /* current volume   */
117
118int picoCurrentLangIndex = -1;
119
120char * pico_alt_lingware_path = NULL;
121
122
123/* internal helper functions */
124
125/** checkForLocale
126 *  Check whether the requested locale is among the supported locales.
127 *  @locale -  the locale to check, either in xx or xx-YY format
128 *  return index of the locale, or -1 if not supported.
129*/
130static int checkForLocale( const char * locale )
131{
132     int found = -1;                                         /* language not found   */
133     int i;
134     if (locale == NULL) {
135        ALOGE("checkForLocale called with NULL language");
136        return found;
137     }
138
139    /* Verify that the requested locale is a locale that we support.    */
140    for (i = 0; i < picoNumSupportedVocs; i ++) {
141        if (strcmp(locale, picoSupportedLang[i]) == 0) { /* in array */
142            found = i;
143            break;
144        }
145    };
146
147    /* The exact locale was not found.    */
148    if (found < 0) {
149        /* We didn't find an exact match; it may have been specified with only the first 2 characters.
150           This could overmatch ISO 639-3 language codes.%%                                   */
151
152        /* check whether the current language matches the locale's language */
153        if ((picoCurrentLangIndex > -1) &&
154                (strncmp(locale, picoSupportedLang[picoCurrentLangIndex], 2) == 0)) {
155            /* the current language matches the requested language, let's use it */
156            found = picoCurrentLangIndex;
157        } else {
158            /* check whether we can find a match at least on the language */
159            for (i = 0; i < picoNumSupportedVocs; i ++) {
160                if (strncmp(locale, picoSupportedLang[i], 2) == 0) {
161                    found = i;
162                    break;
163                }
164            }
165        }
166
167        if (found < 0) {
168            ALOGE("TtsEngine::set language called with unsupported locale %s", locale);
169        }
170    };
171    return found;
172}
173
174
175/** cleanResources
176 *  Unloads any loaded Pico resources.
177*/
178static void cleanResources( void )
179{
180    if (picoEngine) {
181        pico_disposeEngine( picoSystem, &picoEngine );
182        pico_releaseVoiceDefinition( picoSystem, (pico_Char *) PICO_VOICE_NAME );
183        picoEngine = NULL;
184    }
185    if (picoUtppResource) {
186        pico_unloadResource( picoSystem, &picoUtppResource );
187        picoUtppResource = NULL;
188    }
189    if (picoTaResource) {
190        pico_unloadResource( picoSystem, &picoTaResource );
191        picoTaResource = NULL;
192    }
193    if (picoSgResource) {
194        pico_unloadResource( picoSystem, &picoSgResource );
195        picoSgResource = NULL;
196    }
197
198    if (picoSystem) {
199        pico_terminate(&picoSystem);
200        picoSystem = NULL;
201    }
202    picoCurrentLangIndex = -1;
203}
204
205
206/** cleanFiles
207 *  Frees any memory allocated for file and resource strings.
208*/
209static void cleanFiles( void )
210{
211    if (picoProp_currLang) {
212        free( picoProp_currLang );
213        picoProp_currLang = NULL;
214    }
215
216    if (picoTaFileName) {
217        free( picoTaFileName );
218        picoTaFileName = NULL;
219    }
220
221    if (picoSgFileName) {
222        free( picoSgFileName );
223        picoSgFileName = NULL;
224    }
225
226    if (picoUtppFileName) {
227        free( picoUtppFileName );
228        picoUtppFileName = NULL;
229    }
230
231    if (picoTaResourceName) {
232        free( picoTaResourceName );
233        picoTaResourceName = NULL;
234    }
235
236    if (picoSgResourceName) {
237        free( picoSgResourceName );
238        picoSgResourceName = NULL;
239    }
240
241    if (picoUtppResourceName) {
242        free( picoUtppResourceName );
243        picoUtppResourceName = NULL;
244    }
245}
246
247/** hasResourcesForLanguage
248 *  Check to see if the resources required to load the language at the specified index
249 *  are properly installed
250 *  @langIndex - the index of the language to check the resources for. The index is valid.
251 *  return true if the required resources are installed, false otherwise
252 */
253static bool hasResourcesForLanguage(int langIndex) {
254    FILE * pFile;
255    char* fileName = (char*)malloc(PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE);
256
257    /* check resources on system (under PICO_SYSTEM_LINGWARE_PATH). */
258    strcpy((char*)fileName, PICO_SYSTEM_LINGWARE_PATH);
259    strcat((char*)fileName, (const char*)picoInternalTaLingware[langIndex]);
260    pFile = fopen(fileName, "r");
261    if (pFile != NULL) {
262        /* "ta" file found. */
263        fclose (pFile);
264        /* now look for "sg" file. */
265        strcpy((char*)fileName, PICO_SYSTEM_LINGWARE_PATH);
266        strcat((char*)fileName, (const char*)picoInternalSgLingware[langIndex]);
267        pFile = fopen(fileName, "r");
268        if (pFile != NULL) {
269            /* "sg" file found, no need to continue checking, return success. */
270            fclose(pFile);
271            free(fileName);
272            return true;
273        }
274    }
275
276    /* resources not found on system, check resources on alternative location */
277    /* (under pico_alt_lingware_path).                                            */
278    strcpy((char*)fileName, pico_alt_lingware_path);
279    strcat((char*)fileName, (const char*)picoInternalTaLingware[langIndex]);
280    pFile = fopen(fileName, "r");
281    if (pFile == NULL) {
282        free(fileName);
283        return false;
284    } else {
285        fclose (pFile);
286    }
287
288    strcpy((char*)fileName, pico_alt_lingware_path);
289    strcat((char*)fileName, (const char*)picoInternalSgLingware[langIndex]);
290    pFile = fopen(fileName, "r");
291    if (pFile == NULL) {
292        free(fileName);
293        return false;
294    } else {
295        fclose(pFile);
296        free(fileName);
297        return true;
298    }
299}
300
301/** doLanguageSwitchFromLangIndex
302 *  Switch to the requested locale.
303 *  If the locale is already loaded, it returns immediately.
304 *  If another locale is already is loaded, it will first be unloaded and the new one then loaded.
305 *  If no locale is loaded, the requested locale will be loaded.
306 *  @langIndex -  the index of the locale/voice to load, which is guaranteed to be supported.
307 *  return TTS_SUCCESS or TTS_FAILURE
308 */
309static tts_result doLanguageSwitchFromLangIndex( int langIndex )
310{
311    int ret;                                        /* function result code */
312
313    if (langIndex>=0) {
314        /* If we already have a loaded locale, check whether it is the same one as requested.   */
315        if (picoProp_currLang && (strcmp(picoProp_currLang, picoSupportedLang[langIndex]) == 0)) {
316            //ALOGI("Language already loaded (%s == %s)", picoProp_currLang,
317            //        picoSupportedLang[langIndex]);
318            return TTS_SUCCESS;
319        }
320    }
321
322    /* It is not the same locale; unload the current one first. Also invalidates the system object*/
323    cleanResources();
324
325    /* Allocate memory for file and resource names.     */
326    cleanFiles();
327
328    if (picoSystem==NULL) {
329        /*re-init system object*/
330        ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem );
331        if (PICO_OK != ret) {
332            ALOGE("Failed to initialize the pico system object\n");
333            return TTS_FAILURE;
334        }
335    }
336
337    picoProp_currLang   = (char *)      malloc( 10 );
338    picoTaFileName      = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
339    picoSgFileName      = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
340    picoUtppFileName    = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
341    picoTaResourceName  = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE );
342    picoSgResourceName  = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE );
343    picoUtppResourceName =(pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE );
344
345    if (
346        (picoProp_currLang==NULL) || (picoTaFileName==NULL) || (picoSgFileName==NULL) ||
347        (picoUtppFileName==NULL) || (picoTaResourceName==NULL) || (picoSgResourceName==NULL) ||
348        (picoUtppResourceName==NULL)
349        ) {
350        ALOGE("Failed to allocate memory for internal strings\n");
351        cleanResources();
352        return TTS_FAILURE;
353    }
354
355    /* Find where to load the resource files from: system or alternative location              */
356    /* based on availability of the Ta file. Try the alternative location first, this is where */
357    /* more recent language file updates would be installed (under pico_alt_lingware_path).        */
358    bool bUseSystemPath = true;
359    FILE * pFile;
360    char* tmpFileName = (char*)malloc(PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE);
361    strcpy((char*)tmpFileName, pico_alt_lingware_path);
362    strcat((char*)tmpFileName, (const char*)picoInternalTaLingware[langIndex]);
363    pFile = fopen(tmpFileName, "r");
364    if (pFile != NULL) {
365        /* "ta" file found under pico_alt_lingware_path, don't use the system path. */
366        fclose (pFile);
367        bUseSystemPath = false;
368    }
369    free(tmpFileName);
370
371    /* Set the path and file names for resource files.  */
372    if (bUseSystemPath) {
373        strcpy((char *) picoTaFileName,   PICO_SYSTEM_LINGWARE_PATH);
374        strcpy((char *) picoSgFileName,   PICO_SYSTEM_LINGWARE_PATH);
375        strcpy((char *) picoUtppFileName, PICO_SYSTEM_LINGWARE_PATH);
376    } else {
377        strcpy((char *) picoTaFileName,   pico_alt_lingware_path);
378        strcpy((char *) picoSgFileName,   pico_alt_lingware_path);
379        strcpy((char *) picoUtppFileName, pico_alt_lingware_path);
380    }
381    strcat((char *) picoTaFileName,   (const char *) picoInternalTaLingware[langIndex]);
382    strcat((char *) picoSgFileName,   (const char *) picoInternalSgLingware[langIndex]);
383    strcat((char *) picoUtppFileName, (const char *) picoInternalUtppLingware[langIndex]);
384
385    /* Load the text analysis Lingware resource file.   */
386    ret = pico_loadResource( picoSystem, picoTaFileName, &picoTaResource );
387    if (PICO_OK != ret) {
388        ALOGE("Failed to load textana resource for %s [%d]", picoSupportedLang[langIndex], ret);
389        cleanResources();
390        cleanFiles();
391        return TTS_FAILURE;
392    }
393
394    /* Load the signal generation Lingware resource file.   */
395    ret = pico_loadResource( picoSystem, picoSgFileName, &picoSgResource );
396    if (PICO_OK != ret) {
397        ALOGE("Failed to load siggen resource for %s [%d]", picoSupportedLang[langIndex], ret);
398        cleanResources();
399        cleanFiles();
400        return TTS_FAILURE;
401    }
402
403    /* Load the utpp Lingware resource file if exists - NOTE: this file is optional
404       and is currently not used. Loading is only attempted for future compatibility.
405       If this file is not present the loading will still succeed.                      */
406    ret = pico_loadResource( picoSystem, picoUtppFileName, &picoUtppResource );
407    if ((PICO_OK != ret) && (ret != PICO_EXC_CANT_OPEN_FILE)) {
408        ALOGE("Failed to load utpp resource for %s [%d]", picoSupportedLang[langIndex], ret);
409        cleanResources();
410        cleanFiles();
411        return TTS_FAILURE;
412    }
413
414    /* Get the text analysis resource name.     */
415    ret = pico_getResourceName( picoSystem, picoTaResource, (char *) picoTaResourceName );
416    if (PICO_OK != ret) {
417        ALOGE("Failed to get textana resource name for %s [%d]", picoSupportedLang[langIndex], ret);
418        cleanResources();
419        cleanFiles();
420        return TTS_FAILURE;
421    }
422
423    /* Get the signal generation resource name. */
424    ret = pico_getResourceName( picoSystem, picoSgResource, (char *) picoSgResourceName );
425    if ((PICO_OK == ret) && (picoUtppResource != NULL)) {
426        /* Get utpp resource name - optional: see note above.   */
427        ret = pico_getResourceName( picoSystem, picoUtppResource, (char *) picoUtppResourceName );
428        if (PICO_OK != ret)  {
429            ALOGE("Failed to get utpp resource name for %s [%d]", picoSupportedLang[langIndex], ret);
430            cleanResources();
431            cleanFiles();
432            return TTS_FAILURE;
433        }
434    }
435    if (PICO_OK != ret) {
436        ALOGE("Failed to get siggen resource name for %s [%d]", picoSupportedLang[langIndex], ret);
437        cleanResources();
438        cleanFiles();
439        return TTS_FAILURE;
440    }
441
442    /* Create a voice definition.   */
443    ret = pico_createVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME );
444    if (PICO_OK != ret) {
445        ALOGE("Failed to create voice for %s [%d]", picoSupportedLang[langIndex], ret);
446        cleanResources();
447        cleanFiles();
448        return TTS_FAILURE;
449    }
450
451    /* Add the text analysis resource to the voice. */
452    ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoTaResourceName );
453    if (PICO_OK != ret) {
454        ALOGE("Failed to add textana resource to voice for %s [%d]", picoSupportedLang[langIndex], ret);
455        cleanResources();
456        cleanFiles();
457        return TTS_FAILURE;
458    }
459
460    /* Add the signal generation resource to the voice. */
461    ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoSgResourceName );
462    if ((PICO_OK == ret) && (picoUtppResource != NULL)) {
463        /* Add utpp resource to voice - optional: see note above.   */
464        ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoUtppResourceName );
465        if (PICO_OK != ret) {
466            ALOGE("Failed to add utpp resource to voice for %s [%d]", picoSupportedLang[langIndex], ret);
467            cleanResources();
468            cleanFiles();
469            return TTS_FAILURE;
470        }
471    }
472
473    if (PICO_OK != ret) {
474        ALOGE("Failed to add siggen resource to voice for %s [%d]", picoSupportedLang[langIndex], ret);
475        cleanResources();
476        cleanFiles();
477        return TTS_FAILURE;
478    }
479
480    ret = pico_newEngine( picoSystem, (const pico_Char *) PICO_VOICE_NAME, &picoEngine );
481    if (PICO_OK != ret) {
482        ALOGE("Failed to create engine for %s [%d]", picoSupportedLang[langIndex], ret);
483        cleanResources();
484        cleanFiles();
485        return TTS_FAILURE;
486    }
487
488    /* Set the current locale/voice.    */
489    strcpy( picoProp_currLang, picoSupportedLang[langIndex] );
490    picoCurrentLangIndex = langIndex;
491    ALOGI("loaded %s successfully", picoProp_currLang);
492    return TTS_SUCCESS;
493}
494
495
496/** doLanguageSwitch
497 *  Switch to the requested locale.
498 *  If this locale is already loaded, it returns immediately.
499 *  If another locale is already loaded, this will first be unloaded
500 *  and the new one then loaded.
501 *  If no locale is loaded, the requested will be loaded.
502 *  @locale -  the locale to check, either in xx or xx-YY format (i.e "en" or "en-US")
503 *  return TTS_SUCCESS or TTS_FAILURE
504*/
505static tts_result doLanguageSwitch( const char * locale )
506{
507    int loclIndex;                              /* locale index */
508
509    /* Load the new locale. */
510    loclIndex = checkForLocale( locale );
511    if (loclIndex < 0)  {
512        ALOGE("Tried to swith to non-supported locale %s", locale);
513        return TTS_FAILURE;
514    }
515    //ALOGI("Found supported locale %s", picoSupportedLang[loclIndex]);
516    return doLanguageSwitchFromLangIndex( loclIndex );
517}
518
519
520/** doAddProperties
521 *  Add <speed>, <pitch> and <volume> tags to the text,
522 *  if the properties have been set to non-default values, and return the new string.
523 *  The calling function is responsible for freeing the returned string.
524 *  @str - text to apply tags to
525 *  return new string with tags applied
526*/
527static char * doAddProperties( const char * str )
528{
529    char *  data = NULL;
530    int     haspitch, hasspeed, hasvol;                 /* parameters           */
531    int     textlen;                                    /* property string length   */
532    haspitch = 0; hasspeed = 0; hasvol = 0;
533    textlen = strlen(str) + 1;
534    if (picoProp_currPitch != PICO_DEF_PITCH) {          /* non-default pitch    */
535        textlen += strlen(PICO_PITCH_OPEN_TAG) + 5;
536        textlen += strlen(PICO_PITCH_CLOSE_TAG);
537        haspitch = 1;
538    }
539    if (picoProp_currRate != PICO_DEF_RATE) {            /* non-default rate     */
540        textlen += strlen(PICO_SPEED_OPEN_TAG) + 5;
541        textlen += strlen(PICO_SPEED_CLOSE_TAG);
542        hasspeed = 1;
543    }
544
545    if (picoProp_currVolume != PICO_DEF_VOLUME) {        /* non-default volume   */
546        textlen += strlen(PICO_VOLUME_OPEN_TAG) + 5;
547        textlen += strlen(PICO_VOLUME_CLOSE_TAG);
548        hasvol = 1;
549    }
550
551    /* Compose the property strings.    */
552    data = (char *) malloc( textlen );                  /* allocate string      */
553    if (!data) {
554        return NULL;
555    }
556    memset(data, 0, textlen);                           /* clear it             */
557    if (haspitch) {
558        char* tmp = (char*)malloc(strlen(PICO_PITCH_OPEN_TAG) + strlen(PICO_PITCH_CLOSE_TAG) + 5);
559        sprintf(tmp, PICO_PITCH_OPEN_TAG, picoProp_currPitch);
560        strcat(data, tmp);
561        free(tmp);
562    }
563
564    if (hasspeed) {
565        char* tmp = (char*)malloc(strlen(PICO_SPEED_OPEN_TAG) + strlen(PICO_SPEED_CLOSE_TAG) + 5);
566        sprintf(tmp, PICO_SPEED_OPEN_TAG, picoProp_currRate);
567        strcat(data, tmp);
568        free(tmp);
569    }
570
571    if (hasvol) {
572        char* tmp = (char*)malloc(strlen(PICO_VOLUME_OPEN_TAG) + strlen(PICO_VOLUME_CLOSE_TAG) + 5);
573        sprintf(tmp, PICO_VOLUME_OPEN_TAG, picoProp_currVolume);
574        strcat(data, tmp);
575        free(tmp);
576    }
577
578    strcat(data, str);
579    if (hasvol) {
580        strcat(data, PICO_VOLUME_CLOSE_TAG);
581    }
582
583    if (hasspeed) {
584        strcat(data, PICO_SPEED_CLOSE_TAG);
585    }
586
587    if (haspitch) {
588        strcat(data, PICO_PITCH_CLOSE_TAG);
589    }
590    return data;
591}
592
593
594/** get_tok
595 *  Searches for tokens in a string
596 *  @str - text to be processed
597 *  @pos - position of first character to be searched in str
598 *  @textlen - postion of last character to be searched
599 *  @tokstart - address of a variable to receive the start of the token found
600 *  @tokstart - address of a variable to receive the length of the token found
601 *  return : 1=token found, 0=token not found
602 *  notes : the token separator set could be enlarged adding characters in "seps"
603*/
604static int  get_tok(const char * str , int pos, int textlen, int *tokstart, int *toklen)
605{
606    const char * seps = " ";
607
608    /*look for start*/
609    while ((pos<textlen) && (strchr(seps,str[pos]) != NULL)) {
610        pos++;
611    }
612    if (pos == textlen) {
613        /*no characters != seps found whithin string*/
614        return 0;
615    }
616    *tokstart = pos;
617    /*look for end*/
618    while ((pos<textlen) && (strchr(seps,str[pos]) == NULL)) {
619        pos++;
620    }
621    *toklen = pos - *tokstart;
622    return 1;
623}/*get_tok*/
624
625
626/** get_sub_tok
627 *  Searches for subtokens in a token having a compound structure with camel case like "xxxYyyy"
628 *  @str - text to be processed
629 *  @pos - position of first character to be searched in str
630 *  @textlen - postion of last character to be searched in str
631 *  @tokstart - address of a variable to receive the start of the sub token found
632 *  @tokstart - address of a variable to receive the length of the sub token found
633 *  return : 1=sub token found, 0=sub token not found
634 *  notes : the sub token separator set could be enlarged adding characters in "seps"
635*/
636static int  get_sub_tok(const char * str , int pos, int textlen, int *tokstart, int *toklen) {
637
638    const char * seps = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
639
640    if (pos == textlen) {
641        return 0;
642    }
643
644    /*first char != space*/
645    *tokstart = pos;
646    /*finding first non separator*/
647    while ((pos < textlen) && (strchr(seps, str[pos]) != NULL)) {
648        pos++;
649    }
650    if (pos == textlen) {
651        /*characters all in seps found whithin string : return full token*/
652        *toklen = pos - *tokstart;
653        return 1;
654    }
655    /*pos should be pointing to first non seps and more chars are there*/
656    /*finding first separator*/
657    while ((pos < textlen) && (strchr(seps, str[pos]) == NULL)) {
658        pos++;
659    }
660    if (pos == textlen) {
661        /*transition non seps->seps not found : return full token*/
662        *toklen = pos - *tokstart;
663        return 1;
664    }
665    *toklen = pos - *tokstart;
666    return 1;
667}/*get_sub_tok*/
668
669
670/** doCamelCase
671 *  Searches for tokens having a compound structure with camel case and transforms them as follows :
672 *        "XxxxYyyy" -->> "Xxxx Yyyy",
673 *        "xxxYyyy"  -->> "xxx Yyyy",
674 *        "XXXYyyy"  -->> "XXXYyyy"
675 *        etc....
676 *  The calling function is responsible for freeing the returned string.
677 *  @str - text to be processed
678 *  return new string with text processed
679*/
680static char * doCamelCase( const char * str )
681{
682    int     textlen;             /* input string length   */
683    int     totlen;              /* output string length   */
684    int     tlen_2, nsubtok;     /* nuber of subtokens   */
685    int     toklen, tokstart;    /*legnth and start of generic token*/
686    int     stoklen, stokstart;  /*legnth and start of generic sub-token*/
687    int     pos, tokpos, outpos; /*postion of current char in input string and token and output*/
688    char    *data;               /*pointer of the returned string*/
689
690    pos = 0;
691    tokpos = 0;
692    toklen = 0;
693    stoklen = 0;
694    tlen_2 = 0;
695    totlen = 0;
696
697    textlen = strlen(str) + 1;
698
699    /*counting characters after sub token splitting including spaces*/
700    //while ((pos<textlen) && (str[pos]!=0)) {
701    while (get_tok(str, pos, textlen, &tokstart, &toklen)) {
702        tokpos = tokstart;
703        tlen_2 = 0;
704        nsubtok = 0;
705        while (get_sub_tok(str, tokpos, tokstart+toklen, &stokstart, &stoklen)) {
706            totlen += stoklen;
707            tlen_2 += stoklen;
708            tokpos = stokstart + stoklen;
709            nsubtok += 1;
710        }
711        totlen += nsubtok;    /*add spaces between subtokens*/
712        pos = tokstart + tlen_2;
713    }
714    //}
715    /* Allocate the return string */
716
717    data = (char *) malloc( totlen );                  /* allocate string      */
718    if (!data) {
719        return NULL;
720    }
721    memset(data, 0, totlen);                           /* clear it             */
722    outpos = 0;
723    pos = 0;
724    /*copying characters*/
725    //while ((pos<textlen) && (str[pos]!=0)) {
726    while (get_tok  (str, pos, textlen, &tokstart, &toklen)) {
727        tokpos = tokstart;
728        tlen_2 = 0;
729        nsubtok = 0;
730        while (get_sub_tok(str, tokpos, tokstart+toklen, &stokstart, &stoklen)) {
731            strncpy(&(data[outpos]), &(str[stokstart]), stoklen);
732            outpos += stoklen;
733            strncpy(&(data[outpos]), " ", 1);
734            tlen_2 += stoklen;
735            outpos += 1;
736            tokpos = stokstart + stoklen;
737        }
738        pos=tokstart+tlen_2;
739    }
740    //}
741    if (outpos == 0) {
742        outpos = 1;
743    }
744    data[outpos-1] = 0;
745    return data;
746}/*doCamelCase*/
747
748
749/** createPhonemeString
750 *  Wrap all individual words in <phoneme> tags.
751 *  The Pico <phoneme> tag only supports one word in each tag,
752 *  therefore they must be individually wrapped!
753 *  @xsampa - text to convert to Pico phomene string
754 *  @length - length of the input string
755 *  return new string with tags applied
756*/
757extern char * createPhonemeString( const char * xsampa, int length )
758{
759    char *  convstring = NULL;
760    int     origStrLen = strlen(xsampa);
761    int     numWords   = 1;
762    int     start, totalLength, i, j;
763
764    for (i = 0; i < origStrLen; i ++) {
765        if ((xsampa[i] == ' ') || (xsampa[i] == '#')) {
766            numWords ++;
767        }
768    }
769
770    if (numWords == 1) {
771        convstring = new char[origStrLen + 17];
772        convstring[0] = '\0';
773        strcat(convstring, PICO_PHONEME_OPEN_TAG);
774        strcat(convstring, xsampa);
775        strcat(convstring, PICO_PHONEME_CLOSE_TAG);
776    } else {
777        char * words[numWords];
778        start = 0; totalLength = 0; i = 0; j = 0;
779        for (i=0, j=0; i < origStrLen; i++) {
780            if ((xsampa[i] == ' ') || (xsampa[i] == '#')) {
781                words[j]    = new char[i+1-start+17];
782                words[j][0] = '\0';
783                strcat( words[j], PICO_PHONEME_OPEN_TAG);
784                strncat(words[j], xsampa+start, i-start);
785                strcat( words[j], PICO_PHONEME_CLOSE_TAG);
786                start = i + 1;
787                j++;
788                totalLength += strlen(words[j-1]);
789            }
790        }
791        words[j]    = new char[i+1-start+17];
792        words[j][0] = '\0';
793        strcat(words[j], PICO_PHONEME_OPEN_TAG);
794        strcat(words[j], xsampa+start);
795        strcat(words[j], PICO_PHONEME_CLOSE_TAG);
796        totalLength += strlen(words[j]);
797        convstring = new char[totalLength + 1];
798        convstring[0] = '\0';
799        for (i=0 ; i < numWords ; i++) {
800            strcat(convstring, words[i]);
801            delete [] words[i];
802        }
803    }
804
805    return convstring;
806}
807
808/* The XSAMPA uses as many as 5 characters to represent a single IPA code.  */
809typedef struct tagPhnArr
810{
811    char16_t    strIPA;             /* IPA Unicode symbol       */
812    char        strXSAMPA[6];       /* SAMPA sequence           */
813} PArr;
814
815#define phn_cnt (134+7)
816
817PArr    PhnAry[phn_cnt] = {
818
819    /* XSAMPA conversion table
820	   This maps a single IPA symbol to a sequence representing XSAMPA.
821       This relies upon a direct one-to-one correspondance
822       including diphthongs and affricates.						      */
823
824    /* Vowels (23) complete     */
825    {0x025B,        "E"},
826    {0x0251,        "A"},
827    {0x0254,        "O"},
828    {0x00F8,        "2"},
829    {0x0153,        "9"},
830    {0x0276,        "&"},
831    {0x0252,        "Q"},
832    {0x028C,        "V"},
833    {0x0264,        "7"},
834    {0x026F,        "M"},
835    {0x0268,        "1"},
836    {0x0289,        "}"},
837    {0x026A,        "I"},
838    {0x028F,        "Y"},
839    {0x028A,        "U"},
840    {0x0259,        "@"},
841    {0x0275,        "8"},
842    {0x0250,        "6"},
843    {0x00E6,        "{"},
844    {0x025C,        "3"},
845    {0x025A,        "@`"},
846    {0x025E,        "3\\\\"},
847    {0x0258,        "@\\\\"},
848
849    /* Consonants (60) complete */
850    {0x0288,        "t`"},
851    {0x0256,        "d`"},
852    {0x025F,        "J\\\\"},
853    {0x0261,        "g"},
854    {0x0262,        "G\\\\"},
855    {0x0294,        "?"},
856    {0x0271,        "F"},
857    {0x0273,        "n`"},
858    {0x0272,        "J"},
859    {0x014B,        "N"},
860    {0x0274,        "N\\\\"},
861    {0x0299,        "B\\\\"},
862    {0x0280,        "R\\\\"},
863    {0x027E,        "4"},
864    {0x027D,        "r`"},
865    {0x0278,        "p\\\\"},
866    {0x03B2,        "B"},
867    {0x03B8,        "T"},
868    {0x00F0,        "D"},
869    {0x0283,        "S"},
870    {0x0292,        "Z"},
871    {0x0282,        "s`"},
872    {0x0290,        "z`"},
873    {0x00E7,        "C"},
874    {0x029D,        "j\\\\"},
875    {0x0263,        "G"},
876    {0x03C7,        "X"},
877    {0x0281,        "R"},
878    {0x0127,        "X\\\\"},
879    {0x0295,        "?\\\\"},
880    {0x0266,        "h\\\\"},
881    {0x026C,        "K"},
882    {0x026E,        "K\\\\"},
883    {0x028B,        "P"},
884    {0x0279,        "r\\\\"},
885    {0x027B,        "r\\\\'"},
886    {0x0270,        "M\\\\"},
887    {0x026D,        "l`"},
888    {0x028E,        "L"},
889    {0x029F,        "L\\\\"},
890    {0x0253,        "b_<"},
891    {0x0257,        "d_<"},
892    {0x0284,        "J\\_<"},
893    {0x0260,        "g_<"},
894    {0x029B,        "G\\_<"},
895    {0x028D,        "W"},
896    {0x0265,        "H"},
897    {0x029C,        "H\\\\"},
898    {0x02A1,        ">\\\\"},
899    {0x02A2,        "<\\\\"},
900    {0x0267,        "x\\\\"},		/* hooktop heng	*/
901    {0x0298,        "O\\\\"},
902    {0x01C0,        "|\\\\"},
903    {0x01C3,        "!\\\\"},
904    {0x01C2,        "=\\"},
905    {0x01C1,        "|\\|\\"},
906    {0x027A,        "l\\\\"},
907    {0x0255,        "s\\\\"},
908    {0x0291,        "z\\\\"},
909    {0x026B,        "l_G"},
910
911
912    /* Diacritics (37) complete */
913    {0x02BC,        "_>"},
914    {0x0325,        "_0"},
915    {0x030A,        "_0"},
916    {0x032C,        "_v"},
917    {0x02B0,        "_h"},
918    {0x0324,        "_t"},
919    {0x0330,        "_k"},
920    {0x033C,        "_N"},
921    {0x032A,        "_d"},
922    {0x033A,        "_a"},
923    {0x033B,        "_m"},
924    {0x0339,        "_O"},
925    {0x031C,        "_c"},
926    {0x031F,        "_+"},
927    {0x0320,        "_-"},
928    {0x0308,        "_\""},     /* centralized		*/
929    {0x033D,        "_x"},
930    {0x0318,        "_A"},
931    {0x0319,        "_q"},
932    {0x02DE,        "`"},
933    {0x02B7,        "_w"},
934    {0x02B2,        "_j"},
935    {0x02E0,        "_G"},
936    {0x02E4,        "_?\\\\"},	/* pharyngealized	*/
937    {0x0303,        "~"},		/* nasalized		*/
938    {0x207F,        "_n"},
939    {0x02E1,        "_l"},
940    {0x031A,        "_}"},
941    {0x0334,        "_e"},
942    {0x031D,        "_r"},		/* raised  equivalent to 02D4 */
943    {0x02D4,        "_r"},		/* raised  equivalent to 031D */
944    {0x031E,        "_o"},		/* lowered equivalent to 02D5 */
945    {0x02D5,        "_o"},		/* lowered equivalent to 031E */
946    {0x0329,        "="},		/* sylabic			*/
947    {0x032F,        "_^"},		/* non-sylabic		*/
948    {0x0361,        "_"},		/* top tie bar		*/
949    {0x035C,        "_"},
950
951    /* Suprasegmental (15) incomplete */
952    {0x02C8,        "\""},		/* primary   stress	*/
953    {0x02CC,        "%"},		/* secondary stress	*/
954    {0x02D0,        ":"},		/* long				*/
955    {0x02D1,        ":\\\\"},	/* half-long		*/
956    {0x0306,        "_X"},		/* extra short		*/
957
958    {0x2016,        "||"},		/* major group		*/
959    {0x203F,        "-\\\\"},	/* bottom tie bar	*/
960    {0x2197,        "<R>"},		/* global rise		*/
961    {0x2198,        "<F>"},		/* global fall		*/
962    {0x2193,        "<D>"},		/* downstep			*/
963    {0x2191,        "<U>"},		/* upstep			*/
964    {0x02E5,        "<T>"},		/* extra high level	*/
965    {0x02E7,        "<M>"},		/* mid level		*/
966    {0x02E9,        "<B>"},		/* extra low level	*/
967
968    {0x025D,        "3`:"},		/* non-IPA	%%		*/
969
970    /* Affricates (6) complete  */
971    {0x02A3,        "d_z"},
972    {0x02A4,        "d_Z"},
973    {0x02A5,        "d_z\\\\"},
974    {0x02A6,        "t_s"},
975    {0x02A7,        "t_S"},
976    {0x02A8,        "t_s\\\\"}
977    };
978
979
980void CnvIPAPnt( const char16_t IPnt, char * XPnt )
981{
982    char16_t        ThisPnt = IPnt;                     /* local copy of single IPA codepoint   */
983    int             idx;                                /* index into table         */
984
985    /* Convert an individual IPA codepoint.
986       A single IPA code could map to a string.
987       Search the table.  If it is not found, use the same character.
988       Since most codepoints can be contained within 16 bits,
989       they are represented as wide chars.              */
990    XPnt[0] = 0;                                        /* clear the result string  */
991
992    /* Search the table for the conversion. */
993    for (idx = 0; idx < phn_cnt; idx ++) {               /* for each item in table   */
994        if (IPnt == PhnAry[idx].strIPA) {                /* matches IPA code         */
995            strcat( XPnt, (const char *)&(PhnAry[idx].strXSAMPA) ); /* copy the XSAMPA string   */
996            return;
997        }
998    }
999    strcat(XPnt, (const char *)&ThisPnt);               /* just copy it             */
1000}
1001
1002
1003/** cnvIpaToXsampa
1004 *  Convert an IPA character string to an XSAMPA character string.
1005 *  @ipaString - input IPA string to convert
1006 *  @outXsampaString - converted XSAMPA string is passed back in this parameter
1007 *  return size of the new string
1008*/
1009
1010int cnvIpaToXsampa( const char16_t * ipaString, size_t ipaStringSize, char ** outXsampaString )
1011{
1012    size_t xsize;                                  /* size of result               */
1013    size_t ipidx;                                  /* index into IPA string        */
1014    char * XPnt;                                   /* short XSAMPA char sequence   */
1015
1016    /* Convert an IPA string to an XSAMPA string and store the xsampa string in *outXsampaString.
1017       It is the responsibility of the caller to free the allocated string.
1018       Increment through the string.  For each base & combination convert it to the XSAMP equivalent.
1019       Because of the XSAMPA limitations, not all IPA characters will be covered.       */
1020    XPnt = (char *) malloc(6);
1021    xsize   = (4 * ipaStringSize) + 8;          /* assume more than double size */
1022    *outXsampaString = (char *) malloc( xsize );/* allocate return string   */
1023    *outXsampaString[0] = 0;
1024    xsize = 0;                                  /* clear final              */
1025
1026    for (ipidx = 0; ipidx < ipaStringSize; ipidx ++) { /* for each IPA code        */
1027        CnvIPAPnt( ipaString[ipidx], XPnt );           /* get converted character  */
1028        strcat((char *)*outXsampaString, XPnt );       /* concatenate XSAMPA       */
1029    }
1030    free(XPnt);
1031    xsize = strlen(*outXsampaString);                  /* get the final length     */
1032    return xsize;
1033}
1034
1035
1036/* Google Engine API function implementations */
1037
1038/** init
1039 *  Allocates Pico memory block and initializes the Pico system.
1040 *  synthDoneCBPtr - Pointer to callback function which will receive generated samples
1041 *  config - the engine configuration parameters, here only contains the non-system path
1042 *      for the lingware location
1043 *  return tts_result
1044*/
1045tts_result TtsEngine::init( synthDoneCB_t synthDoneCBPtr, const char *config )
1046{
1047    if (synthDoneCBPtr == NULL) {
1048        ALOGE("Callback pointer is NULL");
1049        return TTS_FAILURE;
1050    }
1051
1052    picoMemArea = malloc( PICO_MEM_SIZE );
1053    if (!picoMemArea) {
1054        ALOGE("Failed to allocate memory for Pico system");
1055        return TTS_FAILURE;
1056    }
1057
1058    pico_Status ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem );
1059    if (PICO_OK != ret) {
1060        ALOGE("Failed to initialize Pico system");
1061        free( picoMemArea );
1062        picoMemArea = NULL;
1063        return TTS_FAILURE;
1064    }
1065
1066    picoSynthDoneCBPtr = synthDoneCBPtr;
1067
1068    picoCurrentLangIndex = -1;
1069
1070    // was the initialization given an alternative path for the lingware location?
1071    if ((config != NULL) && (strlen(config) > 0)) {
1072        pico_alt_lingware_path = (char*)malloc(strlen(config));
1073        strcpy((char*)pico_alt_lingware_path, config);
1074        ALOGV("Alternative lingware path %s", pico_alt_lingware_path);
1075    } else {
1076        pico_alt_lingware_path = (char*)malloc(strlen(PICO_LINGWARE_PATH) + 1);
1077        strcpy((char*)pico_alt_lingware_path, PICO_LINGWARE_PATH);
1078        ALOGV("Using predefined lingware path %s", pico_alt_lingware_path);
1079    }
1080
1081    return TTS_SUCCESS;
1082}
1083
1084
1085/** shutdown
1086 *  Unloads all Pico resources; terminates Pico system and frees Pico memory block.
1087 *  return tts_result
1088*/
1089tts_result TtsEngine::shutdown( void )
1090{
1091    cleanResources();
1092
1093    if (picoSystem) {
1094        pico_terminate(&picoSystem);
1095        picoSystem = NULL;
1096    }
1097    if (picoMemArea) {
1098        free(picoMemArea);
1099        picoMemArea = NULL;
1100    }
1101
1102    cleanFiles();
1103    return TTS_SUCCESS;
1104}
1105
1106
1107/** loadLanguage
1108 *  Load a new language.
1109 *  @lang - string with ISO 3 letter language code.
1110 *  @country - string with ISO 3 letter country code .
1111 *  @variant - string with language variant for that language and country pair.
1112 *  return tts_result
1113*/
1114tts_result TtsEngine::loadLanguage(const char *lang, const char *country, const char *variant)
1115{
1116    return TTS_FAILURE;
1117    //return setProperty("language", value, size);
1118}
1119
1120
1121/** setLanguage
1122 *  Load a new language (locale).  Use the ISO 639-3 language codes.
1123 *  @lang - string with ISO 639-3 language code.
1124 *  @country - string with ISO 3 letter country code.
1125 *  @variant - string with language variant for that language and country pair.
1126 *  return tts_result
1127 */
1128tts_result TtsEngine::setLanguage( const char * lang, const char * country, const char * variant )
1129{
1130    //ALOGI("TtsEngine::setLanguage %s %s %s", lang, country, variant);
1131    int langIndex;
1132    int countryIndex;
1133    int i;
1134
1135    if (lang == NULL)
1136        {
1137        ALOGE("TtsEngine::setLanguage called with NULL language");
1138        return TTS_FAILURE;
1139        }
1140
1141    /* We look for a match on the language first
1142       then we look for a match on the country.
1143       If no match on the language:
1144             return an error.
1145       If match on the language, but no match on the country:
1146             load the language found for the language match.
1147       If match on the language, and match on the country:
1148             load the language found for the country match.     */
1149
1150    /* Find a match on the language.    */
1151    langIndex = -1;                                     /* no match */
1152    for (i = 0; i < picoNumSupportedVocs; i ++)
1153        {
1154        if (strcmp(lang, picoSupportedLangIso3[i]) == 0)
1155            {
1156            langIndex = i;
1157            break;
1158            }
1159        }
1160    if (langIndex < 0)
1161        {
1162        /* The language isn't supported.    */
1163        ALOGE("TtsEngine::setLanguage called with unsupported language");
1164        return TTS_FAILURE;
1165        }
1166
1167    /* Find a match on the country, if there is one.    */
1168    if (country != NULL)
1169        {
1170        countryIndex = -1;
1171        for (i = langIndex; i < picoNumSupportedVocs; i ++)
1172            {
1173            if (   (strcmp(lang,    picoSupportedLangIso3[i])    == 0)
1174                && (strcmp(country, picoSupportedCountryIso3[i]) == 0))
1175                {
1176                countryIndex = i;
1177                break;
1178                }
1179            }
1180
1181        if (countryIndex < 0)
1182            {
1183            /* We didn't find a match on the country, but we had a match on the language.
1184               Use that language.                                                       */
1185            ALOGI("TtsEngine::setLanguage found matching language(%s) but not matching country(%s).",
1186                    lang, country);
1187            }
1188        else
1189            {
1190            /* We have a match on both the language and the country.    */
1191            langIndex = countryIndex;
1192            }
1193        }
1194
1195    return doLanguageSwitchFromLangIndex( langIndex );      /* switch the language  */
1196}
1197
1198
1199/** isLanguageAvailable
1200 *  Returns the level of support for a language.
1201 *  @lang - string with ISO 3 letter language code.
1202 *  @country - string with ISO 3 letter country code .
1203 *  @variant - string with language variant for that language and country pair.
1204 *  return tts_support_result
1205*/
1206tts_support_result TtsEngine::isLanguageAvailable(const char *lang, const char *country,
1207            const char *variant) {
1208    int langIndex = -1;
1209    int countryIndex = -1;
1210    //-------------------------
1211    // language matching
1212    // if no language specified
1213    if (lang == NULL)  {
1214        ALOGE("TtsEngine::isLanguageAvailable called with no language");
1215        return TTS_LANG_NOT_SUPPORTED;
1216    }
1217
1218    // find a match on the language
1219    for (int i = 0; i < picoNumSupportedVocs; i++)
1220    {
1221        if (strcmp(lang, picoSupportedLangIso3[i]) == 0) {
1222            langIndex = i;
1223            break;
1224        }
1225    }
1226    if (langIndex < 0) {
1227        // language isn't supported
1228        ALOGV("TtsEngine::isLanguageAvailable called with unsupported language");
1229        return TTS_LANG_NOT_SUPPORTED;
1230    }
1231
1232    //-------------------------
1233    // country matching
1234    // if no country specified
1235    if ((country == NULL) || (strlen(country) == 0)) {
1236        // check installation of matched language
1237        return (hasResourcesForLanguage(langIndex) ? TTS_LANG_AVAILABLE : TTS_LANG_MISSING_DATA);
1238    }
1239
1240    // find a match on the country
1241    for (int i = langIndex; i < picoNumSupportedVocs; i++) {
1242        if ((strcmp(lang, picoSupportedLangIso3[i]) == 0)
1243                && (strcmp(country, picoSupportedCountryIso3[i]) == 0)) {
1244            countryIndex = i;
1245            break;
1246        }
1247    }
1248    if (countryIndex < 0)  {
1249        // we didn't find a match on the country, but we had a match on the language
1250        // check installation of matched language
1251        return (hasResourcesForLanguage(langIndex) ? TTS_LANG_AVAILABLE : TTS_LANG_MISSING_DATA);
1252    } else {
1253        // we have a match on the language and the country
1254        langIndex = countryIndex;
1255        // check installation of matched language + country
1256        return (hasResourcesForLanguage(langIndex) ? TTS_LANG_COUNTRY_AVAILABLE : TTS_LANG_MISSING_DATA);
1257    }
1258
1259    // no variants supported in this library, TTS_LANG_COUNTRY_VAR_AVAILABLE cannot be returned.
1260}
1261
1262
1263/** getLanguage
1264 *  Get the currently loaded language - if any.
1265 *  @lang - string with current ISO 3 letter language code, empty string if no loaded language.
1266 *  @country - string with current ISO 3 letter country code, empty string if no loaded language.
1267 *  @variant - string with current language variant, empty string if no loaded language.
1268 *  return tts_result
1269*/
1270tts_result TtsEngine::getLanguage(char *language, char *country, char *variant)
1271{
1272    if (picoCurrentLangIndex == -1) {
1273        strcpy(language, "\0");
1274        strcpy(country, "\0");
1275        strcpy(variant, "\0");
1276    } else {
1277        strcpy(language, picoSupportedLangIso3[picoCurrentLangIndex]);
1278        strcpy(country, picoSupportedCountryIso3[picoCurrentLangIndex]);
1279        // no variant in this implementation
1280        strcpy(variant, "\0");
1281    }
1282    return TTS_SUCCESS;
1283}
1284
1285
1286/** setAudioFormat
1287 * sets the audio format to use for synthesis, returns what is actually used.
1288 * @encoding - reference to encoding format
1289 * @rate - reference to sample rate
1290 * @channels - reference to number of channels
1291 * return tts_result
1292 * */
1293tts_result TtsEngine::setAudioFormat(tts_audio_format& encoding, uint32_t& rate,
1294            int& channels)
1295{
1296    // ignore the input parameters, the enforced audio parameters are fixed here
1297    encoding = TTS_AUDIO_FORMAT_PCM_16_BIT;
1298    rate = 16000;
1299    channels = 1;
1300    return TTS_SUCCESS;
1301}
1302
1303
1304/** setProperty
1305 *  Set property. The supported properties are:  language, rate, pitch and volume.
1306 *  @property - name of property to set
1307 *  @value - value to set
1308 *  @size - size of value
1309 *  return tts_result
1310*/
1311tts_result TtsEngine::setProperty( const char * property, const char * value, const size_t size )
1312{
1313    int rate;
1314    int pitch;
1315    int volume;
1316
1317    /* Set a specific property for the engine.
1318       Supported properties include: language (locale), rate, pitch, volume.    */
1319    /* Sanity check */
1320    if (property == NULL) {
1321        ALOGE("setProperty called with property NULL");
1322        return TTS_PROPERTY_UNSUPPORTED;
1323    }
1324
1325    if (value == NULL) {
1326        ALOGE("setProperty called with value NULL");
1327        return TTS_VALUE_INVALID;
1328    }
1329
1330    if (strncmp(property, "language", 8) == 0) {
1331        /* Verify it's in correct format.   */
1332        if (strlen(value) != 2 && strlen(value) != 6) {
1333            ALOGE("change language called with incorrect format");
1334            return TTS_VALUE_INVALID;
1335        }
1336
1337        /* Try to switch to specified language. */
1338        if (doLanguageSwitch(value) == TTS_FAILURE) {
1339            ALOGE("failed to load language");
1340            return TTS_FAILURE;
1341        } else {
1342            return TTS_SUCCESS;
1343        }
1344    } else if (strncmp(property, "rate", 4) == 0) {
1345        rate = atoi(value);
1346        if (rate < PICO_MIN_RATE) {
1347            rate = PICO_MIN_RATE;
1348        }
1349        if (rate > PICO_MAX_RATE) {
1350            rate = PICO_MAX_RATE;
1351        }
1352        picoProp_currRate = rate;
1353        return TTS_SUCCESS;
1354    } else if (strncmp(property, "pitch", 5) == 0) {
1355        pitch = atoi(value);
1356        if (pitch < PICO_MIN_PITCH) {
1357            pitch = PICO_MIN_PITCH;
1358        }
1359        if (pitch > PICO_MAX_PITCH) {
1360            pitch = PICO_MAX_PITCH;
1361        }
1362        picoProp_currPitch = pitch;
1363        return TTS_SUCCESS;
1364    } else if (strncmp(property, "volume", 6) == 0) {
1365        volume = atoi(value);
1366        if (volume < PICO_MIN_VOLUME) {
1367            volume = PICO_MIN_VOLUME;
1368        }
1369        if (volume > PICO_MAX_VOLUME) {
1370            volume = PICO_MAX_VOLUME;
1371        }
1372        picoProp_currVolume = volume;
1373        return TTS_SUCCESS;
1374    }
1375
1376    return TTS_PROPERTY_UNSUPPORTED;
1377}
1378
1379
1380/** getProperty
1381 *  Get the property.  Supported properties are:  language, rate, pitch and volume.
1382 *  @property - name of property to get
1383 *  @value    - buffer which will receive value of property
1384 *  @iosize   - size of value - if size is too small on return this will contain actual size needed
1385 *  return tts_result
1386*/
1387tts_result TtsEngine::getProperty( const char * property, char * value, size_t * iosize )
1388{
1389    /* Get the property for the engine.
1390       This property was previously set by setProperty or by default.       */
1391    /* sanity check */
1392    if (property == NULL) {
1393        ALOGE("getProperty called with property NULL");
1394        return TTS_PROPERTY_UNSUPPORTED;
1395    }
1396
1397    if (value == NULL) {
1398        ALOGE("getProperty called with value NULL");
1399        return TTS_VALUE_INVALID;
1400    }
1401
1402    if (strncmp(property, "language", 8) == 0) {
1403        if (picoProp_currLang == NULL) {
1404            strcpy(value, "");
1405        } else {
1406            if (*iosize < strlen(picoProp_currLang)+1)  {
1407                *iosize = strlen(picoProp_currLang) + 1;
1408                return TTS_PROPERTY_SIZE_TOO_SMALL;
1409            }
1410            strcpy(value, picoProp_currLang);
1411        }
1412        return TTS_SUCCESS;
1413    } else if (strncmp(property, "rate", 4) == 0) {
1414        char tmprate[4];
1415        sprintf(tmprate, "%d", picoProp_currRate);
1416        if (*iosize < strlen(tmprate)+1) {
1417            *iosize = strlen(tmprate) + 1;
1418            return TTS_PROPERTY_SIZE_TOO_SMALL;
1419        }
1420        strcpy(value, tmprate);
1421        return TTS_SUCCESS;
1422    } else if (strncmp(property, "pitch", 5) == 0) {
1423        char tmppitch[4];
1424        sprintf(tmppitch, "%d", picoProp_currPitch);
1425        if (*iosize < strlen(tmppitch)+1) {
1426            *iosize = strlen(tmppitch) + 1;
1427            return TTS_PROPERTY_SIZE_TOO_SMALL;
1428        }
1429        strcpy(value, tmppitch);
1430        return TTS_SUCCESS;
1431    } else if (strncmp(property, "volume", 6) == 0) {
1432        char tmpvol[4];
1433        sprintf(tmpvol, "%d", picoProp_currVolume);
1434        if (*iosize < strlen(tmpvol)+1) {
1435            *iosize = strlen(tmpvol) + 1;
1436            return TTS_PROPERTY_SIZE_TOO_SMALL;
1437        }
1438        strcpy(value, tmpvol);
1439        return TTS_SUCCESS;
1440    }
1441
1442    /* Unknown property */
1443    ALOGE("Unsupported property");
1444    return TTS_PROPERTY_UNSUPPORTED;
1445}
1446
1447
1448/** synthesizeText
1449 *  Synthesizes a text string.
1450 *  The text string could be annotated with SSML tags.
1451 *  @text     - text to synthesize
1452 *  @buffer   - buffer which will receive generated samples
1453 *  @bufferSize - size of buffer
1454 *  @userdata - pointer to user data which will be passed back to callback function
1455 *  return tts_result
1456*/
1457tts_result TtsEngine::synthesizeText( const char * text, int8_t * buffer, size_t bufferSize, void * userdata )
1458{
1459    int         err;
1460    int         cbret;
1461    pico_Char * inp = NULL;
1462    char *      expanded_text = NULL;
1463    pico_Char * local_text = NULL;
1464    short       outbuf[MAX_OUTBUF_SIZE/2];
1465    pico_Int16  bytes_sent, bytes_recv, text_remaining, out_data_type;
1466    pico_Status ret;
1467    SvoxSsmlParser * parser = NULL;
1468
1469    picoSynthAbort = 0;
1470    if (text == NULL) {
1471        ALOGE("synthesizeText called with NULL string");
1472        return TTS_FAILURE;
1473    }
1474
1475    if (strlen(text) == 0) {
1476        return TTS_SUCCESS;
1477    }
1478
1479    if (buffer == NULL) {
1480        ALOGE("synthesizeText called with NULL buffer");
1481        return TTS_FAILURE;
1482    }
1483
1484    if ( (strncmp(text, "<speak", 6) == 0) || (strncmp(text, "<?xml", 5) == 0) ) {
1485        /* SSML input */
1486        parser = new SvoxSsmlParser();
1487        if (parser && parser->initSuccessful()) {
1488            err = parser->parseDocument(text, 1);
1489            if (err == XML_STATUS_ERROR) {
1490                /* Note: for some reason expat always thinks the input document has an error
1491                   at the end, even when the XML document is perfectly formed */
1492                ALOGI("Warning: SSML document parsed with errors");
1493            }
1494            char * parsed_text = parser->getParsedDocument();
1495            if (parsed_text) {
1496                /* Add property tags to the string - if any.    */
1497                local_text = (pico_Char *) doAddProperties( parsed_text );
1498                if (!local_text) {
1499                    ALOGE("Failed to allocate memory for text string");
1500                    delete parser;
1501                    return TTS_FAILURE;
1502                }
1503                char * lang = parser->getParsedDocumentLanguage();
1504                if (lang != NULL) {
1505                    if (doLanguageSwitch(lang) == TTS_FAILURE) {
1506                        ALOGE("Failed to switch to language (%s) specified in SSML document.", lang);
1507                        delete parser;
1508                        return TTS_FAILURE;
1509                    }
1510                } else {
1511                    // lang is NULL, pick a language so the synthesis can be performed
1512                    if (picoCurrentLangIndex == -1) {
1513                        // no current language loaded, pick the first one and load it
1514                        if (doLanguageSwitchFromLangIndex(0) == TTS_FAILURE) {
1515                            ALOGE("Failed to switch to default language.");
1516                            delete parser;
1517                            return TTS_FAILURE;
1518                        }
1519                    }
1520                    //ALOGI("No language in SSML, using current language (%s).", picoProp_currLang);
1521                }
1522                delete parser;
1523            } else {
1524                ALOGE("Failed to parse SSML document");
1525                delete parser;
1526                return TTS_FAILURE;
1527            }
1528        } else {
1529            ALOGE("Failed to create SSML parser");
1530            if (parser) {
1531                delete parser;
1532            }
1533            return TTS_FAILURE;
1534        }
1535    } else {
1536        /* camelCase pre-processing */
1537        expanded_text = doCamelCase(text);
1538        /* Add property tags to the string - if any.    */
1539        local_text = (pico_Char *) doAddProperties( expanded_text );
1540        if (expanded_text) {
1541            free( expanded_text );
1542        }
1543        if (!local_text) {
1544            ALOGE("Failed to allocate memory for text string");
1545            return TTS_FAILURE;
1546        }
1547    }
1548
1549    text_remaining = strlen((const char *) local_text) + 1;
1550
1551    inp = (pico_Char *) local_text;
1552
1553    size_t bufused = 0;
1554
1555    /* synthesis loop   */
1556    while (text_remaining) {
1557        if (picoSynthAbort) {
1558            ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT );
1559            break;
1560        }
1561
1562        /* Feed the text into the engine.   */
1563        ret = pico_putTextUtf8( picoEngine, inp, text_remaining, &bytes_sent );
1564        if (ret != PICO_OK) {
1565            ALOGE("Error synthesizing string '%s': [%d]", text, ret);
1566            if (local_text) {
1567                free( local_text );
1568            }
1569            return TTS_FAILURE;
1570        }
1571
1572        text_remaining -= bytes_sent;
1573        inp += bytes_sent;
1574        do {
1575            if (picoSynthAbort) {
1576                ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT );
1577                break;
1578            }
1579            /* Retrieve the samples and add them to the buffer. */
1580            ret = pico_getData( picoEngine, (void *) outbuf, MAX_OUTBUF_SIZE, &bytes_recv,
1581                    &out_data_type );
1582            if (bytes_recv) {
1583                if ((bufused + bytes_recv) <= bufferSize) {
1584                    memcpy(buffer+bufused, (int8_t *) outbuf, bytes_recv);
1585                    bufused += bytes_recv;
1586                } else {
1587                    /* The buffer filled; pass this on to the callback function.    */
1588                    cbret = picoSynthDoneCBPtr(userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer,
1589                            bufused, TTS_SYNTH_PENDING);
1590                    if (cbret == TTS_CALLBACK_HALT) {
1591                        ALOGI("Halt requested by caller. Halting.");
1592                        picoSynthAbort = 1;
1593                        ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT );
1594                        break;
1595                    }
1596                    bufused = 0;
1597                    memcpy(buffer, (int8_t *) outbuf, bytes_recv);
1598                    bufused += bytes_recv;
1599                }
1600            }
1601        } while (PICO_STEP_BUSY == ret);
1602
1603        /* This chunk of synthesis is finished; pass the remaining samples.
1604           Use 16 KHz, 16-bit samples.                                              */
1605        if (!picoSynthAbort) {
1606            picoSynthDoneCBPtr( userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer, bufused,
1607                    TTS_SYNTH_PENDING);
1608        }
1609        picoSynthAbort = 0;
1610
1611        if (ret != PICO_STEP_IDLE) {
1612            if (ret != 0){
1613                ALOGE("Error occurred during synthesis [%d]", ret);
1614            }
1615            if (local_text) {
1616                free(local_text);
1617            }
1618            ALOGV("Synth loop: sending TTS_SYNTH_DONE after error");
1619            picoSynthDoneCBPtr( userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer, bufused,
1620                    TTS_SYNTH_DONE);
1621            pico_resetEngine( picoEngine, PICO_RESET_SOFT );
1622            return TTS_FAILURE;
1623        }
1624    }
1625
1626    /* Synthesis is done; notify the caller */
1627    ALOGV("Synth loop: sending TTS_SYNTH_DONE after all done, or was asked to stop");
1628    picoSynthDoneCBPtr( userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer, bufused,
1629            TTS_SYNTH_DONE);
1630
1631    if (local_text) {
1632        free( local_text );
1633    }
1634    return TTS_SUCCESS;
1635}
1636
1637
1638
1639/** stop
1640 *  Aborts the running synthesis.
1641 *  return tts_result
1642*/
1643tts_result TtsEngine::stop( void )
1644{
1645    picoSynthAbort = 1;
1646    return TTS_SUCCESS;
1647}
1648
1649
1650#ifdef __cplusplus
1651extern "C" {
1652#endif
1653
1654TtsEngine * getTtsEngine( void )
1655{
1656    return new TtsEngine();
1657}
1658
1659#ifdef __cplusplus
1660}
1661#endif
1662