1/* com_svox_picottsengine.cpp 2 3 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 * This is the Manager layer. It sits on top of the native Pico engine 18 * and provides the interface to the defined Google TTS engine API. 19 * The Google engine API is the boundary to allow a TTS engine to be swapped. 20 * The Manager layer also provide the SSML tag interpretation. 21 * The supported SSML tags are mapped to corresponding tags natively supported by Pico. 22 * Native Pico functions always begin with picoXXX. 23 * 24 * In the Pico engine, the language cannot be changed indpendently of the voice. 25 * If either the voice or locale/language are changed, a new resource is loaded. 26 * 27 * Only a subset of SSML 1.0 tags are supported. 28 * Some SSML tags involve significant complexity. 29 * If the language is changed through an SSML tag, there is a latency for the load. 30 * 31 */ 32//#define LOG_NDEBUG 0 33 34#include <stdio.h> 35#include <unistd.h> 36#include <stdlib.h> 37 38#define LOG_TAG "SVOX Pico Engine" 39 40#include <utils/Log.h> 41#include <utils/String16.h> /* for strlen16 */ 42#include <TtsEngine.h> 43 44#include <cutils/jstring.h> 45#include <picoapi.h> 46#include <picodefs.h> 47 48#include "svox_ssml_parser.h" 49 50using namespace android; 51 52/* adaptation layer defines */ 53#define PICO_MEM_SIZE 2500000 54/* speaking rate */ 55#define PICO_MIN_RATE 20 56#define PICO_MAX_RATE 500 57#define PICO_DEF_RATE 100 58/* speaking pitch */ 59#define PICO_MIN_PITCH 50 60#define PICO_MAX_PITCH 200 61#define PICO_DEF_PITCH 100 62/* speaking volume */ 63#define PICO_MIN_VOLUME 0 64#define PICO_MAX_VOLUME 500 65#define PICO_DEF_VOLUME 100 66 67/* string constants */ 68#define MAX_OUTBUF_SIZE 128 69const char * PICO_SYSTEM_LINGWARE_PATH = "/system/tts/lang_pico/"; 70const char * PICO_LINGWARE_PATH = "/sdcard/svox/"; 71const char * PICO_VOICE_NAME = "PicoVoice"; 72const char * PICO_SPEED_OPEN_TAG = "<speed level='%d'>"; 73const char * PICO_SPEED_CLOSE_TAG = "</speed>"; 74const char * PICO_PITCH_OPEN_TAG = "<pitch level='%d'>"; 75const char * PICO_PITCH_CLOSE_TAG = "</pitch>"; 76const char * PICO_VOLUME_OPEN_TAG = "<volume level='%d'>"; 77const char * PICO_VOLUME_CLOSE_TAG = "</volume>"; 78const char * PICO_PHONEME_OPEN_TAG = "<phoneme ph='"; 79const char * PICO_PHONEME_CLOSE_TAG = "'/>"; 80 81/* supported voices 82 Pico does not seperately specify the voice and locale. */ 83const char * picoSupportedLangIso3[] = { "eng", "eng", "deu", "spa", "fra", "ita" }; 84const char * picoSupportedCountryIso3[] = { "USA", "GBR", "DEU", "ESP", "FRA", "ITA" }; 85const char * picoSupportedLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" }; 86const char * picoInternalLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" }; 87const char * picoInternalTaLingware[] = { "en-US_ta.bin", "en-GB_ta.bin", "de-DE_ta.bin", "es-ES_ta.bin", "fr-FR_ta.bin", "it-IT_ta.bin" }; 88const char * picoInternalSgLingware[] = { "en-US_lh0_sg.bin", "en-GB_kh0_sg.bin", "de-DE_gl0_sg.bin", "es-ES_zl0_sg.bin", "fr-FR_nk0_sg.bin", "it-IT_cm0_sg.bin" }; 89const char * picoInternalUtppLingware[] = { "en-US_utpp.bin", "en-GB_utpp.bin", "de-DE_utpp.bin", "es-ES_utpp.bin", "fr-FR_utpp.bin", "it-IT_utpp.bin" }; 90const int picoNumSupportedVocs = 6; 91 92/* supported properties */ 93const char * picoSupportedProperties[] = { "language", "rate", "pitch", "volume" }; 94const int picoNumSupportedProperties = 4; 95 96 97/* adapation layer global variables */ 98synthDoneCB_t * picoSynthDoneCBPtr; 99void * picoMemArea = NULL; 100pico_System picoSystem = NULL; 101pico_Resource picoTaResource = NULL; 102pico_Resource picoSgResource = NULL; 103pico_Resource picoUtppResource = NULL; 104pico_Engine picoEngine = NULL; 105pico_Char * picoTaFileName = NULL; 106pico_Char * picoSgFileName = NULL; 107pico_Char * picoUtppFileName = NULL; 108pico_Char * picoTaResourceName = NULL; 109pico_Char * picoSgResourceName = NULL; 110pico_Char * picoUtppResourceName = NULL; 111int picoSynthAbort = 0; 112char * picoProp_currLang = NULL; /* current language */ 113int picoProp_currRate = PICO_DEF_RATE; /* current rate */ 114int picoProp_currPitch = PICO_DEF_PITCH; /* current pitch */ 115int picoProp_currVolume = PICO_DEF_VOLUME; /* current volume */ 116 117int picoCurrentLangIndex = -1; 118 119char * pico_alt_lingware_path = NULL; 120 121 122/* internal helper functions */ 123 124/** checkForLocale 125 * Check whether the requested locale is among the supported locales. 126 * @locale - the locale to check, either in xx or xx-YY format 127 * return index of the locale, or -1 if not supported. 128*/ 129static int checkForLocale( const char * locale ) 130{ 131 int found = -1; /* language not found */ 132 int i; 133 if (locale == NULL) { 134 ALOGE("checkForLocale called with NULL language"); 135 return found; 136 } 137 138 /* Verify that the requested locale is a locale that we support. */ 139 for (i = 0; i < picoNumSupportedVocs; i ++) { 140 if (strcmp(locale, picoSupportedLang[i]) == 0) { /* in array */ 141 found = i; 142 break; 143 } 144 }; 145 146 /* The exact locale was not found. */ 147 if (found < 0) { 148 /* We didn't find an exact match; it may have been specified with only the first 2 characters. 149 This could overmatch ISO 639-3 language codes.%% */ 150 151 /* check whether the current language matches the locale's language */ 152 if ((picoCurrentLangIndex > -1) && 153 (strncmp(locale, picoSupportedLang[picoCurrentLangIndex], 2) == 0)) { 154 /* the current language matches the requested language, let's use it */ 155 found = picoCurrentLangIndex; 156 } else { 157 /* check whether we can find a match at least on the language */ 158 for (i = 0; i < picoNumSupportedVocs; i ++) { 159 if (strncmp(locale, picoSupportedLang[i], 2) == 0) { 160 found = i; 161 break; 162 } 163 } 164 } 165 166 if (found < 0) { 167 ALOGE("TtsEngine::set language called with unsupported locale %s", locale); 168 } 169 }; 170 return found; 171} 172 173 174/** cleanResources 175 * Unloads any loaded Pico resources. 176*/ 177static void cleanResources( void ) 178{ 179 if (picoEngine) { 180 pico_disposeEngine( picoSystem, &picoEngine ); 181 pico_releaseVoiceDefinition( picoSystem, (pico_Char *) PICO_VOICE_NAME ); 182 picoEngine = NULL; 183 } 184 if (picoUtppResource) { 185 pico_unloadResource( picoSystem, &picoUtppResource ); 186 picoUtppResource = NULL; 187 } 188 if (picoTaResource) { 189 pico_unloadResource( picoSystem, &picoTaResource ); 190 picoTaResource = NULL; 191 } 192 if (picoSgResource) { 193 pico_unloadResource( picoSystem, &picoSgResource ); 194 picoSgResource = NULL; 195 } 196 197 if (picoSystem) { 198 pico_terminate(&picoSystem); 199 picoSystem = NULL; 200 } 201 picoCurrentLangIndex = -1; 202} 203 204 205/** cleanFiles 206 * Frees any memory allocated for file and resource strings. 207*/ 208static void cleanFiles( void ) 209{ 210 if (picoProp_currLang) { 211 free( picoProp_currLang ); 212 picoProp_currLang = NULL; 213 } 214 215 if (picoTaFileName) { 216 free( picoTaFileName ); 217 picoTaFileName = NULL; 218 } 219 220 if (picoSgFileName) { 221 free( picoSgFileName ); 222 picoSgFileName = NULL; 223 } 224 225 if (picoUtppFileName) { 226 free( picoUtppFileName ); 227 picoUtppFileName = NULL; 228 } 229 230 if (picoTaResourceName) { 231 free( picoTaResourceName ); 232 picoTaResourceName = NULL; 233 } 234 235 if (picoSgResourceName) { 236 free( picoSgResourceName ); 237 picoSgResourceName = NULL; 238 } 239 240 if (picoUtppResourceName) { 241 free( picoUtppResourceName ); 242 picoUtppResourceName = NULL; 243 } 244} 245 246/** hasResourcesForLanguage 247 * Check to see if the resources required to load the language at the specified index 248 * are properly installed 249 * @langIndex - the index of the language to check the resources for. The index is valid. 250 * return true if the required resources are installed, false otherwise 251 */ 252static bool hasResourcesForLanguage(int langIndex) { 253 FILE * pFile; 254 char* fileName = (char*)malloc(PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE); 255 256 /* check resources on system (under PICO_SYSTEM_LINGWARE_PATH). */ 257 strcpy((char*)fileName, PICO_SYSTEM_LINGWARE_PATH); 258 strcat((char*)fileName, (const char*)picoInternalTaLingware[langIndex]); 259 pFile = fopen(fileName, "r"); 260 if (pFile != NULL) { 261 /* "ta" file found. */ 262 fclose (pFile); 263 /* now look for "sg" file. */ 264 strcpy((char*)fileName, PICO_SYSTEM_LINGWARE_PATH); 265 strcat((char*)fileName, (const char*)picoInternalSgLingware[langIndex]); 266 pFile = fopen(fileName, "r"); 267 if (pFile != NULL) { 268 /* "sg" file found, no need to continue checking, return success. */ 269 fclose(pFile); 270 free(fileName); 271 return true; 272 } 273 } 274 275 /* resources not found on system, check resources on alternative location */ 276 /* (under pico_alt_lingware_path). */ 277 strcpy((char*)fileName, pico_alt_lingware_path); 278 strcat((char*)fileName, (const char*)picoInternalTaLingware[langIndex]); 279 pFile = fopen(fileName, "r"); 280 if (pFile == NULL) { 281 free(fileName); 282 return false; 283 } else { 284 fclose (pFile); 285 } 286 287 strcpy((char*)fileName, pico_alt_lingware_path); 288 strcat((char*)fileName, (const char*)picoInternalSgLingware[langIndex]); 289 pFile = fopen(fileName, "r"); 290 if (pFile == NULL) { 291 free(fileName); 292 return false; 293 } else { 294 fclose(pFile); 295 free(fileName); 296 return true; 297 } 298} 299 300/** doLanguageSwitchFromLangIndex 301 * Switch to the requested locale. 302 * If the locale is already loaded, it returns immediately. 303 * If another locale is already is loaded, it will first be unloaded and the new one then loaded. 304 * If no locale is loaded, the requested locale will be loaded. 305 * @langIndex - the index of the locale/voice to load, which is guaranteed to be supported. 306 * return TTS_SUCCESS or TTS_FAILURE 307 */ 308static tts_result doLanguageSwitchFromLangIndex( int langIndex ) 309{ 310 int ret; /* function result code */ 311 312 if (langIndex>=0) { 313 /* If we already have a loaded locale, check whether it is the same one as requested. */ 314 if (picoProp_currLang && (strcmp(picoProp_currLang, picoSupportedLang[langIndex]) == 0)) { 315 //ALOGI("Language already loaded (%s == %s)", picoProp_currLang, 316 // picoSupportedLang[langIndex]); 317 return TTS_SUCCESS; 318 } 319 } 320 321 /* It is not the same locale; unload the current one first. Also invalidates the system object*/ 322 cleanResources(); 323 324 /* Allocate memory for file and resource names. */ 325 cleanFiles(); 326 327 if (picoSystem==NULL) { 328 /*re-init system object*/ 329 ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem ); 330 if (PICO_OK != ret) { 331 ALOGE("Failed to initialize the pico system object\n"); 332 return TTS_FAILURE; 333 } 334 } 335 336 picoProp_currLang = (char *) malloc( 10 ); 337 picoTaFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE ); 338 picoSgFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE ); 339 picoUtppFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE ); 340 picoTaResourceName = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE ); 341 picoSgResourceName = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE ); 342 picoUtppResourceName =(pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE ); 343 344 if ( 345 (picoProp_currLang==NULL) || (picoTaFileName==NULL) || (picoSgFileName==NULL) || 346 (picoUtppFileName==NULL) || (picoTaResourceName==NULL) || (picoSgResourceName==NULL) || 347 (picoUtppResourceName==NULL) 348 ) { 349 ALOGE("Failed to allocate memory for internal strings\n"); 350 cleanResources(); 351 return TTS_FAILURE; 352 } 353 354 /* Find where to load the resource files from: system or alternative location */ 355 /* based on availability of the Ta file. Try the alternative location first, this is where */ 356 /* more recent language file updates would be installed (under pico_alt_lingware_path). */ 357 bool bUseSystemPath = true; 358 FILE * pFile; 359 char* tmpFileName = (char*)malloc(PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE); 360 strcpy((char*)tmpFileName, pico_alt_lingware_path); 361 strcat((char*)tmpFileName, (const char*)picoInternalTaLingware[langIndex]); 362 pFile = fopen(tmpFileName, "r"); 363 if (pFile != NULL) { 364 /* "ta" file found under pico_alt_lingware_path, don't use the system path. */ 365 fclose (pFile); 366 bUseSystemPath = false; 367 } 368 free(tmpFileName); 369 370 /* Set the path and file names for resource files. */ 371 if (bUseSystemPath) { 372 strcpy((char *) picoTaFileName, PICO_SYSTEM_LINGWARE_PATH); 373 strcpy((char *) picoSgFileName, PICO_SYSTEM_LINGWARE_PATH); 374 strcpy((char *) picoUtppFileName, PICO_SYSTEM_LINGWARE_PATH); 375 } else { 376 strcpy((char *) picoTaFileName, pico_alt_lingware_path); 377 strcpy((char *) picoSgFileName, pico_alt_lingware_path); 378 strcpy((char *) picoUtppFileName, pico_alt_lingware_path); 379 } 380 strcat((char *) picoTaFileName, (const char *) picoInternalTaLingware[langIndex]); 381 strcat((char *) picoSgFileName, (const char *) picoInternalSgLingware[langIndex]); 382 strcat((char *) picoUtppFileName, (const char *) picoInternalUtppLingware[langIndex]); 383 384 /* Load the text analysis Lingware resource file. */ 385 ret = pico_loadResource( picoSystem, picoTaFileName, &picoTaResource ); 386 if (PICO_OK != ret) { 387 ALOGE("Failed to load textana resource for %s [%d]", picoSupportedLang[langIndex], ret); 388 cleanResources(); 389 cleanFiles(); 390 return TTS_FAILURE; 391 } 392 393 /* Load the signal generation Lingware resource file. */ 394 ret = pico_loadResource( picoSystem, picoSgFileName, &picoSgResource ); 395 if (PICO_OK != ret) { 396 ALOGE("Failed to load siggen resource for %s [%d]", picoSupportedLang[langIndex], ret); 397 cleanResources(); 398 cleanFiles(); 399 return TTS_FAILURE; 400 } 401 402 /* Load the utpp Lingware resource file if exists - NOTE: this file is optional 403 and is currently not used. Loading is only attempted for future compatibility. 404 If this file is not present the loading will still succeed. */ 405 ret = pico_loadResource( picoSystem, picoUtppFileName, &picoUtppResource ); 406 if ((PICO_OK != ret) && (ret != PICO_EXC_CANT_OPEN_FILE)) { 407 ALOGE("Failed to load utpp resource for %s [%d]", picoSupportedLang[langIndex], ret); 408 cleanResources(); 409 cleanFiles(); 410 return TTS_FAILURE; 411 } 412 413 /* Get the text analysis resource name. */ 414 ret = pico_getResourceName( picoSystem, picoTaResource, (char *) picoTaResourceName ); 415 if (PICO_OK != ret) { 416 ALOGE("Failed to get textana resource name for %s [%d]", picoSupportedLang[langIndex], ret); 417 cleanResources(); 418 cleanFiles(); 419 return TTS_FAILURE; 420 } 421 422 /* Get the signal generation resource name. */ 423 ret = pico_getResourceName( picoSystem, picoSgResource, (char *) picoSgResourceName ); 424 if ((PICO_OK == ret) && (picoUtppResource != NULL)) { 425 /* Get utpp resource name - optional: see note above. */ 426 ret = pico_getResourceName( picoSystem, picoUtppResource, (char *) picoUtppResourceName ); 427 if (PICO_OK != ret) { 428 ALOGE("Failed to get utpp resource name for %s [%d]", picoSupportedLang[langIndex], ret); 429 cleanResources(); 430 cleanFiles(); 431 return TTS_FAILURE; 432 } 433 } 434 if (PICO_OK != ret) { 435 ALOGE("Failed to get siggen resource name for %s [%d]", picoSupportedLang[langIndex], ret); 436 cleanResources(); 437 cleanFiles(); 438 return TTS_FAILURE; 439 } 440 441 /* Create a voice definition. */ 442 ret = pico_createVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME ); 443 if (PICO_OK != ret) { 444 ALOGE("Failed to create voice for %s [%d]", picoSupportedLang[langIndex], ret); 445 cleanResources(); 446 cleanFiles(); 447 return TTS_FAILURE; 448 } 449 450 /* Add the text analysis resource to the voice. */ 451 ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoTaResourceName ); 452 if (PICO_OK != ret) { 453 ALOGE("Failed to add textana resource to voice for %s [%d]", picoSupportedLang[langIndex], ret); 454 cleanResources(); 455 cleanFiles(); 456 return TTS_FAILURE; 457 } 458 459 /* Add the signal generation resource to the voice. */ 460 ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoSgResourceName ); 461 if ((PICO_OK == ret) && (picoUtppResource != NULL)) { 462 /* Add utpp resource to voice - optional: see note above. */ 463 ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoUtppResourceName ); 464 if (PICO_OK != ret) { 465 ALOGE("Failed to add utpp resource to voice for %s [%d]", picoSupportedLang[langIndex], ret); 466 cleanResources(); 467 cleanFiles(); 468 return TTS_FAILURE; 469 } 470 } 471 472 if (PICO_OK != ret) { 473 ALOGE("Failed to add siggen resource to voice for %s [%d]", picoSupportedLang[langIndex], ret); 474 cleanResources(); 475 cleanFiles(); 476 return TTS_FAILURE; 477 } 478 479 ret = pico_newEngine( picoSystem, (const pico_Char *) PICO_VOICE_NAME, &picoEngine ); 480 if (PICO_OK != ret) { 481 ALOGE("Failed to create engine for %s [%d]", picoSupportedLang[langIndex], ret); 482 cleanResources(); 483 cleanFiles(); 484 return TTS_FAILURE; 485 } 486 487 /* Set the current locale/voice. */ 488 strcpy( picoProp_currLang, picoSupportedLang[langIndex] ); 489 picoCurrentLangIndex = langIndex; 490 ALOGI("loaded %s successfully", picoProp_currLang); 491 return TTS_SUCCESS; 492} 493 494 495/** doLanguageSwitch 496 * Switch to the requested locale. 497 * If this locale is already loaded, it returns immediately. 498 * If another locale is already loaded, this will first be unloaded 499 * and the new one then loaded. 500 * If no locale is loaded, the requested will be loaded. 501 * @locale - the locale to check, either in xx or xx-YY format (i.e "en" or "en-US") 502 * return TTS_SUCCESS or TTS_FAILURE 503*/ 504static tts_result doLanguageSwitch( const char * locale ) 505{ 506 int loclIndex; /* locale index */ 507 508 /* Load the new locale. */ 509 loclIndex = checkForLocale( locale ); 510 if (loclIndex < 0) { 511 ALOGE("Tried to swith to non-supported locale %s", locale); 512 return TTS_FAILURE; 513 } 514 //ALOGI("Found supported locale %s", picoSupportedLang[loclIndex]); 515 return doLanguageSwitchFromLangIndex( loclIndex ); 516} 517 518 519/** doAddProperties 520 * Add <speed>, <pitch> and <volume> tags to the text, 521 * if the properties have been set to non-default values, and return the new string. 522 * The calling function is responsible for freeing the returned string. 523 * @str - text to apply tags to 524 * return new string with tags applied 525*/ 526static char * doAddProperties( const char * str ) 527{ 528 char * data = NULL; 529 int haspitch, hasspeed, hasvol; /* parameters */ 530 int textlen; /* property string length */ 531 haspitch = 0; hasspeed = 0; hasvol = 0; 532 textlen = strlen(str) + 1; 533 if (picoProp_currPitch != PICO_DEF_PITCH) { /* non-default pitch */ 534 textlen += strlen(PICO_PITCH_OPEN_TAG) + 5; 535 textlen += strlen(PICO_PITCH_CLOSE_TAG); 536 haspitch = 1; 537 } 538 if (picoProp_currRate != PICO_DEF_RATE) { /* non-default rate */ 539 textlen += strlen(PICO_SPEED_OPEN_TAG) + 5; 540 textlen += strlen(PICO_SPEED_CLOSE_TAG); 541 hasspeed = 1; 542 } 543 544 if (picoProp_currVolume != PICO_DEF_VOLUME) { /* non-default volume */ 545 textlen += strlen(PICO_VOLUME_OPEN_TAG) + 5; 546 textlen += strlen(PICO_VOLUME_CLOSE_TAG); 547 hasvol = 1; 548 } 549 550 /* Compose the property strings. */ 551 data = (char *) malloc( textlen ); /* allocate string */ 552 if (!data) { 553 return NULL; 554 } 555 memset(data, 0, textlen); /* clear it */ 556 if (haspitch) { 557 char* tmp = (char*)malloc(strlen(PICO_PITCH_OPEN_TAG) + strlen(PICO_PITCH_CLOSE_TAG) + 5); 558 sprintf(tmp, PICO_PITCH_OPEN_TAG, picoProp_currPitch); 559 strcat(data, tmp); 560 free(tmp); 561 } 562 563 if (hasspeed) { 564 char* tmp = (char*)malloc(strlen(PICO_SPEED_OPEN_TAG) + strlen(PICO_SPEED_CLOSE_TAG) + 5); 565 sprintf(tmp, PICO_SPEED_OPEN_TAG, picoProp_currRate); 566 strcat(data, tmp); 567 free(tmp); 568 } 569 570 if (hasvol) { 571 char* tmp = (char*)malloc(strlen(PICO_VOLUME_OPEN_TAG) + strlen(PICO_VOLUME_CLOSE_TAG) + 5); 572 sprintf(tmp, PICO_VOLUME_OPEN_TAG, picoProp_currVolume); 573 strcat(data, tmp); 574 free(tmp); 575 } 576 577 strcat(data, str); 578 if (hasvol) { 579 strcat(data, PICO_VOLUME_CLOSE_TAG); 580 } 581 582 if (hasspeed) { 583 strcat(data, PICO_SPEED_CLOSE_TAG); 584 } 585 586 if (haspitch) { 587 strcat(data, PICO_PITCH_CLOSE_TAG); 588 } 589 return data; 590} 591 592 593/** get_tok 594 * Searches for tokens in a string 595 * @str - text to be processed 596 * @pos - position of first character to be searched in str 597 * @textlen - postion of last character to be searched 598 * @tokstart - address of a variable to receive the start of the token found 599 * @tokstart - address of a variable to receive the length of the token found 600 * return : 1=token found, 0=token not found 601 * notes : the token separator set could be enlarged adding characters in "seps" 602*/ 603static int get_tok(const char * str , int pos, int textlen, int *tokstart, int *toklen) 604{ 605 const char * seps = " "; 606 607 /*look for start*/ 608 while ((pos<textlen) && (strchr(seps,str[pos]) != NULL)) { 609 pos++; 610 } 611 if (pos == textlen) { 612 /*no characters != seps found whithin string*/ 613 return 0; 614 } 615 *tokstart = pos; 616 /*look for end*/ 617 while ((pos<textlen) && (strchr(seps,str[pos]) == NULL)) { 618 pos++; 619 } 620 *toklen = pos - *tokstart; 621 return 1; 622}/*get_tok*/ 623 624 625/** get_sub_tok 626 * Searches for subtokens in a token having a compound structure with camel case like "xxxYyyy" 627 * @str - text to be processed 628 * @pos - position of first character to be searched in str 629 * @textlen - postion of last character to be searched in str 630 * @tokstart - address of a variable to receive the start of the sub token found 631 * @tokstart - address of a variable to receive the length of the sub token found 632 * return : 1=sub token found, 0=sub token not found 633 * notes : the sub token separator set could be enlarged adding characters in "seps" 634*/ 635static int get_sub_tok(const char * str , int pos, int textlen, int *tokstart, int *toklen) { 636 637 const char * seps = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 638 639 if (pos == textlen) { 640 return 0; 641 } 642 643 /*first char != space*/ 644 *tokstart = pos; 645 /*finding first non separator*/ 646 while ((pos < textlen) && (strchr(seps, str[pos]) != NULL)) { 647 pos++; 648 } 649 if (pos == textlen) { 650 /*characters all in seps found whithin string : return full token*/ 651 *toklen = pos - *tokstart; 652 return 1; 653 } 654 /*pos should be pointing to first non seps and more chars are there*/ 655 /*finding first separator*/ 656 while ((pos < textlen) && (strchr(seps, str[pos]) == NULL)) { 657 pos++; 658 } 659 if (pos == textlen) { 660 /*transition non seps->seps not found : return full token*/ 661 *toklen = pos - *tokstart; 662 return 1; 663 } 664 *toklen = pos - *tokstart; 665 return 1; 666}/*get_sub_tok*/ 667 668 669/** doCamelCase 670 * Searches for tokens having a compound structure with camel case and transforms them as follows : 671 * "XxxxYyyy" -->> "Xxxx Yyyy", 672 * "xxxYyyy" -->> "xxx Yyyy", 673 * "XXXYyyy" -->> "XXXYyyy" 674 * etc.... 675 * The calling function is responsible for freeing the returned string. 676 * @str - text to be processed 677 * return new string with text processed 678*/ 679static char * doCamelCase( const char * str ) 680{ 681 int textlen; /* input string length */ 682 int totlen; /* output string length */ 683 int tlen_2, nsubtok; /* nuber of subtokens */ 684 int toklen, tokstart; /*legnth and start of generic token*/ 685 int stoklen, stokstart; /*legnth and start of generic sub-token*/ 686 int pos, tokpos, outpos; /*postion of current char in input string and token and output*/ 687 char *data; /*pointer of the returned string*/ 688 689 pos = 0; 690 tokpos = 0; 691 toklen = 0; 692 stoklen = 0; 693 tlen_2 = 0; 694 totlen = 0; 695 696 textlen = strlen(str) + 1; 697 698 /*counting characters after sub token splitting including spaces*/ 699 //while ((pos<textlen) && (str[pos]!=0)) { 700 while (get_tok(str, pos, textlen, &tokstart, &toklen)) { 701 tokpos = tokstart; 702 tlen_2 = 0; 703 nsubtok = 0; 704 while (get_sub_tok(str, tokpos, tokstart+toklen, &stokstart, &stoklen)) { 705 totlen += stoklen; 706 tlen_2 += stoklen; 707 tokpos = stokstart + stoklen; 708 nsubtok += 1; 709 } 710 totlen += nsubtok; /*add spaces between subtokens*/ 711 pos = tokstart + tlen_2; 712 } 713 //} 714 /* Allocate the return string */ 715 716 data = (char *) malloc( totlen ); /* allocate string */ 717 if (!data) { 718 return NULL; 719 } 720 memset(data, 0, totlen); /* clear it */ 721 outpos = 0; 722 pos = 0; 723 /*copying characters*/ 724 //while ((pos<textlen) && (str[pos]!=0)) { 725 while (get_tok (str, pos, textlen, &tokstart, &toklen)) { 726 tokpos = tokstart; 727 tlen_2 = 0; 728 nsubtok = 0; 729 while (get_sub_tok(str, tokpos, tokstart+toklen, &stokstart, &stoklen)) { 730 strncpy(&(data[outpos]), &(str[stokstart]), stoklen); 731 outpos += stoklen; 732 strncpy(&(data[outpos]), " ", 1); 733 tlen_2 += stoklen; 734 outpos += 1; 735 tokpos = stokstart + stoklen; 736 } 737 pos=tokstart+tlen_2; 738 } 739 //} 740 if (outpos == 0) { 741 outpos = 1; 742 } 743 data[outpos-1] = 0; 744 return data; 745}/*doCamelCase*/ 746 747 748/** createPhonemeString 749 * Wrap all individual words in <phoneme> tags. 750 * The Pico <phoneme> tag only supports one word in each tag, 751 * therefore they must be individually wrapped! 752 * @xsampa - text to convert to Pico phomene string 753 * @length - length of the input string 754 * return new string with tags applied 755*/ 756extern char * createPhonemeString( const char * xsampa, int length ) 757{ 758 char * convstring = NULL; 759 int origStrLen = strlen(xsampa); 760 int numWords = 1; 761 int start, totalLength, i, j; 762 763 for (i = 0; i < origStrLen; i ++) { 764 if ((xsampa[i] == ' ') || (xsampa[i] == '#')) { 765 numWords ++; 766 } 767 } 768 769 if (numWords == 1) { 770 convstring = new char[origStrLen + 17]; 771 convstring[0] = '\0'; 772 strcat(convstring, PICO_PHONEME_OPEN_TAG); 773 strcat(convstring, xsampa); 774 strcat(convstring, PICO_PHONEME_CLOSE_TAG); 775 } else { 776 char * words[numWords]; 777 start = 0; totalLength = 0; i = 0; j = 0; 778 for (i=0, j=0; i < origStrLen; i++) { 779 if ((xsampa[i] == ' ') || (xsampa[i] == '#')) { 780 words[j] = new char[i+1-start+17]; 781 words[j][0] = '\0'; 782 strcat( words[j], PICO_PHONEME_OPEN_TAG); 783 strncat(words[j], xsampa+start, i-start); 784 strcat( words[j], PICO_PHONEME_CLOSE_TAG); 785 start = i + 1; 786 j++; 787 totalLength += strlen(words[j-1]); 788 } 789 } 790 words[j] = new char[i+1-start+17]; 791 words[j][0] = '\0'; 792 strcat(words[j], PICO_PHONEME_OPEN_TAG); 793 strcat(words[j], xsampa+start); 794 strcat(words[j], PICO_PHONEME_CLOSE_TAG); 795 totalLength += strlen(words[j]); 796 convstring = new char[totalLength + 1]; 797 convstring[0] = '\0'; 798 for (i=0 ; i < numWords ; i++) { 799 strcat(convstring, words[i]); 800 delete [] words[i]; 801 } 802 } 803 804 return convstring; 805} 806 807/* The XSAMPA uses as many as 5 characters to represent a single IPA code. */ 808typedef struct tagPhnArr 809{ 810 char16_t strIPA; /* IPA Unicode symbol */ 811 char strXSAMPA[6]; /* SAMPA sequence */ 812} PArr; 813 814#define phn_cnt (134+7) 815 816PArr PhnAry[phn_cnt] = { 817 818 /* XSAMPA conversion table 819 This maps a single IPA symbol to a sequence representing XSAMPA. 820 This relies upon a direct one-to-one correspondance 821 including diphthongs and affricates. */ 822 823 /* Vowels (23) complete */ 824 {0x025B, "E"}, 825 {0x0251, "A"}, 826 {0x0254, "O"}, 827 {0x00F8, "2"}, 828 {0x0153, "9"}, 829 {0x0276, "&"}, 830 {0x0252, "Q"}, 831 {0x028C, "V"}, 832 {0x0264, "7"}, 833 {0x026F, "M"}, 834 {0x0268, "1"}, 835 {0x0289, "}"}, 836 {0x026A, "I"}, 837 {0x028F, "Y"}, 838 {0x028A, "U"}, 839 {0x0259, "@"}, 840 {0x0275, "8"}, 841 {0x0250, "6"}, 842 {0x00E6, "{"}, 843 {0x025C, "3"}, 844 {0x025A, "@`"}, 845 {0x025E, "3\\\\"}, 846 {0x0258, "@\\\\"}, 847 848 /* Consonants (60) complete */ 849 {0x0288, "t`"}, 850 {0x0256, "d`"}, 851 {0x025F, "J\\\\"}, 852 {0x0261, "g"}, 853 {0x0262, "G\\\\"}, 854 {0x0294, "?"}, 855 {0x0271, "F"}, 856 {0x0273, "n`"}, 857 {0x0272, "J"}, 858 {0x014B, "N"}, 859 {0x0274, "N\\\\"}, 860 {0x0299, "B\\\\"}, 861 {0x0280, "R\\\\"}, 862 {0x027E, "4"}, 863 {0x027D, "r`"}, 864 {0x0278, "p\\\\"}, 865 {0x03B2, "B"}, 866 {0x03B8, "T"}, 867 {0x00F0, "D"}, 868 {0x0283, "S"}, 869 {0x0292, "Z"}, 870 {0x0282, "s`"}, 871 {0x0290, "z`"}, 872 {0x00E7, "C"}, 873 {0x029D, "j\\\\"}, 874 {0x0263, "G"}, 875 {0x03C7, "X"}, 876 {0x0281, "R"}, 877 {0x0127, "X\\\\"}, 878 {0x0295, "?\\\\"}, 879 {0x0266, "h\\\\"}, 880 {0x026C, "K"}, 881 {0x026E, "K\\\\"}, 882 {0x028B, "P"}, 883 {0x0279, "r\\\\"}, 884 {0x027B, "r\\\\'"}, 885 {0x0270, "M\\\\"}, 886 {0x026D, "l`"}, 887 {0x028E, "L"}, 888 {0x029F, "L\\\\"}, 889 {0x0253, "b_<"}, 890 {0x0257, "d_<"}, 891 {0x0284, "J\\_<"}, 892 {0x0260, "g_<"}, 893 {0x029B, "G\\_<"}, 894 {0x028D, "W"}, 895 {0x0265, "H"}, 896 {0x029C, "H\\\\"}, 897 {0x02A1, ">\\\\"}, 898 {0x02A2, "<\\\\"}, 899 {0x0267, "x\\\\"}, /* hooktop heng */ 900 {0x0298, "O\\\\"}, 901 {0x01C0, "|\\\\"}, 902 {0x01C3, "!\\\\"}, 903 {0x01C2, "=\\"}, 904 {0x01C1, "|\\|\\"}, 905 {0x027A, "l\\\\"}, 906 {0x0255, "s\\\\"}, 907 {0x0291, "z\\\\"}, 908 {0x026B, "l_G"}, 909 910 911 /* Diacritics (37) complete */ 912 {0x02BC, "_>"}, 913 {0x0325, "_0"}, 914 {0x030A, "_0"}, 915 {0x032C, "_v"}, 916 {0x02B0, "_h"}, 917 {0x0324, "_t"}, 918 {0x0330, "_k"}, 919 {0x033C, "_N"}, 920 {0x032A, "_d"}, 921 {0x033A, "_a"}, 922 {0x033B, "_m"}, 923 {0x0339, "_O"}, 924 {0x031C, "_c"}, 925 {0x031F, "_+"}, 926 {0x0320, "_-"}, 927 {0x0308, "_\""}, /* centralized */ 928 {0x033D, "_x"}, 929 {0x0318, "_A"}, 930 {0x0319, "_q"}, 931 {0x02DE, "`"}, 932 {0x02B7, "_w"}, 933 {0x02B2, "_j"}, 934 {0x02E0, "_G"}, 935 {0x02E4, "_?\\\\"}, /* pharyngealized */ 936 {0x0303, "~"}, /* nasalized */ 937 {0x207F, "_n"}, 938 {0x02E1, "_l"}, 939 {0x031A, "_}"}, 940 {0x0334, "_e"}, 941 {0x031D, "_r"}, /* raised equivalent to 02D4 */ 942 {0x02D4, "_r"}, /* raised equivalent to 031D */ 943 {0x031E, "_o"}, /* lowered equivalent to 02D5 */ 944 {0x02D5, "_o"}, /* lowered equivalent to 031E */ 945 {0x0329, "="}, /* sylabic */ 946 {0x032F, "_^"}, /* non-sylabic */ 947 {0x0361, "_"}, /* top tie bar */ 948 {0x035C, "_"}, 949 950 /* Suprasegmental (15) incomplete */ 951 {0x02C8, "\""}, /* primary stress */ 952 {0x02CC, "%"}, /* secondary stress */ 953 {0x02D0, ":"}, /* long */ 954 {0x02D1, ":\\\\"}, /* half-long */ 955 {0x0306, "_X"}, /* extra short */ 956 957 {0x2016, "||"}, /* major group */ 958 {0x203F, "-\\\\"}, /* bottom tie bar */ 959 {0x2197, "<R>"}, /* global rise */ 960 {0x2198, "<F>"}, /* global fall */ 961 {0x2193, "<D>"}, /* downstep */ 962 {0x2191, "<U>"}, /* upstep */ 963 {0x02E5, "<T>"}, /* extra high level */ 964 {0x02E7, "<M>"}, /* mid level */ 965 {0x02E9, "<B>"}, /* extra low level */ 966 967 {0x025D, "3`:"}, /* non-IPA %% */ 968 969 /* Affricates (6) complete */ 970 {0x02A3, "d_z"}, 971 {0x02A4, "d_Z"}, 972 {0x02A5, "d_z\\\\"}, 973 {0x02A6, "t_s"}, 974 {0x02A7, "t_S"}, 975 {0x02A8, "t_s\\\\"} 976 }; 977 978 979void CnvIPAPnt( const char16_t IPnt, char * XPnt ) 980{ 981 char16_t ThisPnt = IPnt; /* local copy of single IPA codepoint */ 982 int idx; /* index into table */ 983 984 /* Convert an individual IPA codepoint. 985 A single IPA code could map to a string. 986 Search the table. If it is not found, use the same character. 987 Since most codepoints can be contained within 16 bits, 988 they are represented as wide chars. */ 989 XPnt[0] = 0; /* clear the result string */ 990 991 /* Search the table for the conversion. */ 992 for (idx = 0; idx < phn_cnt; idx ++) { /* for each item in table */ 993 if (IPnt == PhnAry[idx].strIPA) { /* matches IPA code */ 994 strcat( XPnt, (const char *)&(PhnAry[idx].strXSAMPA) ); /* copy the XSAMPA string */ 995 return; 996 } 997 } 998 strcat(XPnt, (const char *)&ThisPnt); /* just copy it */ 999} 1000 1001 1002/** cnvIpaToXsampa 1003 * Convert an IPA character string to an XSAMPA character string. 1004 * @ipaString - input IPA string to convert 1005 * @outXsampaString - converted XSAMPA string is passed back in this parameter 1006 * return size of the new string 1007*/ 1008 1009int cnvIpaToXsampa( const char16_t * ipaString, size_t ipaStringSize, char ** outXsampaString ) 1010{ 1011 size_t xsize; /* size of result */ 1012 size_t ipidx; /* index into IPA string */ 1013 char * XPnt; /* short XSAMPA char sequence */ 1014 1015 /* Convert an IPA string to an XSAMPA string and store the xsampa string in *outXsampaString. 1016 It is the responsibility of the caller to free the allocated string. 1017 Increment through the string. For each base & combination convert it to the XSAMP equivalent. 1018 Because of the XSAMPA limitations, not all IPA characters will be covered. */ 1019 XPnt = (char *) malloc(6); 1020 xsize = (4 * ipaStringSize) + 8; /* assume more than double size */ 1021 *outXsampaString = (char *) malloc( xsize );/* allocate return string */ 1022 *outXsampaString[0] = 0; 1023 xsize = 0; /* clear final */ 1024 1025 for (ipidx = 0; ipidx < ipaStringSize; ipidx ++) { /* for each IPA code */ 1026 CnvIPAPnt( ipaString[ipidx], XPnt ); /* get converted character */ 1027 strcat((char *)*outXsampaString, XPnt ); /* concatenate XSAMPA */ 1028 } 1029 free(XPnt); 1030 xsize = strlen(*outXsampaString); /* get the final length */ 1031 return xsize; 1032} 1033 1034 1035/* Google Engine API function implementations */ 1036 1037/** init 1038 * Allocates Pico memory block and initializes the Pico system. 1039 * synthDoneCBPtr - Pointer to callback function which will receive generated samples 1040 * config - the engine configuration parameters, here only contains the non-system path 1041 * for the lingware location 1042 * return tts_result 1043*/ 1044tts_result TtsEngine::init( synthDoneCB_t synthDoneCBPtr, const char *config ) 1045{ 1046 if (synthDoneCBPtr == NULL) { 1047 ALOGE("Callback pointer is NULL"); 1048 return TTS_FAILURE; 1049 } 1050 1051 picoMemArea = malloc( PICO_MEM_SIZE ); 1052 if (!picoMemArea) { 1053 ALOGE("Failed to allocate memory for Pico system"); 1054 return TTS_FAILURE; 1055 } 1056 1057 pico_Status ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem ); 1058 if (PICO_OK != ret) { 1059 ALOGE("Failed to initialize Pico system"); 1060 free( picoMemArea ); 1061 picoMemArea = NULL; 1062 return TTS_FAILURE; 1063 } 1064 1065 picoSynthDoneCBPtr = synthDoneCBPtr; 1066 1067 picoCurrentLangIndex = -1; 1068 1069 // was the initialization given an alternative path for the lingware location? 1070 if ((config != NULL) && (strlen(config) > 0)) { 1071 int max_filename_length = PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE; 1072 if (strlen(config) >= max_filename_length) { 1073 ALOGE("The length of engine config is too long (should be less than %d bytes).", 1074 max_filename_length); 1075 return TTS_FAILURE; 1076 } 1077 pico_alt_lingware_path = (char*)malloc(strlen(config) + 1); 1078 strcpy((char*)pico_alt_lingware_path, config); 1079 ALOGV("Alternative lingware path %s", pico_alt_lingware_path); 1080 } else { 1081 pico_alt_lingware_path = (char*)malloc(strlen(PICO_LINGWARE_PATH) + 1); 1082 strcpy((char*)pico_alt_lingware_path, PICO_LINGWARE_PATH); 1083 ALOGV("Using predefined lingware path %s", pico_alt_lingware_path); 1084 } 1085 1086 return TTS_SUCCESS; 1087} 1088 1089 1090/** shutdown 1091 * Unloads all Pico resources; terminates Pico system and frees Pico memory block. 1092 * return tts_result 1093*/ 1094tts_result TtsEngine::shutdown( void ) 1095{ 1096 cleanResources(); 1097 1098 if (picoSystem) { 1099 pico_terminate(&picoSystem); 1100 picoSystem = NULL; 1101 } 1102 if (picoMemArea) { 1103 free(picoMemArea); 1104 picoMemArea = NULL; 1105 } 1106 1107 cleanFiles(); 1108 return TTS_SUCCESS; 1109} 1110 1111 1112/** loadLanguage 1113 * Load a new language. 1114 * @lang - string with ISO 3 letter language code. 1115 * @country - string with ISO 3 letter country code . 1116 * @variant - string with language variant for that language and country pair. 1117 * return tts_result 1118*/ 1119tts_result TtsEngine::loadLanguage(const char *lang, const char *country, const char *variant) 1120{ 1121 return TTS_FAILURE; 1122 //return setProperty("language", value, size); 1123} 1124 1125 1126/** setLanguage 1127 * Load a new language (locale). Use the ISO 639-3 language codes. 1128 * @lang - string with ISO 639-3 language code. 1129 * @country - string with ISO 3 letter country code. 1130 * @variant - string with language variant for that language and country pair. 1131 * return tts_result 1132 */ 1133tts_result TtsEngine::setLanguage( const char * lang, const char * country, const char * variant ) 1134{ 1135 //ALOGI("TtsEngine::setLanguage %s %s %s", lang, country, variant); 1136 int langIndex; 1137 int countryIndex; 1138 int i; 1139 1140 if (lang == NULL) 1141 { 1142 ALOGE("TtsEngine::setLanguage called with NULL language"); 1143 return TTS_FAILURE; 1144 } 1145 1146 /* We look for a match on the language first 1147 then we look for a match on the country. 1148 If no match on the language: 1149 return an error. 1150 If match on the language, but no match on the country: 1151 load the language found for the language match. 1152 If match on the language, and match on the country: 1153 load the language found for the country match. */ 1154 1155 /* Find a match on the language. */ 1156 langIndex = -1; /* no match */ 1157 for (i = 0; i < picoNumSupportedVocs; i ++) 1158 { 1159 if (strcmp(lang, picoSupportedLangIso3[i]) == 0) 1160 { 1161 langIndex = i; 1162 break; 1163 } 1164 } 1165 if (langIndex < 0) 1166 { 1167 /* The language isn't supported. */ 1168 ALOGE("TtsEngine::setLanguage called with unsupported language"); 1169 return TTS_FAILURE; 1170 } 1171 1172 /* Find a match on the country, if there is one. */ 1173 if (country != NULL) 1174 { 1175 countryIndex = -1; 1176 for (i = langIndex; i < picoNumSupportedVocs; i ++) 1177 { 1178 if ( (strcmp(lang, picoSupportedLangIso3[i]) == 0) 1179 && (strcmp(country, picoSupportedCountryIso3[i]) == 0)) 1180 { 1181 countryIndex = i; 1182 break; 1183 } 1184 } 1185 1186 if (countryIndex < 0) 1187 { 1188 /* We didn't find a match on the country, but we had a match on the language. 1189 Use that language. */ 1190 ALOGI("TtsEngine::setLanguage found matching language(%s) but not matching country(%s).", 1191 lang, country); 1192 } 1193 else 1194 { 1195 /* We have a match on both the language and the country. */ 1196 langIndex = countryIndex; 1197 } 1198 } 1199 1200 return doLanguageSwitchFromLangIndex( langIndex ); /* switch the language */ 1201} 1202 1203 1204/** isLanguageAvailable 1205 * Returns the level of support for a language. 1206 * @lang - string with ISO 3 letter language code. 1207 * @country - string with ISO 3 letter country code . 1208 * @variant - string with language variant for that language and country pair. 1209 * return tts_support_result 1210*/ 1211tts_support_result TtsEngine::isLanguageAvailable(const char *lang, const char *country, 1212 const char *variant) { 1213 int langIndex = -1; 1214 int countryIndex = -1; 1215 //------------------------- 1216 // language matching 1217 // if no language specified 1218 if (lang == NULL) { 1219 ALOGE("TtsEngine::isLanguageAvailable called with no language"); 1220 return TTS_LANG_NOT_SUPPORTED; 1221 } 1222 1223 // find a match on the language 1224 for (int i = 0; i < picoNumSupportedVocs; i++) 1225 { 1226 if (strcmp(lang, picoSupportedLangIso3[i]) == 0) { 1227 langIndex = i; 1228 break; 1229 } 1230 } 1231 if (langIndex < 0) { 1232 // language isn't supported 1233 ALOGV("TtsEngine::isLanguageAvailable called with unsupported language"); 1234 return TTS_LANG_NOT_SUPPORTED; 1235 } 1236 1237 //------------------------- 1238 // country matching 1239 // if no country specified 1240 if ((country == NULL) || (strlen(country) == 0)) { 1241 // check installation of matched language 1242 return (hasResourcesForLanguage(langIndex) ? TTS_LANG_AVAILABLE : TTS_LANG_MISSING_DATA); 1243 } 1244 1245 // find a match on the country 1246 for (int i = langIndex; i < picoNumSupportedVocs; i++) { 1247 if ((strcmp(lang, picoSupportedLangIso3[i]) == 0) 1248 && (strcmp(country, picoSupportedCountryIso3[i]) == 0)) { 1249 countryIndex = i; 1250 break; 1251 } 1252 } 1253 if (countryIndex < 0) { 1254 // we didn't find a match on the country, but we had a match on the language 1255 // check installation of matched language 1256 return (hasResourcesForLanguage(langIndex) ? TTS_LANG_AVAILABLE : TTS_LANG_MISSING_DATA); 1257 } else { 1258 // we have a match on the language and the country 1259 langIndex = countryIndex; 1260 // check installation of matched language + country 1261 return (hasResourcesForLanguage(langIndex) ? TTS_LANG_COUNTRY_AVAILABLE : TTS_LANG_MISSING_DATA); 1262 } 1263 1264 // no variants supported in this library, TTS_LANG_COUNTRY_VAR_AVAILABLE cannot be returned. 1265} 1266 1267 1268/** getLanguage 1269 * Get the currently loaded language - if any. 1270 * @lang - string with current ISO 3 letter language code, empty string if no loaded language. 1271 * @country - string with current ISO 3 letter country code, empty string if no loaded language. 1272 * @variant - string with current language variant, empty string if no loaded language. 1273 * return tts_result 1274*/ 1275tts_result TtsEngine::getLanguage(char *language, char *country, char *variant) 1276{ 1277 if (picoCurrentLangIndex == -1) { 1278 strcpy(language, "\0"); 1279 strcpy(country, "\0"); 1280 strcpy(variant, "\0"); 1281 } else { 1282 strcpy(language, picoSupportedLangIso3[picoCurrentLangIndex]); 1283 strcpy(country, picoSupportedCountryIso3[picoCurrentLangIndex]); 1284 // no variant in this implementation 1285 strcpy(variant, "\0"); 1286 } 1287 return TTS_SUCCESS; 1288} 1289 1290 1291/** setAudioFormat 1292 * sets the audio format to use for synthesis, returns what is actually used. 1293 * @encoding - reference to encoding format 1294 * @rate - reference to sample rate 1295 * @channels - reference to number of channels 1296 * return tts_result 1297 * */ 1298tts_result TtsEngine::setAudioFormat(tts_audio_format& encoding, uint32_t& rate, 1299 int& channels) 1300{ 1301 // ignore the input parameters, the enforced audio parameters are fixed here 1302 encoding = TTS_AUDIO_FORMAT_PCM_16_BIT; 1303 rate = 16000; 1304 channels = 1; 1305 return TTS_SUCCESS; 1306} 1307 1308 1309/** setProperty 1310 * Set property. The supported properties are: language, rate, pitch and volume. 1311 * @property - name of property to set 1312 * @value - value to set 1313 * @size - size of value 1314 * return tts_result 1315*/ 1316tts_result TtsEngine::setProperty( const char * property, const char * value, const size_t size ) 1317{ 1318 int rate; 1319 int pitch; 1320 int volume; 1321 1322 /* Set a specific property for the engine. 1323 Supported properties include: language (locale), rate, pitch, volume. */ 1324 /* Sanity check */ 1325 if (property == NULL) { 1326 ALOGE("setProperty called with property NULL"); 1327 return TTS_PROPERTY_UNSUPPORTED; 1328 } 1329 1330 if (value == NULL) { 1331 ALOGE("setProperty called with value NULL"); 1332 return TTS_VALUE_INVALID; 1333 } 1334 1335 if (strncmp(property, "language", 8) == 0) { 1336 /* Verify it's in correct format. */ 1337 if (strlen(value) != 2 && strlen(value) != 6) { 1338 ALOGE("change language called with incorrect format"); 1339 return TTS_VALUE_INVALID; 1340 } 1341 1342 /* Try to switch to specified language. */ 1343 if (doLanguageSwitch(value) == TTS_FAILURE) { 1344 ALOGE("failed to load language"); 1345 return TTS_FAILURE; 1346 } else { 1347 return TTS_SUCCESS; 1348 } 1349 } else if (strncmp(property, "rate", 4) == 0) { 1350 rate = atoi(value); 1351 if (rate < PICO_MIN_RATE) { 1352 rate = PICO_MIN_RATE; 1353 } 1354 if (rate > PICO_MAX_RATE) { 1355 rate = PICO_MAX_RATE; 1356 } 1357 picoProp_currRate = rate; 1358 return TTS_SUCCESS; 1359 } else if (strncmp(property, "pitch", 5) == 0) { 1360 pitch = atoi(value); 1361 if (pitch < PICO_MIN_PITCH) { 1362 pitch = PICO_MIN_PITCH; 1363 } 1364 if (pitch > PICO_MAX_PITCH) { 1365 pitch = PICO_MAX_PITCH; 1366 } 1367 picoProp_currPitch = pitch; 1368 return TTS_SUCCESS; 1369 } else if (strncmp(property, "volume", 6) == 0) { 1370 volume = atoi(value); 1371 if (volume < PICO_MIN_VOLUME) { 1372 volume = PICO_MIN_VOLUME; 1373 } 1374 if (volume > PICO_MAX_VOLUME) { 1375 volume = PICO_MAX_VOLUME; 1376 } 1377 picoProp_currVolume = volume; 1378 return TTS_SUCCESS; 1379 } 1380 1381 return TTS_PROPERTY_UNSUPPORTED; 1382} 1383 1384 1385/** getProperty 1386 * Get the property. Supported properties are: language, rate, pitch and volume. 1387 * @property - name of property to get 1388 * @value - buffer which will receive value of property 1389 * @iosize - size of value - if size is too small on return this will contain actual size needed 1390 * return tts_result 1391*/ 1392tts_result TtsEngine::getProperty( const char * property, char * value, size_t * iosize ) 1393{ 1394 /* Get the property for the engine. 1395 This property was previously set by setProperty or by default. */ 1396 /* sanity check */ 1397 if (property == NULL) { 1398 ALOGE("getProperty called with property NULL"); 1399 return TTS_PROPERTY_UNSUPPORTED; 1400 } 1401 1402 if (value == NULL) { 1403 ALOGE("getProperty called with value NULL"); 1404 return TTS_VALUE_INVALID; 1405 } 1406 1407 if (strncmp(property, "language", 8) == 0) { 1408 if (picoProp_currLang == NULL) { 1409 strcpy(value, ""); 1410 } else { 1411 if (*iosize < strlen(picoProp_currLang)+1) { 1412 *iosize = strlen(picoProp_currLang) + 1; 1413 return TTS_PROPERTY_SIZE_TOO_SMALL; 1414 } 1415 strcpy(value, picoProp_currLang); 1416 } 1417 return TTS_SUCCESS; 1418 } else if (strncmp(property, "rate", 4) == 0) { 1419 char tmprate[4]; 1420 sprintf(tmprate, "%d", picoProp_currRate); 1421 if (*iosize < strlen(tmprate)+1) { 1422 *iosize = strlen(tmprate) + 1; 1423 return TTS_PROPERTY_SIZE_TOO_SMALL; 1424 } 1425 strcpy(value, tmprate); 1426 return TTS_SUCCESS; 1427 } else if (strncmp(property, "pitch", 5) == 0) { 1428 char tmppitch[4]; 1429 sprintf(tmppitch, "%d", picoProp_currPitch); 1430 if (*iosize < strlen(tmppitch)+1) { 1431 *iosize = strlen(tmppitch) + 1; 1432 return TTS_PROPERTY_SIZE_TOO_SMALL; 1433 } 1434 strcpy(value, tmppitch); 1435 return TTS_SUCCESS; 1436 } else if (strncmp(property, "volume", 6) == 0) { 1437 char tmpvol[4]; 1438 sprintf(tmpvol, "%d", picoProp_currVolume); 1439 if (*iosize < strlen(tmpvol)+1) { 1440 *iosize = strlen(tmpvol) + 1; 1441 return TTS_PROPERTY_SIZE_TOO_SMALL; 1442 } 1443 strcpy(value, tmpvol); 1444 return TTS_SUCCESS; 1445 } 1446 1447 /* Unknown property */ 1448 ALOGE("Unsupported property"); 1449 return TTS_PROPERTY_UNSUPPORTED; 1450} 1451 1452 1453/** synthesizeText 1454 * Synthesizes a text string. 1455 * The text string could be annotated with SSML tags. 1456 * @text - text to synthesize 1457 * @buffer - buffer which will receive generated samples 1458 * @bufferSize - size of buffer 1459 * @userdata - pointer to user data which will be passed back to callback function 1460 * return tts_result 1461*/ 1462tts_result TtsEngine::synthesizeText( const char * text, int8_t * buffer, size_t bufferSize, void * userdata ) 1463{ 1464 int err; 1465 int cbret; 1466 pico_Char * inp = NULL; 1467 char * expanded_text = NULL; 1468 pico_Char * local_text = NULL; 1469 short outbuf[MAX_OUTBUF_SIZE/2]; 1470 pico_Int16 bytes_sent, bytes_recv, text_remaining, out_data_type; 1471 pico_Status ret; 1472 SvoxSsmlParser * parser = NULL; 1473 1474 picoSynthAbort = 0; 1475 if (text == NULL) { 1476 ALOGE("synthesizeText called with NULL string"); 1477 return TTS_FAILURE; 1478 } 1479 1480 if (strlen(text) == 0) { 1481 return TTS_SUCCESS; 1482 } 1483 1484 if (buffer == NULL) { 1485 ALOGE("synthesizeText called with NULL buffer"); 1486 return TTS_FAILURE; 1487 } 1488 1489 if ( (strncmp(text, "<speak", 6) == 0) || (strncmp(text, "<?xml", 5) == 0) ) { 1490 /* SSML input */ 1491 parser = new SvoxSsmlParser(); 1492 if (parser && parser->initSuccessful()) { 1493 err = parser->parseDocument(text, 1); 1494 if (err == XML_STATUS_ERROR) { 1495 /* Note: for some reason expat always thinks the input document has an error 1496 at the end, even when the XML document is perfectly formed */ 1497 ALOGI("Warning: SSML document parsed with errors"); 1498 } 1499 char * parsed_text = parser->getParsedDocument(); 1500 if (parsed_text) { 1501 /* Add property tags to the string - if any. */ 1502 local_text = (pico_Char *) doAddProperties( parsed_text ); 1503 if (!local_text) { 1504 ALOGE("Failed to allocate memory for text string"); 1505 delete parser; 1506 return TTS_FAILURE; 1507 } 1508 char * lang = parser->getParsedDocumentLanguage(); 1509 if (lang != NULL) { 1510 if (doLanguageSwitch(lang) == TTS_FAILURE) { 1511 ALOGE("Failed to switch to language (%s) specified in SSML document.", lang); 1512 delete parser; 1513 return TTS_FAILURE; 1514 } 1515 } else { 1516 // lang is NULL, pick a language so the synthesis can be performed 1517 if (picoCurrentLangIndex == -1) { 1518 // no current language loaded, pick the first one and load it 1519 if (doLanguageSwitchFromLangIndex(0) == TTS_FAILURE) { 1520 ALOGE("Failed to switch to default language."); 1521 delete parser; 1522 return TTS_FAILURE; 1523 } 1524 } 1525 //ALOGI("No language in SSML, using current language (%s).", picoProp_currLang); 1526 } 1527 delete parser; 1528 } else { 1529 ALOGE("Failed to parse SSML document"); 1530 delete parser; 1531 return TTS_FAILURE; 1532 } 1533 } else { 1534 ALOGE("Failed to create SSML parser"); 1535 if (parser) { 1536 delete parser; 1537 } 1538 return TTS_FAILURE; 1539 } 1540 } else { 1541 /* camelCase pre-processing */ 1542 expanded_text = doCamelCase(text); 1543 /* Add property tags to the string - if any. */ 1544 local_text = (pico_Char *) doAddProperties( expanded_text ); 1545 if (expanded_text) { 1546 free( expanded_text ); 1547 } 1548 if (!local_text) { 1549 ALOGE("Failed to allocate memory for text string"); 1550 return TTS_FAILURE; 1551 } 1552 } 1553 1554 text_remaining = strlen((const char *) local_text) + 1; 1555 1556 inp = (pico_Char *) local_text; 1557 1558 size_t bufused = 0; 1559 1560 /* synthesis loop */ 1561 while (text_remaining) { 1562 if (picoSynthAbort) { 1563 ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT ); 1564 break; 1565 } 1566 1567 /* Feed the text into the engine. */ 1568 ret = pico_putTextUtf8( picoEngine, inp, text_remaining, &bytes_sent ); 1569 if (ret != PICO_OK) { 1570 ALOGE("Error synthesizing string '%s': [%d]", text, ret); 1571 if (local_text) { 1572 free( local_text ); 1573 } 1574 return TTS_FAILURE; 1575 } 1576 1577 text_remaining -= bytes_sent; 1578 inp += bytes_sent; 1579 do { 1580 if (picoSynthAbort) { 1581 ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT ); 1582 break; 1583 } 1584 /* Retrieve the samples and add them to the buffer. */ 1585 ret = pico_getData( picoEngine, (void *) outbuf, MAX_OUTBUF_SIZE, &bytes_recv, 1586 &out_data_type ); 1587 if (bytes_recv) { 1588 if ((bufused + bytes_recv) <= bufferSize) { 1589 memcpy(buffer+bufused, (int8_t *) outbuf, bytes_recv); 1590 bufused += bytes_recv; 1591 } else { 1592 /* The buffer filled; pass this on to the callback function. */ 1593 cbret = picoSynthDoneCBPtr(userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer, 1594 bufused, TTS_SYNTH_PENDING); 1595 if (cbret == TTS_CALLBACK_HALT) { 1596 ALOGI("Halt requested by caller. Halting."); 1597 picoSynthAbort = 1; 1598 ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT ); 1599 break; 1600 } 1601 bufused = 0; 1602 memcpy(buffer, (int8_t *) outbuf, bytes_recv); 1603 bufused += bytes_recv; 1604 } 1605 } 1606 } while (PICO_STEP_BUSY == ret); 1607 1608 /* This chunk of synthesis is finished; pass the remaining samples. 1609 Use 16 KHz, 16-bit samples. */ 1610 if (!picoSynthAbort) { 1611 picoSynthDoneCBPtr( userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer, bufused, 1612 TTS_SYNTH_PENDING); 1613 } 1614 picoSynthAbort = 0; 1615 1616 if (ret != PICO_STEP_IDLE) { 1617 if (ret != 0){ 1618 ALOGE("Error occurred during synthesis [%d]", ret); 1619 } 1620 if (local_text) { 1621 free(local_text); 1622 } 1623 ALOGV("Synth loop: sending TTS_SYNTH_DONE after error"); 1624 picoSynthDoneCBPtr( userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer, bufused, 1625 TTS_SYNTH_DONE); 1626 pico_resetEngine( picoEngine, PICO_RESET_SOFT ); 1627 return TTS_FAILURE; 1628 } 1629 } 1630 1631 /* Synthesis is done; notify the caller */ 1632 ALOGV("Synth loop: sending TTS_SYNTH_DONE after all done, or was asked to stop"); 1633 picoSynthDoneCBPtr( userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer, bufused, 1634 TTS_SYNTH_DONE); 1635 1636 if (local_text) { 1637 free( local_text ); 1638 } 1639 return TTS_SUCCESS; 1640} 1641 1642 1643 1644/** stop 1645 * Aborts the running synthesis. 1646 * return tts_result 1647*/ 1648tts_result TtsEngine::stop( void ) 1649{ 1650 picoSynthAbort = 1; 1651 return TTS_SUCCESS; 1652} 1653 1654 1655#ifdef __cplusplus 1656extern "C" { 1657#endif 1658 1659TtsEngine * getTtsEngine( void ) 1660{ 1661 return new TtsEngine(); 1662} 1663 1664#ifdef __cplusplus 1665} 1666#endif 1667