1/* com_svox_picottsengine.cpp 2 3 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 * This is the Manager layer. It sits on top of the native Pico engine 18 * and provides the interface to the defined Google TTS engine API. 19 * The Google engine API is the boundary to allow a TTS engine to be swapped. 20 * The Manager layer also provide the SSML tag interpretation. 21 * The supported SSML tags are mapped to corresponding tags natively supported by Pico. 22 * Native Pico functions always begin with picoXXX. 23 * 24 * In the Pico engine, the language cannot be changed indpendently of the voice. 25 * If either the voice or locale/language are changed, a new resource is loaded. 26 * 27 * Only a subset of SSML 1.0 tags are supported. 28 * Some SSML tags involve significant complexity. 29 * If the language is changed through an SSML tag, there is a latency for the load. 30 * 31 */ 32//#define LOG_NDEBUG 0 33 34#include <stdio.h> 35#include <unistd.h> 36#include <stdlib.h> 37 38#define LOG_TAG "SVOX Pico Engine" 39 40#include <utils/Log.h> 41#include <utils/String16.h> /* for strlen16 */ 42#include <android_runtime/AndroidRuntime.h> 43#include <tts/TtsEngine.h> 44#include <cutils/jstring.h> 45#include <picoapi.h> 46#include <picodefs.h> 47#include "svox_ssml_parser.h" 48 49using namespace android; 50 51/* adaptation layer defines */ 52#define PICO_MEM_SIZE 2500000 53/* speaking rate */ 54#define PICO_MIN_RATE 20 55#define PICO_MAX_RATE 500 56#define PICO_DEF_RATE 100 57/* speaking pitch */ 58#define PICO_MIN_PITCH 50 59#define PICO_MAX_PITCH 200 60#define PICO_DEF_PITCH 100 61/* speaking volume */ 62#define PICO_MIN_VOLUME 0 63#define PICO_MAX_VOLUME 500 64#define PICO_DEF_VOLUME 100 65 66/* string constants */ 67#define MAX_OUTBUF_SIZE 128 68const char * PICO_SYSTEM_LINGWARE_PATH = "/system/tts/lang_pico/"; 69const char * PICO_LINGWARE_PATH = "/sdcard/svox/"; 70const char * PICO_VOICE_NAME = "PicoVoice"; 71const char * PICO_SPEED_OPEN_TAG = "<speed level='%d'>"; 72const char * PICO_SPEED_CLOSE_TAG = "</speed>"; 73const char * PICO_PITCH_OPEN_TAG = "<pitch level='%d'>"; 74const char * PICO_PITCH_CLOSE_TAG = "</pitch>"; 75const char * PICO_VOLUME_OPEN_TAG = "<volume level='%d'>"; 76const char * PICO_VOLUME_CLOSE_TAG = "</volume>"; 77const char * PICO_PHONEME_OPEN_TAG = "<phoneme ph='"; 78const char * PICO_PHONEME_CLOSE_TAG = "'/>"; 79 80/* supported voices 81 Pico does not seperately specify the voice and locale. */ 82const char * picoSupportedLangIso3[] = { "eng", "eng", "deu", "spa", "fra", "ita" }; 83const char * picoSupportedCountryIso3[] = { "USA", "GBR", "DEU", "ESP", "FRA", "ITA" }; 84const char * picoSupportedLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" }; 85const char * picoInternalLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" }; 86const char * picoInternalTaLingware[] = { "en-US_ta.bin", "en-GB_ta.bin", "de-DE_ta.bin", "es-ES_ta.bin", "fr-FR_ta.bin", "it-IT_ta.bin" }; 87const char * picoInternalSgLingware[] = { "en-US_lh0_sg.bin", "en-GB_kh0_sg.bin", "de-DE_gl0_sg.bin", "es-ES_zl0_sg.bin", "fr-FR_nk0_sg.bin", "it-IT_cm0_sg.bin" }; 88const char * picoInternalUtppLingware[] = { "en-US_utpp.bin", "en-GB_utpp.bin", "de-DE_utpp.bin", "es-ES_utpp.bin", "fr-FR_utpp.bin", "it-IT_utpp.bin" }; 89const int picoNumSupportedVocs = 6; 90 91/* supported properties */ 92const char * picoSupportedProperties[] = { "language", "rate", "pitch", "volume" }; 93const int picoNumSupportedProperties = 4; 94 95 96/* adapation layer global variables */ 97synthDoneCB_t * picoSynthDoneCBPtr; 98void * picoMemArea = NULL; 99pico_System picoSystem = NULL; 100pico_Resource picoTaResource = NULL; 101pico_Resource picoSgResource = NULL; 102pico_Resource picoUtppResource = NULL; 103pico_Engine picoEngine = NULL; 104pico_Char * picoTaFileName = NULL; 105pico_Char * picoSgFileName = NULL; 106pico_Char * picoUtppFileName = NULL; 107pico_Char * picoTaResourceName = NULL; 108pico_Char * picoSgResourceName = NULL; 109pico_Char * picoUtppResourceName = NULL; 110int picoSynthAbort = 0; 111char * picoProp_currLang = NULL; /* current language */ 112int picoProp_currRate = PICO_DEF_RATE; /* current rate */ 113int picoProp_currPitch = PICO_DEF_PITCH; /* current pitch */ 114int picoProp_currVolume = PICO_DEF_VOLUME; /* current volume */ 115 116int picoCurrentLangIndex = -1; 117 118char * pico_alt_lingware_path = NULL; 119 120 121/* internal helper functions */ 122 123/** checkForLocale 124 * Check whether the requested locale is among the supported locales. 125 * @locale - the locale to check, either in xx or xx-YY format 126 * return index of the locale, or -1 if not supported. 127*/ 128static int checkForLocale( const char * locale ) 129{ 130 int found = -1; /* language not found */ 131 int i; 132 if (locale == NULL) { 133 LOGE("checkForLocale called with NULL language"); 134 return found; 135 } 136 137 /* Verify that the requested locale is a locale that we support. */ 138 for (i = 0; i < picoNumSupportedVocs; i ++) { 139 if (strcmp(locale, picoSupportedLang[i]) == 0) { /* in array */ 140 found = i; 141 break; 142 } 143 }; 144 145 /* The locale was not found. */ 146 if (found < 0) { 147 /* We didn't find an exact match; it may have been specified with only the first 2 characters. 148 This could overmatch ISO 639-3 language codes.%% */ 149 for (i = 0; i < picoNumSupportedVocs; i ++) { 150 if (strncmp(locale, picoSupportedLang[i], 2) == 0) { 151 found = i; 152 break; 153 } 154 } 155 if (found < 0) { 156 LOGE("TtsEngine::set language called with unsupported locale"); 157 } 158 }; 159 return found; 160} 161 162 163/** cleanResources 164 * Unloads any loaded Pico resources. 165*/ 166static void cleanResources( void ) 167{ 168 if (picoEngine) { 169 pico_disposeEngine( picoSystem, &picoEngine ); 170 pico_releaseVoiceDefinition( picoSystem, (pico_Char *) PICO_VOICE_NAME ); 171 picoEngine = NULL; 172 } 173 if (picoUtppResource) { 174 pico_unloadResource( picoSystem, &picoUtppResource ); 175 picoUtppResource = NULL; 176 } 177 if (picoTaResource) { 178 pico_unloadResource( picoSystem, &picoTaResource ); 179 picoTaResource = NULL; 180 } 181 if (picoSgResource) { 182 pico_unloadResource( picoSystem, &picoSgResource ); 183 picoSgResource = NULL; 184 } 185 186 if (picoSystem) { 187 pico_terminate(&picoSystem); 188 picoSystem = NULL; 189 } 190 picoCurrentLangIndex = -1; 191} 192 193 194/** cleanFiles 195 * Frees any memory allocated for file and resource strings. 196*/ 197static void cleanFiles( void ) 198{ 199 if (picoProp_currLang) { 200 free( picoProp_currLang ); 201 picoProp_currLang = NULL; 202 } 203 204 if (picoTaFileName) { 205 free( picoTaFileName ); 206 picoTaFileName = NULL; 207 } 208 209 if (picoSgFileName) { 210 free( picoSgFileName ); 211 picoSgFileName = NULL; 212 } 213 214 if (picoUtppFileName) { 215 free( picoUtppFileName ); 216 picoUtppFileName = NULL; 217 } 218 219 if (picoTaResourceName) { 220 free( picoTaResourceName ); 221 picoTaResourceName = NULL; 222 } 223 224 if (picoSgResourceName) { 225 free( picoSgResourceName ); 226 picoSgResourceName = NULL; 227 } 228 229 if (picoUtppResourceName) { 230 free( picoUtppResourceName ); 231 picoUtppResourceName = NULL; 232 } 233} 234 235/** hasResourcesForLanguage 236 * Check to see if the resources required to load the language at the specified index 237 * are properly installed 238 * @langIndex - the index of the language to check the resources for. The index is valid. 239 * return true if the required resources are installed, false otherwise 240 */ 241static bool hasResourcesForLanguage(int langIndex) { 242 FILE * pFile; 243 char* fileName = (char*)malloc(PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE); 244 245 /* check resources on system (under PICO_SYSTEM_LINGWARE_PATH). */ 246 strcpy((char*)fileName, PICO_SYSTEM_LINGWARE_PATH); 247 strcat((char*)fileName, (const char*)picoInternalTaLingware[langIndex]); 248 pFile = fopen(fileName, "r"); 249 if (pFile != NULL) { 250 /* "ta" file found. */ 251 fclose (pFile); 252 /* now look for "sg" file. */ 253 strcpy((char*)fileName, PICO_SYSTEM_LINGWARE_PATH); 254 strcat((char*)fileName, (const char*)picoInternalSgLingware[langIndex]); 255 pFile = fopen(fileName, "r"); 256 if (pFile != NULL) { 257 /* "sg" file found, no need to continue checking, return success. */ 258 fclose(pFile); 259 free(fileName); 260 return true; 261 } 262 } 263 264 /* resources not found on system, check resources on alternative location */ 265 /* (under pico_alt_lingware_path). */ 266 strcpy((char*)fileName, pico_alt_lingware_path); 267 strcat((char*)fileName, (const char*)picoInternalTaLingware[langIndex]); 268 pFile = fopen(fileName, "r"); 269 if (pFile == NULL) { 270 free(fileName); 271 return false; 272 } else { 273 fclose (pFile); 274 } 275 276 strcpy((char*)fileName, pico_alt_lingware_path); 277 strcat((char*)fileName, (const char*)picoInternalSgLingware[langIndex]); 278 pFile = fopen(fileName, "r"); 279 if (pFile == NULL) { 280 free(fileName); 281 return false; 282 } else { 283 fclose(pFile); 284 free(fileName); 285 return true; 286 } 287} 288 289/** doLanguageSwitchFromLangIndex 290 * Switch to the requested locale. 291 * If the locale is already loaded, it returns immediately. 292 * If another locale is already is loaded, it will first be unloaded and the new one then loaded. 293 * If no locale is loaded, the requested locale will be loaded. 294 * @langIndex - the index of the locale/voice to load, which is guaranteed to be supported. 295 * return TTS_SUCCESS or TTS_FAILURE 296 */ 297static tts_result doLanguageSwitchFromLangIndex( int langIndex ) 298{ 299 int ret; /* function result code */ 300 301 if (langIndex>=0) { 302 /* If we already have a loaded locale, check whether it is the same one as requested. */ 303 if (picoProp_currLang && (strcmp(picoProp_currLang, picoSupportedLang[langIndex]) == 0)) { 304 LOGI("Language already loaded (%s == %s)", picoProp_currLang, 305 picoSupportedLang[langIndex]); 306 return TTS_SUCCESS; 307 } 308 } 309 310 /* It is not the same locale; unload the current one first. Also invalidates the system object*/ 311 cleanResources(); 312 313 /* Allocate memory for file and resource names. */ 314 cleanFiles(); 315 316 if (picoSystem==NULL) { 317 /*re-init system object*/ 318 ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem ); 319 if (PICO_OK != ret) { 320 LOGE("Failed to initialize the pico system object\n"); 321 return TTS_FAILURE; 322 } 323 } 324 325 picoProp_currLang = (char *) malloc( 10 ); 326 picoTaFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE ); 327 picoSgFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE ); 328 picoUtppFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE ); 329 picoTaResourceName = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE ); 330 picoSgResourceName = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE ); 331 picoUtppResourceName =(pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE ); 332 333 if ( 334 (picoProp_currLang==NULL) || (picoTaFileName==NULL) || (picoSgFileName==NULL) || 335 (picoUtppFileName==NULL) || (picoTaResourceName==NULL) || (picoSgResourceName==NULL) || 336 (picoUtppResourceName==NULL) 337 ) { 338 LOGE("Failed to allocate memory for internal strings\n"); 339 cleanResources(); 340 return TTS_FAILURE; 341 } 342 343 /* Find where to load the resource files from: system or alternative location */ 344 /* based on availability of the Ta file. Try the alternative location first, this is where */ 345 /* more recent language file updates would be installed (under pico_alt_lingware_path). */ 346 bool bUseSystemPath = true; 347 FILE * pFile; 348 char* tmpFileName = (char*)malloc(PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE); 349 strcpy((char*)tmpFileName, pico_alt_lingware_path); 350 strcat((char*)tmpFileName, (const char*)picoInternalTaLingware[langIndex]); 351 pFile = fopen(tmpFileName, "r"); 352 if (pFile != NULL) { 353 /* "ta" file found under pico_alt_lingware_path, don't use the system path. */ 354 fclose (pFile); 355 bUseSystemPath = false; 356 } 357 free(tmpFileName); 358 359 /* Set the path and file names for resource files. */ 360 if (bUseSystemPath) { 361 strcpy((char *) picoTaFileName, PICO_SYSTEM_LINGWARE_PATH); 362 strcpy((char *) picoSgFileName, PICO_SYSTEM_LINGWARE_PATH); 363 strcpy((char *) picoUtppFileName, PICO_SYSTEM_LINGWARE_PATH); 364 } else { 365 strcpy((char *) picoTaFileName, pico_alt_lingware_path); 366 strcpy((char *) picoSgFileName, pico_alt_lingware_path); 367 strcpy((char *) picoUtppFileName, pico_alt_lingware_path); 368 } 369 strcat((char *) picoTaFileName, (const char *) picoInternalTaLingware[langIndex]); 370 strcat((char *) picoSgFileName, (const char *) picoInternalSgLingware[langIndex]); 371 strcat((char *) picoUtppFileName, (const char *) picoInternalUtppLingware[langIndex]); 372 373 /* Load the text analysis Lingware resource file. */ 374 ret = pico_loadResource( picoSystem, picoTaFileName, &picoTaResource ); 375 if (PICO_OK != ret) { 376 LOGE("Failed to load textana resource for %s [%d]", picoSupportedLang[langIndex], ret); 377 cleanResources(); 378 cleanFiles(); 379 return TTS_FAILURE; 380 } 381 382 /* Load the signal generation Lingware resource file. */ 383 ret = pico_loadResource( picoSystem, picoSgFileName, &picoSgResource ); 384 if (PICO_OK != ret) { 385 LOGE("Failed to load siggen resource for %s [%d]", picoSupportedLang[langIndex], ret); 386 cleanResources(); 387 cleanFiles(); 388 return TTS_FAILURE; 389 } 390 391 /* Load the utpp Lingware resource file if exists - NOTE: this file is optional 392 and is currently not used. Loading is only attempted for future compatibility. 393 If this file is not present the loading will still succeed. */ 394 ret = pico_loadResource( picoSystem, picoUtppFileName, &picoUtppResource ); 395 if ((PICO_OK != ret) && (ret != PICO_EXC_CANT_OPEN_FILE)) { 396 LOGE("Failed to load utpp resource for %s [%d]", picoSupportedLang[langIndex], ret); 397 cleanResources(); 398 cleanFiles(); 399 return TTS_FAILURE; 400 } 401 402 /* Get the text analysis resource name. */ 403 ret = pico_getResourceName( picoSystem, picoTaResource, (char *) picoTaResourceName ); 404 if (PICO_OK != ret) { 405 LOGE("Failed to get textana resource name for %s [%d]", picoSupportedLang[langIndex], ret); 406 cleanResources(); 407 cleanFiles(); 408 return TTS_FAILURE; 409 } 410 411 /* Get the signal generation resource name. */ 412 ret = pico_getResourceName( picoSystem, picoSgResource, (char *) picoSgResourceName ); 413 if ((PICO_OK == ret) && (picoUtppResource != NULL)) { 414 /* Get utpp resource name - optional: see note above. */ 415 ret = pico_getResourceName( picoSystem, picoUtppResource, (char *) picoUtppResourceName ); 416 if (PICO_OK != ret) { 417 LOGE("Failed to get utpp resource name for %s [%d]", picoSupportedLang[langIndex], ret); 418 cleanResources(); 419 cleanFiles(); 420 return TTS_FAILURE; 421 } 422 } 423 if (PICO_OK != ret) { 424 LOGE("Failed to get siggen resource name for %s [%d]", picoSupportedLang[langIndex], ret); 425 cleanResources(); 426 cleanFiles(); 427 return TTS_FAILURE; 428 } 429 430 /* Create a voice definition. */ 431 ret = pico_createVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME ); 432 if (PICO_OK != ret) { 433 LOGE("Failed to create voice for %s [%d]", picoSupportedLang[langIndex], ret); 434 cleanResources(); 435 cleanFiles(); 436 return TTS_FAILURE; 437 } 438 439 /* Add the text analysis resource to the voice. */ 440 ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoTaResourceName ); 441 if (PICO_OK != ret) { 442 LOGE("Failed to add textana resource to voice for %s [%d]", picoSupportedLang[langIndex], ret); 443 cleanResources(); 444 cleanFiles(); 445 return TTS_FAILURE; 446 } 447 448 /* Add the signal generation resource to the voice. */ 449 ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoSgResourceName ); 450 if ((PICO_OK == ret) && (picoUtppResource != NULL)) { 451 /* Add utpp resource to voice - optional: see note above. */ 452 ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoUtppResourceName ); 453 if (PICO_OK != ret) { 454 LOGE("Failed to add utpp resource to voice for %s [%d]", picoSupportedLang[langIndex], ret); 455 cleanResources(); 456 cleanFiles(); 457 return TTS_FAILURE; 458 } 459 } 460 461 if (PICO_OK != ret) { 462 LOGE("Failed to add siggen resource to voice for %s [%d]", picoSupportedLang[langIndex], ret); 463 cleanResources(); 464 cleanFiles(); 465 return TTS_FAILURE; 466 } 467 468 ret = pico_newEngine( picoSystem, (const pico_Char *) PICO_VOICE_NAME, &picoEngine ); 469 if (PICO_OK != ret) { 470 LOGE("Failed to create engine for %s [%d]", picoSupportedLang[langIndex], ret); 471 cleanResources(); 472 cleanFiles(); 473 return TTS_FAILURE; 474 } 475 476 /* Set the current locale/voice. */ 477 strcpy( picoProp_currLang, picoSupportedLang[langIndex] ); 478 picoCurrentLangIndex = langIndex; 479 LOGI("loaded %s successfully", picoProp_currLang); 480 return TTS_SUCCESS; 481} 482 483 484/** doLanguageSwitch 485 * Switch to the requested locale. 486 * If this locale is already loaded, it returns immediately. 487 * If another locale is already loaded, this will first be unloaded 488 * and the new one then loaded. 489 * If no locale is loaded, the requested will be loaded. 490 * @locale - the locale to check, either in xx or xx-YY format (i.e "en" or "en-US") 491 * return TTS_SUCCESS or TTS_FAILURE 492*/ 493static tts_result doLanguageSwitch( const char * locale ) 494{ 495 int loclIndex; /* locale index */ 496 497 /* Load the new locale. */ 498 loclIndex = checkForLocale( locale ); 499 if (loclIndex < 0) { 500 LOGE("Tried to swith to non-supported locale %s", locale); 501 return TTS_FAILURE; 502 } 503 LOGI("Found supported locale %s", picoSupportedLang[loclIndex]); 504 return doLanguageSwitchFromLangIndex( loclIndex ); 505} 506 507 508/** doAddProperties 509 * Add <speed>, <pitch> and <volume> tags to the text, 510 * if the properties have been set to non-default values, and return the new string. 511 * The calling function is responsible for freeing the returned string. 512 * @str - text to apply tags to 513 * return new string with tags applied 514*/ 515static char * doAddProperties( const char * str ) 516{ 517 char * data = NULL; 518 int haspitch, hasspeed, hasvol; /* parameters */ 519 int textlen; /* property string length */ 520 haspitch = 0; hasspeed = 0; hasvol = 0; 521 textlen = strlen(str) + 1; 522 if (picoProp_currPitch != PICO_DEF_PITCH) { /* non-default pitch */ 523 textlen += strlen(PICO_PITCH_OPEN_TAG) + 5; 524 textlen += strlen(PICO_PITCH_CLOSE_TAG); 525 haspitch = 1; 526 } 527 if (picoProp_currRate != PICO_DEF_RATE) { /* non-default rate */ 528 textlen += strlen(PICO_SPEED_OPEN_TAG) + 5; 529 textlen += strlen(PICO_SPEED_CLOSE_TAG); 530 hasspeed = 1; 531 } 532 533 if (picoProp_currVolume != PICO_DEF_VOLUME) { /* non-default volume */ 534 textlen += strlen(PICO_VOLUME_OPEN_TAG) + 5; 535 textlen += strlen(PICO_VOLUME_CLOSE_TAG); 536 hasvol = 1; 537 } 538 539 /* Compose the property strings. */ 540 data = (char *) malloc( textlen ); /* allocate string */ 541 if (!data) { 542 return NULL; 543 } 544 memset(data, 0, textlen); /* clear it */ 545 if (haspitch) { 546 char* tmp = (char*)malloc(strlen(PICO_PITCH_OPEN_TAG) + strlen(PICO_PITCH_CLOSE_TAG) + 5); 547 sprintf(tmp, PICO_PITCH_OPEN_TAG, picoProp_currPitch); 548 strcat(data, tmp); 549 free(tmp); 550 } 551 552 if (hasspeed) { 553 char* tmp = (char*)malloc(strlen(PICO_SPEED_OPEN_TAG) + strlen(PICO_SPEED_CLOSE_TAG) + 5); 554 sprintf(tmp, PICO_SPEED_OPEN_TAG, picoProp_currRate); 555 strcat(data, tmp); 556 free(tmp); 557 } 558 559 if (hasvol) { 560 char* tmp = (char*)malloc(strlen(PICO_VOLUME_OPEN_TAG) + strlen(PICO_VOLUME_CLOSE_TAG) + 5); 561 sprintf(tmp, PICO_VOLUME_OPEN_TAG, picoProp_currVolume); 562 strcat(data, tmp); 563 free(tmp); 564 } 565 566 strcat(data, str); 567 if (hasvol) { 568 strcat(data, PICO_VOLUME_CLOSE_TAG); 569 } 570 571 if (hasspeed) { 572 strcat(data, PICO_SPEED_CLOSE_TAG); 573 } 574 575 if (haspitch) { 576 strcat(data, PICO_PITCH_CLOSE_TAG); 577 } 578 return data; 579} 580 581 582/** get_tok 583 * Searches for tokens in a string 584 * @str - text to be processed 585 * @pos - position of first character to be searched in str 586 * @textlen - postion of last character to be searched 587 * @tokstart - address of a variable to receive the start of the token found 588 * @tokstart - address of a variable to receive the length of the token found 589 * return : 1=token found, 0=token not found 590 * notes : the token separator set could be enlarged adding characters in "seps" 591*/ 592static int get_tok(const char * str , int pos, int textlen, int *tokstart, int *toklen) 593{ 594 const char * seps = " "; 595 596 /*look for start*/ 597 while ((pos<textlen) && (strchr(seps,str[pos]) != NULL)) { 598 pos++; 599 } 600 if (pos == textlen) { 601 /*no characters != seps found whithin string*/ 602 return 0; 603 } 604 *tokstart = pos; 605 /*look for end*/ 606 while ((pos<textlen) && (strchr(seps,str[pos]) == NULL)) { 607 pos++; 608 } 609 *toklen = pos - *tokstart; 610 return 1; 611}/*get_tok*/ 612 613 614/** get_sub_tok 615 * Searches for subtokens in a token having a compound structure with camel case like "xxxYyyy" 616 * @str - text to be processed 617 * @pos - position of first character to be searched in str 618 * @textlen - postion of last character to be searched in str 619 * @tokstart - address of a variable to receive the start of the sub token found 620 * @tokstart - address of a variable to receive the length of the sub token found 621 * return : 1=sub token found, 0=sub token not found 622 * notes : the sub token separator set could be enlarged adding characters in "seps" 623*/ 624static int get_sub_tok(const char * str , int pos, int textlen, int *tokstart, int *toklen) { 625 626 const char * seps = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 627 628 if (pos == textlen) { 629 return 0; 630 } 631 632 /*first char != space*/ 633 *tokstart = pos; 634 /*finding first non separator*/ 635 while ((pos < textlen) && (strchr(seps, str[pos]) != NULL)) { 636 pos++; 637 } 638 if (pos == textlen) { 639 /*characters all in seps found whithin string : return full token*/ 640 *toklen = pos - *tokstart; 641 return 1; 642 } 643 /*pos should be pointing to first non seps and more chars are there*/ 644 /*finding first separator*/ 645 while ((pos < textlen) && (strchr(seps, str[pos]) == NULL)) { 646 pos++; 647 } 648 if (pos == textlen) { 649 /*transition non seps->seps not found : return full token*/ 650 *toklen = pos - *tokstart; 651 return 1; 652 } 653 *toklen = pos - *tokstart; 654 return 1; 655}/*get_sub_tok*/ 656 657 658/** doCamelCase 659 * Searches for tokens having a compound structure with camel case and transforms them as follows : 660 * "XxxxYyyy" -->> "Xxxx Yyyy", 661 * "xxxYyyy" -->> "xxx Yyyy", 662 * "XXXYyyy" -->> "XXXYyyy" 663 * etc.... 664 * The calling function is responsible for freeing the returned string. 665 * @str - text to be processed 666 * return new string with text processed 667*/ 668static char * doCamelCase( const char * str ) 669{ 670 int textlen; /* input string length */ 671 int totlen; /* output string length */ 672 int tlen_2, nsubtok; /* nuber of subtokens */ 673 int toklen, tokstart; /*legnth and start of generic token*/ 674 int stoklen, stokstart; /*legnth and start of generic sub-token*/ 675 int pos, tokpos, outpos; /*postion of current char in input string and token and output*/ 676 char *data; /*pointer of the returned string*/ 677 678 pos = 0; 679 tokpos = 0; 680 toklen = 0; 681 stoklen = 0; 682 tlen_2 = 0; 683 totlen = 0; 684 685 textlen = strlen(str) + 1; 686 687 /*counting characters after sub token splitting including spaces*/ 688 //while ((pos<textlen) && (str[pos]!=0)) { 689 while (get_tok(str, pos, textlen, &tokstart, &toklen)) { 690 tokpos = tokstart; 691 tlen_2 = 0; 692 nsubtok = 0; 693 while (get_sub_tok(str, tokpos, tokstart+toklen, &stokstart, &stoklen)) { 694 totlen += stoklen; 695 tlen_2 += stoklen; 696 tokpos = stokstart + stoklen; 697 nsubtok += 1; 698 } 699 totlen += nsubtok; /*add spaces between subtokens*/ 700 pos = tokstart + tlen_2; 701 } 702 //} 703 /* Allocate the return string */ 704 705 data = (char *) malloc( totlen ); /* allocate string */ 706 if (!data) { 707 return NULL; 708 } 709 memset(data, 0, totlen); /* clear it */ 710 outpos = 0; 711 pos = 0; 712 /*copying characters*/ 713 //while ((pos<textlen) && (str[pos]!=0)) { 714 while (get_tok (str, pos, textlen, &tokstart, &toklen)) { 715 tokpos = tokstart; 716 tlen_2 = 0; 717 nsubtok = 0; 718 while (get_sub_tok(str, tokpos, tokstart+toklen, &stokstart, &stoklen)) { 719 strncpy(&(data[outpos]), &(str[stokstart]), stoklen); 720 outpos += stoklen; 721 strncpy(&(data[outpos]), " ", 1); 722 tlen_2 += stoklen; 723 outpos += 1; 724 tokpos = stokstart + stoklen; 725 } 726 pos=tokstart+tlen_2; 727 } 728 //} 729 if (outpos == 0) { 730 outpos = 1; 731 } 732 data[outpos-1] = 0; 733 return data; 734}/*doCamelCase*/ 735 736 737/** createPhonemeString 738 * Wrap all individual words in <phoneme> tags. 739 * The Pico <phoneme> tag only supports one word in each tag, 740 * therefore they must be individually wrapped! 741 * @xsampa - text to convert to Pico phomene string 742 * @length - length of the input string 743 * return new string with tags applied 744*/ 745extern char * createPhonemeString( const char * xsampa, int length ) 746{ 747 char * convstring = NULL; 748 int origStrLen = strlen(xsampa); 749 int numWords = 1; 750 int start, totalLength, i, j; 751 752 for (i = 0; i < origStrLen; i ++) { 753 if ((xsampa[i] == ' ') || (xsampa[i] == '#')) { 754 numWords ++; 755 } 756 } 757 758 if (numWords == 1) { 759 convstring = new char[origStrLen + 17]; 760 convstring[0] = '\0'; 761 strcat(convstring, PICO_PHONEME_OPEN_TAG); 762 strcat(convstring, xsampa); 763 strcat(convstring, PICO_PHONEME_CLOSE_TAG); 764 } else { 765 char * words[numWords]; 766 start = 0; totalLength = 0; i = 0; j = 0; 767 for (i=0, j=0; i < origStrLen; i++) { 768 if ((xsampa[i] == ' ') || (xsampa[i] == '#')) { 769 words[j] = new char[i+1-start+17]; 770 words[j][0] = '\0'; 771 strcat( words[j], PICO_PHONEME_OPEN_TAG); 772 strncat(words[j], xsampa+start, i-start); 773 strcat( words[j], PICO_PHONEME_CLOSE_TAG); 774 start = i + 1; 775 j++; 776 totalLength += strlen(words[j-1]); 777 } 778 } 779 words[j] = new char[i+1-start+17]; 780 words[j][0] = '\0'; 781 strcat(words[j], PICO_PHONEME_OPEN_TAG); 782 strcat(words[j], xsampa+start); 783 strcat(words[j], PICO_PHONEME_CLOSE_TAG); 784 totalLength += strlen(words[j]); 785 convstring = new char[totalLength + 1]; 786 convstring[0] = '\0'; 787 for (i=0 ; i < numWords ; i++) { 788 strcat(convstring, words[i]); 789 delete [] words[i]; 790 } 791 } 792 793 return convstring; 794} 795 796/* The XSAMPA uses as many as 5 characters to represent a single IPA code. */ 797typedef struct tagPhnArr 798{ 799 char16_t strIPA; /* IPA Unicode symbol */ 800 char strXSAMPA[6]; /* SAMPA sequence */ 801} PArr; 802 803#define phn_cnt (134+7) 804 805PArr PhnAry[phn_cnt] = { 806 807 /* XSAMPA conversion table 808 This maps a single IPA symbol to a sequence representing XSAMPA. 809 This relies upon a direct one-to-one correspondance 810 including diphthongs and affricates. */ 811 812 /* Vowels (23) complete */ 813 {0x025B, "E"}, 814 {0x0251, "A"}, 815 {0x0254, "O"}, 816 {0x00F8, "2"}, 817 {0x0153, "9"}, 818 {0x0276, "&"}, 819 {0x0252, "Q"}, 820 {0x028C, "V"}, 821 {0x0264, "7"}, 822 {0x026F, "M"}, 823 {0x0268, "1"}, 824 {0x0289, "}"}, 825 {0x026A, "I"}, 826 {0x028F, "Y"}, 827 {0x028A, "U"}, 828 {0x0259, "@"}, 829 {0x0275, "8"}, 830 {0x0250, "6"}, 831 {0x00E6, "{"}, 832 {0x025C, "3"}, 833 {0x025A, "@`"}, 834 {0x025E, "3\\\\"}, 835 {0x0258, "@\\\\"}, 836 837 /* Consonants (60) complete */ 838 {0x0288, "t`"}, 839 {0x0256, "d`"}, 840 {0x025F, "J\\\\"}, 841 {0x0261, "g"}, 842 {0x0262, "G\\\\"}, 843 {0x0294, "?"}, 844 {0x0271, "F"}, 845 {0x0273, "n`"}, 846 {0x0272, "J"}, 847 {0x014B, "N"}, 848 {0x0274, "N\\\\"}, 849 {0x0299, "B\\\\"}, 850 {0x0280, "R\\\\"}, 851 {0x027E, "4"}, 852 {0x027D, "r`"}, 853 {0x0278, "p\\\\"}, 854 {0x03B2, "B"}, 855 {0x03B8, "T"}, 856 {0x00F0, "D"}, 857 {0x0283, "S"}, 858 {0x0292, "Z"}, 859 {0x0282, "s`"}, 860 {0x0290, "z`"}, 861 {0x00E7, "C"}, 862 {0x029D, "j\\\\"}, 863 {0x0263, "G"}, 864 {0x03C7, "X"}, 865 {0x0281, "R"}, 866 {0x0127, "X\\\\"}, 867 {0x0295, "?\\\\"}, 868 {0x0266, "h\\\\"}, 869 {0x026C, "K"}, 870 {0x026E, "K\\\\"}, 871 {0x028B, "P"}, 872 {0x0279, "r\\\\"}, 873 {0x027B, "r\\\\'"}, 874 {0x0270, "M\\\\"}, 875 {0x026D, "l`"}, 876 {0x028E, "L"}, 877 {0x029F, "L\\\\"}, 878 {0x0253, "b_<"}, 879 {0x0257, "d_<"}, 880 {0x0284, "J\\_<"}, 881 {0x0260, "g_<"}, 882 {0x029B, "G\\_<"}, 883 {0x028D, "W"}, 884 {0x0265, "H"}, 885 {0x029C, "H\\\\"}, 886 {0x02A1, ">\\\\"}, 887 {0x02A2, "<\\\\"}, 888 {0x0267, "x\\\\"}, /* hooktop heng */ 889 {0x0298, "O\\\\"}, 890 {0x01C0, "|\\\\"}, 891 {0x01C3, "!\\\\"}, 892 {0x01C2, "=\\"}, 893 {0x01C1, "|\\|\\"}, 894 {0x027A, "l\\\\"}, 895 {0x0255, "s\\\\"}, 896 {0x0291, "z\\\\"}, 897 {0x026B, "l_G"}, 898 899 900 /* Diacritics (37) complete */ 901 {0x02BC, "_>"}, 902 {0x0325, "_0"}, 903 {0x030A, "_0"}, 904 {0x032C, "_v"}, 905 {0x02B0, "_h"}, 906 {0x0324, "_t"}, 907 {0x0330, "_k"}, 908 {0x033C, "_N"}, 909 {0x032A, "_d"}, 910 {0x033A, "_a"}, 911 {0x033B, "_m"}, 912 {0x0339, "_O"}, 913 {0x031C, "_c"}, 914 {0x031F, "_+"}, 915 {0x0320, "_-"}, 916 {0x0308, "_\""}, /* centralized */ 917 {0x033D, "_x"}, 918 {0x0318, "_A"}, 919 {0x0319, "_q"}, 920 {0x02DE, "`"}, 921 {0x02B7, "_w"}, 922 {0x02B2, "_j"}, 923 {0x02E0, "_G"}, 924 {0x02E4, "_?\\\\"}, /* pharyngealized */ 925 {0x0303, "~"}, /* nasalized */ 926 {0x207F, "_n"}, 927 {0x02E1, "_l"}, 928 {0x031A, "_}"}, 929 {0x0334, "_e"}, 930 {0x031D, "_r"}, /* raised equivalent to 02D4 */ 931 {0x02D4, "_r"}, /* raised equivalent to 031D */ 932 {0x031E, "_o"}, /* lowered equivalent to 02D5 */ 933 {0x02D5, "_o"}, /* lowered equivalent to 031E */ 934 {0x0329, "="}, /* sylabic */ 935 {0x032F, "_^"}, /* non-sylabic */ 936 {0x0361, "_"}, /* top tie bar */ 937 {0x035C, "_"}, 938 939 /* Suprasegmental (15) incomplete */ 940 {0x02C8, "\""}, /* primary stress */ 941 {0x02CC, "%"}, /* secondary stress */ 942 {0x02D0, ":"}, /* long */ 943 {0x02D1, ":\\\\"}, /* half-long */ 944 {0x0306, "_X"}, /* extra short */ 945 946 {0x2016, "||"}, /* major group */ 947 {0x203F, "-\\\\"}, /* bottom tie bar */ 948 {0x2197, "<R>"}, /* global rise */ 949 {0x2198, "<F>"}, /* global fall */ 950 {0x2193, "<D>"}, /* downstep */ 951 {0x2191, "<U>"}, /* upstep */ 952 {0x02E5, "<T>"}, /* extra high level */ 953 {0x02E7, "<M>"}, /* mid level */ 954 {0x02E9, "<B>"}, /* extra low level */ 955 956 {0x025D, "3`:"}, /* non-IPA %% */ 957 958 /* Affricates (6) complete */ 959 {0x02A3, "d_z"}, 960 {0x02A4, "d_Z"}, 961 {0x02A5, "d_z\\\\"}, 962 {0x02A6, "t_s"}, 963 {0x02A7, "t_S"}, 964 {0x02A8, "t_s\\\\"} 965 }; 966 967 968void CnvIPAPnt( const char16_t IPnt, char * XPnt ) 969{ 970 char16_t ThisPnt = IPnt; /* local copy of single IPA codepoint */ 971 int idx; /* index into table */ 972 973 /* Convert an individual IPA codepoint. 974 A single IPA code could map to a string. 975 Search the table. If it is not found, use the same character. 976 Since most codepoints can be contained within 16 bits, 977 they are represented as wide chars. */ 978 XPnt[0] = 0; /* clear the result string */ 979 980 /* Search the table for the conversion. */ 981 for (idx = 0; idx < phn_cnt; idx ++) { /* for each item in table */ 982 if (IPnt == PhnAry[idx].strIPA) { /* matches IPA code */ 983 strcat( XPnt, (const char *)&(PhnAry[idx].strXSAMPA) ); /* copy the XSAMPA string */ 984 return; 985 } 986 } 987 strcat(XPnt, (const char *)&ThisPnt); /* just copy it */ 988} 989 990 991/** cnvIpaToXsampa 992 * Convert an IPA character string to an XSAMPA character string. 993 * @ipaString - input IPA string to convert 994 * @outXsampaString - converted XSAMPA string is passed back in this parameter 995 * return size of the new string 996*/ 997 998int cnvIpaToXsampa( const char16_t * ipaString, size_t ipaStringSize, char ** outXsampaString ) 999{ 1000 size_t xsize; /* size of result */ 1001 size_t ipidx; /* index into IPA string */ 1002 char * XPnt; /* short XSAMPA char sequence */ 1003 1004 /* Convert an IPA string to an XSAMPA string and store the xsampa string in *outXsampaString. 1005 It is the responsibility of the caller to free the allocated string. 1006 Increment through the string. For each base & combination convert it to the XSAMP equivalent. 1007 Because of the XSAMPA limitations, not all IPA characters will be covered. */ 1008 XPnt = (char *) malloc(6); 1009 xsize = (4 * ipaStringSize) + 8; /* assume more than double size */ 1010 *outXsampaString = (char *) malloc( xsize );/* allocate return string */ 1011 *outXsampaString[0] = 0; 1012 xsize = 0; /* clear final */ 1013 1014 for (ipidx = 0; ipidx < ipaStringSize; ipidx ++) { /* for each IPA code */ 1015 CnvIPAPnt( ipaString[ipidx], XPnt ); /* get converted character */ 1016 strcat((char *)*outXsampaString, XPnt ); /* concatenate XSAMPA */ 1017 } 1018 free(XPnt); 1019 xsize = strlen(*outXsampaString); /* get the final length */ 1020 return xsize; 1021} 1022 1023 1024/* Google Engine API function implementations */ 1025 1026/** init 1027 * Allocates Pico memory block and initializes the Pico system. 1028 * synthDoneCBPtr - Pointer to callback function which will receive generated samples 1029 * config - the engine configuration parameters, here only contains the non-system path 1030 * for the lingware location 1031 * return tts_result 1032*/ 1033tts_result TtsEngine::init( synthDoneCB_t synthDoneCBPtr, const char *config ) 1034{ 1035 if (synthDoneCBPtr == NULL) { 1036 LOGE("Callback pointer is NULL"); 1037 return TTS_FAILURE; 1038 } 1039 1040 picoMemArea = malloc( PICO_MEM_SIZE ); 1041 if (!picoMemArea) { 1042 LOGE("Failed to allocate memory for Pico system"); 1043 return TTS_FAILURE; 1044 } 1045 1046 pico_Status ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem ); 1047 if (PICO_OK != ret) { 1048 LOGE("Failed to initialize Pico system"); 1049 free( picoMemArea ); 1050 picoMemArea = NULL; 1051 return TTS_FAILURE; 1052 } 1053 1054 picoSynthDoneCBPtr = synthDoneCBPtr; 1055 1056 picoCurrentLangIndex = -1; 1057 1058 // was the initialization given an alternative path for the lingware location? 1059 if ((config != NULL) && (strlen(config) > 0)) { 1060 pico_alt_lingware_path = (char*)malloc(strlen(config)); 1061 strcpy((char*)pico_alt_lingware_path, config); 1062 LOGV("Alternative lingware path %s", pico_alt_lingware_path); 1063 } else { 1064 pico_alt_lingware_path = (char*)malloc(strlen(PICO_LINGWARE_PATH)); 1065 strcpy((char*)pico_alt_lingware_path, PICO_LINGWARE_PATH); 1066 LOGV("Using predefined lingware path %s", pico_alt_lingware_path); 1067 } 1068 1069 return TTS_SUCCESS; 1070} 1071 1072 1073/** shutdown 1074 * Unloads all Pico resources; terminates Pico system and frees Pico memory block. 1075 * return tts_result 1076*/ 1077tts_result TtsEngine::shutdown( void ) 1078{ 1079 cleanResources(); 1080 1081 if (picoSystem) { 1082 pico_terminate(&picoSystem); 1083 picoSystem = NULL; 1084 } 1085 if (picoMemArea) { 1086 free(picoMemArea); 1087 picoMemArea = NULL; 1088 } 1089 1090 cleanFiles(); 1091 return TTS_SUCCESS; 1092} 1093 1094 1095/** loadLanguage 1096 * Load a new language. 1097 * @lang - string with ISO 3 letter language code. 1098 * @country - string with ISO 3 letter country code . 1099 * @variant - string with language variant for that language and country pair. 1100 * return tts_result 1101*/ 1102tts_result TtsEngine::loadLanguage(const char *lang, const char *country, const char *variant) 1103{ 1104 return TTS_FAILURE; 1105 //return setProperty("language", value, size); 1106} 1107 1108 1109/** setLanguage 1110 * Load a new language (locale). Use the ISO 639-3 language codes. 1111 * @lang - string with ISO 639-3 language code. 1112 * @country - string with ISO 3 letter country code. 1113 * @variant - string with language variant for that language and country pair. 1114 * return tts_result 1115 */ 1116tts_result TtsEngine::setLanguage( const char * lang, const char * country, const char * variant ) 1117{ 1118 int langIndex; 1119 int countryIndex; 1120 int i; 1121 1122 if (lang == NULL) 1123 { 1124 LOGE("TtsEngine::setLanguage called with NULL language"); 1125 return TTS_FAILURE; 1126 } 1127 1128 /* We look for a match on the language first 1129 then we look for a match on the country. 1130 If no match on the language: 1131 return an error. 1132 If match on the language, but no match on the country: 1133 load the language found for the language match. 1134 If match on the language, and match on the country: 1135 load the language found for the country match. */ 1136 1137 /* Find a match on the language. */ 1138 langIndex = -1; /* no match */ 1139 for (i = 0; i < picoNumSupportedVocs; i ++) 1140 { 1141 if (strcmp(lang, picoSupportedLangIso3[i]) == 0) 1142 { 1143 langIndex = i; 1144 break; 1145 } 1146 } 1147 if (langIndex < 0) 1148 { 1149 /* The language isn't supported. */ 1150 LOGE("TtsEngine::setLanguage called with unsupported language"); 1151 return TTS_FAILURE; 1152 } 1153 1154 /* Find a match on the country, if there is one. */ 1155 if (country != NULL) 1156 { 1157 countryIndex = -1; 1158 for (i = langIndex; i < picoNumSupportedVocs; i ++) 1159 { 1160 if ( (strcmp(lang, picoSupportedLangIso3[i]) == 0) 1161 && (strcmp(country, picoSupportedCountryIso3[i]) == 0)) 1162 { 1163 countryIndex = i; 1164 break; 1165 } 1166 } 1167 1168 if (countryIndex < 0) 1169 { 1170 /* We didn't find a match on the country, but we had a match on the language. 1171 Use that language. */ 1172 LOGI("TtsEngine::setLanguage found matching language(%s) but not matching country(%s).", 1173 lang, country); 1174 } 1175 else 1176 { 1177 /* We have a match on both the language and the country. */ 1178 langIndex = countryIndex; 1179 } 1180 } 1181 1182 return doLanguageSwitchFromLangIndex( langIndex ); /* switch the language */ 1183} 1184 1185 1186/** isLanguageAvailable 1187 * Returns the level of support for a language. 1188 * @lang - string with ISO 3 letter language code. 1189 * @country - string with ISO 3 letter country code . 1190 * @variant - string with language variant for that language and country pair. 1191 * return tts_support_result 1192*/ 1193tts_support_result TtsEngine::isLanguageAvailable(const char *lang, const char *country, 1194 const char *variant) { 1195 int langIndex = -1; 1196 int countryIndex = -1; 1197 //------------------------- 1198 // language matching 1199 // if no language specified 1200 if (lang == NULL) { 1201 LOGE("TtsEngine::isLanguageAvailable called with no language"); 1202 return TTS_LANG_NOT_SUPPORTED; 1203 } 1204 1205 // find a match on the language 1206 for (int i = 0; i < picoNumSupportedVocs; i++) 1207 { 1208 if (strcmp(lang, picoSupportedLangIso3[i]) == 0) { 1209 langIndex = i; 1210 break; 1211 } 1212 } 1213 if (langIndex < 0) { 1214 // language isn't supported 1215 LOGV("TtsEngine::isLanguageAvailable called with unsupported language"); 1216 return TTS_LANG_NOT_SUPPORTED; 1217 } 1218 1219 //------------------------- 1220 // country matching 1221 // if no country specified 1222 if ((country == NULL) || (strlen(country) == 0)) { 1223 // check installation of matched language 1224 return (hasResourcesForLanguage(langIndex) ? TTS_LANG_AVAILABLE : TTS_LANG_MISSING_DATA); 1225 } 1226 1227 // find a match on the country 1228 for (int i = langIndex; i < picoNumSupportedVocs; i++) { 1229 if ((strcmp(lang, picoSupportedLangIso3[i]) == 0) 1230 && (strcmp(country, picoSupportedCountryIso3[i]) == 0)) { 1231 countryIndex = i; 1232 break; 1233 } 1234 } 1235 if (countryIndex < 0) { 1236 // we didn't find a match on the country, but we had a match on the language 1237 // check installation of matched language 1238 return (hasResourcesForLanguage(langIndex) ? TTS_LANG_AVAILABLE : TTS_LANG_MISSING_DATA); 1239 } else { 1240 // we have a match on the language and the country 1241 langIndex = countryIndex; 1242 // check installation of matched language + country 1243 return (hasResourcesForLanguage(langIndex) ? TTS_LANG_COUNTRY_AVAILABLE : TTS_LANG_MISSING_DATA); 1244 } 1245 1246 // no variants supported in this library, TTS_LANG_COUNTRY_VAR_AVAILABLE cannot be returned. 1247} 1248 1249 1250/** getLanguage 1251 * Get the currently loaded language - if any. 1252 * @lang - string with current ISO 3 letter language code, empty string if no loaded language. 1253 * @country - string with current ISO 3 letter country code, empty string if no loaded language. 1254 * @variant - string with current language variant, empty string if no loaded language. 1255 * return tts_result 1256*/ 1257tts_result TtsEngine::getLanguage(char *language, char *country, char *variant) 1258{ 1259 if (picoCurrentLangIndex == -1) { 1260 strcpy(language, "\0"); 1261 strcpy(country, "\0"); 1262 strcpy(variant, "\0"); 1263 } else { 1264 strcpy(language, picoSupportedLangIso3[picoCurrentLangIndex]); 1265 strcpy(country, picoSupportedCountryIso3[picoCurrentLangIndex]); 1266 // no variant in this implementation 1267 strcpy(variant, "\0"); 1268 } 1269 return TTS_SUCCESS; 1270} 1271 1272 1273/** setAudioFormat 1274 * sets the audio format to use for synthesis, returns what is actually used. 1275 * @encoding - reference to encoding format 1276 * @rate - reference to sample rate 1277 * @channels - reference to number of channels 1278 * return tts_result 1279 * */ 1280tts_result TtsEngine::setAudioFormat(AudioSystem::audio_format& encoding, uint32_t& rate, 1281 int& channels) 1282{ 1283 // ignore the input parameters, the enforced audio parameters are fixed here 1284 encoding = AudioSystem::PCM_16_BIT; 1285 rate = 16000; 1286 channels = 1; 1287 return TTS_SUCCESS; 1288} 1289 1290 1291/** setProperty 1292 * Set property. The supported properties are: language, rate, pitch and volume. 1293 * @property - name of property to set 1294 * @value - value to set 1295 * @size - size of value 1296 * return tts_result 1297*/ 1298tts_result TtsEngine::setProperty( const char * property, const char * value, const size_t size ) 1299{ 1300 int rate; 1301 int pitch; 1302 int volume; 1303 1304 /* Set a specific property for the engine. 1305 Supported properties include: language (locale), rate, pitch, volume. */ 1306 /* Sanity check */ 1307 if (property == NULL) { 1308 LOGE("setProperty called with property NULL"); 1309 return TTS_PROPERTY_UNSUPPORTED; 1310 } 1311 1312 if (value == NULL) { 1313 LOGE("setProperty called with value NULL"); 1314 return TTS_VALUE_INVALID; 1315 } 1316 1317 if (strncmp(property, "language", 8) == 0) { 1318 /* Verify it's in correct format. */ 1319 if (strlen(value) != 2 && strlen(value) != 6) { 1320 LOGE("change language called with incorrect format"); 1321 return TTS_VALUE_INVALID; 1322 } 1323 1324 /* Try to switch to specified language. */ 1325 if (doLanguageSwitch(value) == TTS_FAILURE) { 1326 LOGE("failed to load language"); 1327 return TTS_FAILURE; 1328 } else { 1329 return TTS_SUCCESS; 1330 } 1331 } else if (strncmp(property, "rate", 4) == 0) { 1332 rate = atoi(value); 1333 if (rate < PICO_MIN_RATE) { 1334 rate = PICO_MIN_RATE; 1335 } 1336 if (rate > PICO_MAX_RATE) { 1337 rate = PICO_MAX_RATE; 1338 } 1339 picoProp_currRate = rate; 1340 return TTS_SUCCESS; 1341 } else if (strncmp(property, "pitch", 5) == 0) { 1342 pitch = atoi(value); 1343 if (pitch < PICO_MIN_PITCH) { 1344 pitch = PICO_MIN_PITCH; 1345 } 1346 if (pitch > PICO_MAX_PITCH) { 1347 pitch = PICO_MAX_PITCH; 1348 } 1349 picoProp_currPitch = pitch; 1350 return TTS_SUCCESS; 1351 } else if (strncmp(property, "volume", 6) == 0) { 1352 volume = atoi(value); 1353 if (volume < PICO_MIN_VOLUME) { 1354 volume = PICO_MIN_VOLUME; 1355 } 1356 if (volume > PICO_MAX_VOLUME) { 1357 volume = PICO_MAX_VOLUME; 1358 } 1359 picoProp_currVolume = volume; 1360 return TTS_SUCCESS; 1361 } 1362 1363 return TTS_PROPERTY_UNSUPPORTED; 1364} 1365 1366 1367/** getProperty 1368 * Get the property. Supported properties are: language, rate, pitch and volume. 1369 * @property - name of property to get 1370 * @value - buffer which will receive value of property 1371 * @iosize - size of value - if size is too small on return this will contain actual size needed 1372 * return tts_result 1373*/ 1374tts_result TtsEngine::getProperty( const char * property, char * value, size_t * iosize ) 1375{ 1376 /* Get the property for the engine. 1377 This property was previously set by setProperty or by default. */ 1378 /* sanity check */ 1379 if (property == NULL) { 1380 LOGE("getProperty called with property NULL"); 1381 return TTS_PROPERTY_UNSUPPORTED; 1382 } 1383 1384 if (value == NULL) { 1385 LOGE("getProperty called with value NULL"); 1386 return TTS_VALUE_INVALID; 1387 } 1388 1389 if (strncmp(property, "language", 8) == 0) { 1390 if (picoProp_currLang == NULL) { 1391 strcpy(value, ""); 1392 } else { 1393 if (*iosize < strlen(picoProp_currLang)+1) { 1394 *iosize = strlen(picoProp_currLang) + 1; 1395 return TTS_PROPERTY_SIZE_TOO_SMALL; 1396 } 1397 strcpy(value, picoProp_currLang); 1398 } 1399 return TTS_SUCCESS; 1400 } else if (strncmp(property, "rate", 4) == 0) { 1401 char tmprate[4]; 1402 sprintf(tmprate, "%d", picoProp_currRate); 1403 if (*iosize < strlen(tmprate)+1) { 1404 *iosize = strlen(tmprate) + 1; 1405 return TTS_PROPERTY_SIZE_TOO_SMALL; 1406 } 1407 strcpy(value, tmprate); 1408 return TTS_SUCCESS; 1409 } else if (strncmp(property, "pitch", 5) == 0) { 1410 char tmppitch[4]; 1411 sprintf(tmppitch, "%d", picoProp_currPitch); 1412 if (*iosize < strlen(tmppitch)+1) { 1413 *iosize = strlen(tmppitch) + 1; 1414 return TTS_PROPERTY_SIZE_TOO_SMALL; 1415 } 1416 strcpy(value, tmppitch); 1417 return TTS_SUCCESS; 1418 } else if (strncmp(property, "volume", 6) == 0) { 1419 char tmpvol[4]; 1420 sprintf(tmpvol, "%d", picoProp_currVolume); 1421 if (*iosize < strlen(tmpvol)+1) { 1422 *iosize = strlen(tmpvol) + 1; 1423 return TTS_PROPERTY_SIZE_TOO_SMALL; 1424 } 1425 strcpy(value, tmpvol); 1426 return TTS_SUCCESS; 1427 } 1428 1429 /* Unknown property */ 1430 LOGE("Unsupported property"); 1431 return TTS_PROPERTY_UNSUPPORTED; 1432} 1433 1434 1435/** synthesizeText 1436 * Synthesizes a text string. 1437 * The text string could be annotated with SSML tags. 1438 * @text - text to synthesize 1439 * @buffer - buffer which will receive generated samples 1440 * @bufferSize - size of buffer 1441 * @userdata - pointer to user data which will be passed back to callback function 1442 * return tts_result 1443*/ 1444tts_result TtsEngine::synthesizeText( const char * text, int8_t * buffer, size_t bufferSize, void * userdata ) 1445{ 1446 int err; 1447 int cbret; 1448 pico_Char * inp = NULL; 1449 char * expanded_text = NULL; 1450 pico_Char * local_text = NULL; 1451 short outbuf[MAX_OUTBUF_SIZE/2]; 1452 pico_Int16 bytes_sent, bytes_recv, text_remaining, out_data_type; 1453 pico_Status ret; 1454 SvoxSsmlParser * parser = NULL; 1455 1456 picoSynthAbort = 0; 1457 if (text == NULL) { 1458 LOGE("synthesizeText called with NULL string"); 1459 return TTS_FAILURE; 1460 } 1461 1462 if (strlen(text) == 0) { 1463 return TTS_SUCCESS; 1464 } 1465 1466 if (buffer == NULL) { 1467 LOGE("synthesizeText called with NULL buffer"); 1468 return TTS_FAILURE; 1469 } 1470 1471 if ( (strncmp(text, "<speak", 6) == 0) || (strncmp(text, "<?xml", 5) == 0) ) { 1472 /* SSML input */ 1473 parser = new SvoxSsmlParser(); 1474 if (parser && parser->initSuccessful()) { 1475 err = parser->parseDocument(text, 1); 1476 if (err == XML_STATUS_ERROR) { 1477 /* Note: for some reason expat always thinks the input document has an error 1478 at the end, even when the XML document is perfectly formed */ 1479 LOGI("Warning: SSML document parsed with errors"); 1480 } 1481 char * parsed_text = parser->getParsedDocument(); 1482 if (parsed_text) { 1483 /* Add property tags to the string - if any. */ 1484 local_text = (pico_Char *) doAddProperties( parsed_text ); 1485 if (!local_text) { 1486 LOGE("Failed to allocate memory for text string"); 1487 delete parser; 1488 return TTS_FAILURE; 1489 } 1490 char * lang = parser->getParsedDocumentLanguage(); 1491 if (lang != NULL) { 1492 if (doLanguageSwitch(lang) == TTS_FAILURE) { 1493 LOGE("Failed to switch to language (%s) specified in SSML document.", lang); 1494 delete parser; 1495 return TTS_FAILURE; 1496 } 1497 } else { 1498 // lang is NULL, pick a language so the synthesis can be performed 1499 if (picoCurrentLangIndex == -1) { 1500 // no current language loaded, pick the first one and load it 1501 if (doLanguageSwitchFromLangIndex(0) == TTS_FAILURE) { 1502 LOGE("Failed to switch to default language."); 1503 delete parser; 1504 return TTS_FAILURE; 1505 } 1506 } 1507 LOGE("No language in SSML, using current language (%s).", picoProp_currLang); 1508 } 1509 delete parser; 1510 } else { 1511 LOGE("Failed to parse SSML document"); 1512 delete parser; 1513 return TTS_FAILURE; 1514 } 1515 } else { 1516 LOGE("Failed to create SSML parser"); 1517 if (parser) { 1518 delete parser; 1519 } 1520 return TTS_FAILURE; 1521 } 1522 } else { 1523 /* camelCase pre-processing */ 1524 expanded_text = doCamelCase(text); 1525 /* Add property tags to the string - if any. */ 1526 local_text = (pico_Char *) doAddProperties( expanded_text ); 1527 if (expanded_text) { 1528 free( expanded_text ); 1529 } 1530 if (!local_text) { 1531 LOGE("Failed to allocate memory for text string"); 1532 return TTS_FAILURE; 1533 } 1534 } 1535 1536 text_remaining = strlen((const char *) local_text) + 1; 1537 1538 inp = (pico_Char *) local_text; 1539 1540 size_t bufused = 0; 1541 1542 /* synthesis loop */ 1543 while (text_remaining) { 1544 if (picoSynthAbort) { 1545 ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT ); 1546 break; 1547 } 1548 1549 /* Feed the text into the engine. */ 1550 ret = pico_putTextUtf8( picoEngine, inp, text_remaining, &bytes_sent ); 1551 if (ret != PICO_OK) { 1552 LOGE("Error synthesizing string '%s': [%d]", text, ret); 1553 if (local_text) { 1554 free( local_text ); 1555 } 1556 return TTS_FAILURE; 1557 } 1558 1559 text_remaining -= bytes_sent; 1560 inp += bytes_sent; 1561 do { 1562 if (picoSynthAbort) { 1563 ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT ); 1564 break; 1565 } 1566 /* Retrieve the samples and add them to the buffer. */ 1567 ret = pico_getData( picoEngine, (void *) outbuf, MAX_OUTBUF_SIZE, &bytes_recv, 1568 &out_data_type ); 1569 if (bytes_recv) { 1570 if ((bufused + bytes_recv) <= bufferSize) { 1571 memcpy(buffer+bufused, (int8_t *) outbuf, bytes_recv); 1572 bufused += bytes_recv; 1573 } else { 1574 /* The buffer filled; pass this on to the callback function. */ 1575 cbret = picoSynthDoneCBPtr(userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, 1576 bufused, TTS_SYNTH_PENDING); 1577 if (cbret == TTS_CALLBACK_HALT) { 1578 LOGI("Halt requested by caller. Halting."); 1579 picoSynthAbort = 1; 1580 ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT ); 1581 break; 1582 } 1583 bufused = 0; 1584 memcpy(buffer, (int8_t *) outbuf, bytes_recv); 1585 bufused += bytes_recv; 1586 } 1587 } 1588 } while (PICO_STEP_BUSY == ret); 1589 1590 /* This chunk of synthesis is finished; pass the remaining samples. 1591 Use 16 KHz, 16-bit samples. */ 1592 if (!picoSynthAbort) { 1593 picoSynthDoneCBPtr( userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, 1594 TTS_SYNTH_PENDING); 1595 } 1596 picoSynthAbort = 0; 1597 1598 if (ret != PICO_STEP_IDLE) { 1599 if (ret != 0){ 1600 LOGE("Error occurred during synthesis [%d]", ret); 1601 } 1602 if (local_text) { 1603 free(local_text); 1604 } 1605 LOGV("Synth loop: sending TTS_SYNTH_DONE after error"); 1606 picoSynthDoneCBPtr( userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, 1607 TTS_SYNTH_DONE); 1608 pico_resetEngine( picoEngine, PICO_RESET_SOFT ); 1609 return TTS_FAILURE; 1610 } 1611 } 1612 1613 /* Synthesis is done; notify the caller */ 1614 LOGV("Synth loop: sending TTS_SYNTH_DONE after all done, or was asked to stop"); 1615 picoSynthDoneCBPtr( userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, 1616 TTS_SYNTH_DONE); 1617 1618 if (local_text) { 1619 free( local_text ); 1620 } 1621 return TTS_SUCCESS; 1622} 1623 1624 1625 1626/** stop 1627 * Aborts the running synthesis. 1628 * return tts_result 1629*/ 1630tts_result TtsEngine::stop( void ) 1631{ 1632 picoSynthAbort = 1; 1633 return TTS_SUCCESS; 1634} 1635 1636 1637#ifdef __cplusplus 1638extern "C" { 1639#endif 1640 1641TtsEngine * getTtsEngine( void ) 1642{ 1643 return new TtsEngine(); 1644} 1645 1646#ifdef __cplusplus 1647} 1648#endif 1649 1650