1/* 2 * Copyright (C) 2009 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include <stdio.h> 18#include <unistd.h> 19 20#define LOG_TAG "SynthProxy" 21 22#include <utils/Log.h> 23#include <nativehelper/jni.h> 24#include <nativehelper/JNIHelp.h> 25#include <android_runtime/AndroidRuntime.h> 26#include <tts/TtsEngine.h> 27#include <media/AudioTrack.h> 28#include <math.h> 29 30#include <dlfcn.h> 31 32#define DEFAULT_TTS_RATE 16000 33#define DEFAULT_TTS_FORMAT AudioSystem::PCM_16_BIT 34#define DEFAULT_TTS_NB_CHANNELS 1 35#define DEFAULT_TTS_BUFFERSIZE 2048 36// TODO use the TTS stream type when available 37#define DEFAULT_TTS_STREAM_TYPE AudioSystem::MUSIC 38 39// EQ + BOOST parameters 40#define FILTER_LOWSHELF_ATTENUATION -18.0f // in dB 41#define FILTER_TRANSITION_FREQ 1100.0f // in Hz 42#define FILTER_SHELF_SLOPE 1.0f // Q 43#define FILTER_GAIN 5.5f // linear gain 44 45#define USAGEMODE_PLAY_IMMEDIATELY 0 46#define USAGEMODE_WRITE_TO_FILE 1 47 48#define SYNTHPLAYSTATE_IS_STOPPED 0 49#define SYNTHPLAYSTATE_IS_PLAYING 1 50 51using namespace android; 52 53// ---------------------------------------------------------------------------- 54struct fields_t { 55 jfieldID synthProxyFieldJniData; 56 jclass synthProxyClass; 57 jmethodID synthProxyMethodPost; 58}; 59 60// structure to hold the data that is used each time the TTS engine has synthesized more data 61struct afterSynthData_t { 62 jint jniStorage; 63 int usageMode; 64 FILE* outputFile; 65 AudioSystem::stream_type streamType; 66}; 67 68// ---------------------------------------------------------------------------- 69// EQ data 70double amp; 71double w; 72double sinw; 73double cosw; 74double beta; 75double a0, a1, a2, b0, b1, b2; 76double m_fa, m_fb, m_fc, m_fd, m_fe; 77double x0; // x[n] 78double x1; // x[n-1] 79double x2; // x[n-2] 80double out0;// y[n] 81double out1;// y[n-1] 82double out2;// y[n-2] 83 84static float fFilterLowshelfAttenuation = FILTER_LOWSHELF_ATTENUATION; 85static float fFilterTransitionFreq = FILTER_TRANSITION_FREQ; 86static float fFilterShelfSlope = FILTER_SHELF_SLOPE; 87static float fFilterGain = FILTER_GAIN; 88static bool bUseFilter = false; 89 90void initializeEQ() { 91 92 amp = float(pow(10.0, fFilterLowshelfAttenuation / 40.0)); 93 w = 2.0 * M_PI * (fFilterTransitionFreq / DEFAULT_TTS_RATE); 94 sinw = float(sin(w)); 95 cosw = float(cos(w)); 96 beta = float(sqrt(amp)/fFilterShelfSlope); 97 98 // initialize low-shelf parameters 99 b0 = amp * ((amp+1.0F) - ((amp-1.0F)*cosw) + (beta*sinw)); 100 b1 = 2.0F * amp * ((amp-1.0F) - ((amp+1.0F)*cosw)); 101 b2 = amp * ((amp+1.0F) - ((amp-1.0F)*cosw) - (beta*sinw)); 102 a0 = (amp+1.0F) + ((amp-1.0F)*cosw) + (beta*sinw); 103 a1 = 2.0F * ((amp-1.0F) + ((amp+1.0F)*cosw)); 104 a2 = -((amp+1.0F) + ((amp-1.0F)*cosw) - (beta*sinw)); 105 106 m_fa = fFilterGain * b0/a0; 107 m_fb = fFilterGain * b1/a0; 108 m_fc = fFilterGain * b2/a0; 109 m_fd = a1/a0; 110 m_fe = a2/a0; 111} 112 113void initializeFilter() { 114 x0 = 0.0f; 115 x1 = 0.0f; 116 x2 = 0.0f; 117 out0 = 0.0f; 118 out1 = 0.0f; 119 out2 = 0.0f; 120} 121 122void applyFilter(int16_t* buffer, size_t sampleCount) { 123 124 for (size_t i=0 ; i<sampleCount ; i++) { 125 126 x0 = (double) buffer[i]; 127 128 out0 = (m_fa*x0) + (m_fb*x1) + (m_fc*x2) + (m_fd*out1) + (m_fe*out2); 129 130 x2 = x1; 131 x1 = x0; 132 133 out2 = out1; 134 out1 = out0; 135 136 if (out0 > 32767.0f) { 137 buffer[i] = 32767; 138 } else if (out0 < -32768.0f) { 139 buffer[i] = -32768; 140 } else { 141 buffer[i] = (int16_t) out0; 142 } 143 } 144} 145 146 147// ---------------------------------------------------------------------------- 148static fields_t javaTTSFields; 149 150// TODO move to synth member once we have multiple simultaneous engines running 151static Mutex engineMutex; 152 153// ---------------------------------------------------------------------------- 154class SynthProxyJniStorage { 155 public : 156 jobject tts_ref; 157 TtsEngine* mNativeSynthInterface; 158 void* mEngineLibHandle; 159 AudioTrack* mAudioOut; 160 int8_t mPlayState; 161 Mutex mPlayLock; 162 AudioSystem::stream_type mStreamType; 163 uint32_t mSampleRate; 164 uint32_t mAudFormat; 165 int mNbChannels; 166 int8_t * mBuffer; 167 size_t mBufferSize; 168 169 SynthProxyJniStorage() { 170 tts_ref = NULL; 171 mNativeSynthInterface = NULL; 172 mEngineLibHandle = NULL; 173 mAudioOut = NULL; 174 mPlayState = SYNTHPLAYSTATE_IS_STOPPED; 175 mStreamType = DEFAULT_TTS_STREAM_TYPE; 176 mSampleRate = DEFAULT_TTS_RATE; 177 mAudFormat = DEFAULT_TTS_FORMAT; 178 mNbChannels = DEFAULT_TTS_NB_CHANNELS; 179 mBufferSize = DEFAULT_TTS_BUFFERSIZE; 180 mBuffer = new int8_t[mBufferSize]; 181 memset(mBuffer, 0, mBufferSize); 182 } 183 184 ~SynthProxyJniStorage() { 185 //LOGV("entering ~SynthProxyJniStorage()"); 186 killAudio(); 187 if (mNativeSynthInterface) { 188 mNativeSynthInterface->shutdown(); 189 mNativeSynthInterface = NULL; 190 } 191 if (mEngineLibHandle) { 192 //LOGE("~SynthProxyJniStorage(): before close library"); 193 int res = dlclose(mEngineLibHandle); 194 LOGE_IF( res != 0, "~SynthProxyJniStorage(): dlclose returned %d", res); 195 } 196 delete mBuffer; 197 } 198 199 void killAudio() { 200 if (mAudioOut) { 201 mAudioOut->stop(); 202 delete mAudioOut; 203 mAudioOut = NULL; 204 } 205 } 206 207 void createAudioOut(AudioSystem::stream_type streamType, uint32_t rate, 208 AudioSystem::audio_format format, int channel) { 209 mSampleRate = rate; 210 mAudFormat = format; 211 mNbChannels = channel; 212 mStreamType = streamType; 213 214 // retrieve system properties to ensure successful creation of the 215 // AudioTrack object for playback 216 int afSampleRate; 217 if (AudioSystem::getOutputSamplingRate(&afSampleRate, mStreamType) != NO_ERROR) { 218 afSampleRate = 44100; 219 } 220 int afFrameCount; 221 if (AudioSystem::getOutputFrameCount(&afFrameCount, mStreamType) != NO_ERROR) { 222 afFrameCount = 2048; 223 } 224 uint32_t afLatency; 225 if (AudioSystem::getOutputLatency(&afLatency, mStreamType) != NO_ERROR) { 226 afLatency = 500; 227 } 228 uint32_t minBufCount = afLatency / ((1000 * afFrameCount)/afSampleRate); 229 if (minBufCount < 2) minBufCount = 2; 230 int minFrameCount = (afFrameCount * rate * minBufCount)/afSampleRate; 231 232 mPlayLock.lock(); 233 mAudioOut = new AudioTrack(mStreamType, rate, format, 234 (channel == 2) ? AudioSystem::CHANNEL_OUT_STEREO : AudioSystem::CHANNEL_OUT_MONO, 235 minFrameCount > 4096 ? minFrameCount : 4096, 236 0, 0, 0, 0); // not using an AudioTrack callback 237 238 if (mAudioOut->initCheck() != NO_ERROR) { 239 LOGE("createAudioOut(): AudioTrack error"); 240 delete mAudioOut; 241 mAudioOut = NULL; 242 } else { 243 //LOGI("AudioTrack OK"); 244 mAudioOut->setVolume(1.0f, 1.0f); 245 LOGV("AudioTrack ready"); 246 } 247 mPlayLock.unlock(); 248 } 249}; 250 251 252// ---------------------------------------------------------------------------- 253void prepAudioTrack(SynthProxyJniStorage* pJniData, AudioSystem::stream_type streamType, 254 uint32_t rate, AudioSystem::audio_format format, int channel) { 255 // Don't bother creating a new audiotrack object if the current 256 // object is already initialized with the same audio parameters. 257 if ( pJniData->mAudioOut && 258 (rate == pJniData->mSampleRate) && 259 (format == pJniData->mAudFormat) && 260 (channel == pJniData->mNbChannels) && 261 (streamType == pJniData->mStreamType) ){ 262 return; 263 } 264 if (pJniData->mAudioOut){ 265 pJniData->killAudio(); 266 } 267 pJniData->createAudioOut(streamType, rate, format, channel); 268} 269 270 271// ---------------------------------------------------------------------------- 272/* 273 * Callback from TTS engine. 274 * Directly speaks using AudioTrack or write to file 275 */ 276static tts_callback_status ttsSynthDoneCB(void *& userdata, uint32_t rate, 277 uint32_t format, int channel, 278 int8_t *&wav, size_t &bufferSize, tts_synth_status status) { 279 //LOGV("ttsSynthDoneCallback: %d bytes", bufferSize); 280 281 if (userdata == NULL){ 282 LOGE("userdata == NULL"); 283 return TTS_CALLBACK_HALT; 284 } 285 afterSynthData_t* pForAfter = (afterSynthData_t*)userdata; 286 SynthProxyJniStorage* pJniData = (SynthProxyJniStorage*)(pForAfter->jniStorage); 287 288 if (pForAfter->usageMode == USAGEMODE_PLAY_IMMEDIATELY){ 289 //LOGV("Direct speech"); 290 291 if (wav == NULL) { 292 delete pForAfter; 293 LOGV("Null: speech has completed"); 294 } 295 296 if (bufferSize > 0) { 297 prepAudioTrack(pJniData, pForAfter->streamType, rate, (AudioSystem::audio_format)format, channel); 298 if (pJniData->mAudioOut) { 299 pJniData->mPlayLock.lock(); 300 if(pJniData->mAudioOut->stopped() 301 && (pJniData->mPlayState == SYNTHPLAYSTATE_IS_PLAYING)) { 302 pJniData->mAudioOut->start(); 303 } 304 pJniData->mPlayLock.unlock(); 305 if (bUseFilter) { 306 applyFilter((int16_t*)wav, bufferSize/2); 307 } 308 pJniData->mAudioOut->write(wav, bufferSize); 309 memset(wav, 0, bufferSize); 310 //LOGV("AudioTrack wrote: %d bytes", bufferSize); 311 } else { 312 LOGE("Can't play, null audiotrack"); 313 } 314 } 315 } else if (pForAfter->usageMode == USAGEMODE_WRITE_TO_FILE) { 316 //LOGV("Save to file"); 317 if (wav == NULL) { 318 delete pForAfter; 319 LOGV("Null: speech has completed"); 320 return TTS_CALLBACK_HALT; 321 } 322 if (bufferSize > 0){ 323 if (bUseFilter) { 324 applyFilter((int16_t*)wav, bufferSize/2); 325 } 326 fwrite(wav, 1, bufferSize, pForAfter->outputFile); 327 memset(wav, 0, bufferSize); 328 } 329 } 330 // Future update: 331 // For sync points in the speech, call back into the SynthProxy class through the 332 // javaTTSFields.synthProxyMethodPost methode to notify 333 // playback has completed if the synthesis is done or if a marker has been reached. 334 335 if (status == TTS_SYNTH_DONE) { 336 // this struct was allocated in the original android_tts_SynthProxy_speak call, 337 // all processing matching this call is now done. 338 LOGV("Speech synthesis done."); 339 if (pForAfter->usageMode == USAGEMODE_PLAY_IMMEDIATELY) { 340 // only delete for direct playback. When writing to a file, we still have work to do 341 // in android_tts_SynthProxy_synthesizeToFile. The struct will be deleted there. 342 delete pForAfter; 343 pForAfter = NULL; 344 } 345 return TTS_CALLBACK_HALT; 346 } 347 348 // we don't update the wav (output) parameter as we'll let the next callback 349 // write at the same location, we've consumed the data already, but we need 350 // to update bufferSize to let the TTS engine know how much it can write the 351 // next time it calls this function. 352 bufferSize = pJniData->mBufferSize; 353 354 return TTS_CALLBACK_CONTINUE; 355} 356 357 358// ---------------------------------------------------------------------------- 359static int 360android_tts_SynthProxy_setLowShelf(JNIEnv *env, jobject thiz, jboolean applyFilter, 361 jfloat filterGain, jfloat attenuationInDb, jfloat freqInHz, jfloat slope) 362{ 363 int result = TTS_SUCCESS; 364 365 bUseFilter = applyFilter; 366 if (applyFilter) { 367 fFilterLowshelfAttenuation = attenuationInDb; 368 fFilterTransitionFreq = freqInHz; 369 fFilterShelfSlope = slope; 370 fFilterGain = filterGain; 371 372 if (fFilterShelfSlope != 0.0f) { 373 initializeEQ(); 374 } else { 375 LOGE("Invalid slope, can't be null"); 376 result = TTS_FAILURE; 377 } 378 } 379 380 return result; 381} 382 383// ---------------------------------------------------------------------------- 384static int 385android_tts_SynthProxy_native_setup(JNIEnv *env, jobject thiz, 386 jobject weak_this, jstring nativeSoLib, jstring engConfig) 387{ 388 int result = TTS_FAILURE; 389 390 bUseFilter = false; 391 392 SynthProxyJniStorage* pJniStorage = new SynthProxyJniStorage(); 393 394 prepAudioTrack(pJniStorage, 395 DEFAULT_TTS_STREAM_TYPE, DEFAULT_TTS_RATE, DEFAULT_TTS_FORMAT, DEFAULT_TTS_NB_CHANNELS); 396 397 const char *nativeSoLibNativeString = env->GetStringUTFChars(nativeSoLib, 0); 398 const char *engConfigString = env->GetStringUTFChars(engConfig, 0); 399 400 void *engine_lib_handle = dlopen(nativeSoLibNativeString, 401 RTLD_NOW | RTLD_LOCAL); 402 if (engine_lib_handle == NULL) { 403 LOGE("android_tts_SynthProxy_native_setup(): engine_lib_handle == NULL"); 404 } else { 405 TtsEngine *(*get_TtsEngine)() = 406 reinterpret_cast<TtsEngine* (*)()>(dlsym(engine_lib_handle, "getTtsEngine")); 407 408 pJniStorage->mNativeSynthInterface = (*get_TtsEngine)(); 409 pJniStorage->mEngineLibHandle = engine_lib_handle; 410 411 if (pJniStorage->mNativeSynthInterface) { 412 Mutex::Autolock l(engineMutex); 413 pJniStorage->mNativeSynthInterface->init(ttsSynthDoneCB, engConfigString); 414 } 415 416 result = TTS_SUCCESS; 417 } 418 419 // we use a weak reference so the SynthProxy object can be garbage collected. 420 pJniStorage->tts_ref = env->NewGlobalRef(weak_this); 421 422 // save the JNI resources so we can use them (and free them) later 423 env->SetIntField(thiz, javaTTSFields.synthProxyFieldJniData, (int)pJniStorage); 424 425 env->ReleaseStringUTFChars(nativeSoLib, nativeSoLibNativeString); 426 env->ReleaseStringUTFChars(engConfig, engConfigString); 427 428 return result; 429} 430 431 432static void 433android_tts_SynthProxy_native_finalize(JNIEnv *env, jobject thiz, jint jniData) 434{ 435 //LOGV("entering android_tts_SynthProxy_finalize()"); 436 if (jniData == 0) { 437 //LOGE("android_tts_SynthProxy_native_finalize(): invalid JNI data"); 438 return; 439 } 440 441 Mutex::Autolock l(engineMutex); 442 443 SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; 444 env->DeleteGlobalRef(pSynthData->tts_ref); 445 delete pSynthData; 446 447 env->SetIntField(thiz, javaTTSFields.synthProxyFieldJniData, 0); 448} 449 450 451static void 452android_tts_SynthProxy_shutdown(JNIEnv *env, jobject thiz, jint jniData) 453{ 454 //LOGV("entering android_tts_SynthProxy_shutdown()"); 455 456 // do everything a call to finalize would 457 android_tts_SynthProxy_native_finalize(env, thiz, jniData); 458} 459 460 461static int 462android_tts_SynthProxy_isLanguageAvailable(JNIEnv *env, jobject thiz, jint jniData, 463 jstring language, jstring country, jstring variant) 464{ 465 int result = TTS_LANG_NOT_SUPPORTED; 466 467 if (jniData == 0) { 468 LOGE("android_tts_SynthProxy_isLanguageAvailable(): invalid JNI data"); 469 return result; 470 } 471 472 SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; 473 const char *langNativeString = env->GetStringUTFChars(language, 0); 474 const char *countryNativeString = env->GetStringUTFChars(country, 0); 475 const char *variantNativeString = env->GetStringUTFChars(variant, 0); 476 477 if (pSynthData->mNativeSynthInterface) { 478 result = pSynthData->mNativeSynthInterface->isLanguageAvailable(langNativeString, 479 countryNativeString, variantNativeString); 480 } 481 env->ReleaseStringUTFChars(language, langNativeString); 482 env->ReleaseStringUTFChars(country, countryNativeString); 483 env->ReleaseStringUTFChars(variant, variantNativeString); 484 return result; 485} 486 487static int 488android_tts_SynthProxy_setConfig(JNIEnv *env, jobject thiz, jint jniData, jstring engineConfig) 489{ 490 int result = TTS_FAILURE; 491 492 if (jniData == 0) { 493 LOGE("android_tts_SynthProxy_setConfig(): invalid JNI data"); 494 return result; 495 } 496 497 Mutex::Autolock l(engineMutex); 498 499 SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; 500 const char *engineConfigNativeString = env->GetStringUTFChars(engineConfig, 0); 501 502 if (pSynthData->mNativeSynthInterface) { 503 result = pSynthData->mNativeSynthInterface->setProperty(ANDROID_TTS_ENGINE_PROPERTY_CONFIG, 504 engineConfigNativeString, strlen(engineConfigNativeString)); 505 } 506 env->ReleaseStringUTFChars(engineConfig, engineConfigNativeString); 507 508 return result; 509} 510 511static int 512android_tts_SynthProxy_setLanguage(JNIEnv *env, jobject thiz, jint jniData, 513 jstring language, jstring country, jstring variant) 514{ 515 int result = TTS_LANG_NOT_SUPPORTED; 516 517 if (jniData == 0) { 518 LOGE("android_tts_SynthProxy_setLanguage(): invalid JNI data"); 519 return result; 520 } 521 522 Mutex::Autolock l(engineMutex); 523 524 SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; 525 const char *langNativeString = env->GetStringUTFChars(language, 0); 526 const char *countryNativeString = env->GetStringUTFChars(country, 0); 527 const char *variantNativeString = env->GetStringUTFChars(variant, 0); 528 529 if (pSynthData->mNativeSynthInterface) { 530 result = pSynthData->mNativeSynthInterface->setLanguage(langNativeString, 531 countryNativeString, variantNativeString); 532 } 533 env->ReleaseStringUTFChars(language, langNativeString); 534 env->ReleaseStringUTFChars(country, countryNativeString); 535 env->ReleaseStringUTFChars(variant, variantNativeString); 536 return result; 537} 538 539 540static int 541android_tts_SynthProxy_loadLanguage(JNIEnv *env, jobject thiz, jint jniData, 542 jstring language, jstring country, jstring variant) 543{ 544 int result = TTS_LANG_NOT_SUPPORTED; 545 546 if (jniData == 0) { 547 LOGE("android_tts_SynthProxy_loadLanguage(): invalid JNI data"); 548 return result; 549 } 550 551 SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; 552 const char *langNativeString = env->GetStringUTFChars(language, 0); 553 const char *countryNativeString = env->GetStringUTFChars(country, 0); 554 const char *variantNativeString = env->GetStringUTFChars(variant, 0); 555 556 if (pSynthData->mNativeSynthInterface) { 557 result = pSynthData->mNativeSynthInterface->loadLanguage(langNativeString, 558 countryNativeString, variantNativeString); 559 } 560 env->ReleaseStringUTFChars(language, langNativeString); 561 env->ReleaseStringUTFChars(country, countryNativeString); 562 env->ReleaseStringUTFChars(variant, variantNativeString); 563 564 return result; 565} 566 567 568static int 569android_tts_SynthProxy_setSpeechRate(JNIEnv *env, jobject thiz, jint jniData, 570 jint speechRate) 571{ 572 int result = TTS_FAILURE; 573 574 if (jniData == 0) { 575 LOGE("android_tts_SynthProxy_setSpeechRate(): invalid JNI data"); 576 return result; 577 } 578 579 int bufSize = 12; 580 char buffer [bufSize]; 581 sprintf(buffer, "%d", speechRate); 582 583 Mutex::Autolock l(engineMutex); 584 585 SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; 586 LOGI("setting speech rate to %d", speechRate); 587 588 if (pSynthData->mNativeSynthInterface) { 589 result = pSynthData->mNativeSynthInterface->setProperty("rate", buffer, bufSize); 590 } 591 592 return result; 593} 594 595 596static int 597android_tts_SynthProxy_setPitch(JNIEnv *env, jobject thiz, jint jniData, 598 jint pitch) 599{ 600 int result = TTS_FAILURE; 601 602 if (jniData == 0) { 603 LOGE("android_tts_SynthProxy_setPitch(): invalid JNI data"); 604 return result; 605 } 606 607 Mutex::Autolock l(engineMutex); 608 609 int bufSize = 12; 610 char buffer [bufSize]; 611 sprintf(buffer, "%d", pitch); 612 613 SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; 614 LOGI("setting pitch to %d", pitch); 615 616 if (pSynthData->mNativeSynthInterface) { 617 result = pSynthData->mNativeSynthInterface->setProperty("pitch", buffer, bufSize); 618 } 619 620 return result; 621} 622 623 624static int 625android_tts_SynthProxy_synthesizeToFile(JNIEnv *env, jobject thiz, jint jniData, 626 jstring textJavaString, jstring filenameJavaString) 627{ 628 int result = TTS_FAILURE; 629 630 if (jniData == 0) { 631 LOGE("android_tts_SynthProxy_synthesizeToFile(): invalid JNI data"); 632 return result; 633 } 634 635 SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; 636 if (!pSynthData->mNativeSynthInterface) { 637 LOGE("android_tts_SynthProxy_synthesizeToFile(): invalid engine handle"); 638 return result; 639 } 640 641 initializeFilter(); 642 643 Mutex::Autolock l(engineMutex); 644 645 // Retrieve audio parameters before writing the file header 646 AudioSystem::audio_format encoding = DEFAULT_TTS_FORMAT; 647 uint32_t rate = DEFAULT_TTS_RATE; 648 int channels = DEFAULT_TTS_NB_CHANNELS; 649 pSynthData->mNativeSynthInterface->setAudioFormat(encoding, rate, channels); 650 651 if ((encoding != AudioSystem::PCM_16_BIT) && (encoding != AudioSystem::PCM_8_BIT)) { 652 LOGE("android_tts_SynthProxy_synthesizeToFile(): engine uses invalid format"); 653 return result; 654 } 655 656 const char *filenameNativeString = 657 env->GetStringUTFChars(filenameJavaString, 0); 658 const char *textNativeString = env->GetStringUTFChars(textJavaString, 0); 659 660 afterSynthData_t* pForAfter = new (afterSynthData_t); 661 pForAfter->jniStorage = jniData; 662 pForAfter->usageMode = USAGEMODE_WRITE_TO_FILE; 663 664 pForAfter->outputFile = fopen(filenameNativeString, "wb"); 665 666 if (pForAfter->outputFile == NULL) { 667 LOGE("android_tts_SynthProxy_synthesizeToFile(): error creating output file"); 668 delete pForAfter; 669 return result; 670 } 671 672 // Write 44 blank bytes for WAV header, then come back and fill them in 673 // after we've written the audio data 674 char header[44]; 675 fwrite(header, 1, 44, pForAfter->outputFile); 676 677 unsigned int unique_identifier; 678 679 memset(pSynthData->mBuffer, 0, pSynthData->mBufferSize); 680 result = pSynthData->mNativeSynthInterface->synthesizeText(textNativeString, 681 pSynthData->mBuffer, pSynthData->mBufferSize, (void *)pForAfter); 682 683 long filelen = ftell(pForAfter->outputFile); 684 685 int samples = (((int)filelen) - 44) / 2; 686 header[0] = 'R'; 687 header[1] = 'I'; 688 header[2] = 'F'; 689 header[3] = 'F'; 690 ((uint32_t *)(&header[4]))[0] = filelen - 8; 691 header[8] = 'W'; 692 header[9] = 'A'; 693 header[10] = 'V'; 694 header[11] = 'E'; 695 696 header[12] = 'f'; 697 header[13] = 'm'; 698 header[14] = 't'; 699 header[15] = ' '; 700 701 ((uint32_t *)(&header[16]))[0] = 16; // size of fmt 702 703 int sampleSizeInByte = (encoding == AudioSystem::PCM_16_BIT ? 2 : 1); 704 705 ((unsigned short *)(&header[20]))[0] = 1; // format 706 ((unsigned short *)(&header[22]))[0] = channels; // channels 707 ((uint32_t *)(&header[24]))[0] = rate; // samplerate 708 ((uint32_t *)(&header[28]))[0] = rate * sampleSizeInByte * channels;// byterate 709 ((unsigned short *)(&header[32]))[0] = sampleSizeInByte * channels; // block align 710 ((unsigned short *)(&header[34]))[0] = sampleSizeInByte * 8; // bits per sample 711 712 header[36] = 'd'; 713 header[37] = 'a'; 714 header[38] = 't'; 715 header[39] = 'a'; 716 717 ((uint32_t *)(&header[40]))[0] = samples * 2; // size of data 718 719 // Skip back to the beginning and rewrite the header 720 fseek(pForAfter->outputFile, 0, SEEK_SET); 721 fwrite(header, 1, 44, pForAfter->outputFile); 722 723 fflush(pForAfter->outputFile); 724 fclose(pForAfter->outputFile); 725 726 delete pForAfter; 727 pForAfter = NULL; 728 729 env->ReleaseStringUTFChars(textJavaString, textNativeString); 730 env->ReleaseStringUTFChars(filenameJavaString, filenameNativeString); 731 732 return result; 733} 734 735 736static int 737android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData, 738 jstring textJavaString, jint javaStreamType) 739{ 740 int result = TTS_FAILURE; 741 742 if (jniData == 0) { 743 LOGE("android_tts_SynthProxy_speak(): invalid JNI data"); 744 return result; 745 } 746 747 initializeFilter(); 748 749 Mutex::Autolock l(engineMutex); 750 751 SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; 752 753 pSynthData->mPlayLock.lock(); 754 pSynthData->mPlayState = SYNTHPLAYSTATE_IS_PLAYING; 755 pSynthData->mPlayLock.unlock(); 756 757 afterSynthData_t* pForAfter = new (afterSynthData_t); 758 pForAfter->jniStorage = jniData; 759 pForAfter->usageMode = USAGEMODE_PLAY_IMMEDIATELY; 760 pForAfter->streamType = (AudioSystem::stream_type) javaStreamType; 761 762 if (pSynthData->mNativeSynthInterface) { 763 const char *textNativeString = env->GetStringUTFChars(textJavaString, 0); 764 memset(pSynthData->mBuffer, 0, pSynthData->mBufferSize); 765 result = pSynthData->mNativeSynthInterface->synthesizeText(textNativeString, 766 pSynthData->mBuffer, pSynthData->mBufferSize, (void *)pForAfter); 767 env->ReleaseStringUTFChars(textJavaString, textNativeString); 768 } 769 770 return result; 771} 772 773 774static int 775android_tts_SynthProxy_stop(JNIEnv *env, jobject thiz, jint jniData) 776{ 777 int result = TTS_FAILURE; 778 779 if (jniData == 0) { 780 LOGE("android_tts_SynthProxy_stop(): invalid JNI data"); 781 return result; 782 } 783 784 SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; 785 786 pSynthData->mPlayLock.lock(); 787 pSynthData->mPlayState = SYNTHPLAYSTATE_IS_STOPPED; 788 if (pSynthData->mAudioOut) { 789 pSynthData->mAudioOut->stop(); 790 } 791 pSynthData->mPlayLock.unlock(); 792 793 if (pSynthData->mNativeSynthInterface) { 794 result = pSynthData->mNativeSynthInterface->stop(); 795 } 796 797 return result; 798} 799 800 801static int 802android_tts_SynthProxy_stopSync(JNIEnv *env, jobject thiz, jint jniData) 803{ 804 int result = TTS_FAILURE; 805 806 if (jniData == 0) { 807 LOGE("android_tts_SynthProxy_stop(): invalid JNI data"); 808 return result; 809 } 810 811 // perform a regular stop 812 result = android_tts_SynthProxy_stop(env, thiz, jniData); 813 // but wait on the engine having released the engine mutex which protects 814 // the synthesizer resources. 815 engineMutex.lock(); 816 engineMutex.unlock(); 817 818 return result; 819} 820 821 822static jobjectArray 823android_tts_SynthProxy_getLanguage(JNIEnv *env, jobject thiz, jint jniData) 824{ 825 if (jniData == 0) { 826 LOGE("android_tts_SynthProxy_getLanguage(): invalid JNI data"); 827 return NULL; 828 } 829 830 SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; 831 832 if (pSynthData->mNativeSynthInterface) { 833 size_t bufSize = 100; 834 char lang[bufSize]; 835 char country[bufSize]; 836 char variant[bufSize]; 837 memset(lang, 0, bufSize); 838 memset(country, 0, bufSize); 839 memset(variant, 0, bufSize); 840 jobjectArray retLocale = (jobjectArray)env->NewObjectArray(3, 841 env->FindClass("java/lang/String"), env->NewStringUTF("")); 842 pSynthData->mNativeSynthInterface->getLanguage(lang, country, variant); 843 env->SetObjectArrayElement(retLocale, 0, env->NewStringUTF(lang)); 844 env->SetObjectArrayElement(retLocale, 1, env->NewStringUTF(country)); 845 env->SetObjectArrayElement(retLocale, 2, env->NewStringUTF(variant)); 846 return retLocale; 847 } else { 848 return NULL; 849 } 850} 851 852 853JNIEXPORT int JNICALL 854android_tts_SynthProxy_getRate(JNIEnv *env, jobject thiz, jint jniData) 855{ 856 if (jniData == 0) { 857 LOGE("android_tts_SynthProxy_getRate(): invalid JNI data"); 858 return 0; 859 } 860 861 SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; 862 size_t bufSize = 100; 863 864 char buf[bufSize]; 865 memset(buf, 0, bufSize); 866 // TODO check return codes 867 if (pSynthData->mNativeSynthInterface) { 868 pSynthData->mNativeSynthInterface->getProperty("rate", buf, &bufSize); 869 } 870 return atoi(buf); 871} 872 873// Dalvik VM type signatures 874static JNINativeMethod gMethods[] = { 875 { "native_stop", 876 "(I)I", 877 (void*)android_tts_SynthProxy_stop 878 }, 879 { "native_stopSync", 880 "(I)I", 881 (void*)android_tts_SynthProxy_stopSync 882 }, 883 { "native_speak", 884 "(ILjava/lang/String;I)I", 885 (void*)android_tts_SynthProxy_speak 886 }, 887 { "native_synthesizeToFile", 888 "(ILjava/lang/String;Ljava/lang/String;)I", 889 (void*)android_tts_SynthProxy_synthesizeToFile 890 }, 891 { "native_isLanguageAvailable", 892 "(ILjava/lang/String;Ljava/lang/String;Ljava/lang/String;)I", 893 (void*)android_tts_SynthProxy_isLanguageAvailable 894 }, 895 { "native_setConfig", 896 "(ILjava/lang/String;)I", 897 (void*)android_tts_SynthProxy_setConfig 898 }, 899 { "native_setLanguage", 900 "(ILjava/lang/String;Ljava/lang/String;Ljava/lang/String;)I", 901 (void*)android_tts_SynthProxy_setLanguage 902 }, 903 { "native_loadLanguage", 904 "(ILjava/lang/String;Ljava/lang/String;Ljava/lang/String;)I", 905 (void*)android_tts_SynthProxy_loadLanguage 906 }, 907 { "native_setSpeechRate", 908 "(II)I", 909 (void*)android_tts_SynthProxy_setSpeechRate 910 }, 911 { "native_setPitch", 912 "(II)I", 913 (void*)android_tts_SynthProxy_setPitch 914 }, 915 { "native_getLanguage", 916 "(I)[Ljava/lang/String;", 917 (void*)android_tts_SynthProxy_getLanguage 918 }, 919 { "native_getRate", 920 "(I)I", 921 (void*)android_tts_SynthProxy_getRate 922 }, 923 { "native_shutdown", 924 "(I)V", 925 (void*)android_tts_SynthProxy_shutdown 926 }, 927 { "native_setup", 928 "(Ljava/lang/Object;Ljava/lang/String;Ljava/lang/String;)I", 929 (void*)android_tts_SynthProxy_native_setup 930 }, 931 { "native_setLowShelf", 932 "(ZFFFF)I", 933 (void*)android_tts_SynthProxy_setLowShelf 934 }, 935 { "native_finalize", 936 "(I)V", 937 (void*)android_tts_SynthProxy_native_finalize 938 } 939}; 940 941#define SP_JNIDATA_FIELD_NAME "mJniData" 942#define SP_POSTSPEECHSYNTHESIZED_METHOD_NAME "postNativeSpeechSynthesizedInJava" 943 944static const char* const kClassPathName = "android/tts/SynthProxy"; 945 946jint JNI_OnLoad(JavaVM* vm, void* reserved) 947{ 948 JNIEnv* env = NULL; 949 jint result = -1; 950 jclass clazz; 951 952 if (vm->GetEnv((void**) &env, JNI_VERSION_1_4) != JNI_OK) { 953 LOGE("ERROR: GetEnv failed\n"); 954 goto bail; 955 } 956 assert(env != NULL); 957 958 clazz = env->FindClass(kClassPathName); 959 if (clazz == NULL) { 960 LOGE("Can't find %s", kClassPathName); 961 goto bail; 962 } 963 964 javaTTSFields.synthProxyClass = clazz; 965 javaTTSFields.synthProxyFieldJniData = NULL; 966 javaTTSFields.synthProxyMethodPost = NULL; 967 968 javaTTSFields.synthProxyFieldJniData = env->GetFieldID(clazz, 969 SP_JNIDATA_FIELD_NAME, "I"); 970 if (javaTTSFields.synthProxyFieldJniData == NULL) { 971 LOGE("Can't find %s.%s field", kClassPathName, SP_JNIDATA_FIELD_NAME); 972 goto bail; 973 } 974 975 javaTTSFields.synthProxyMethodPost = env->GetStaticMethodID(clazz, 976 SP_POSTSPEECHSYNTHESIZED_METHOD_NAME, "(Ljava/lang/Object;II)V"); 977 if (javaTTSFields.synthProxyMethodPost == NULL) { 978 LOGE("Can't find %s.%s method", kClassPathName, SP_POSTSPEECHSYNTHESIZED_METHOD_NAME); 979 goto bail; 980 } 981 982 if (jniRegisterNativeMethods( 983 env, kClassPathName, gMethods, NELEM(gMethods)) < 0) 984 goto bail; 985 986 /* success -- return valid version number */ 987 result = JNI_VERSION_1_4; 988 989 bail: 990 return result; 991} 992