1/*
2 * Copyright (C) 2009 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <stdio.h>
18#include <unistd.h>
19
20#define LOG_TAG "SynthProxy"
21
22#include <utils/Log.h>
23#include <nativehelper/jni.h>
24#include <nativehelper/JNIHelp.h>
25#include <android_runtime/AndroidRuntime.h>
26#include <tts/TtsEngine.h>
27#include <media/AudioTrack.h>
28#include <math.h>
29
30#include <dlfcn.h>
31
32#define DEFAULT_TTS_RATE        16000
33#define DEFAULT_TTS_FORMAT      AudioSystem::PCM_16_BIT
34#define DEFAULT_TTS_NB_CHANNELS 1
35#define DEFAULT_TTS_BUFFERSIZE  2048
36// TODO use the TTS stream type when available
37#define DEFAULT_TTS_STREAM_TYPE AudioSystem::MUSIC
38
39// EQ + BOOST parameters
40#define FILTER_LOWSHELF_ATTENUATION -18.0f // in dB
41#define FILTER_TRANSITION_FREQ 1100.0f     // in Hz
42#define FILTER_SHELF_SLOPE 1.0f            // Q
43#define FILTER_GAIN 5.5f // linear gain
44
45#define USAGEMODE_PLAY_IMMEDIATELY 0
46#define USAGEMODE_WRITE_TO_FILE    1
47
48#define SYNTHPLAYSTATE_IS_STOPPED 0
49#define SYNTHPLAYSTATE_IS_PLAYING 1
50
51using namespace android;
52
53// ----------------------------------------------------------------------------
54struct fields_t {
55    jfieldID    synthProxyFieldJniData;
56    jclass      synthProxyClass;
57    jmethodID   synthProxyMethodPost;
58};
59
60// structure to hold the data that is used each time the TTS engine has synthesized more data
61struct afterSynthData_t {
62    jint jniStorage;
63    int  usageMode;
64    FILE* outputFile;
65    AudioSystem::stream_type streamType;
66};
67
68// ----------------------------------------------------------------------------
69// EQ data
70double amp;
71double w;
72double sinw;
73double cosw;
74double beta;
75double a0, a1, a2, b0, b1, b2;
76double m_fa, m_fb, m_fc, m_fd, m_fe;
77double x0;  // x[n]
78double x1;  // x[n-1]
79double x2;  // x[n-2]
80double out0;// y[n]
81double out1;// y[n-1]
82double out2;// y[n-2]
83
84static float fFilterLowshelfAttenuation = FILTER_LOWSHELF_ATTENUATION;
85static float fFilterTransitionFreq = FILTER_TRANSITION_FREQ;
86static float fFilterShelfSlope = FILTER_SHELF_SLOPE;
87static float fFilterGain = FILTER_GAIN;
88static bool  bUseFilter = false;
89
90void initializeEQ() {
91
92    amp = float(pow(10.0, fFilterLowshelfAttenuation / 40.0));
93    w = 2.0 * M_PI * (fFilterTransitionFreq / DEFAULT_TTS_RATE);
94    sinw = float(sin(w));
95    cosw = float(cos(w));
96    beta = float(sqrt(amp)/fFilterShelfSlope);
97
98    // initialize low-shelf parameters
99    b0 = amp * ((amp+1.0F) - ((amp-1.0F)*cosw) + (beta*sinw));
100    b1 = 2.0F * amp * ((amp-1.0F) - ((amp+1.0F)*cosw));
101    b2 = amp * ((amp+1.0F) - ((amp-1.0F)*cosw) - (beta*sinw));
102    a0 = (amp+1.0F) + ((amp-1.0F)*cosw) + (beta*sinw);
103    a1 = 2.0F * ((amp-1.0F) + ((amp+1.0F)*cosw));
104    a2 = -((amp+1.0F) + ((amp-1.0F)*cosw) - (beta*sinw));
105
106    m_fa = fFilterGain * b0/a0;
107    m_fb = fFilterGain * b1/a0;
108    m_fc = fFilterGain * b2/a0;
109    m_fd = a1/a0;
110    m_fe = a2/a0;
111}
112
113void initializeFilter() {
114    x0 = 0.0f;
115    x1 = 0.0f;
116    x2 = 0.0f;
117    out0 = 0.0f;
118    out1 = 0.0f;
119    out2 = 0.0f;
120}
121
122void applyFilter(int16_t* buffer, size_t sampleCount) {
123
124    for (size_t i=0 ; i<sampleCount ; i++) {
125
126        x0 = (double) buffer[i];
127
128        out0 = (m_fa*x0) + (m_fb*x1) + (m_fc*x2) + (m_fd*out1) + (m_fe*out2);
129
130        x2 = x1;
131        x1 = x0;
132
133        out2 = out1;
134        out1 = out0;
135
136        if (out0 > 32767.0f) {
137            buffer[i] = 32767;
138        } else if (out0 < -32768.0f) {
139            buffer[i] = -32768;
140        } else {
141            buffer[i] = (int16_t) out0;
142        }
143    }
144}
145
146
147// ----------------------------------------------------------------------------
148static fields_t javaTTSFields;
149
150// TODO move to synth member once we have multiple simultaneous engines running
151static Mutex engineMutex;
152
153// ----------------------------------------------------------------------------
154class SynthProxyJniStorage {
155    public :
156        jobject                   tts_ref;
157        TtsEngine*                mNativeSynthInterface;
158        void*                     mEngineLibHandle;
159        AudioTrack*               mAudioOut;
160        int8_t                    mPlayState;
161        Mutex                     mPlayLock;
162        AudioSystem::stream_type  mStreamType;
163        uint32_t                  mSampleRate;
164        uint32_t                  mAudFormat;
165        int                       mNbChannels;
166        int8_t *                  mBuffer;
167        size_t                    mBufferSize;
168
169        SynthProxyJniStorage() {
170            tts_ref = NULL;
171            mNativeSynthInterface = NULL;
172            mEngineLibHandle = NULL;
173            mAudioOut = NULL;
174            mPlayState =  SYNTHPLAYSTATE_IS_STOPPED;
175            mStreamType = DEFAULT_TTS_STREAM_TYPE;
176            mSampleRate = DEFAULT_TTS_RATE;
177            mAudFormat  = DEFAULT_TTS_FORMAT;
178            mNbChannels = DEFAULT_TTS_NB_CHANNELS;
179            mBufferSize = DEFAULT_TTS_BUFFERSIZE;
180            mBuffer = new int8_t[mBufferSize];
181            memset(mBuffer, 0, mBufferSize);
182        }
183
184        ~SynthProxyJniStorage() {
185            //LOGV("entering ~SynthProxyJniStorage()");
186            killAudio();
187            if (mNativeSynthInterface) {
188                mNativeSynthInterface->shutdown();
189                mNativeSynthInterface = NULL;
190            }
191            if (mEngineLibHandle) {
192                //LOGE("~SynthProxyJniStorage(): before close library");
193                int res = dlclose(mEngineLibHandle);
194                LOGE_IF( res != 0, "~SynthProxyJniStorage(): dlclose returned %d", res);
195            }
196            delete mBuffer;
197        }
198
199        void killAudio() {
200            if (mAudioOut) {
201                mAudioOut->stop();
202                delete mAudioOut;
203                mAudioOut = NULL;
204            }
205        }
206
207        void createAudioOut(AudioSystem::stream_type streamType, uint32_t rate,
208                AudioSystem::audio_format format, int channel) {
209            mSampleRate = rate;
210            mAudFormat  = format;
211            mNbChannels = channel;
212            mStreamType = streamType;
213
214            // retrieve system properties to ensure successful creation of the
215            // AudioTrack object for playback
216            int afSampleRate;
217            if (AudioSystem::getOutputSamplingRate(&afSampleRate, mStreamType) != NO_ERROR) {
218                afSampleRate = 44100;
219            }
220            int afFrameCount;
221            if (AudioSystem::getOutputFrameCount(&afFrameCount, mStreamType) != NO_ERROR) {
222                afFrameCount = 2048;
223            }
224            uint32_t afLatency;
225            if (AudioSystem::getOutputLatency(&afLatency, mStreamType) != NO_ERROR) {
226                afLatency = 500;
227            }
228            uint32_t minBufCount = afLatency / ((1000 * afFrameCount)/afSampleRate);
229            if (minBufCount < 2) minBufCount = 2;
230            int minFrameCount = (afFrameCount * rate * minBufCount)/afSampleRate;
231
232            mPlayLock.lock();
233            mAudioOut = new AudioTrack(mStreamType, rate, format,
234                    (channel == 2) ? AudioSystem::CHANNEL_OUT_STEREO : AudioSystem::CHANNEL_OUT_MONO,
235                    minFrameCount > 4096 ? minFrameCount : 4096,
236                    0, 0, 0, 0); // not using an AudioTrack callback
237
238            if (mAudioOut->initCheck() != NO_ERROR) {
239              LOGE("createAudioOut(): AudioTrack error");
240              delete mAudioOut;
241              mAudioOut = NULL;
242            } else {
243              //LOGI("AudioTrack OK");
244              mAudioOut->setVolume(1.0f, 1.0f);
245              LOGV("AudioTrack ready");
246            }
247            mPlayLock.unlock();
248        }
249};
250
251
252// ----------------------------------------------------------------------------
253void prepAudioTrack(SynthProxyJniStorage* pJniData, AudioSystem::stream_type streamType,
254        uint32_t rate, AudioSystem::audio_format format, int channel) {
255    // Don't bother creating a new audiotrack object if the current
256    // object is already initialized with the same audio parameters.
257    if ( pJniData->mAudioOut &&
258         (rate == pJniData->mSampleRate) &&
259         (format == pJniData->mAudFormat) &&
260         (channel == pJniData->mNbChannels) &&
261         (streamType == pJniData->mStreamType) ){
262        return;
263    }
264    if (pJniData->mAudioOut){
265        pJniData->killAudio();
266    }
267    pJniData->createAudioOut(streamType, rate, format, channel);
268}
269
270
271// ----------------------------------------------------------------------------
272/*
273 * Callback from TTS engine.
274 * Directly speaks using AudioTrack or write to file
275 */
276static tts_callback_status ttsSynthDoneCB(void *& userdata, uint32_t rate,
277                           uint32_t format, int channel,
278                           int8_t *&wav, size_t &bufferSize, tts_synth_status status) {
279    //LOGV("ttsSynthDoneCallback: %d bytes", bufferSize);
280
281    if (userdata == NULL){
282        LOGE("userdata == NULL");
283        return TTS_CALLBACK_HALT;
284    }
285    afterSynthData_t* pForAfter = (afterSynthData_t*)userdata;
286    SynthProxyJniStorage* pJniData = (SynthProxyJniStorage*)(pForAfter->jniStorage);
287
288    if (pForAfter->usageMode == USAGEMODE_PLAY_IMMEDIATELY){
289        //LOGV("Direct speech");
290
291        if (wav == NULL) {
292            delete pForAfter;
293            LOGV("Null: speech has completed");
294        }
295
296        if (bufferSize > 0) {
297            prepAudioTrack(pJniData, pForAfter->streamType, rate, (AudioSystem::audio_format)format, channel);
298            if (pJniData->mAudioOut) {
299                pJniData->mPlayLock.lock();
300                if(pJniData->mAudioOut->stopped()
301                        && (pJniData->mPlayState == SYNTHPLAYSTATE_IS_PLAYING)) {
302                    pJniData->mAudioOut->start();
303                }
304                pJniData->mPlayLock.unlock();
305                if (bUseFilter) {
306                    applyFilter((int16_t*)wav, bufferSize/2);
307                }
308                pJniData->mAudioOut->write(wav, bufferSize);
309                memset(wav, 0, bufferSize);
310                //LOGV("AudioTrack wrote: %d bytes", bufferSize);
311            } else {
312                LOGE("Can't play, null audiotrack");
313            }
314        }
315    } else  if (pForAfter->usageMode == USAGEMODE_WRITE_TO_FILE) {
316        //LOGV("Save to file");
317        if (wav == NULL) {
318            delete pForAfter;
319            LOGV("Null: speech has completed");
320            return TTS_CALLBACK_HALT;
321        }
322        if (bufferSize > 0){
323            if (bUseFilter) {
324                applyFilter((int16_t*)wav, bufferSize/2);
325            }
326            fwrite(wav, 1, bufferSize, pForAfter->outputFile);
327            memset(wav, 0, bufferSize);
328        }
329    }
330    // Future update:
331    //      For sync points in the speech, call back into the SynthProxy class through the
332    //      javaTTSFields.synthProxyMethodPost methode to notify
333    //      playback has completed if the synthesis is done or if a marker has been reached.
334
335    if (status == TTS_SYNTH_DONE) {
336        // this struct was allocated in the original android_tts_SynthProxy_speak call,
337        // all processing matching this call is now done.
338        LOGV("Speech synthesis done.");
339        if (pForAfter->usageMode == USAGEMODE_PLAY_IMMEDIATELY) {
340            // only delete for direct playback. When writing to a file, we still have work to do
341            // in android_tts_SynthProxy_synthesizeToFile. The struct will be deleted there.
342            delete pForAfter;
343            pForAfter = NULL;
344        }
345        return TTS_CALLBACK_HALT;
346    }
347
348    // we don't update the wav (output) parameter as we'll let the next callback
349    // write at the same location, we've consumed the data already, but we need
350    // to update bufferSize to let the TTS engine know how much it can write the
351    // next time it calls this function.
352    bufferSize = pJniData->mBufferSize;
353
354    return TTS_CALLBACK_CONTINUE;
355}
356
357
358// ----------------------------------------------------------------------------
359static int
360android_tts_SynthProxy_setLowShelf(JNIEnv *env, jobject thiz, jboolean applyFilter,
361        jfloat filterGain, jfloat attenuationInDb, jfloat freqInHz, jfloat slope)
362{
363    int result = TTS_SUCCESS;
364
365    bUseFilter = applyFilter;
366    if (applyFilter) {
367        fFilterLowshelfAttenuation = attenuationInDb;
368        fFilterTransitionFreq = freqInHz;
369        fFilterShelfSlope = slope;
370        fFilterGain = filterGain;
371
372        if (fFilterShelfSlope != 0.0f) {
373            initializeEQ();
374        } else {
375            LOGE("Invalid slope, can't be null");
376            result = TTS_FAILURE;
377        }
378    }
379
380    return result;
381}
382
383// ----------------------------------------------------------------------------
384static int
385android_tts_SynthProxy_native_setup(JNIEnv *env, jobject thiz,
386        jobject weak_this, jstring nativeSoLib, jstring engConfig)
387{
388    int result = TTS_FAILURE;
389
390    bUseFilter = false;
391
392    SynthProxyJniStorage* pJniStorage = new SynthProxyJniStorage();
393
394    prepAudioTrack(pJniStorage,
395            DEFAULT_TTS_STREAM_TYPE, DEFAULT_TTS_RATE, DEFAULT_TTS_FORMAT, DEFAULT_TTS_NB_CHANNELS);
396
397    const char *nativeSoLibNativeString =  env->GetStringUTFChars(nativeSoLib, 0);
398    const char *engConfigString = env->GetStringUTFChars(engConfig, 0);
399
400    void *engine_lib_handle = dlopen(nativeSoLibNativeString,
401            RTLD_NOW | RTLD_LOCAL);
402    if (engine_lib_handle == NULL) {
403       LOGE("android_tts_SynthProxy_native_setup(): engine_lib_handle == NULL");
404    } else {
405        TtsEngine *(*get_TtsEngine)() =
406            reinterpret_cast<TtsEngine* (*)()>(dlsym(engine_lib_handle, "getTtsEngine"));
407
408        pJniStorage->mNativeSynthInterface = (*get_TtsEngine)();
409        pJniStorage->mEngineLibHandle = engine_lib_handle;
410
411        if (pJniStorage->mNativeSynthInterface) {
412            Mutex::Autolock l(engineMutex);
413            pJniStorage->mNativeSynthInterface->init(ttsSynthDoneCB, engConfigString);
414        }
415
416        result = TTS_SUCCESS;
417    }
418
419    // we use a weak reference so the SynthProxy object can be garbage collected.
420    pJniStorage->tts_ref = env->NewGlobalRef(weak_this);
421
422    // save the JNI resources so we can use them (and free them) later
423    env->SetIntField(thiz, javaTTSFields.synthProxyFieldJniData, (int)pJniStorage);
424
425    env->ReleaseStringUTFChars(nativeSoLib, nativeSoLibNativeString);
426    env->ReleaseStringUTFChars(engConfig, engConfigString);
427
428    return result;
429}
430
431
432static void
433android_tts_SynthProxy_native_finalize(JNIEnv *env, jobject thiz, jint jniData)
434{
435    //LOGV("entering android_tts_SynthProxy_finalize()");
436    if (jniData == 0) {
437        //LOGE("android_tts_SynthProxy_native_finalize(): invalid JNI data");
438        return;
439    }
440
441    Mutex::Autolock l(engineMutex);
442
443    SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
444    env->DeleteGlobalRef(pSynthData->tts_ref);
445    delete pSynthData;
446
447    env->SetIntField(thiz, javaTTSFields.synthProxyFieldJniData, 0);
448}
449
450
451static void
452android_tts_SynthProxy_shutdown(JNIEnv *env, jobject thiz, jint jniData)
453{
454    //LOGV("entering android_tts_SynthProxy_shutdown()");
455
456    // do everything a call to finalize would
457    android_tts_SynthProxy_native_finalize(env, thiz, jniData);
458}
459
460
461static int
462android_tts_SynthProxy_isLanguageAvailable(JNIEnv *env, jobject thiz, jint jniData,
463        jstring language, jstring country, jstring variant)
464{
465    int result = TTS_LANG_NOT_SUPPORTED;
466
467    if (jniData == 0) {
468        LOGE("android_tts_SynthProxy_isLanguageAvailable(): invalid JNI data");
469        return result;
470    }
471
472    SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
473    const char *langNativeString = env->GetStringUTFChars(language, 0);
474    const char *countryNativeString = env->GetStringUTFChars(country, 0);
475    const char *variantNativeString = env->GetStringUTFChars(variant, 0);
476
477    if (pSynthData->mNativeSynthInterface) {
478        result = pSynthData->mNativeSynthInterface->isLanguageAvailable(langNativeString,
479                countryNativeString, variantNativeString);
480    }
481    env->ReleaseStringUTFChars(language, langNativeString);
482    env->ReleaseStringUTFChars(country, countryNativeString);
483    env->ReleaseStringUTFChars(variant, variantNativeString);
484    return result;
485}
486
487static int
488android_tts_SynthProxy_setConfig(JNIEnv *env, jobject thiz, jint jniData, jstring engineConfig)
489{
490    int result = TTS_FAILURE;
491
492    if (jniData == 0) {
493        LOGE("android_tts_SynthProxy_setConfig(): invalid JNI data");
494        return result;
495    }
496
497    Mutex::Autolock l(engineMutex);
498
499    SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
500    const char *engineConfigNativeString = env->GetStringUTFChars(engineConfig, 0);
501
502    if (pSynthData->mNativeSynthInterface) {
503        result = pSynthData->mNativeSynthInterface->setProperty(ANDROID_TTS_ENGINE_PROPERTY_CONFIG,
504                engineConfigNativeString, strlen(engineConfigNativeString));
505    }
506    env->ReleaseStringUTFChars(engineConfig, engineConfigNativeString);
507
508    return result;
509}
510
511static int
512android_tts_SynthProxy_setLanguage(JNIEnv *env, jobject thiz, jint jniData,
513        jstring language, jstring country, jstring variant)
514{
515    int result = TTS_LANG_NOT_SUPPORTED;
516
517    if (jniData == 0) {
518        LOGE("android_tts_SynthProxy_setLanguage(): invalid JNI data");
519        return result;
520    }
521
522    Mutex::Autolock l(engineMutex);
523
524    SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
525    const char *langNativeString = env->GetStringUTFChars(language, 0);
526    const char *countryNativeString = env->GetStringUTFChars(country, 0);
527    const char *variantNativeString = env->GetStringUTFChars(variant, 0);
528
529    if (pSynthData->mNativeSynthInterface) {
530        result = pSynthData->mNativeSynthInterface->setLanguage(langNativeString,
531                countryNativeString, variantNativeString);
532    }
533    env->ReleaseStringUTFChars(language, langNativeString);
534    env->ReleaseStringUTFChars(country, countryNativeString);
535    env->ReleaseStringUTFChars(variant, variantNativeString);
536    return result;
537}
538
539
540static int
541android_tts_SynthProxy_loadLanguage(JNIEnv *env, jobject thiz, jint jniData,
542        jstring language, jstring country, jstring variant)
543{
544    int result = TTS_LANG_NOT_SUPPORTED;
545
546    if (jniData == 0) {
547        LOGE("android_tts_SynthProxy_loadLanguage(): invalid JNI data");
548        return result;
549    }
550
551    SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
552    const char *langNativeString = env->GetStringUTFChars(language, 0);
553    const char *countryNativeString = env->GetStringUTFChars(country, 0);
554    const char *variantNativeString = env->GetStringUTFChars(variant, 0);
555
556    if (pSynthData->mNativeSynthInterface) {
557        result = pSynthData->mNativeSynthInterface->loadLanguage(langNativeString,
558                countryNativeString, variantNativeString);
559    }
560    env->ReleaseStringUTFChars(language, langNativeString);
561    env->ReleaseStringUTFChars(country, countryNativeString);
562    env->ReleaseStringUTFChars(variant, variantNativeString);
563
564    return result;
565}
566
567
568static int
569android_tts_SynthProxy_setSpeechRate(JNIEnv *env, jobject thiz, jint jniData,
570        jint speechRate)
571{
572    int result = TTS_FAILURE;
573
574    if (jniData == 0) {
575        LOGE("android_tts_SynthProxy_setSpeechRate(): invalid JNI data");
576        return result;
577    }
578
579    int bufSize = 12;
580    char buffer [bufSize];
581    sprintf(buffer, "%d", speechRate);
582
583    Mutex::Autolock l(engineMutex);
584
585    SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
586    LOGI("setting speech rate to %d", speechRate);
587
588    if (pSynthData->mNativeSynthInterface) {
589        result = pSynthData->mNativeSynthInterface->setProperty("rate", buffer, bufSize);
590    }
591
592    return result;
593}
594
595
596static int
597android_tts_SynthProxy_setPitch(JNIEnv *env, jobject thiz, jint jniData,
598        jint pitch)
599{
600    int result = TTS_FAILURE;
601
602    if (jniData == 0) {
603        LOGE("android_tts_SynthProxy_setPitch(): invalid JNI data");
604        return result;
605    }
606
607    Mutex::Autolock l(engineMutex);
608
609    int bufSize = 12;
610    char buffer [bufSize];
611    sprintf(buffer, "%d", pitch);
612
613    SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
614    LOGI("setting pitch to %d", pitch);
615
616    if (pSynthData->mNativeSynthInterface) {
617        result = pSynthData->mNativeSynthInterface->setProperty("pitch", buffer, bufSize);
618    }
619
620    return result;
621}
622
623
624static int
625android_tts_SynthProxy_synthesizeToFile(JNIEnv *env, jobject thiz, jint jniData,
626        jstring textJavaString, jstring filenameJavaString)
627{
628    int result = TTS_FAILURE;
629
630    if (jniData == 0) {
631        LOGE("android_tts_SynthProxy_synthesizeToFile(): invalid JNI data");
632        return result;
633    }
634
635    SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
636    if (!pSynthData->mNativeSynthInterface) {
637        LOGE("android_tts_SynthProxy_synthesizeToFile(): invalid engine handle");
638        return result;
639    }
640
641    initializeFilter();
642
643    Mutex::Autolock l(engineMutex);
644
645    // Retrieve audio parameters before writing the file header
646    AudioSystem::audio_format encoding = DEFAULT_TTS_FORMAT;
647    uint32_t rate = DEFAULT_TTS_RATE;
648    int channels = DEFAULT_TTS_NB_CHANNELS;
649    pSynthData->mNativeSynthInterface->setAudioFormat(encoding, rate, channels);
650
651    if ((encoding != AudioSystem::PCM_16_BIT) && (encoding != AudioSystem::PCM_8_BIT)) {
652        LOGE("android_tts_SynthProxy_synthesizeToFile(): engine uses invalid format");
653        return result;
654    }
655
656    const char *filenameNativeString =
657            env->GetStringUTFChars(filenameJavaString, 0);
658    const char *textNativeString = env->GetStringUTFChars(textJavaString, 0);
659
660    afterSynthData_t* pForAfter = new (afterSynthData_t);
661    pForAfter->jniStorage = jniData;
662    pForAfter->usageMode  = USAGEMODE_WRITE_TO_FILE;
663
664    pForAfter->outputFile = fopen(filenameNativeString, "wb");
665
666    if (pForAfter->outputFile == NULL) {
667        LOGE("android_tts_SynthProxy_synthesizeToFile(): error creating output file");
668        delete pForAfter;
669        return result;
670    }
671
672    // Write 44 blank bytes for WAV header, then come back and fill them in
673    // after we've written the audio data
674    char header[44];
675    fwrite(header, 1, 44, pForAfter->outputFile);
676
677    unsigned int unique_identifier;
678
679    memset(pSynthData->mBuffer, 0, pSynthData->mBufferSize);
680    result = pSynthData->mNativeSynthInterface->synthesizeText(textNativeString,
681            pSynthData->mBuffer, pSynthData->mBufferSize, (void *)pForAfter);
682
683    long filelen = ftell(pForAfter->outputFile);
684
685    int samples = (((int)filelen) - 44) / 2;
686    header[0] = 'R';
687    header[1] = 'I';
688    header[2] = 'F';
689    header[3] = 'F';
690    ((uint32_t *)(&header[4]))[0] = filelen - 8;
691    header[8] = 'W';
692    header[9] = 'A';
693    header[10] = 'V';
694    header[11] = 'E';
695
696    header[12] = 'f';
697    header[13] = 'm';
698    header[14] = 't';
699    header[15] = ' ';
700
701    ((uint32_t *)(&header[16]))[0] = 16;  // size of fmt
702
703    int sampleSizeInByte = (encoding == AudioSystem::PCM_16_BIT ? 2 : 1);
704
705    ((unsigned short *)(&header[20]))[0] = 1;  // format
706    ((unsigned short *)(&header[22]))[0] = channels;  // channels
707    ((uint32_t *)(&header[24]))[0] = rate;  // samplerate
708    ((uint32_t *)(&header[28]))[0] = rate * sampleSizeInByte * channels;// byterate
709    ((unsigned short *)(&header[32]))[0] = sampleSizeInByte * channels;  // block align
710    ((unsigned short *)(&header[34]))[0] = sampleSizeInByte * 8;  // bits per sample
711
712    header[36] = 'd';
713    header[37] = 'a';
714    header[38] = 't';
715    header[39] = 'a';
716
717    ((uint32_t *)(&header[40]))[0] = samples * 2;  // size of data
718
719    // Skip back to the beginning and rewrite the header
720    fseek(pForAfter->outputFile, 0, SEEK_SET);
721    fwrite(header, 1, 44, pForAfter->outputFile);
722
723    fflush(pForAfter->outputFile);
724    fclose(pForAfter->outputFile);
725
726    delete pForAfter;
727    pForAfter = NULL;
728
729    env->ReleaseStringUTFChars(textJavaString, textNativeString);
730    env->ReleaseStringUTFChars(filenameJavaString, filenameNativeString);
731
732    return result;
733}
734
735
736static int
737android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData,
738        jstring textJavaString, jint javaStreamType)
739{
740    int result = TTS_FAILURE;
741
742    if (jniData == 0) {
743        LOGE("android_tts_SynthProxy_speak(): invalid JNI data");
744        return result;
745    }
746
747    initializeFilter();
748
749    Mutex::Autolock l(engineMutex);
750
751    SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
752
753    pSynthData->mPlayLock.lock();
754    pSynthData->mPlayState = SYNTHPLAYSTATE_IS_PLAYING;
755    pSynthData->mPlayLock.unlock();
756
757    afterSynthData_t* pForAfter = new (afterSynthData_t);
758    pForAfter->jniStorage = jniData;
759    pForAfter->usageMode  = USAGEMODE_PLAY_IMMEDIATELY;
760    pForAfter->streamType = (AudioSystem::stream_type) javaStreamType;
761
762    if (pSynthData->mNativeSynthInterface) {
763        const char *textNativeString = env->GetStringUTFChars(textJavaString, 0);
764        memset(pSynthData->mBuffer, 0, pSynthData->mBufferSize);
765        result = pSynthData->mNativeSynthInterface->synthesizeText(textNativeString,
766                pSynthData->mBuffer, pSynthData->mBufferSize, (void *)pForAfter);
767        env->ReleaseStringUTFChars(textJavaString, textNativeString);
768    }
769
770    return result;
771}
772
773
774static int
775android_tts_SynthProxy_stop(JNIEnv *env, jobject thiz, jint jniData)
776{
777    int result = TTS_FAILURE;
778
779    if (jniData == 0) {
780        LOGE("android_tts_SynthProxy_stop(): invalid JNI data");
781        return result;
782    }
783
784    SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
785
786    pSynthData->mPlayLock.lock();
787    pSynthData->mPlayState = SYNTHPLAYSTATE_IS_STOPPED;
788    if (pSynthData->mAudioOut) {
789        pSynthData->mAudioOut->stop();
790    }
791    pSynthData->mPlayLock.unlock();
792
793    if (pSynthData->mNativeSynthInterface) {
794        result = pSynthData->mNativeSynthInterface->stop();
795    }
796
797    return result;
798}
799
800
801static int
802android_tts_SynthProxy_stopSync(JNIEnv *env, jobject thiz, jint jniData)
803{
804    int result = TTS_FAILURE;
805
806    if (jniData == 0) {
807        LOGE("android_tts_SynthProxy_stop(): invalid JNI data");
808        return result;
809    }
810
811    // perform a regular stop
812    result = android_tts_SynthProxy_stop(env, thiz, jniData);
813    // but wait on the engine having released the engine mutex which protects
814    // the synthesizer resources.
815    engineMutex.lock();
816    engineMutex.unlock();
817
818    return result;
819}
820
821
822static jobjectArray
823android_tts_SynthProxy_getLanguage(JNIEnv *env, jobject thiz, jint jniData)
824{
825    if (jniData == 0) {
826        LOGE("android_tts_SynthProxy_getLanguage(): invalid JNI data");
827        return NULL;
828    }
829
830    SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
831
832    if (pSynthData->mNativeSynthInterface) {
833        size_t bufSize = 100;
834        char lang[bufSize];
835        char country[bufSize];
836        char variant[bufSize];
837        memset(lang, 0, bufSize);
838        memset(country, 0, bufSize);
839        memset(variant, 0, bufSize);
840        jobjectArray retLocale = (jobjectArray)env->NewObjectArray(3,
841                env->FindClass("java/lang/String"), env->NewStringUTF(""));
842        pSynthData->mNativeSynthInterface->getLanguage(lang, country, variant);
843        env->SetObjectArrayElement(retLocale, 0, env->NewStringUTF(lang));
844        env->SetObjectArrayElement(retLocale, 1, env->NewStringUTF(country));
845        env->SetObjectArrayElement(retLocale, 2, env->NewStringUTF(variant));
846        return retLocale;
847    } else {
848        return NULL;
849    }
850}
851
852
853JNIEXPORT int JNICALL
854android_tts_SynthProxy_getRate(JNIEnv *env, jobject thiz, jint jniData)
855{
856    if (jniData == 0) {
857        LOGE("android_tts_SynthProxy_getRate(): invalid JNI data");
858        return 0;
859    }
860
861    SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
862    size_t bufSize = 100;
863
864    char buf[bufSize];
865    memset(buf, 0, bufSize);
866    // TODO check return codes
867    if (pSynthData->mNativeSynthInterface) {
868        pSynthData->mNativeSynthInterface->getProperty("rate", buf, &bufSize);
869    }
870    return atoi(buf);
871}
872
873// Dalvik VM type signatures
874static JNINativeMethod gMethods[] = {
875    {   "native_stop",
876        "(I)I",
877        (void*)android_tts_SynthProxy_stop
878    },
879    {   "native_stopSync",
880        "(I)I",
881        (void*)android_tts_SynthProxy_stopSync
882    },
883    {   "native_speak",
884        "(ILjava/lang/String;I)I",
885        (void*)android_tts_SynthProxy_speak
886    },
887    {   "native_synthesizeToFile",
888        "(ILjava/lang/String;Ljava/lang/String;)I",
889        (void*)android_tts_SynthProxy_synthesizeToFile
890    },
891    {   "native_isLanguageAvailable",
892        "(ILjava/lang/String;Ljava/lang/String;Ljava/lang/String;)I",
893        (void*)android_tts_SynthProxy_isLanguageAvailable
894    },
895    {   "native_setConfig",
896            "(ILjava/lang/String;)I",
897            (void*)android_tts_SynthProxy_setConfig
898    },
899    {   "native_setLanguage",
900        "(ILjava/lang/String;Ljava/lang/String;Ljava/lang/String;)I",
901        (void*)android_tts_SynthProxy_setLanguage
902    },
903    {   "native_loadLanguage",
904        "(ILjava/lang/String;Ljava/lang/String;Ljava/lang/String;)I",
905        (void*)android_tts_SynthProxy_loadLanguage
906    },
907    {   "native_setSpeechRate",
908        "(II)I",
909        (void*)android_tts_SynthProxy_setSpeechRate
910    },
911    {   "native_setPitch",
912        "(II)I",
913        (void*)android_tts_SynthProxy_setPitch
914    },
915    {   "native_getLanguage",
916        "(I)[Ljava/lang/String;",
917        (void*)android_tts_SynthProxy_getLanguage
918    },
919    {   "native_getRate",
920        "(I)I",
921        (void*)android_tts_SynthProxy_getRate
922    },
923    {   "native_shutdown",
924        "(I)V",
925        (void*)android_tts_SynthProxy_shutdown
926    },
927    {   "native_setup",
928        "(Ljava/lang/Object;Ljava/lang/String;Ljava/lang/String;)I",
929        (void*)android_tts_SynthProxy_native_setup
930    },
931    {   "native_setLowShelf",
932        "(ZFFFF)I",
933        (void*)android_tts_SynthProxy_setLowShelf
934    },
935    {   "native_finalize",
936        "(I)V",
937        (void*)android_tts_SynthProxy_native_finalize
938    }
939};
940
941#define SP_JNIDATA_FIELD_NAME                "mJniData"
942#define SP_POSTSPEECHSYNTHESIZED_METHOD_NAME "postNativeSpeechSynthesizedInJava"
943
944static const char* const kClassPathName = "android/tts/SynthProxy";
945
946jint JNI_OnLoad(JavaVM* vm, void* reserved)
947{
948    JNIEnv* env = NULL;
949    jint result = -1;
950    jclass clazz;
951
952    if (vm->GetEnv((void**) &env, JNI_VERSION_1_4) != JNI_OK) {
953        LOGE("ERROR: GetEnv failed\n");
954        goto bail;
955    }
956    assert(env != NULL);
957
958    clazz = env->FindClass(kClassPathName);
959    if (clazz == NULL) {
960        LOGE("Can't find %s", kClassPathName);
961        goto bail;
962    }
963
964    javaTTSFields.synthProxyClass = clazz;
965    javaTTSFields.synthProxyFieldJniData = NULL;
966    javaTTSFields.synthProxyMethodPost = NULL;
967
968    javaTTSFields.synthProxyFieldJniData = env->GetFieldID(clazz,
969            SP_JNIDATA_FIELD_NAME, "I");
970    if (javaTTSFields.synthProxyFieldJniData == NULL) {
971        LOGE("Can't find %s.%s field", kClassPathName, SP_JNIDATA_FIELD_NAME);
972        goto bail;
973    }
974
975    javaTTSFields.synthProxyMethodPost = env->GetStaticMethodID(clazz,
976            SP_POSTSPEECHSYNTHESIZED_METHOD_NAME, "(Ljava/lang/Object;II)V");
977    if (javaTTSFields.synthProxyMethodPost == NULL) {
978        LOGE("Can't find %s.%s method", kClassPathName, SP_POSTSPEECHSYNTHESIZED_METHOD_NAME);
979        goto bail;
980    }
981
982    if (jniRegisterNativeMethods(
983            env, kClassPathName, gMethods, NELEM(gMethods)) < 0)
984        goto bail;
985
986    /* success -- return valid version number */
987    result = JNI_VERSION_1_4;
988
989 bail:
990    return result;
991}
992