TtsService.java revision f85aa5a4d4e6f1ef7e07638568e27d709b8085c6
1/*
2 * Copyright (C) 2009 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.tts;
17
18import android.speech.tts.ITts.Stub;
19import android.speech.tts.ITtsCallback;
20
21import android.app.Service;
22import android.content.Context;
23import android.content.Intent;
24import android.content.SharedPreferences;
25import android.content.pm.PackageManager;
26import android.content.pm.PackageManager.NameNotFoundException;
27import android.media.MediaPlayer;
28import android.media.MediaPlayer.OnCompletionListener;
29import android.net.Uri;
30import android.os.IBinder;
31import android.os.RemoteCallbackList;
32import android.os.RemoteException;
33import android.preference.PreferenceManager;
34import android.util.Log;
35import java.util.ArrayList;
36import java.util.Arrays;
37import java.util.HashMap;
38import java.util.concurrent.locks.ReentrantLock;
39
40/**
41 * @hide Synthesizes speech from text. This is implemented as a service so that
42 *       other applications can call the TTS without needing to bundle the TTS
43 *       in the build.
44 *
45 */
46public class TtsService extends Service implements OnCompletionListener {
47
48    private static class SpeechItem {
49        public static final int SPEECH = 0;
50        public static final int EARCON = 1;
51        public static final int SILENCE = 2;
52        public String mText = null;
53        public ArrayList<String> mParams = null;
54        public int mType = SPEECH;
55        public long mDuration = 0;
56
57        public SpeechItem(String text, ArrayList<String> params, int itemType) {
58            mText = text;
59            mParams = params;
60            mType = itemType;
61        }
62
63        public SpeechItem(long silenceTime) {
64            mDuration = silenceTime;
65        }
66    }
67
68    /**
69     * Contains the information needed to access a sound resource; the name of
70     * the package that contains the resource and the resID of the resource
71     * within that package.
72     */
73    private static class SoundResource {
74        public String mSourcePackageName = null;
75        public int mResId = -1;
76        public String mFilename = null;
77
78        public SoundResource(String packageName, int id) {
79            mSourcePackageName = packageName;
80            mResId = id;
81            mFilename = null;
82        }
83
84        public SoundResource(String file) {
85            mSourcePackageName = null;
86            mResId = -1;
87            mFilename = file;
88        }
89    }
90
91    private static final String ACTION = "android.intent.action.USE_TTS";
92    private static final String CATEGORY = "android.intent.category.TTS";
93    private static final String PKGNAME = "android.tts";
94
95    final RemoteCallbackList<android.speech.tts.ITtsCallback> mCallbacks = new RemoteCallbackList<ITtsCallback>();
96
97    private Boolean mIsSpeaking;
98    private ArrayList<SpeechItem> mSpeechQueue;
99    private HashMap<String, SoundResource> mEarcons;
100    private HashMap<String, SoundResource> mUtterances;
101    private MediaPlayer mPlayer;
102    private TtsService mSelf;
103
104    private SharedPreferences prefs;
105
106    private final ReentrantLock speechQueueLock = new ReentrantLock();
107    private final ReentrantLock synthesizerLock = new ReentrantLock();
108
109    private SynthProxy nativeSynth;
110
111    @Override
112    public void onCreate() {
113        super.onCreate();
114        Log.i("TTS", "TTS starting");
115
116        // TODO: Make this work when the settings are done in the main Settings
117        // app.
118        prefs = PreferenceManager.getDefaultSharedPreferences(this);
119
120        PackageManager pm = this.getPackageManager();
121        String soLibPath = "";
122        try {
123            soLibPath = pm.getApplicationInfo("com.svox.pico", 0).dataDir;
124        } catch (NameNotFoundException e) {
125            // This exception cannot actually happen as com.svox.pico is
126            // included with the system image.
127            e.printStackTrace();
128        }
129        soLibPath = soLibPath + "/lib/libttspico.so";
130        nativeSynth = new SynthProxy(soLibPath);
131
132        mSelf = this;
133        mIsSpeaking = false;
134
135        mEarcons = new HashMap<String, SoundResource>();
136        mUtterances = new HashMap<String, SoundResource>();
137
138        mSpeechQueue = new ArrayList<SpeechItem>();
139        mPlayer = null;
140
141        setLanguage(prefs.getString("lang_pref", "en-rUS"));
142        setSpeechRate(Integer.parseInt(prefs.getString("rate_pref", "140")));
143    }
144
145    @Override
146    public void onDestroy() {
147        super.onDestroy();
148        // Don't hog the media player
149        cleanUpPlayer();
150
151        nativeSynth.shutdown();
152
153        // Unregister all callbacks.
154        mCallbacks.kill();
155    }
156
157    private void setSpeechRate(int rate) {
158        if (prefs.getBoolean("override_pref", false)) {
159            // This is set to the default here so that the preview in the prefs
160            // activity will show the change without a restart, even if apps are
161            // not allowed to change the defaults.
162            rate = Integer.parseInt(prefs.getString("rate_pref", "140"));
163        }
164        nativeSynth.setSpeechRate(rate);
165    }
166
167    private void setLanguage(String lang) {
168        if (prefs.getBoolean("override_pref", false)) {
169            // This is set to the default here so that the preview in the prefs
170            // activity will show the change without a restart, even if apps are
171            // not
172            // allowed to change the defaults.
173            lang = prefs.getString("lang_pref", "en-rUS");
174        }
175        nativeSynth.setLanguage(lang);
176    }
177
178    /**
179     * Adds a sound resource to the TTS.
180     *
181     * @param text
182     *            The text that should be associated with the sound resource
183     * @param packageName
184     *            The name of the package which has the sound resource
185     * @param resId
186     *            The resource ID of the sound within its package
187     */
188    private void addSpeech(String text, String packageName, int resId) {
189        mUtterances.put(text, new SoundResource(packageName, resId));
190    }
191
192    /**
193     * Adds a sound resource to the TTS.
194     *
195     * @param text
196     *            The text that should be associated with the sound resource
197     * @param filename
198     *            The filename of the sound resource. This must be a complete
199     *            path like: (/sdcard/mysounds/mysoundbite.mp3).
200     */
201    private void addSpeech(String text, String filename) {
202        mUtterances.put(text, new SoundResource(filename));
203    }
204
205    /**
206     * Adds a sound resource to the TTS as an earcon.
207     *
208     * @param earcon
209     *            The text that should be associated with the sound resource
210     * @param packageName
211     *            The name of the package which has the sound resource
212     * @param resId
213     *            The resource ID of the sound within its package
214     */
215    private void addEarcon(String earcon, String packageName, int resId) {
216        mEarcons.put(earcon, new SoundResource(packageName, resId));
217    }
218
219    /**
220     * Adds a sound resource to the TTS as an earcon.
221     *
222     * @param earcon
223     *            The text that should be associated with the sound resource
224     * @param filename
225     *            The filename of the sound resource. This must be a complete
226     *            path like: (/sdcard/mysounds/mysoundbite.mp3).
227     */
228    private void addEarcon(String earcon, String filename) {
229        mEarcons.put(earcon, new SoundResource(filename));
230    }
231
232    /**
233     * Speaks the given text using the specified queueing mode and parameters.
234     *
235     * @param text
236     *            The text that should be spoken
237     * @param queueMode
238     *            0 for no queue (interrupts all previous utterances), 1 for
239     *            queued
240     * @param params
241     *            An ArrayList of parameters. This is not implemented for all
242     *            engines.
243     */
244    private void speak(String text, int queueMode, ArrayList<String> params) {
245        if (queueMode == 0) {
246            stop();
247        }
248        mSpeechQueue.add(new SpeechItem(text, params, SpeechItem.SPEECH));
249        if (!mIsSpeaking) {
250            processSpeechQueue();
251        }
252    }
253
254    /**
255     * Plays the earcon using the specified queueing mode and parameters.
256     *
257     * @param earcon
258     *            The earcon that should be played
259     * @param queueMode
260     *            0 for no queue (interrupts all previous utterances), 1 for
261     *            queued
262     * @param params
263     *            An ArrayList of parameters. This is not implemented for all
264     *            engines.
265     */
266    private void playEarcon(String earcon, int queueMode,
267            ArrayList<String> params) {
268        if (queueMode == 0) {
269            stop();
270        }
271        mSpeechQueue.add(new SpeechItem(earcon, params, SpeechItem.EARCON));
272        if (!mIsSpeaking) {
273            processSpeechQueue();
274        }
275    }
276
277    /**
278     * Stops all speech output and removes any utterances still in the queue.
279     */
280    private void stop() {
281        Log.i("TTS", "Stopping");
282        mSpeechQueue.clear();
283
284        nativeSynth.stop();
285        mIsSpeaking = false;
286        if (mPlayer != null) {
287            try {
288                mPlayer.stop();
289            } catch (IllegalStateException e) {
290                // Do nothing, the player is already stopped.
291            }
292        }
293        Log.i("TTS", "Stopped");
294    }
295
296    public void onCompletion(MediaPlayer arg0) {
297        processSpeechQueue();
298    }
299
300    private void playSilence(long duration, int queueMode,
301            ArrayList<String> params) {
302        if (queueMode == 0) {
303            stop();
304        }
305        mSpeechQueue.add(new SpeechItem(duration));
306        if (!mIsSpeaking) {
307            processSpeechQueue();
308        }
309    }
310
311    private void silence(final long duration) {
312        class SilenceThread implements Runnable {
313            public void run() {
314                try {
315                    Thread.sleep(duration);
316                } catch (InterruptedException e) {
317                    e.printStackTrace();
318                } finally {
319                    processSpeechQueue();
320                }
321            }
322        }
323        Thread slnc = (new Thread(new SilenceThread()));
324        slnc.setPriority(Thread.MIN_PRIORITY);
325        slnc.start();
326    }
327
328    private void speakInternalOnly(final String text,
329            final ArrayList<String> params) {
330        class SynthThread implements Runnable {
331            public void run() {
332                boolean synthAvailable = false;
333                try {
334                    synthAvailable = synthesizerLock.tryLock();
335                    if (!synthAvailable) {
336                        Thread.sleep(100);
337                        Thread synth = (new Thread(new SynthThread()));
338                        synth.setPriority(Thread.MIN_PRIORITY);
339                        synth.start();
340                        return;
341                    }
342                    nativeSynth.speak(text);
343                } catch (InterruptedException e) {
344                    e.printStackTrace();
345                } finally {
346                    // This check is needed because finally will always run;
347                    // even if the
348                    // method returns somewhere in the try block.
349                    if (synthAvailable) {
350                        synthesizerLock.unlock();
351                    }
352                }
353            }
354        }
355        Thread synth = (new Thread(new SynthThread()));
356        synth.setPriority(Thread.MIN_PRIORITY);
357        synth.start();
358    }
359
360    private SoundResource getSoundResource(SpeechItem speechItem) {
361        SoundResource sr = null;
362        String text = speechItem.mText;
363        if (speechItem.mType == SpeechItem.SILENCE) {
364            // Do nothing if this is just silence
365        } else if (speechItem.mType == SpeechItem.EARCON) {
366            sr = mEarcons.get(text);
367        } else {
368            sr = mUtterances.get(text);
369        }
370        return sr;
371    }
372
373    private void dispatchSpeechCompletedCallbacks(String mark) {
374        Log.i("TTS callback", "dispatch started");
375        // Broadcast to all clients the new value.
376        final int N = mCallbacks.beginBroadcast();
377        for (int i = 0; i < N; i++) {
378            try {
379                mCallbacks.getBroadcastItem(i).markReached(mark);
380            } catch (RemoteException e) {
381                // The RemoteCallbackList will take care of removing
382                // the dead object for us.
383            }
384        }
385        mCallbacks.finishBroadcast();
386        Log.i("TTS callback", "dispatch completed to " + N);
387    }
388
389    private void processSpeechQueue() {
390        boolean speechQueueAvailable = false;
391        try {
392            speechQueueAvailable = speechQueueLock.tryLock();
393            if (!speechQueueAvailable) {
394                return;
395            }
396            if (mSpeechQueue.size() < 1) {
397                mIsSpeaking = false;
398                // Dispatch a completion here as this is the
399                // only place where speech completes normally.
400                // Nothing left to say in the queue is a special case
401                // that is always a "mark" - associated text is null.
402                dispatchSpeechCompletedCallbacks("");
403                return;
404            }
405
406            SpeechItem currentSpeechItem = mSpeechQueue.get(0);
407            mIsSpeaking = true;
408            SoundResource sr = getSoundResource(currentSpeechItem);
409            // Synth speech as needed - synthesizer should call
410            // processSpeechQueue to continue running the queue
411            Log.i("TTS processing: ", currentSpeechItem.mText);
412            if (sr == null) {
413                if (currentSpeechItem.mType == SpeechItem.SPEECH) {
414                    // TODO: Split text up into smaller chunks before accepting
415                    // them for processing.
416                    speakInternalOnly(currentSpeechItem.mText,
417                            currentSpeechItem.mParams);
418                } else {
419                    // This is either silence or an earcon that was missing
420                    silence(currentSpeechItem.mDuration);
421                }
422            } else {
423                cleanUpPlayer();
424                if (sr.mSourcePackageName == PKGNAME) {
425                    // Utterance is part of the TTS library
426                    mPlayer = MediaPlayer.create(this, sr.mResId);
427                } else if (sr.mSourcePackageName != null) {
428                    // Utterance is part of the app calling the library
429                    Context ctx;
430                    try {
431                        ctx = this.createPackageContext(sr.mSourcePackageName,
432                                0);
433                    } catch (NameNotFoundException e) {
434                        e.printStackTrace();
435                        mSpeechQueue.remove(0); // Remove it from the queue and
436                        // move on
437                        mIsSpeaking = false;
438                        return;
439                    }
440                    mPlayer = MediaPlayer.create(ctx, sr.mResId);
441                } else {
442                    // Utterance is coming from a file
443                    mPlayer = MediaPlayer.create(this, Uri.parse(sr.mFilename));
444                }
445
446                // Check if Media Server is dead; if it is, clear the queue and
447                // give up for now - hopefully, it will recover itself.
448                if (mPlayer == null) {
449                    mSpeechQueue.clear();
450                    mIsSpeaking = false;
451                    return;
452                }
453                mPlayer.setOnCompletionListener(this);
454                try {
455                    mPlayer.start();
456                } catch (IllegalStateException e) {
457                    mSpeechQueue.clear();
458                    mIsSpeaking = false;
459                    cleanUpPlayer();
460                    return;
461                }
462            }
463            if (mSpeechQueue.size() > 0) {
464                mSpeechQueue.remove(0);
465            }
466        } finally {
467            // This check is needed because finally will always run; even if the
468            // method returns somewhere in the try block.
469            if (speechQueueAvailable) {
470                speechQueueLock.unlock();
471            }
472        }
473    }
474
475    private void cleanUpPlayer() {
476        if (mPlayer != null) {
477            mPlayer.release();
478            mPlayer = null;
479        }
480    }
481
482    /**
483     * Synthesizes the given text using the specified queuing mode and
484     * parameters.
485     *
486     * @param text
487     *            The String of text that should be synthesized
488     * @param params
489     *            An ArrayList of parameters. The first element of this array
490     *            controls the type of voice to use.
491     * @param filename
492     *            The string that gives the full output filename; it should be
493     *            something like "/sdcard/myappsounds/mysound.wav".
494     * @return A boolean that indicates if the synthesis succeeded
495     */
496    private boolean synthesizeToFile(String text, ArrayList<String> params,
497            String filename, boolean calledFromApi) {
498        // Only stop everything if this is a call made by an outside app trying
499        // to
500        // use the API. Do NOT stop if this is a call from within the service as
501        // clearing the speech queue here would be a mistake.
502        if (calledFromApi) {
503            stop();
504        }
505        Log.i("TTS", "Synthesizing to " + filename);
506        boolean synthAvailable = false;
507        try {
508            synthAvailable = synthesizerLock.tryLock();
509            if (!synthAvailable) {
510                return false;
511            }
512            // Don't allow a filename that is too long
513            // TODO use platform constant
514            if (filename.length() > 250) {
515                return false;
516            }
517            nativeSynth.synthesizeToFile(text, filename);
518        } finally {
519            // This check is needed because finally will always run; even if the
520            // method returns somewhere in the try block.
521            if (synthAvailable) {
522                synthesizerLock.unlock();
523            }
524        }
525        Log.i("TTS", "Completed synthesis for " + filename);
526        return true;
527    }
528
529    @Override
530    public IBinder onBind(Intent intent) {
531        if (ACTION.equals(intent.getAction())) {
532            for (String category : intent.getCategories()) {
533                if (category.equals(CATEGORY)) {
534                    return mBinder;
535                }
536            }
537        }
538        return null;
539    }
540
541    private final android.speech.tts.ITts.Stub mBinder = new Stub() {
542
543        public void registerCallback(ITtsCallback cb) {
544            if (cb != null)
545                mCallbacks.register(cb);
546        }
547
548        public void unregisterCallback(ITtsCallback cb) {
549            if (cb != null)
550                mCallbacks.unregister(cb);
551        }
552
553        /**
554         * Speaks the given text using the specified queueing mode and
555         * parameters.
556         *
557         * @param text
558         *            The text that should be spoken
559         * @param queueMode
560         *            0 for no queue (interrupts all previous utterances), 1 for
561         *            queued
562         * @param params
563         *            An ArrayList of parameters. The first element of this
564         *            array controls the type of voice to use.
565         */
566        public void speak(String text, int queueMode, String[] params) {
567            ArrayList<String> speakingParams = new ArrayList<String>();
568            if (params != null) {
569                speakingParams = new ArrayList<String>(Arrays.asList(params));
570            }
571            mSelf.speak(text, queueMode, speakingParams);
572        }
573
574        /**
575         * Plays the earcon using the specified queueing mode and parameters.
576         *
577         * @param earcon
578         *            The earcon that should be played
579         * @param queueMode
580         *            0 for no queue (interrupts all previous utterances), 1 for
581         *            queued
582         * @param params
583         *            An ArrayList of parameters.
584         */
585        public void playEarcon(String earcon, int queueMode, String[] params) {
586            ArrayList<String> speakingParams = new ArrayList<String>();
587            if (params != null) {
588                speakingParams = new ArrayList<String>(Arrays.asList(params));
589            }
590            mSelf.playEarcon(earcon, queueMode, speakingParams);
591        }
592
593        /**
594         * Plays the silence using the specified queueing mode and parameters.
595         *
596         * @param duration
597         *            The duration of the silence that should be played
598         * @param queueMode
599         *            0 for no queue (interrupts all previous utterances), 1 for
600         *            queued
601         * @param params
602         *            An ArrayList of parameters.
603         */
604        public void playSilence(long duration, int queueMode, String[] params) {
605            ArrayList<String> speakingParams = new ArrayList<String>();
606            if (params != null) {
607                speakingParams = new ArrayList<String>(Arrays.asList(params));
608            }
609            mSelf.playSilence(duration, queueMode, speakingParams);
610        }
611
612        /**
613         * Stops all speech output and removes any utterances still in the
614         * queue.
615         */
616        public void stop() {
617            mSelf.stop();
618        }
619
620        /**
621         * Returns whether or not the TTS is speaking.
622         *
623         * @return Boolean to indicate whether or not the TTS is speaking
624         */
625        public boolean isSpeaking() {
626            return (mSelf.mIsSpeaking && (mSpeechQueue.size() < 1));
627        }
628
629        /**
630         * Adds a sound resource to the TTS.
631         *
632         * @param text
633         *            The text that should be associated with the sound resource
634         * @param packageName
635         *            The name of the package which has the sound resource
636         * @param resId
637         *            The resource ID of the sound within its package
638         */
639        public void addSpeech(String text, String packageName, int resId) {
640            mSelf.addSpeech(text, packageName, resId);
641        }
642
643        /**
644         * Adds a sound resource to the TTS.
645         *
646         * @param text
647         *            The text that should be associated with the sound resource
648         * @param filename
649         *            The filename of the sound resource. This must be a
650         *            complete path like: (/sdcard/mysounds/mysoundbite.mp3).
651         */
652        public void addSpeechFile(String text, String filename) {
653            mSelf.addSpeech(text, filename);
654        }
655
656        /**
657         * Adds a sound resource to the TTS as an earcon.
658         *
659         * @param earcon
660         *            The text that should be associated with the sound resource
661         * @param packageName
662         *            The name of the package which has the sound resource
663         * @param resId
664         *            The resource ID of the sound within its package
665         */
666        public void addEarcon(String earcon, String packageName, int resId) {
667            mSelf.addEarcon(earcon, packageName, resId);
668        }
669
670        /**
671         * Adds a sound resource to the TTS as an earcon.
672         *
673         * @param earcon
674         *            The text that should be associated with the sound resource
675         * @param filename
676         *            The filename of the sound resource. This must be a
677         *            complete path like: (/sdcard/mysounds/mysoundbite.mp3).
678         */
679        public void addEarconFile(String earcon, String filename) {
680            mSelf.addEarcon(earcon, filename);
681        }
682
683        /**
684         * Sets the speech rate for the TTS. Note that this will only have an
685         * effect on synthesized speech; it will not affect pre-recorded speech.
686         *
687         * @param speechRate
688         *            The speech rate that should be used
689         */
690        public void setSpeechRate(int speechRate) {
691            mSelf.setSpeechRate(speechRate);
692        }
693
694        /**
695         * Sets the speech rate for the TTS. Note that this will only have an
696         * effect on synthesized speech; it will not affect pre-recorded speech.
697         *
698         * @param language
699         *            Language values are based on the Android conventions for
700         *            localization as described in the Android platform
701         *            documentation on internationalization. This implies that
702         *            language data is specified in the format xx-rYY, where xx
703         *            is a two letter ISO 639-1 language code in lowercase and
704         *            rYY is a two letter ISO 3166-1-alpha-2 language code in
705         *            uppercase preceded by a lowercase "r".
706         */
707        public void setLanguage(String language) {
708            mSelf.setLanguage(language);
709        }
710
711        /**
712         * Speaks the given text using the specified queueing mode and
713         * parameters.
714         *
715         * @param text
716         *            The String of text that should be synthesized
717         * @param params
718         *            An ArrayList of parameters. The first element of this
719         *            array controls the type of voice to use.
720         * @param filename
721         *            The string that gives the full output filename; it should
722         *            be something like "/sdcard/myappsounds/mysound.wav".
723         * @return A boolean that indicates if the synthesis succeeded
724         */
725        public boolean synthesizeToFile(String text, String[] params,
726                String filename) {
727            ArrayList<String> speakingParams = new ArrayList<String>();
728            if (params != null) {
729                speakingParams = new ArrayList<String>(Arrays.asList(params));
730            }
731            return mSelf.synthesizeToFile(text, speakingParams, filename, true);
732        }
733    };
734
735}
736