TextToSpeech.java revision 748efcc3fb1b369690ab4617a04f452b1832edf4
1/*
2 * Copyright (C) 2009 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.speech.tts.ITts;
19import android.speech.tts.ITtsCallback;
20
21import android.content.ComponentName;
22import android.content.Context;
23import android.content.Intent;
24import android.content.ServiceConnection;
25import android.os.IBinder;
26import android.os.RemoteException;
27import android.util.Log;
28
29import java.util.HashMap;
30import java.util.Locale;
31
32/**
33 *
34 * Synthesizes speech from text for immediate playback or to create a sound file.
35 *
36 */
37//TODO complete javadoc + add links to constants
38public class TextToSpeech {
39
40    /**
41     * Denotes a successful operation.
42     */
43    public static final int TTS_SUCCESS                = 0;
44    /**
45     * Denotes a generic operation failure.
46     */
47    public static final int TTS_ERROR                  = -1;
48
49    /**
50     * Queue mode where all entries in the playback queue (media to be played
51     * and text to be synthesized) are dropped and replaced by the new entry.
52     */
53    public static final int TTS_QUEUE_FLUSH = 0;
54    /**
55     * Queue mode where the new entry is added at the end of the playback queue.
56     */
57    public static final int TTS_QUEUE_ADD = 1;
58
59
60    /**
61     * Denotes the language is available exactly as specified by the locale
62     */
63    public static final int TTS_LANG_COUNTRY_VAR_AVAILABLE = 2;
64
65
66    /**
67     * Denotes the language is available for the language and country specified
68     * by the locale, but not the variant.
69     */
70    public static final int TTS_LANG_COUNTRY_AVAILABLE = 1;
71
72
73    /**
74     * Denotes the language is available for the language by the locale,
75     * but not the country and variant.
76     */
77    public static final int TTS_LANG_AVAILABLE = 0;
78
79    /**
80     * Denotes the language data is missing.
81     */
82    public static final int TTS_LANG_MISSING_DATA = -1;
83
84    /**
85     * Denotes the language is not supported by the current TTS engine.
86     */
87    public static final int TTS_LANG_NOT_SUPPORTED = -2;
88
89
90    /**
91     * Called when the TTS has initialized.
92     *
93     * The InitListener must implement the onInit function. onInit is passed a
94     * status code indicating the result of the TTS initialization.
95     */
96    public interface OnInitListener {
97        public void onInit(int status);
98    }
99
100    /**
101     * Internal constants for the TTS functionality
102     *
103     * {@hide}
104     */
105    public class Engine {
106        // default values for a TTS engine when settings are not found in the provider
107        public static final int FALLBACK_TTS_DEFAULT_RATE = 100; // 1x
108        public static final int FALLBACK_TTS_DEFAULT_PITCH = 100;// 1x
109        public static final int FALLBACK_TTS_USE_DEFAULTS = 0; // false
110        public static final String FALLBACK_TTS_DEFAULT_SYNTH = "com.svox.pico";
111
112        // return codes for a TTS engine's check data activity
113        public static final int CHECK_VOICE_DATA_PASS = 1;
114        public static final int CHECK_VOICE_DATA_FAIL = 0;
115        public static final int CHECK_VOICE_DATA_BAD_DATA = -1;
116        public static final int CHECK_VOICE_DATA_MISSING_DATA = -2;
117        public static final int CHECK_VOICE_DATA_MISSING_DATA_NO_SDCARD = -3;
118
119        // return codes for a TTS engine's check data activity
120        public static final String VOICE_DATA_ROOT_DIRECTORY = "dataRoot";
121        public static final String VOICE_DATA_FILES = "dataFiles";
122        public static final String VOICE_DATA_FILES_INFO = "dataFilesInfo";
123
124        // keys for the parameters passed with speak commands
125        public static final String TTS_KEY_PARAM_RATE = "rate";
126        public static final String TTS_KEY_PARAM_LANGUAGE = "language";
127        public static final String TTS_KEY_PARAM_COUNTRY = "country";
128        public static final String TTS_KEY_PARAM_VARIANT = "variant";
129        public static final int TTS_PARAM_POSITION_RATE = 0;
130        public static final int TTS_PARAM_POSITION_LANGUAGE = 2;
131        public static final int TTS_PARAM_POSITION_COUNTRY = 4;
132        public static final int TTS_PARAM_POSITION_VARIANT = 6;
133    }
134
135    /**
136     * Connection needed for the TTS.
137     */
138    private ServiceConnection mServiceConnection;
139
140    private ITts mITts = null;
141    private Context mContext = null;
142    private OnInitListener mInitListener = null;
143    private boolean mStarted = false;
144    private final Object mStartLock = new Object();
145    /**
146     * Used to store the cached parameters sent along with each synthesis request to the
147     * TTS service.
148     */
149    private String[] mCachedParams;
150
151    /**
152     * The constructor for the TTS.
153     *
154     * @param context
155     *            The context
156     * @param listener
157     *            The InitListener that will be called when the TTS has
158     *            initialized successfully.
159     */
160    public TextToSpeech(Context context, OnInitListener listener) {
161        mContext = context;
162        mInitListener = listener;
163
164        mCachedParams = new String[2*4]; // 4 parameters, store key and value
165        mCachedParams[Engine.TTS_PARAM_POSITION_RATE] = Engine.TTS_KEY_PARAM_RATE;
166        mCachedParams[Engine.TTS_PARAM_POSITION_LANGUAGE] = Engine.TTS_KEY_PARAM_LANGUAGE;
167        mCachedParams[Engine.TTS_PARAM_POSITION_COUNTRY] = Engine.TTS_KEY_PARAM_COUNTRY;
168        mCachedParams[Engine.TTS_PARAM_POSITION_VARIANT] = Engine.TTS_KEY_PARAM_VARIANT;
169
170        mCachedParams[Engine.TTS_PARAM_POSITION_RATE + 1] =
171                String.valueOf(Engine.FALLBACK_TTS_DEFAULT_RATE);
172        // initialize the language cached parameters with the current Locale
173        Locale defaultLoc = Locale.getDefault();
174        mCachedParams[Engine.TTS_PARAM_POSITION_LANGUAGE + 1] = defaultLoc.getISO3Language();
175        mCachedParams[Engine.TTS_PARAM_POSITION_COUNTRY + 1] = defaultLoc.getISO3Country();
176        mCachedParams[Engine.TTS_PARAM_POSITION_VARIANT + 1] = defaultLoc.getVariant();
177
178        initTts();
179    }
180
181
182    private void initTts() {
183        mStarted = false;
184
185        // Initialize the TTS, run the callback after the binding is successful
186        mServiceConnection = new ServiceConnection() {
187            public void onServiceConnected(ComponentName name, IBinder service) {
188                synchronized(mStartLock) {
189                    mITts = ITts.Stub.asInterface(service);
190                    mStarted = true;
191                    if (mInitListener != null) {
192                        // TODO manage failures and missing resources
193                        mInitListener.onInit(TTS_SUCCESS);
194                    }
195                }
196            }
197
198            public void onServiceDisconnected(ComponentName name) {
199                synchronized(mStartLock) {
200                    mITts = null;
201                    mInitListener = null;
202                    mStarted = false;
203                }
204            }
205        };
206
207        Intent intent = new Intent("android.intent.action.START_TTS_SERVICE");
208        intent.addCategory("android.intent.category.TTS");
209        mContext.bindService(intent, mServiceConnection,
210                Context.BIND_AUTO_CREATE);
211        // TODO handle case where the binding works (should always work) but
212        //      the plugin fails
213    }
214
215
216    /**
217     * Shuts down the TTS. It is good practice to call this in the onDestroy
218     * method of the Activity that is using the TTS so that the TTS is stopped
219     * cleanly.
220     */
221    public void shutdown() {
222        try {
223            mContext.unbindService(mServiceConnection);
224        } catch (IllegalArgumentException e) {
225            // Do nothing and fail silently since an error here indicates that
226            // binding never succeeded in the first place.
227        }
228    }
229
230
231    /**
232     * Adds a mapping between a string of text and a sound resource in a
233     * package.
234     *
235     * @see #TTS.speak(String text, int queueMode, String[] params)
236     *
237     * @param text
238     *            Example: <b><code>"south_south_east"</code></b><br/>
239     *
240     * @param packagename
241     *            Pass the packagename of the application that contains the
242     *            resource. If the resource is in your own application (this is
243     *            the most common case), then put the packagename of your
244     *            application here.<br/>
245     *            Example: <b>"com.google.marvin.compass"</b><br/>
246     *            The packagename can be found in the AndroidManifest.xml of
247     *            your application.
248     *            <p>
249     *            <code>&lt;manifest xmlns:android=&quot;...&quot;
250     *      package=&quot;<b>com.google.marvin.compass</b>&quot;&gt;</code>
251     *            </p>
252     *
253     * @param resourceId
254     *            Example: <b><code>R.raw.south_south_east</code></b>
255     *
256     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
257     */
258    public int addSpeech(String text, String packagename, int resourceId) {
259        synchronized(mStartLock) {
260            if (!mStarted) {
261                return TTS_ERROR;
262            }
263            try {
264                mITts.addSpeech(text, packagename, resourceId);
265                return TTS_SUCCESS;
266            } catch (RemoteException e) {
267                // TTS died; restart it.
268                mStarted = false;
269                initTts();
270            } catch (NullPointerException e) {
271                // TTS died; restart it.
272                mStarted = false;
273                initTts();
274            } catch (IllegalStateException e) {
275                // TTS died; restart it.
276                mStarted = false;
277                initTts();
278            }
279            return TTS_ERROR;
280        }
281    }
282
283
284    /**
285     * Adds a mapping between a string of text and a sound file. Using this, it
286     * is possible to add custom pronounciations for text.
287     *
288     * @param text
289     *            The string of text
290     * @param filename
291     *            The full path to the sound file (for example:
292     *            "/sdcard/mysounds/hello.wav")
293     *
294     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
295     */
296    public int addSpeech(String text, String filename) {
297        synchronized (mStartLock) {
298            if (!mStarted) {
299                return TTS_ERROR;
300            }
301            try {
302                mITts.addSpeechFile(text, filename);
303                return TTS_SUCCESS;
304            } catch (RemoteException e) {
305                // TTS died; restart it.
306                mStarted = false;
307                initTts();
308            } catch (NullPointerException e) {
309                // TTS died; restart it.
310                mStarted = false;
311                initTts();
312            } catch (IllegalStateException e) {
313                // TTS died; restart it.
314                mStarted = false;
315                initTts();
316            }
317            return TTS_ERROR;
318        }
319    }
320
321
322    /**
323     * Speaks the string using the specified queuing strategy and speech
324     * parameters. Note that the speech parameters are not universally supported
325     * by all engines and will be treated as a hint. The TTS library will try to
326     * fulfill these parameters as much as possible, but there is no guarantee
327     * that the voice used will have the properties specified.
328     *
329     * @param text
330     *            The string of text to be spoken.
331     * @param queueMode
332     *            The queuing strategy to use.
333     *            See TTS_QUEUE_ADD and TTS_QUEUE_FLUSH.
334     * @param params
335     *            The hashmap of speech parameters to be used.
336     *
337     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
338     */
339    public int speak(String text, int queueMode, HashMap<String,String> params)
340    {
341        synchronized (mStartLock) {
342            int result = TTS_ERROR;
343            Log.i("TTS received: ", text);
344            if (!mStarted) {
345                return result;
346            }
347            try {
348                // TODO support extra parameters, passing cache of current parameters for the moment
349                result = mITts.speak(text, queueMode, mCachedParams);
350            } catch (RemoteException e) {
351                // TTS died; restart it.
352                mStarted = false;
353                initTts();
354            } catch (NullPointerException e) {
355                // TTS died; restart it.
356                mStarted = false;
357                initTts();
358            } catch (IllegalStateException e) {
359                // TTS died; restart it.
360                mStarted = false;
361                initTts();
362            } finally {
363              return result;
364            }
365        }
366    }
367
368
369    /**
370     * Plays the earcon using the specified queueing mode and parameters.
371     *
372     * @param earcon
373     *            The earcon that should be played
374     * @param queueMode
375     *            See TTS_QUEUE_ADD and TTS_QUEUE_FLUSH.
376     * @param params
377     *            The hashmap of parameters to be used.
378     *
379     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
380     */
381    public int playEarcon(String earcon, int queueMode,
382            HashMap<String,String> params) {
383        synchronized (mStartLock) {
384            int result = TTS_ERROR;
385            if (!mStarted) {
386                return result;
387            }
388            try {
389                // TODO support extra parameters, passing null for the moment
390                result = mITts.playEarcon(earcon, queueMode, null);
391            } catch (RemoteException e) {
392                // TTS died; restart it.
393                mStarted = false;
394                initTts();
395            } catch (NullPointerException e) {
396                // TTS died; restart it.
397                mStarted = false;
398                initTts();
399            } catch (IllegalStateException e) {
400                // TTS died; restart it.
401                mStarted = false;
402                initTts();
403            } finally {
404              return result;
405            }
406        }
407    }
408
409    /**
410     * Plays silence for the specified amount of time using the specified
411     * queue mode.
412     *
413     * @param durationInMs
414     *            A long that indicates how long the silence should last.
415     * @param queueMode
416     *            See TTS_QUEUE_ADD and TTS_QUEUE_FLUSH.
417     *
418     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
419     */
420    public int playSilence(long durationInMs, int queueMode) {
421        synchronized (mStartLock) {
422            int result = TTS_ERROR;
423            if (!mStarted) {
424                return result;
425            }
426            try {
427                // TODO support extra parameters, passing cache of current parameters for the moment
428                result = mITts.playSilence(durationInMs, queueMode, mCachedParams);
429            } catch (RemoteException e) {
430                // TTS died; restart it.
431                mStarted = false;
432                initTts();
433            } catch (NullPointerException e) {
434                // TTS died; restart it.
435                mStarted = false;
436                initTts();
437            } catch (IllegalStateException e) {
438                // TTS died; restart it.
439                mStarted = false;
440                initTts();
441            } finally {
442              return result;
443            }
444        }
445    }
446
447
448    /**
449     * Returns whether or not the TTS is busy speaking.
450     *
451     * @return Whether or not the TTS is busy speaking.
452     */
453    public boolean isSpeaking() {
454        synchronized (mStartLock) {
455            if (!mStarted) {
456                return false;
457            }
458            try {
459                return mITts.isSpeaking();
460            } catch (RemoteException e) {
461                // TTS died; restart it.
462                mStarted = false;
463                initTts();
464            } catch (NullPointerException e) {
465                // TTS died; restart it.
466                mStarted = false;
467                initTts();
468            } catch (IllegalStateException e) {
469                // TTS died; restart it.
470                mStarted = false;
471                initTts();
472            }
473            return false;
474        }
475    }
476
477
478    /**
479     * Stops speech from the TTS.
480     *
481     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
482     */
483    public int stop() {
484        synchronized (mStartLock) {
485            int result = TTS_ERROR;
486            if (!mStarted) {
487                return result;
488            }
489            try {
490                result = mITts.stop();
491            } catch (RemoteException e) {
492                // TTS died; restart it.
493                mStarted = false;
494                initTts();
495            } catch (NullPointerException e) {
496                // TTS died; restart it.
497                mStarted = false;
498                initTts();
499            } catch (IllegalStateException e) {
500                // TTS died; restart it.
501                mStarted = false;
502                initTts();
503            } finally {
504              return result;
505            }
506        }
507    }
508
509
510    /**
511     * Sets the speech rate for the TTS engine.
512     *
513     * Note that the speech rate is not universally supported by all engines and
514     * will be treated as a hint. The TTS library will try to use the specified
515     * speech rate, but there is no guarantee.
516     * This has no effect on any pre-recorded speech.
517     *
518     * @param speechRate
519     *            The speech rate for the TTS engine. 1 is the normal speed,
520     *            lower values slow down the speech (0.5 is half the normal speech rate),
521     *            greater values accelerate it (2 is twice the normal speech rate).
522     *
523     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
524     */
525    public int setSpeechRate(float speechRate) {
526        synchronized (mStartLock) {
527            int result = TTS_ERROR;
528            if (!mStarted) {
529                return result;
530            }
531            try {
532                if (speechRate > 0) {
533                    int rate = (int)(speechRate*100);
534                    mCachedParams[Engine.TTS_PARAM_POSITION_RATE + 1] = String.valueOf(rate);
535                    result = mITts.setSpeechRate(rate);
536                }
537            } catch (RemoteException e) {
538                // TTS died; restart it.
539                mStarted = false;
540                initTts();
541            } finally {
542              return result;
543            }
544        }
545    }
546
547
548    /**
549     * Sets the speech pitch for the TTS engine.
550     *
551     * Note that the pitch is not universally supported by all engines and
552     * will be treated as a hint. The TTS library will try to use the specified
553     * pitch, but there is no guarantee.
554     * This has no effect on any pre-recorded speech.
555     *
556     * @param pitch
557     *            The pitch for the TTS engine. 1 is the normal pitch,
558     *            lower values lower the tone of the synthesized voice,
559     *            greater values increase it.
560     *
561     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
562     */
563    public int setPitch(float pitch) {
564        synchronized (mStartLock) {
565            int result = TTS_ERROR;
566            if (!mStarted) {
567                return result;
568            }
569            try {
570                if (pitch > 0) {
571                    result = mITts.setPitch((int)(pitch*100));
572                }
573            } catch (RemoteException e) {
574                // TTS died; restart it.
575                mStarted = false;
576                initTts();
577            } finally {
578              return result;
579            }
580        }
581    }
582
583
584    /**
585     * Sets the language for the TTS engine.
586     *
587     * Note that the language is not universally supported by all engines and
588     * will be treated as a hint. The TTS library will try to use the specified
589     * language as represented by the Locale, but there is no guarantee.
590     *
591     * @param loc
592     *            The locale describing the language to be used.
593     *
594     * @return Code indicating the support status for the locale. See the TTS_LANG_ codes.
595     */
596    public int setLanguage(Locale loc) {
597        synchronized (mStartLock) {
598            int result = TTS_LANG_NOT_SUPPORTED;
599            if (!mStarted) {
600                return result;
601            }
602            try {
603                mCachedParams[Engine.TTS_PARAM_POSITION_LANGUAGE + 1] = loc.getISO3Language();
604                mCachedParams[Engine.TTS_PARAM_POSITION_COUNTRY + 1] = loc.getISO3Country();
605                mCachedParams[Engine.TTS_PARAM_POSITION_VARIANT + 1] = loc.getVariant();
606                result = mITts.setLanguage(mCachedParams[Engine.TTS_PARAM_POSITION_LANGUAGE + 1],
607                        mCachedParams[Engine.TTS_PARAM_POSITION_COUNTRY + 1],
608                        mCachedParams[Engine.TTS_PARAM_POSITION_VARIANT + 1] );
609            } catch (RemoteException e) {
610                // TTS died; restart it.
611                mStarted = false;
612                initTts();
613            } finally {
614              return result;
615            }
616        }
617    }
618
619
620    /**
621     * Returns a Locale instance describing the language currently being used by the TTS engine.
622     * @return language, country (if any) and variant (if any) used by the engine stored in a Locale
623     *     instance, or null is the TTS engine has failed.
624     */
625    public Locale getLanguage() {
626        synchronized (mStartLock) {
627            if (!mStarted) {
628                return null;
629            }
630            try {
631                String[] locStrings =  mITts.getLanguage();
632                if (locStrings.length == 3) {
633                    return new Locale(locStrings[0], locStrings[1], locStrings[2]);
634                } else {
635                    return null;
636                }
637            } catch (RemoteException e) {
638                // TTS died; restart it.
639                mStarted = false;
640                initTts();
641            }
642            return null;
643        }
644    }
645
646    /**
647     * Checks if the specified language as represented by the Locale is available.
648     *
649     * @param loc
650     *            The Locale describing the language to be used.
651     *
652     * @return one of TTS_LANG_NOT_SUPPORTED, TTS_LANG_MISSING_DATA, TTS_LANG_AVAILABLE,
653     *         TTS_LANG_COUNTRY_AVAILABLE, TTS_LANG_COUNTRY_VAR_AVAILABLE.
654     */
655    public int isLanguageAvailable(Locale loc) {
656        synchronized (mStartLock) {
657            int result = TTS_LANG_NOT_SUPPORTED;
658            if (!mStarted) {
659                return result;
660            }
661            try {
662                result = mITts.isLanguageAvailable(loc.getISO3Language(),
663                        loc.getISO3Country(), loc.getVariant());
664            } catch (RemoteException e) {
665                // TTS died; restart it.
666                mStarted = false;
667                initTts();
668            } finally {
669              return result;
670            }
671        }
672    }
673
674
675    /**
676     * Synthesizes the given text to a file using the specified parameters.
677     *
678     * @param text
679     *            The String of text that should be synthesized
680     * @param params
681     *            A hashmap of parameters.
682     * @param filename
683     *            The string that gives the full output filename; it should be
684     *            something like "/sdcard/myappsounds/mysound.wav".
685     *
686     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
687     */
688    public int synthesizeToFile(String text, HashMap<String,String> params,
689            String filename) {
690        synchronized (mStartLock) {
691            int result = TTS_ERROR;
692            if (!mStarted) {
693                return result;
694            }
695            try {
696                // TODO support extra parameters, passing null for the moment
697                if (mITts.synthesizeToFile(text, null, filename)){
698                    result = TTS_SUCCESS;
699                }
700            } catch (RemoteException e) {
701                // TTS died; restart it.
702                mStarted = false;
703                initTts();
704            } catch (NullPointerException e) {
705                // TTS died; restart it.
706                mStarted = false;
707                initTts();
708            } catch (IllegalStateException e) {
709                // TTS died; restart it.
710                mStarted = false;
711                initTts();
712            } finally {
713              return result;
714            }
715        }
716    }
717
718}
719