TextToSpeech.java revision 62788e9b48f884a35b89c88911b581daa6a14e08
1/*
2 * Copyright (C) 2009 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.speech.tts.ITts;
19import android.speech.tts.ITtsCallback;
20
21import android.content.ComponentName;
22import android.content.Context;
23import android.content.Intent;
24import android.content.ServiceConnection;
25import android.os.IBinder;
26import android.os.RemoteException;
27import android.util.Log;
28
29import java.util.HashMap;
30import java.util.Locale;
31
32/**
33 *
34 * Synthesizes speech from text for immediate playback or to create a sound file.
35 *
36 */
37//TODO complete javadoc + add links to constants
38public class TextToSpeech {
39
40    /**
41     * Denotes a successful operation.
42     */
43    public static final int TTS_SUCCESS                = 0;
44    /**
45     * Denotes a generic operation failure.
46     */
47    public static final int TTS_ERROR                  = -1;
48
49    /**
50     * Queue mode where all entries in the playback queue (media to be played
51     * and text to be synthesized) are dropped and replaced by the new entry.
52     */
53    public static final int TTS_QUEUE_FLUSH = 0;
54    /**
55     * Queue mode where the new entry is added at the end of the playback queue.
56     */
57    public static final int TTS_QUEUE_ADD = 1;
58
59
60    /**
61     * Denotes the language is available exactly as specified by the locale
62     */
63    public static final int TTS_LANG_COUNTRY_VAR_AVAILABLE = 2;
64
65
66    /**
67     * Denotes the language is available for the language and country specified
68     * by the locale, but not the variant.
69     */
70    public static final int TTS_LANG_COUNTRY_AVAILABLE = 1;
71
72
73    /**
74     * Denotes the language is available for the language by the locale,
75     * but not the country and variant.
76     */
77    public static final int TTS_LANG_AVAILABLE = 0;
78
79    /**
80     * Denotes the language data is missing.
81     */
82    public static final int TTS_LANG_MISSING_DATA = -1;
83
84    /**
85     * Denotes the language is not supported by the current TTS engine.
86     */
87    public static final int TTS_LANG_NOT_SUPPORTED = -2;
88
89
90    /**
91     * Called when the TTS has initialized.
92     *
93     * The InitListener must implement the onInit function. onInit is passed a
94     * status code indicating the result of the TTS initialization.
95     */
96    public interface OnInitListener {
97        public void onInit(int status);
98    }
99
100    /**
101     * Internal constants for the TTS functionality
102     *
103     * {@hide}
104     */
105    public class Engine {
106        // default values for a TTS engine when settings are not found in the provider
107        public static final int FALLBACK_TTS_DEFAULT_RATE = 100; // 1x
108        public static final int FALLBACK_TTS_DEFAULT_PITCH = 100;// 1x
109        public static final int FALLBACK_TTS_USE_DEFAULTS = 0; // false
110        public static final String FALLBACK_TTS_DEFAULT_LANG = "eng";
111        public static final String FALLBACK_TTS_DEFAULT_COUNTRY = "";
112        public static final String FALLBACK_TTS_DEFAULT_VARIANT = "";
113
114        // return codes for a TTS engine's check data activity
115        public static final int CHECK_VOICE_DATA_PASS = 1;
116        public static final int CHECK_VOICE_DATA_FAIL = 0;
117        public static final int CHECK_VOICE_DATA_BAD_DATA = -1;
118        public static final int CHECK_VOICE_DATA_MISSING_DATA = -2;
119        public static final int CHECK_VOICE_DATA_MISSING_DATA_NO_SDCARD = -3;
120
121        // return codes for a TTS engine's check data activity
122        public static final String VOICE_DATA_ROOT_DIRECTORY = "dataRoot";
123        public static final String VOICE_DATA_FILES = "dataFiles";
124        public static final String VOICE_DATA_FILES_INFO = "dataFilesInfo";
125
126        // keys for the parameters passed with speak commands
127        public static final String TTS_KEY_PARAM_RATE = "rate";
128        public static final String TTS_KEY_PARAM_LANGUAGE = "language";
129        public static final String TTS_KEY_PARAM_COUNTRY = "country";
130        public static final String TTS_KEY_PARAM_VARIANT = "variant";
131        public static final int TTS_PARAM_POSITION_RATE = 0;
132        public static final int TTS_PARAM_POSITION_LANGUAGE = 2;
133        public static final int TTS_PARAM_POSITION_COUNTRY = 4;
134        public static final int TTS_PARAM_POSITION_VARIANT = 6;
135    }
136
137    /**
138     * Connection needed for the TTS.
139     */
140    private ServiceConnection mServiceConnection;
141
142    private ITts mITts = null;
143    private Context mContext = null;
144    private OnInitListener mInitListener = null;
145    private boolean mStarted = false;
146    private final Object mStartLock = new Object();
147    private int mCachedRate = Engine.FALLBACK_TTS_DEFAULT_RATE;
148    private String mCachedLang = Engine.FALLBACK_TTS_DEFAULT_LANG;
149    private String mCachedCountry = Engine.FALLBACK_TTS_DEFAULT_COUNTRY;
150    private String mCachedVariant = Engine.FALLBACK_TTS_DEFAULT_VARIANT;
151    private String[] mCachedParams;
152
153    /**
154     * The constructor for the TTS.
155     *
156     * @param context
157     *            The context
158     * @param listener
159     *            The InitListener that will be called when the TTS has
160     *            initialized successfully.
161     */
162    public TextToSpeech(Context context, OnInitListener listener) {
163        mContext = context;
164        mInitListener = listener;
165
166        mCachedParams = new String[2*4]; //4 parameters, store key and value
167        mCachedParams[Engine.TTS_PARAM_POSITION_RATE] = Engine.TTS_KEY_PARAM_RATE;
168        mCachedParams[Engine.TTS_PARAM_POSITION_LANGUAGE] = Engine.TTS_KEY_PARAM_LANGUAGE;
169        mCachedParams[Engine.TTS_PARAM_POSITION_COUNTRY] = Engine.TTS_KEY_PARAM_COUNTRY;
170        mCachedParams[Engine.TTS_PARAM_POSITION_VARIANT] = Engine.TTS_KEY_PARAM_VARIANT;
171        updateCachedParamArray();
172
173        initTts();
174    }
175
176
177    private void updateCachedParamArray() {
178        mCachedParams[Engine.TTS_PARAM_POSITION_RATE+1] = String.valueOf(mCachedRate);
179        mCachedParams[Engine.TTS_PARAM_POSITION_LANGUAGE+1] = mCachedLang;
180        mCachedParams[Engine.TTS_PARAM_POSITION_COUNTRY+1] = mCachedCountry;
181        mCachedParams[Engine.TTS_PARAM_POSITION_VARIANT+1] = mCachedVariant;
182    }
183
184
185    private void initTts() {
186        mStarted = false;
187
188        // Initialize the TTS, run the callback after the binding is successful
189        mServiceConnection = new ServiceConnection() {
190            public void onServiceConnected(ComponentName name, IBinder service) {
191                synchronized(mStartLock) {
192                    mITts = ITts.Stub.asInterface(service);
193                    mStarted = true;
194                    if (mInitListener != null) {
195                        // TODO manage failures and missing resources
196                        mInitListener.onInit(TTS_SUCCESS);
197                    }
198                }
199            }
200
201            public void onServiceDisconnected(ComponentName name) {
202                synchronized(mStartLock) {
203                    mITts = null;
204                    mInitListener = null;
205                    mStarted = false;
206                }
207            }
208        };
209
210        Intent intent = new Intent("android.intent.action.USE_TTS");
211        intent.addCategory("android.intent.category.TTS");
212        mContext.bindService(intent, mServiceConnection,
213                Context.BIND_AUTO_CREATE);
214        // TODO handle case where the binding works (should always work) but
215        //      the plugin fails
216    }
217
218
219    /**
220     * Shuts down the TTS. It is good practice to call this in the onDestroy
221     * method of the Activity that is using the TTS so that the TTS is stopped
222     * cleanly.
223     */
224    public void shutdown() {
225        try {
226            mContext.unbindService(mServiceConnection);
227        } catch (IllegalArgumentException e) {
228            // Do nothing and fail silently since an error here indicates that
229            // binding never succeeded in the first place.
230        }
231    }
232
233
234    /**
235     * Adds a mapping between a string of text and a sound resource in a
236     * package.
237     *
238     * @see #TTS.speak(String text, int queueMode, String[] params)
239     *
240     * @param text
241     *            Example: <b><code>"south_south_east"</code></b><br/>
242     *
243     * @param packagename
244     *            Pass the packagename of the application that contains the
245     *            resource. If the resource is in your own application (this is
246     *            the most common case), then put the packagename of your
247     *            application here.<br/>
248     *            Example: <b>"com.google.marvin.compass"</b><br/>
249     *            The packagename can be found in the AndroidManifest.xml of
250     *            your application.
251     *            <p>
252     *            <code>&lt;manifest xmlns:android=&quot;...&quot;
253     *      package=&quot;<b>com.google.marvin.compass</b>&quot;&gt;</code>
254     *            </p>
255     *
256     * @param resourceId
257     *            Example: <b><code>R.raw.south_south_east</code></b>
258     *
259     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
260     */
261    public int addSpeech(String text, String packagename, int resourceId) {
262        synchronized(mStartLock) {
263            if (!mStarted) {
264                return TTS_ERROR;
265            }
266            try {
267                mITts.addSpeech(text, packagename, resourceId);
268                return TTS_SUCCESS;
269            } catch (RemoteException e) {
270                // TTS died; restart it.
271                mStarted = false;
272                initTts();
273            } catch (NullPointerException e) {
274                // TTS died; restart it.
275                mStarted = false;
276                initTts();
277            } catch (IllegalStateException e) {
278                // TTS died; restart it.
279                mStarted = false;
280                initTts();
281            }
282            return TTS_ERROR;
283        }
284    }
285
286
287    /**
288     * Adds a mapping between a string of text and a sound file. Using this, it
289     * is possible to add custom pronounciations for text.
290     *
291     * @param text
292     *            The string of text
293     * @param filename
294     *            The full path to the sound file (for example:
295     *            "/sdcard/mysounds/hello.wav")
296     *
297     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
298     */
299    public int addSpeech(String text, String filename) {
300        synchronized (mStartLock) {
301            if (!mStarted) {
302                return TTS_ERROR;
303            }
304            try {
305                mITts.addSpeechFile(text, filename);
306                return TTS_SUCCESS;
307            } catch (RemoteException e) {
308                // TTS died; restart it.
309                mStarted = false;
310                initTts();
311            } catch (NullPointerException e) {
312                // TTS died; restart it.
313                mStarted = false;
314                initTts();
315            } catch (IllegalStateException e) {
316                // TTS died; restart it.
317                mStarted = false;
318                initTts();
319            }
320            return TTS_ERROR;
321        }
322    }
323
324
325    /**
326     * Speaks the string using the specified queuing strategy and speech
327     * parameters. Note that the speech parameters are not universally supported
328     * by all engines and will be treated as a hint. The TTS library will try to
329     * fulfill these parameters as much as possible, but there is no guarantee
330     * that the voice used will have the properties specified.
331     *
332     * @param text
333     *            The string of text to be spoken.
334     * @param queueMode
335     *            The queuing strategy to use.
336     *            See TTS_QUEUE_ADD and TTS_QUEUE_FLUSH.
337     * @param params
338     *            The hashmap of speech parameters to be used.
339     *
340     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
341     */
342    public int speak(String text, int queueMode, HashMap<String,String> params)
343    {
344        synchronized (mStartLock) {
345            Log.i("TTS received: ", text);
346            if (!mStarted) {
347                return TTS_ERROR;
348            }
349            try {
350                // TODO support extra parameters, passing cache of current parameters for the moment
351                mITts.speak(text, queueMode, mCachedParams);
352                return TTS_SUCCESS;
353            } catch (RemoteException e) {
354                // TTS died; restart it.
355                mStarted = false;
356                initTts();
357            } catch (NullPointerException e) {
358                // TTS died; restart it.
359                mStarted = false;
360                initTts();
361            } catch (IllegalStateException e) {
362                // TTS died; restart it.
363                mStarted = false;
364                initTts();
365            }
366            return TTS_ERROR;
367        }
368    }
369
370
371    /**
372     * Plays the earcon using the specified queueing mode and parameters.
373     *
374     * @param earcon
375     *            The earcon that should be played
376     * @param queueMode
377     *            See TTS_QUEUE_ADD and TTS_QUEUE_FLUSH.
378     * @param params
379     *            The hashmap of parameters to be used.
380     *
381     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
382     */
383    public int playEarcon(String earcon, int queueMode,
384            HashMap<String,String> params) {
385        synchronized (mStartLock) {
386            if (!mStarted) {
387                return TTS_ERROR;
388            }
389            try {
390                // TODO support extra parameters, passing null for the moment
391                mITts.playEarcon(earcon, queueMode, null);
392                return TTS_SUCCESS;
393            } catch (RemoteException e) {
394                // TTS died; restart it.
395                mStarted = false;
396                initTts();
397            } catch (NullPointerException e) {
398                // TTS died; restart it.
399                mStarted = false;
400                initTts();
401            } catch (IllegalStateException e) {
402                // TTS died; restart it.
403                mStarted = false;
404                initTts();
405            }
406            return TTS_ERROR;
407        }
408    }
409
410    /**
411     * Plays silence for the specified amount of time using the specified
412     * queue mode.
413     *
414     * @param durationInMs
415     *            A long that indicates how long the silence should last.
416     * @param queueMode
417     *            See TTS_QUEUE_ADD and TTS_QUEUE_FLUSH.
418     *
419     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
420     */
421    public int playSilence(long durationInMs, int queueMode) {
422        synchronized (mStartLock) {
423            if (!mStarted) {
424                return TTS_ERROR;
425            }
426            try {
427                // TODO support extra parameters, passing cache of current parameters for the moment
428                mITts.playSilence(durationInMs, queueMode, mCachedParams);
429                return TTS_SUCCESS;
430            } catch (RemoteException e) {
431                // TTS died; restart it.
432                mStarted = false;
433                initTts();
434            } catch (NullPointerException e) {
435                // TTS died; restart it.
436                mStarted = false;
437                initTts();
438            } catch (IllegalStateException e) {
439                // TTS died; restart it.
440                mStarted = false;
441                initTts();
442            }
443            return TTS_ERROR;
444        }
445    }
446
447
448    /**
449     * Returns whether or not the TTS is busy speaking.
450     *
451     * @return Whether or not the TTS is busy speaking.
452     */
453    public boolean isSpeaking() {
454        synchronized (mStartLock) {
455            if (!mStarted) {
456                return false;
457            }
458            try {
459                return mITts.isSpeaking();
460            } catch (RemoteException e) {
461                // TTS died; restart it.
462                mStarted = false;
463                initTts();
464            } catch (NullPointerException e) {
465                // TTS died; restart it.
466                mStarted = false;
467                initTts();
468            } catch (IllegalStateException e) {
469                // TTS died; restart it.
470                mStarted = false;
471                initTts();
472            }
473            return false;
474        }
475    }
476
477
478    /**
479     * Stops speech from the TTS.
480     *
481     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
482     */
483    public int stop() {
484        synchronized (mStartLock) {
485            if (!mStarted) {
486                return TTS_ERROR;
487            }
488            try {
489                mITts.stop();
490                return TTS_SUCCESS;
491            } catch (RemoteException e) {
492                // TTS died; restart it.
493                mStarted = false;
494                initTts();
495            } catch (NullPointerException e) {
496                // TTS died; restart it.
497                mStarted = false;
498                initTts();
499            } catch (IllegalStateException e) {
500                // TTS died; restart it.
501                mStarted = false;
502                initTts();
503            }
504            return TTS_ERROR;
505        }
506    }
507
508
509    /**
510     * Sets the speech rate for the TTS engine.
511     *
512     * Note that the speech rate is not universally supported by all engines and
513     * will be treated as a hint. The TTS library will try to use the specified
514     * speech rate, but there is no guarantee.
515     * This has no effect on any pre-recorded speech.
516     *
517     * @param speechRate
518     *            The speech rate for the TTS engine. 1 is the normal speed,
519     *            lower values slow down the speech (0.5 is half the normal speech rate),
520     *            greater values accelerate it (2 is twice the normal speech rate).
521     *
522     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
523     */
524    public int setSpeechRate(float speechRate) {
525        synchronized (mStartLock) {
526            if (!mStarted) {
527                return TTS_SUCCESS;
528            }
529            try {
530                if (speechRate > 0) {
531                    mCachedRate = (int)(speechRate*100);
532                    updateCachedParamArray();
533                    mITts.setSpeechRate(mCachedRate);
534                    return TTS_SUCCESS;
535                }
536            } catch (RemoteException e) {
537                // TTS died; restart it.
538                mStarted = false;
539                initTts();
540            }
541            return TTS_ERROR;
542        }
543    }
544
545
546    /**
547     * Sets the speech pitch for the TTS engine.
548     *
549     * Note that the pitch is not universally supported by all engines and
550     * will be treated as a hint. The TTS library will try to use the specified
551     * pitch, but there is no guarantee.
552     * This has no effect on any pre-recorded speech.
553     *
554     * @param pitch
555     *            The pitch for the TTS engine. 1 is the normal pitch,
556     *            lower values lower the tone of the synthesized voice,
557     *            greater values increase it.
558     *
559     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
560     */
561    public int setPitch(float pitch) {
562        synchronized (mStartLock) {
563            if (!mStarted) {
564                return TTS_ERROR;
565            }
566            try {
567                if (pitch > 0) {
568                    mITts.setPitch((int)(pitch*100));
569                    return TTS_SUCCESS;
570                }
571            } catch (RemoteException e) {
572                // TTS died; restart it.
573                mStarted = false;
574                initTts();
575            }
576            return TTS_ERROR;
577        }
578    }
579
580
581    /**
582     * Sets the language for the TTS engine.
583     *
584     * Note that the language is not universally supported by all engines and
585     * will be treated as a hint. The TTS library will try to use the specified
586     * language as represented by the Locale, but there is no guarantee.
587     *
588     * @param loc
589     *            The locale describing the language to be used.
590     *
591     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
592     */
593    public int setLanguage(Locale loc) {
594        synchronized (mStartLock) {
595            if (!mStarted) {
596                return TTS_ERROR;
597            }
598            try {
599                mCachedLang = loc.getISO3Language();
600                mCachedCountry = loc.getISO3Country();
601                mCachedVariant = loc.getVariant();
602                updateCachedParamArray();
603                mITts.setLanguage(mCachedLang, mCachedCountry, mCachedVariant);
604                return TTS_SUCCESS;
605            } catch (RemoteException e) {
606                // TTS died; restart it.
607                mStarted = false;
608                initTts();
609            }
610            return TTS_ERROR;
611        }
612    }
613
614
615    /**
616     * Returns a Locale instance describing the language currently being used by the TTS engine.
617     * @return language, country (if any) and variant (if any) used by the engine stored in a Locale
618     *     instance, or null is the TTS engine has failed.
619     */
620    public Locale getLanguage() {
621        synchronized (mStartLock) {
622            if (!mStarted) {
623                return null;
624            }
625            try {
626                String[] locStrings =  mITts.getLanguage();
627                if (locStrings.length == 3) {
628                    return new Locale(locStrings[0], locStrings[1], locStrings[2]);
629                } else {
630                    return null;
631                }
632            } catch (RemoteException e) {
633                // TTS died; restart it.
634                mStarted = false;
635                initTts();
636            }
637            return null;
638        }
639    }
640
641    /**
642     * Checks if the specified language as represented by the Locale is available.
643     *
644     * @param loc
645     *            The Locale describing the language to be used.
646     *
647     * @return one of TTS_LANG_NOT_SUPPORTED, TTS_LANG_MISSING_DATA, TTS_LANG_AVAILABLE,
648     *         TTS_LANG_COUNTRY_AVAILABLE, TTS_LANG_COUNTRY_VAR_AVAILABLE.
649     */
650    public int isLanguageAvailable(Locale loc) {
651        synchronized (mStartLock) {
652            if (!mStarted) {
653                return TTS_LANG_NOT_SUPPORTED;
654            }
655            try {
656                return mITts.isLanguageAvailable(loc.getISO3Language(), loc.getISO3Country(),
657                        loc.getVariant());
658            } catch (RemoteException e) {
659                // TTS died; restart it.
660                mStarted = false;
661                initTts();
662            }
663            return TTS_LANG_NOT_SUPPORTED;
664        }
665    }
666
667
668    /**
669     * Synthesizes the given text to a file using the specified parameters.
670     *
671     * @param text
672     *            The String of text that should be synthesized
673     * @param params
674     *            A hashmap of parameters.
675     * @param filename
676     *            The string that gives the full output filename; it should be
677     *            something like "/sdcard/myappsounds/mysound.wav".
678     *
679     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
680     */
681    public int synthesizeToFile(String text, HashMap<String,String> params,
682            String filename) {
683        synchronized (mStartLock) {
684            if (!mStarted) {
685                return TTS_ERROR;
686            }
687            try {
688                // TODO support extra parameters, passing null for the moment
689                if (mITts.synthesizeToFile(text, null, filename)){
690                    return TTS_SUCCESS;
691                }
692            } catch (RemoteException e) {
693                // TTS died; restart it.
694                mStarted = false;
695                initTts();
696            } catch (NullPointerException e) {
697                // TTS died; restart it.
698                mStarted = false;
699                initTts();
700            } catch (IllegalStateException e) {
701                // TTS died; restart it.
702                mStarted = false;
703                initTts();
704            }
705            return TTS_ERROR;
706        }
707    }
708
709}
710