TextToSpeech.java revision da7681e7b61dd450be72f2b4a80e6d1c86342e05
1/*
2 * Copyright (C) 2009 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.speech.tts.ITts;
19import android.speech.tts.ITtsCallback;
20
21import android.content.ComponentName;
22import android.content.Context;
23import android.content.Intent;
24import android.content.ServiceConnection;
25import android.os.IBinder;
26import android.os.RemoteException;
27import android.util.Log;
28
29import java.util.HashMap;
30import java.util.Locale;
31
32/**
33 *
34 * Synthesizes speech from text for immediate playback or to create a sound file.
35 *
36 */
37//TODO complete javadoc + add links to constants
38public class TextToSpeech {
39
40    /**
41     * Denotes a successful operation.
42     */
43    public static final int TTS_SUCCESS                = 0;
44    /**
45     * Denotes a generic operation failure.
46     */
47    public static final int TTS_ERROR                  = -1;
48
49    /**
50     * Queue mode where all entries in the playback queue (media to be played
51     * and text to be synthesized) are dropped and replaced by the new entry.
52     */
53    public static final int TTS_QUEUE_FLUSH = 0;
54    /**
55     * Queue mode where the new entry is added at the end of the playback queue.
56     */
57    public static final int TTS_QUEUE_ADD = 1;
58
59
60    /**
61     * Denotes the language is available exactly as specified by the locale
62     */
63    public static final int TTS_LANG_COUNTRY_VAR_AVAILABLE = 2;
64
65
66    /**
67     * Denotes the language is available for the language and country specified
68     * by the locale, but not the variant.
69     */
70    public static final int TTS_LANG_COUNTRY_AVAILABLE = 1;
71
72
73    /**
74     * Denotes the language is available for the language by the locale,
75     * but not the country and variant.
76     */
77    public static final int TTS_LANG_AVAILABLE = 0;
78
79    /**
80     * Denotes the language data is missing.
81     */
82    public static final int TTS_LANG_MISSING_DATA = -1;
83
84    /**
85     * Denotes the language is not supported by the current TTS engine.
86     */
87    public static final int TTS_LANG_NOT_SUPPORTED = -2;
88
89
90    /**
91     * Called when the TTS has initialized.
92     *
93     * The InitListener must implement the onInit function. onInit is passed a
94     * status code indicating the result of the TTS initialization.
95     */
96    public interface OnInitListener {
97        public void onInit(int status);
98    }
99
100    /**
101     * Internal constants for the TTS functionality
102     *
103     * {@hide}
104     */
105    public class Engine {
106        // default values for a TTS engine when settings are not found in the provider
107        public static final int FALLBACK_TTS_DEFAULT_RATE = 100; // 1x
108        public static final int FALLBACK_TTS_DEFAULT_PITCH = 100;// 1x
109        public static final int FALLBACK_TTS_USE_DEFAULTS = 0; // false
110        public static final String FALLBACK_TTS_DEFAULT_LANG = "eng";
111        public static final String FALLBACK_TTS_DEFAULT_COUNTRY = "";
112        public static final String FALLBACK_TTS_DEFAULT_VARIANT = "";
113
114        // return codes for a TTS engine's check data activity
115        public static final int CHECK_VOICE_DATA_PASS = 1;
116        public static final int CHECK_VOICE_DATA_FAIL = 0;
117        public static final int CHECK_VOICE_DATA_BAD_DATA = -1;
118        public static final int CHECK_VOICE_DATA_MISSING_DATA = -2;
119        public static final int CHECK_VOICE_DATA_MISSING_DATA_NO_SDCARD = -3;
120
121        // return codes for a TTS engine's check data activity
122        public static final String VOICE_DATA_ROOT_DIRECTORY = "dataRoot";
123        public static final String VOICE_DATA_FILES = "dataFiles";
124        public static final String VOICE_DATA_FILES_INFO = "dataFilesInfo";
125
126        // keys for the parameters passed with speak commands
127        public static final String TTS_KEY_PARAM_RATE = "rate";
128        public static final String TTS_KEY_PARAM_LANGUAGE = "language";
129        public static final String TTS_KEY_PARAM_COUNTRY = "country";
130        public static final String TTS_KEY_PARAM_VARIANT = "variant";
131        public static final int TTS_PARAM_POSITION_RATE = 0;
132        public static final int TTS_PARAM_POSITION_LANGUAGE = 2;
133        public static final int TTS_PARAM_POSITION_COUNTRY = 4;
134        public static final int TTS_PARAM_POSITION_VARIANT = 6;
135    }
136
137    /**
138     * Connection needed for the TTS.
139     */
140    private ServiceConnection mServiceConnection;
141
142    private ITts mITts = null;
143    private Context mContext = null;
144    private OnInitListener mInitListener = null;
145    private boolean mStarted = false;
146    private final Object mStartLock = new Object();
147    /**
148     * Used to store the cached parameters sent along with each synthesis request to the
149     * TTS service.
150     */
151    private String[] mCachedParams;
152
153    /**
154     * The constructor for the TTS.
155     *
156     * @param context
157     *            The context
158     * @param listener
159     *            The InitListener that will be called when the TTS has
160     *            initialized successfully.
161     */
162    public TextToSpeech(Context context, OnInitListener listener) {
163        mContext = context;
164        mInitListener = listener;
165
166        mCachedParams = new String[2*4]; // 4 parameters, store key and value
167        mCachedParams[Engine.TTS_PARAM_POSITION_RATE] = Engine.TTS_KEY_PARAM_RATE;
168        mCachedParams[Engine.TTS_PARAM_POSITION_LANGUAGE] = Engine.TTS_KEY_PARAM_LANGUAGE;
169        mCachedParams[Engine.TTS_PARAM_POSITION_COUNTRY] = Engine.TTS_KEY_PARAM_COUNTRY;
170        mCachedParams[Engine.TTS_PARAM_POSITION_VARIANT] = Engine.TTS_KEY_PARAM_VARIANT;
171
172        mCachedParams[Engine.TTS_PARAM_POSITION_RATE + 1] =
173                String.valueOf(Engine.FALLBACK_TTS_DEFAULT_RATE);
174        // initialize the language cached parameters with the current Locale
175        Locale defaultLoc = Locale.getDefault();
176        mCachedParams[Engine.TTS_PARAM_POSITION_LANGUAGE + 1] = defaultLoc.getISO3Language();
177        mCachedParams[Engine.TTS_PARAM_POSITION_COUNTRY + 1] = defaultLoc.getISO3Country();
178        mCachedParams[Engine.TTS_PARAM_POSITION_VARIANT + 1] = defaultLoc.getVariant();
179
180        initTts();
181    }
182
183
184    private void initTts() {
185        mStarted = false;
186
187        // Initialize the TTS, run the callback after the binding is successful
188        mServiceConnection = new ServiceConnection() {
189            public void onServiceConnected(ComponentName name, IBinder service) {
190                synchronized(mStartLock) {
191                    mITts = ITts.Stub.asInterface(service);
192                    mStarted = true;
193                    if (mInitListener != null) {
194                        // TODO manage failures and missing resources
195                        mInitListener.onInit(TTS_SUCCESS);
196                    }
197                }
198            }
199
200            public void onServiceDisconnected(ComponentName name) {
201                synchronized(mStartLock) {
202                    mITts = null;
203                    mInitListener = null;
204                    mStarted = false;
205                }
206            }
207        };
208
209        Intent intent = new Intent("android.intent.action.START_TTS_SERVICE");
210        intent.addCategory("android.intent.category.TTS");
211        mContext.bindService(intent, mServiceConnection,
212                Context.BIND_AUTO_CREATE);
213        // TODO handle case where the binding works (should always work) but
214        //      the plugin fails
215    }
216
217
218    /**
219     * Shuts down the TTS. It is good practice to call this in the onDestroy
220     * method of the Activity that is using the TTS so that the TTS is stopped
221     * cleanly.
222     */
223    public void shutdown() {
224        try {
225            mContext.unbindService(mServiceConnection);
226        } catch (IllegalArgumentException e) {
227            // Do nothing and fail silently since an error here indicates that
228            // binding never succeeded in the first place.
229        }
230    }
231
232
233    /**
234     * Adds a mapping between a string of text and a sound resource in a
235     * package.
236     *
237     * @see #TTS.speak(String text, int queueMode, String[] params)
238     *
239     * @param text
240     *            Example: <b><code>"south_south_east"</code></b><br/>
241     *
242     * @param packagename
243     *            Pass the packagename of the application that contains the
244     *            resource. If the resource is in your own application (this is
245     *            the most common case), then put the packagename of your
246     *            application here.<br/>
247     *            Example: <b>"com.google.marvin.compass"</b><br/>
248     *            The packagename can be found in the AndroidManifest.xml of
249     *            your application.
250     *            <p>
251     *            <code>&lt;manifest xmlns:android=&quot;...&quot;
252     *      package=&quot;<b>com.google.marvin.compass</b>&quot;&gt;</code>
253     *            </p>
254     *
255     * @param resourceId
256     *            Example: <b><code>R.raw.south_south_east</code></b>
257     *
258     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
259     */
260    public int addSpeech(String text, String packagename, int resourceId) {
261        synchronized(mStartLock) {
262            if (!mStarted) {
263                return TTS_ERROR;
264            }
265            try {
266                mITts.addSpeech(text, packagename, resourceId);
267                return TTS_SUCCESS;
268            } catch (RemoteException e) {
269                // TTS died; restart it.
270                mStarted = false;
271                initTts();
272            } catch (NullPointerException e) {
273                // TTS died; restart it.
274                mStarted = false;
275                initTts();
276            } catch (IllegalStateException e) {
277                // TTS died; restart it.
278                mStarted = false;
279                initTts();
280            }
281            return TTS_ERROR;
282        }
283    }
284
285
286    /**
287     * Adds a mapping between a string of text and a sound file. Using this, it
288     * is possible to add custom pronounciations for text.
289     *
290     * @param text
291     *            The string of text
292     * @param filename
293     *            The full path to the sound file (for example:
294     *            "/sdcard/mysounds/hello.wav")
295     *
296     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
297     */
298    public int addSpeech(String text, String filename) {
299        synchronized (mStartLock) {
300            if (!mStarted) {
301                return TTS_ERROR;
302            }
303            try {
304                mITts.addSpeechFile(text, filename);
305                return TTS_SUCCESS;
306            } catch (RemoteException e) {
307                // TTS died; restart it.
308                mStarted = false;
309                initTts();
310            } catch (NullPointerException e) {
311                // TTS died; restart it.
312                mStarted = false;
313                initTts();
314            } catch (IllegalStateException e) {
315                // TTS died; restart it.
316                mStarted = false;
317                initTts();
318            }
319            return TTS_ERROR;
320        }
321    }
322
323
324    /**
325     * Speaks the string using the specified queuing strategy and speech
326     * parameters. Note that the speech parameters are not universally supported
327     * by all engines and will be treated as a hint. The TTS library will try to
328     * fulfill these parameters as much as possible, but there is no guarantee
329     * that the voice used will have the properties specified.
330     *
331     * @param text
332     *            The string of text to be spoken.
333     * @param queueMode
334     *            The queuing strategy to use.
335     *            See TTS_QUEUE_ADD and TTS_QUEUE_FLUSH.
336     * @param params
337     *            The hashmap of speech parameters to be used.
338     *
339     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
340     */
341    public int speak(String text, int queueMode, HashMap<String,String> params)
342    {
343        synchronized (mStartLock) {
344            int result = TTS_ERROR;
345            Log.i("TTS received: ", text);
346            if (!mStarted) {
347                return result;
348            }
349            try {
350                // TODO support extra parameters, passing cache of current parameters for the moment
351                result = mITts.speak(text, queueMode, mCachedParams);
352            } catch (RemoteException e) {
353                // TTS died; restart it.
354                mStarted = false;
355                initTts();
356            } catch (NullPointerException e) {
357                // TTS died; restart it.
358                mStarted = false;
359                initTts();
360            } catch (IllegalStateException e) {
361                // TTS died; restart it.
362                mStarted = false;
363                initTts();
364            } finally {
365              return result;
366            }
367        }
368    }
369
370
371    /**
372     * Plays the earcon using the specified queueing mode and parameters.
373     *
374     * @param earcon
375     *            The earcon that should be played
376     * @param queueMode
377     *            See TTS_QUEUE_ADD and TTS_QUEUE_FLUSH.
378     * @param params
379     *            The hashmap of parameters to be used.
380     *
381     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
382     */
383    public int playEarcon(String earcon, int queueMode,
384            HashMap<String,String> params) {
385        synchronized (mStartLock) {
386            int result = TTS_ERROR;
387            if (!mStarted) {
388                return result;
389            }
390            try {
391                // TODO support extra parameters, passing null for the moment
392                result = mITts.playEarcon(earcon, queueMode, null);
393            } catch (RemoteException e) {
394                // TTS died; restart it.
395                mStarted = false;
396                initTts();
397            } catch (NullPointerException e) {
398                // TTS died; restart it.
399                mStarted = false;
400                initTts();
401            } catch (IllegalStateException e) {
402                // TTS died; restart it.
403                mStarted = false;
404                initTts();
405            } finally {
406              return result;
407            }
408        }
409    }
410
411    /**
412     * Plays silence for the specified amount of time using the specified
413     * queue mode.
414     *
415     * @param durationInMs
416     *            A long that indicates how long the silence should last.
417     * @param queueMode
418     *            See TTS_QUEUE_ADD and TTS_QUEUE_FLUSH.
419     *
420     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
421     */
422    public int playSilence(long durationInMs, int queueMode) {
423        synchronized (mStartLock) {
424            int result = TTS_ERROR;
425            if (!mStarted) {
426                return result;
427            }
428            try {
429                // TODO support extra parameters, passing cache of current parameters for the moment
430                result = mITts.playSilence(durationInMs, queueMode, mCachedParams);
431            } catch (RemoteException e) {
432                // TTS died; restart it.
433                mStarted = false;
434                initTts();
435            } catch (NullPointerException e) {
436                // TTS died; restart it.
437                mStarted = false;
438                initTts();
439            } catch (IllegalStateException e) {
440                // TTS died; restart it.
441                mStarted = false;
442                initTts();
443            } finally {
444              return result;
445            }
446        }
447    }
448
449
450    /**
451     * Returns whether or not the TTS is busy speaking.
452     *
453     * @return Whether or not the TTS is busy speaking.
454     */
455    public boolean isSpeaking() {
456        synchronized (mStartLock) {
457            if (!mStarted) {
458                return false;
459            }
460            try {
461                return mITts.isSpeaking();
462            } catch (RemoteException e) {
463                // TTS died; restart it.
464                mStarted = false;
465                initTts();
466            } catch (NullPointerException e) {
467                // TTS died; restart it.
468                mStarted = false;
469                initTts();
470            } catch (IllegalStateException e) {
471                // TTS died; restart it.
472                mStarted = false;
473                initTts();
474            }
475            return false;
476        }
477    }
478
479
480    /**
481     * Stops speech from the TTS.
482     *
483     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
484     */
485    public int stop() {
486        synchronized (mStartLock) {
487            int result = TTS_ERROR;
488            if (!mStarted) {
489                return result;
490            }
491            try {
492                result = mITts.stop();
493            } catch (RemoteException e) {
494                // TTS died; restart it.
495                mStarted = false;
496                initTts();
497            } catch (NullPointerException e) {
498                // TTS died; restart it.
499                mStarted = false;
500                initTts();
501            } catch (IllegalStateException e) {
502                // TTS died; restart it.
503                mStarted = false;
504                initTts();
505            } finally {
506              return result;
507            }
508        }
509    }
510
511
512    /**
513     * Sets the speech rate for the TTS engine.
514     *
515     * Note that the speech rate is not universally supported by all engines and
516     * will be treated as a hint. The TTS library will try to use the specified
517     * speech rate, but there is no guarantee.
518     * This has no effect on any pre-recorded speech.
519     *
520     * @param speechRate
521     *            The speech rate for the TTS engine. 1 is the normal speed,
522     *            lower values slow down the speech (0.5 is half the normal speech rate),
523     *            greater values accelerate it (2 is twice the normal speech rate).
524     *
525     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
526     */
527    public int setSpeechRate(float speechRate) {
528        synchronized (mStartLock) {
529            int result = TTS_ERROR;
530            if (!mStarted) {
531                return result;
532            }
533            try {
534                if (speechRate > 0) {
535                    int rate = (int)(speechRate*100);
536                    mCachedParams[Engine.TTS_PARAM_POSITION_RATE + 1] = String.valueOf(rate);
537                    result = mITts.setSpeechRate(rate);
538                }
539            } catch (RemoteException e) {
540                // TTS died; restart it.
541                mStarted = false;
542                initTts();
543            } finally {
544              return result;
545            }
546        }
547    }
548
549
550    /**
551     * Sets the speech pitch for the TTS engine.
552     *
553     * Note that the pitch is not universally supported by all engines and
554     * will be treated as a hint. The TTS library will try to use the specified
555     * pitch, but there is no guarantee.
556     * This has no effect on any pre-recorded speech.
557     *
558     * @param pitch
559     *            The pitch for the TTS engine. 1 is the normal pitch,
560     *            lower values lower the tone of the synthesized voice,
561     *            greater values increase it.
562     *
563     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
564     */
565    public int setPitch(float pitch) {
566        synchronized (mStartLock) {
567            int result = TTS_ERROR;
568            if (!mStarted) {
569                return result;
570            }
571            try {
572                if (pitch > 0) {
573                    result = mITts.setPitch((int)(pitch*100));
574                }
575            } catch (RemoteException e) {
576                // TTS died; restart it.
577                mStarted = false;
578                initTts();
579            } finally {
580              return result;
581            }
582        }
583    }
584
585
586    /**
587     * Sets the language for the TTS engine.
588     *
589     * Note that the language is not universally supported by all engines and
590     * will be treated as a hint. The TTS library will try to use the specified
591     * language as represented by the Locale, but there is no guarantee.
592     *
593     * @param loc
594     *            The locale describing the language to be used.
595     *
596     * @return Code indicating the support status for the locale. See the TTS_LANG_ codes.
597     */
598    public int setLanguage(Locale loc) {
599        synchronized (mStartLock) {
600            int result = TTS_LANG_NOT_SUPPORTED;
601            if (!mStarted) {
602                return result;
603            }
604            try {
605                mCachedParams[Engine.TTS_PARAM_POSITION_LANGUAGE + 1] = loc.getISO3Language();
606                mCachedParams[Engine.TTS_PARAM_POSITION_COUNTRY + 1] = loc.getISO3Country();
607                mCachedParams[Engine.TTS_PARAM_POSITION_VARIANT + 1] = loc.getVariant();
608                result = mITts.setLanguage(mCachedParams[Engine.TTS_PARAM_POSITION_LANGUAGE + 1],
609                        mCachedParams[Engine.TTS_PARAM_POSITION_COUNTRY + 1],
610                        mCachedParams[Engine.TTS_PARAM_POSITION_VARIANT + 1] );
611                // TTS died; restart it.
612                mStarted = false;
613                initTts();
614            } finally {
615              return result;
616            }
617        }
618    }
619
620
621    /**
622     * Returns a Locale instance describing the language currently being used by the TTS engine.
623     * @return language, country (if any) and variant (if any) used by the engine stored in a Locale
624     *     instance, or null is the TTS engine has failed.
625     */
626    public Locale getLanguage() {
627        synchronized (mStartLock) {
628            if (!mStarted) {
629                return null;
630            }
631            try {
632                String[] locStrings =  mITts.getLanguage();
633                if (locStrings.length == 3) {
634                    return new Locale(locStrings[0], locStrings[1], locStrings[2]);
635                } else {
636                    return null;
637                }
638            } catch (RemoteException e) {
639                // TTS died; restart it.
640                mStarted = false;
641                initTts();
642            }
643            return null;
644        }
645    }
646
647    /**
648     * Checks if the specified language as represented by the Locale is available.
649     *
650     * @param loc
651     *            The Locale describing the language to be used.
652     *
653     * @return one of TTS_LANG_NOT_SUPPORTED, TTS_LANG_MISSING_DATA, TTS_LANG_AVAILABLE,
654     *         TTS_LANG_COUNTRY_AVAILABLE, TTS_LANG_COUNTRY_VAR_AVAILABLE.
655     */
656    public int isLanguageAvailable(Locale loc) {
657        synchronized (mStartLock) {
658            int result = TTS_LANG_NOT_SUPPORTED;
659            if (!mStarted) {
660                return result;
661            }
662            try {
663                result = mITts.isLanguageAvailable(loc.getISO3Language(),
664                        loc.getISO3Country(), loc.getVariant());
665            } catch (RemoteException e) {
666                // TTS died; restart it.
667                mStarted = false;
668                initTts();
669            } finally {
670              return result;
671            }
672        }
673    }
674
675
676    /**
677     * Synthesizes the given text to a file using the specified parameters.
678     *
679     * @param text
680     *            The String of text that should be synthesized
681     * @param params
682     *            A hashmap of parameters.
683     * @param filename
684     *            The string that gives the full output filename; it should be
685     *            something like "/sdcard/myappsounds/mysound.wav".
686     *
687     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
688     */
689    public int synthesizeToFile(String text, HashMap<String,String> params,
690            String filename) {
691        synchronized (mStartLock) {
692            int result = TTS_ERROR;
693            if (!mStarted) {
694                return result;
695            }
696            try {
697                // TODO support extra parameters, passing null for the moment
698                if (mITts.synthesizeToFile(text, null, filename)){
699                    result = TTS_SUCCESS;
700                }
701            } catch (RemoteException e) {
702                // TTS died; restart it.
703                mStarted = false;
704                initTts();
705            } catch (NullPointerException e) {
706                // TTS died; restart it.
707                mStarted = false;
708                initTts();
709            } catch (IllegalStateException e) {
710                // TTS died; restart it.
711                mStarted = false;
712                initTts();
713            } finally {
714              return result;
715            }
716        }
717    }
718
719}
720