TextToSpeech.java revision c8ba3b560ce0ce5944939a50d61f639fdcaa0015
1/*
2 * Copyright (C) 2009 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.speech.tts.ITts;
19import android.speech.tts.ITtsCallback;
20
21import android.content.ComponentName;
22import android.content.Context;
23import android.content.Intent;
24import android.content.ServiceConnection;
25import android.os.IBinder;
26import android.os.RemoteException;
27import android.util.Log;
28
29import java.util.HashMap;
30import java.util.Locale;
31
32/**
33 *
34 * Synthesizes speech from text for immediate playback or to create a sound file.
35 *
36 */
37//TODO complete javadoc + add links to constants
38public class TextToSpeech {
39
40    /**
41     * Denotes a successful operation.
42     */
43    public static final int TTS_SUCCESS                = 0;
44    /**
45     * Denotes a generic operation failure.
46     */
47    public static final int TTS_ERROR                  = -1;
48
49    /**
50     * Queue mode where all entries in the playback queue (media to be played
51     * and text to be synthesized) are dropped and replaced by the new entry.
52     */
53    public static final int TTS_QUEUE_FLUSH = 0;
54    /**
55     * Queue mode where the new entry is added at the end of the playback queue.
56     */
57    public static final int TTS_QUEUE_ADD = 1;
58
59
60    /**
61     * Denotes the language is available exactly as specified by the locale
62     */
63    public static final int TTS_LANG_COUNTRY_VAR_AVAILABLE = 2;
64
65
66    /**
67     * Denotes the language is available for the language and country specified
68     * by the locale, but not the variant.
69     */
70    public static final int TTS_LANG_COUNTRY_AVAILABLE = 1;
71
72
73    /**
74     * Denotes the language is available for the language by the locale,
75     * but not the country and variant.
76     */
77    public static final int TTS_LANG_AVAILABLE = 0;
78
79    /**
80     * Denotes the language data is missing.
81     */
82    public static final int TTS_LANG_MISSING_DATA = -1;
83
84    /**
85     * Denotes the language is not supported by the current TTS engine.
86     */
87    public static final int TTS_LANG_NOT_SUPPORTED = -2;
88
89
90    /**
91     * Called when the TTS has initialized.
92     *
93     * The InitListener must implement the onInit function. onInit is passed a
94     * status code indicating the result of the TTS initialization.
95     */
96    public interface OnInitListener {
97        public void onInit(int status);
98    }
99
100    /**
101     * Internal constants for the TTS functionality
102     *
103     * {@hide}
104     */
105    public class Engine {
106        // default values for a TTS engine when settings are not found in the provider
107        public static final int FALLBACK_TTS_DEFAULT_RATE = 100; // 1x
108        public static final int FALLBACK_TTS_DEFAULT_PITCH = 100;// 1x
109        public static final int FALLBACK_TTS_USE_DEFAULTS = 0; // false
110        public static final String FALLBACK_TTS_DEFAULT_LANG = "eng";
111        public static final String FALLBACK_TTS_DEFAULT_COUNTRY = "";
112        public static final String FALLBACK_TTS_DEFAULT_VARIANT = "";
113
114        // return codes for a TTS engine's check data activity
115        public static final int CHECK_VOICE_DATA_PASS = 1;
116        public static final int CHECK_VOICE_DATA_FAIL = 0;
117        public static final int CHECK_VOICE_DATA_BAD_DATA = -1;
118        public static final int CHECK_VOICE_DATA_MISSING_DATA = -2;
119        public static final int CHECK_VOICE_DATA_MISSING_DATA_NO_SDCARD = -3;
120
121        // return codes for a TTS engine's check data activity
122        public static final String VOICE_DATA_ROOT_DIRECTORY = "dataRoot";
123        public static final String VOICE_DATA_FILES = "dataFiles";
124        public static final String VOICE_DATA_FILES_INFO = "dataFilesInfo";
125
126        // keys for the parameters passed with speak commands
127        public static final String TTS_KEY_PARAM_RATE = "rate";
128        public static final String TTS_KEY_PARAM_LANGUAGE = "language";
129        public static final String TTS_KEY_PARAM_COUNTRY = "country";
130        public static final String TTS_KEY_PARAM_VARIANT = "variant";
131        public static final int TTS_PARAM_POSITION_RATE = 0;
132        public static final int TTS_PARAM_POSITION_LANGUAGE = 2;
133        public static final int TTS_PARAM_POSITION_COUNTRY = 4;
134        public static final int TTS_PARAM_POSITION_VARIANT = 6;
135    }
136
137    /**
138     * Connection needed for the TTS.
139     */
140    private ServiceConnection mServiceConnection;
141
142    private ITts mITts = null;
143    private Context mContext = null;
144    private OnInitListener mInitListener = null;
145    private boolean mStarted = false;
146    private final Object mStartLock = new Object();
147    private int mCachedRate = Engine.FALLBACK_TTS_DEFAULT_RATE;
148    private String mCachedLang = Engine.FALLBACK_TTS_DEFAULT_LANG;
149    private String mCachedCountry = Engine.FALLBACK_TTS_DEFAULT_COUNTRY;
150    private String mCachedVariant = Engine.FALLBACK_TTS_DEFAULT_VARIANT;
151    private String[] mCachedParams;
152
153    /**
154     * The constructor for the TTS.
155     *
156     * @param context
157     *            The context
158     * @param listener
159     *            The InitListener that will be called when the TTS has
160     *            initialized successfully.
161     */
162    public TextToSpeech(Context context, OnInitListener listener) {
163        mContext = context;
164        mInitListener = listener;
165
166        mCachedParams = new String[2*4]; //4 parameters, store key and value
167        mCachedParams[Engine.TTS_PARAM_POSITION_RATE] = Engine.TTS_KEY_PARAM_RATE;
168        mCachedParams[Engine.TTS_PARAM_POSITION_LANGUAGE] = Engine.TTS_KEY_PARAM_LANGUAGE;
169        mCachedParams[Engine.TTS_PARAM_POSITION_COUNTRY] = Engine.TTS_KEY_PARAM_COUNTRY;
170        mCachedParams[Engine.TTS_PARAM_POSITION_VARIANT] = Engine.TTS_KEY_PARAM_VARIANT;
171        updateCachedParamArray();
172
173        initTts();
174    }
175
176
177    private void updateCachedParamArray() {
178        mCachedParams[Engine.TTS_PARAM_POSITION_RATE+1] = String.valueOf(mCachedRate);
179        mCachedParams[Engine.TTS_PARAM_POSITION_LANGUAGE+1] = mCachedLang;
180        mCachedParams[Engine.TTS_PARAM_POSITION_COUNTRY+1] = mCachedCountry;
181        mCachedParams[Engine.TTS_PARAM_POSITION_VARIANT+1] = mCachedVariant;
182    }
183
184
185    private void initTts() {
186        mStarted = false;
187
188        // Initialize the TTS, run the callback after the binding is successful
189        mServiceConnection = new ServiceConnection() {
190            public void onServiceConnected(ComponentName name, IBinder service) {
191                synchronized(mStartLock) {
192                    mITts = ITts.Stub.asInterface(service);
193                    mStarted = true;
194                    if (mInitListener != null) {
195                        // TODO manage failures and missing resources
196                        mInitListener.onInit(TTS_SUCCESS);
197                    }
198                }
199            }
200
201            public void onServiceDisconnected(ComponentName name) {
202                synchronized(mStartLock) {
203                    mITts = null;
204                    mInitListener = null;
205                    mStarted = false;
206                }
207            }
208        };
209
210        Intent intent = new Intent("android.intent.action.START_TTS_SERVICE");
211        intent.addCategory("android.intent.category.TTS");
212        mContext.bindService(intent, mServiceConnection,
213                Context.BIND_AUTO_CREATE);
214        // TODO handle case where the binding works (should always work) but
215        //      the plugin fails
216    }
217
218
219    /**
220     * Shuts down the TTS. It is good practice to call this in the onDestroy
221     * method of the Activity that is using the TTS so that the TTS is stopped
222     * cleanly.
223     */
224    public void shutdown() {
225        try {
226            mContext.unbindService(mServiceConnection);
227        } catch (IllegalArgumentException e) {
228            // Do nothing and fail silently since an error here indicates that
229            // binding never succeeded in the first place.
230        }
231    }
232
233
234    /**
235     * Adds a mapping between a string of text and a sound resource in a
236     * package.
237     *
238     * @see #TTS.speak(String text, int queueMode, String[] params)
239     *
240     * @param text
241     *            Example: <b><code>"south_south_east"</code></b><br/>
242     *
243     * @param packagename
244     *            Pass the packagename of the application that contains the
245     *            resource. If the resource is in your own application (this is
246     *            the most common case), then put the packagename of your
247     *            application here.<br/>
248     *            Example: <b>"com.google.marvin.compass"</b><br/>
249     *            The packagename can be found in the AndroidManifest.xml of
250     *            your application.
251     *            <p>
252     *            <code>&lt;manifest xmlns:android=&quot;...&quot;
253     *      package=&quot;<b>com.google.marvin.compass</b>&quot;&gt;</code>
254     *            </p>
255     *
256     * @param resourceId
257     *            Example: <b><code>R.raw.south_south_east</code></b>
258     *
259     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
260     */
261    public int addSpeech(String text, String packagename, int resourceId) {
262        synchronized(mStartLock) {
263            if (!mStarted) {
264                return TTS_ERROR;
265            }
266            try {
267                mITts.addSpeech(text, packagename, resourceId);
268                return TTS_SUCCESS;
269            } catch (RemoteException e) {
270                // TTS died; restart it.
271                mStarted = false;
272                initTts();
273            } catch (NullPointerException e) {
274                // TTS died; restart it.
275                mStarted = false;
276                initTts();
277            } catch (IllegalStateException e) {
278                // TTS died; restart it.
279                mStarted = false;
280                initTts();
281            }
282            return TTS_ERROR;
283        }
284    }
285
286
287    /**
288     * Adds a mapping between a string of text and a sound file. Using this, it
289     * is possible to add custom pronounciations for text.
290     *
291     * @param text
292     *            The string of text
293     * @param filename
294     *            The full path to the sound file (for example:
295     *            "/sdcard/mysounds/hello.wav")
296     *
297     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
298     */
299    public int addSpeech(String text, String filename) {
300        synchronized (mStartLock) {
301            if (!mStarted) {
302                return TTS_ERROR;
303            }
304            try {
305                mITts.addSpeechFile(text, filename);
306                return TTS_SUCCESS;
307            } catch (RemoteException e) {
308                // TTS died; restart it.
309                mStarted = false;
310                initTts();
311            } catch (NullPointerException e) {
312                // TTS died; restart it.
313                mStarted = false;
314                initTts();
315            } catch (IllegalStateException e) {
316                // TTS died; restart it.
317                mStarted = false;
318                initTts();
319            }
320            return TTS_ERROR;
321        }
322    }
323
324
325    /**
326     * Speaks the string using the specified queuing strategy and speech
327     * parameters. Note that the speech parameters are not universally supported
328     * by all engines and will be treated as a hint. The TTS library will try to
329     * fulfill these parameters as much as possible, but there is no guarantee
330     * that the voice used will have the properties specified.
331     *
332     * @param text
333     *            The string of text to be spoken.
334     * @param queueMode
335     *            The queuing strategy to use.
336     *            See TTS_QUEUE_ADD and TTS_QUEUE_FLUSH.
337     * @param params
338     *            The hashmap of speech parameters to be used.
339     *
340     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
341     */
342    public int speak(String text, int queueMode, HashMap<String,String> params)
343    {
344        synchronized (mStartLock) {
345            int result = TTS_ERROR;
346            Log.i("TTS received: ", text);
347            if (!mStarted) {
348                return result;
349            }
350            try {
351                // TODO support extra parameters, passing cache of current parameters for the moment
352                result = mITts.speak(text, queueMode, mCachedParams);
353            } catch (RemoteException e) {
354                // TTS died; restart it.
355                mStarted = false;
356                initTts();
357            } catch (NullPointerException e) {
358                // TTS died; restart it.
359                mStarted = false;
360                initTts();
361            } catch (IllegalStateException e) {
362                // TTS died; restart it.
363                mStarted = false;
364                initTts();
365            } finally {
366              return result;
367            }
368        }
369    }
370
371
372    /**
373     * Plays the earcon using the specified queueing mode and parameters.
374     *
375     * @param earcon
376     *            The earcon that should be played
377     * @param queueMode
378     *            See TTS_QUEUE_ADD and TTS_QUEUE_FLUSH.
379     * @param params
380     *            The hashmap of parameters to be used.
381     *
382     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
383     */
384    public int playEarcon(String earcon, int queueMode,
385            HashMap<String,String> params) {
386        synchronized (mStartLock) {
387            int result = TTS_ERROR;
388            if (!mStarted) {
389                return result;
390            }
391            try {
392                // TODO support extra parameters, passing null for the moment
393                result = mITts.playEarcon(earcon, queueMode, null);
394            } catch (RemoteException e) {
395                // TTS died; restart it.
396                mStarted = false;
397                initTts();
398            } catch (NullPointerException e) {
399                // TTS died; restart it.
400                mStarted = false;
401                initTts();
402            } catch (IllegalStateException e) {
403                // TTS died; restart it.
404                mStarted = false;
405                initTts();
406            } finally {
407              return result;
408            }
409        }
410    }
411
412    /**
413     * Plays silence for the specified amount of time using the specified
414     * queue mode.
415     *
416     * @param durationInMs
417     *            A long that indicates how long the silence should last.
418     * @param queueMode
419     *            See TTS_QUEUE_ADD and TTS_QUEUE_FLUSH.
420     *
421     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
422     */
423    public int playSilence(long durationInMs, int queueMode) {
424        synchronized (mStartLock) {
425            int result = TTS_ERROR;
426            if (!mStarted) {
427                return result;
428            }
429            try {
430                // TODO support extra parameters, passing cache of current parameters for the moment
431                result = mITts.playSilence(durationInMs, queueMode, mCachedParams);
432            } catch (RemoteException e) {
433                // TTS died; restart it.
434                mStarted = false;
435                initTts();
436            } catch (NullPointerException e) {
437                // TTS died; restart it.
438                mStarted = false;
439                initTts();
440            } catch (IllegalStateException e) {
441                // TTS died; restart it.
442                mStarted = false;
443                initTts();
444            } finally {
445              return result;
446            }
447        }
448    }
449
450
451    /**
452     * Returns whether or not the TTS is busy speaking.
453     *
454     * @return Whether or not the TTS is busy speaking.
455     */
456    public boolean isSpeaking() {
457        synchronized (mStartLock) {
458            if (!mStarted) {
459                return false;
460            }
461            try {
462                return mITts.isSpeaking();
463            } catch (RemoteException e) {
464                // TTS died; restart it.
465                mStarted = false;
466                initTts();
467            } catch (NullPointerException e) {
468                // TTS died; restart it.
469                mStarted = false;
470                initTts();
471            } catch (IllegalStateException e) {
472                // TTS died; restart it.
473                mStarted = false;
474                initTts();
475            }
476            return false;
477        }
478    }
479
480
481    /**
482     * Stops speech from the TTS.
483     *
484     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
485     */
486    public int stop() {
487        synchronized (mStartLock) {
488            int result = TTS_ERROR;
489            if (!mStarted) {
490                return result;
491            }
492            try {
493                result = mITts.stop();
494            } catch (RemoteException e) {
495                // TTS died; restart it.
496                mStarted = false;
497                initTts();
498            } catch (NullPointerException e) {
499                // TTS died; restart it.
500                mStarted = false;
501                initTts();
502            } catch (IllegalStateException e) {
503                // TTS died; restart it.
504                mStarted = false;
505                initTts();
506            } finally {
507              return result;
508            }
509        }
510    }
511
512
513    /**
514     * Sets the speech rate for the TTS engine.
515     *
516     * Note that the speech rate is not universally supported by all engines and
517     * will be treated as a hint. The TTS library will try to use the specified
518     * speech rate, but there is no guarantee.
519     * This has no effect on any pre-recorded speech.
520     *
521     * @param speechRate
522     *            The speech rate for the TTS engine. 1 is the normal speed,
523     *            lower values slow down the speech (0.5 is half the normal speech rate),
524     *            greater values accelerate it (2 is twice the normal speech rate).
525     *
526     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
527     */
528    public int setSpeechRate(float speechRate) {
529        synchronized (mStartLock) {
530            int result = TTS_ERROR;
531            if (!mStarted) {
532                return result;
533            }
534            try {
535                if (speechRate > 0) {
536                    mCachedRate = (int)(speechRate*100);
537                    updateCachedParamArray();
538                    result = mITts.setSpeechRate(mCachedRate);
539                }
540            } catch (RemoteException e) {
541                // TTS died; restart it.
542                mStarted = false;
543                initTts();
544            } finally {
545              return result;
546            }
547        }
548    }
549
550
551    /**
552     * Sets the speech pitch for the TTS engine.
553     *
554     * Note that the pitch is not universally supported by all engines and
555     * will be treated as a hint. The TTS library will try to use the specified
556     * pitch, but there is no guarantee.
557     * This has no effect on any pre-recorded speech.
558     *
559     * @param pitch
560     *            The pitch for the TTS engine. 1 is the normal pitch,
561     *            lower values lower the tone of the synthesized voice,
562     *            greater values increase it.
563     *
564     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
565     */
566    public int setPitch(float pitch) {
567        synchronized (mStartLock) {
568            int result = TTS_ERROR;
569            if (!mStarted) {
570                return result;
571            }
572            try {
573                if (pitch > 0) {
574                    result = mITts.setPitch((int)(pitch*100));
575                }
576            } catch (RemoteException e) {
577                // TTS died; restart it.
578                mStarted = false;
579                initTts();
580            } finally {
581              return result;
582            }
583        }
584    }
585
586
587    /**
588     * Sets the language for the TTS engine.
589     *
590     * Note that the language is not universally supported by all engines and
591     * will be treated as a hint. The TTS library will try to use the specified
592     * language as represented by the Locale, but there is no guarantee.
593     *
594     * @param loc
595     *            The locale describing the language to be used.
596     *
597     * @return Code indicating the support status for the locale. See the TTS_LANG_ codes.
598     */
599    public int setLanguage(Locale loc) {
600        synchronized (mStartLock) {
601            int result = TTS_LANG_NOT_SUPPORTED;
602            if (!mStarted) {
603                return result;
604            }
605            try {
606                mCachedLang = loc.getISO3Language();
607                mCachedCountry = loc.getISO3Country();
608                mCachedVariant = loc.getVariant();
609                updateCachedParamArray();
610                result = mITts.setLanguage(mCachedLang, mCachedCountry, mCachedVariant);
611            } catch (RemoteException e) {
612                // TTS died; restart it.
613                mStarted = false;
614                initTts();
615            } finally {
616              return result;
617            }
618        }
619    }
620
621
622    /**
623     * Returns a Locale instance describing the language currently being used by the TTS engine.
624     * @return language, country (if any) and variant (if any) used by the engine stored in a Locale
625     *     instance, or null is the TTS engine has failed.
626     */
627    public Locale getLanguage() {
628        synchronized (mStartLock) {
629            if (!mStarted) {
630                return null;
631            }
632            try {
633                String[] locStrings =  mITts.getLanguage();
634                if (locStrings.length == 3) {
635                    return new Locale(locStrings[0], locStrings[1], locStrings[2]);
636                } else {
637                    return null;
638                }
639            } catch (RemoteException e) {
640                // TTS died; restart it.
641                mStarted = false;
642                initTts();
643            }
644            return null;
645        }
646    }
647
648    /**
649     * Checks if the specified language as represented by the Locale is available.
650     *
651     * @param loc
652     *            The Locale describing the language to be used.
653     *
654     * @return one of TTS_LANG_NOT_SUPPORTED, TTS_LANG_MISSING_DATA, TTS_LANG_AVAILABLE,
655     *         TTS_LANG_COUNTRY_AVAILABLE, TTS_LANG_COUNTRY_VAR_AVAILABLE.
656     */
657    public int isLanguageAvailable(Locale loc) {
658        synchronized (mStartLock) {
659            int result = TTS_LANG_NOT_SUPPORTED;
660            if (!mStarted) {
661                return result;
662            }
663            try {
664                result = mITts.isLanguageAvailable(loc.getISO3Language(),
665                        loc.getISO3Country(), loc.getVariant());
666            } catch (RemoteException e) {
667                // TTS died; restart it.
668                mStarted = false;
669                initTts();
670            } finally {
671              return result;
672            }
673        }
674    }
675
676
677    /**
678     * Synthesizes the given text to a file using the specified parameters.
679     *
680     * @param text
681     *            The String of text that should be synthesized
682     * @param params
683     *            A hashmap of parameters.
684     * @param filename
685     *            The string that gives the full output filename; it should be
686     *            something like "/sdcard/myappsounds/mysound.wav".
687     *
688     * @return Code indicating success or failure. See TTS_ERROR and TTS_SUCCESS.
689     */
690    public int synthesizeToFile(String text, HashMap<String,String> params,
691            String filename) {
692        synchronized (mStartLock) {
693            int result = TTS_ERROR;
694            if (!mStarted) {
695                return result;
696            }
697            try {
698                // TODO support extra parameters, passing null for the moment
699                if (mITts.synthesizeToFile(text, null, filename)){
700                    result = TTS_SUCCESS;
701                }
702            } catch (RemoteException e) {
703                // TTS died; restart it.
704                mStarted = false;
705                initTts();
706            } catch (NullPointerException e) {
707                // TTS died; restart it.
708                mStarted = false;
709                initTts();
710            } catch (IllegalStateException e) {
711                // TTS died; restart it.
712                mStarted = false;
713                initTts();
714            } finally {
715              return result;
716            }
717        }
718    }
719
720}
721