AudioPlaybackHandler.java revision 8d1fc2403b8277e68d7816b2bbf05464a4c7a58a
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.media.AudioFormat;
19import android.media.AudioTrack;
20import android.os.Handler;
21import android.os.Looper;
22import android.os.Message;
23import android.speech.tts.SynthesisMessageParams.ListEntry;
24import android.util.Log;
25
26class AudioPlaybackHandler extends Handler {
27    private static final String TAG = "TTS.AudioPlaybackHandler";
28    private static final boolean DBG = false;
29
30    private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
31
32    private static final int SYNTHESIS_START = 1;
33    private static final int SYNTHESIS_DATA_AVAILABLE = 2;
34    private static final int SYNTHESIS_COMPLETE_DATA_AVAILABLE = 3;
35    private static final int SYNTHESIS_DONE = 4;
36
37    private static final int PLAY_AUDIO = 5;
38    private static final int PLAY_SILENCE = 6;
39
40    // Accessed by multiple threads, synchronized by "this".
41    private MessageParams mCurrentParams;
42    // Used only for book keeping and error detection.
43    private SynthesisMessageParams mLastSynthesisRequest;
44
45    AudioPlaybackHandler(Looper looper) {
46        super(looper);
47    }
48
49    @Override
50    public synchronized void handleMessage(Message msg) {
51        if (msg.what == SYNTHESIS_START) {
52            mCurrentParams = (SynthesisMessageParams) msg.obj;
53            handleSynthesisStart(msg);
54        } else if (msg.what == SYNTHESIS_DATA_AVAILABLE) {
55            handleSynthesisDataAvailable(msg);
56        } else if (msg.what == SYNTHESIS_DONE) {
57            handleSynthesisDone(msg);
58        } else if (msg.what == SYNTHESIS_COMPLETE_DATA_AVAILABLE) {
59            handleSynthesisCompleteDataAvailable(msg);
60        } else if (msg.what == PLAY_AUDIO) {
61            handleAudio(msg);
62        } else if (msg.what == PLAY_SILENCE) {
63            handleSilence(msg);
64        }
65
66        mCurrentParams = null;
67    }
68
69    /**
70     * Stops all synthesis for a given {@code token}. If the current token
71     * is currently being processed, an effort will be made to stop it but
72     * that is not guaranteed.
73     */
74    synchronized public void stop(MessageParams token) {
75        removeCallbacksAndMessages(token);
76
77        if (token.getType() == MessageParams.TYPE_SYNTHESIS) {
78            sendMessageAtFrontOfQueue(obtainMessage(SYNTHESIS_DONE, token));
79        } else if (token == mCurrentParams) {
80            if (token.getType() == MessageParams.TYPE_AUDIO) {
81                ((AudioMessageParams) mCurrentParams).getPlayer().stop();
82            } else if (token.getType() == MessageParams.TYPE_SILENCE) {
83                ((SilenceMessageParams) mCurrentParams).getConditionVariable().open();
84            }
85        }
86    }
87
88    /**
89     * Shut down the audio playback thread.
90     */
91    synchronized public void quit() {
92        if (mCurrentParams != null) {
93            stop(mCurrentParams);
94        }
95        getLooper().quit();
96    }
97
98    void enqueueSynthesisStart(SynthesisMessageParams token) {
99        sendMessage(obtainMessage(SYNTHESIS_START, token));
100    }
101
102    void enqueueSynthesisDataAvailable(SynthesisMessageParams token) {
103        sendMessage(obtainMessage(SYNTHESIS_DATA_AVAILABLE, token));
104    }
105
106    void enqueueSynthesisCompleteDataAvailable(SynthesisMessageParams token) {
107        sendMessage(obtainMessage(SYNTHESIS_COMPLETE_DATA_AVAILABLE, token));
108    }
109
110    void enqueueSynthesisDone(SynthesisMessageParams token) {
111        sendMessage(obtainMessage(SYNTHESIS_DONE, token));
112    }
113
114    void enqueueAudio(AudioMessageParams token) {
115        sendMessage(obtainMessage(PLAY_AUDIO, token));
116    }
117
118    void enqueueSilence(SilenceMessageParams token) {
119        sendMessage(obtainMessage(PLAY_SILENCE, token));
120    }
121
122    // -----------------------------------------
123    // End of public API methods.
124    // -----------------------------------------
125
126    // Currently implemented as blocking the audio playback thread for the
127    // specified duration. If a call to stop() is made, the thread
128    // unblocks.
129    private void handleSilence(Message msg) {
130        if (DBG) Log.d(TAG, "handleSilence()");
131        SilenceMessageParams params = (SilenceMessageParams) msg.obj;
132        if (params.getSilenceDurationMs() > 0) {
133            params.getConditionVariable().block(params.getSilenceDurationMs());
134        }
135        params.getDispatcher().dispatchUtteranceCompleted();
136        if (DBG) Log.d(TAG, "handleSilence() done.");
137    }
138
139    // Plays back audio from a given URI. No TTS engine involvement here.
140    private void handleAudio(Message msg) {
141        if (DBG) Log.d(TAG, "handleAudio()");
142        AudioMessageParams params = (AudioMessageParams) msg.obj;
143        // Note that the BlockingMediaPlayer spawns a separate thread.
144        //
145        // TODO: This can be avoided.
146        params.getPlayer().startAndWait();
147        params.getDispatcher().dispatchUtteranceCompleted();
148        if (DBG) Log.d(TAG, "handleAudio() done.");
149    }
150
151    // Denotes the start of a new synthesis request. We create a new
152    // audio track, and prepare it for incoming data.
153    //
154    // Note that since all TTS synthesis happens on a single thread, we
155    // should ALWAYS see the following order :
156    //
157    // handleSynthesisStart -> handleSynthesisDataAvailable(*) -> handleSynthesisDone
158    // OR
159    // handleSynthesisCompleteDataAvailable.
160    private void handleSynthesisStart(Message msg) {
161        if (DBG) Log.d(TAG, "handleSynthesisStart()");
162        final SynthesisMessageParams param = (SynthesisMessageParams) msg.obj;
163
164        // Oops, looks like the engine forgot to call done(). We go through
165        // extra trouble to clean the data to prevent the AudioTrack resources
166        // from being leaked.
167        if (mLastSynthesisRequest != null) {
168            Log.w(TAG, "Error : Missing call to done() for request : " +
169                    mLastSynthesisRequest);
170            handleSynthesisDone(mLastSynthesisRequest);
171        }
172
173        mLastSynthesisRequest = param;
174
175        // Create the audio track.
176        final AudioTrack audioTrack = createStreamingAudioTrack(
177                param.mStreamType, param.mSampleRateInHz, param.mAudioFormat,
178                param.mChannelCount, param.mVolume, param.mPan);
179
180        param.setAudioTrack(audioTrack);
181    }
182
183    // More data available to be flushed to the audio track.
184    private void handleSynthesisDataAvailable(Message msg) {
185        final SynthesisMessageParams param = (SynthesisMessageParams) msg.obj;
186        if (param.getAudioTrack() == null) {
187            Log.w(TAG, "Error : null audio track in handleDataAvailable.");
188            return;
189        }
190
191        if (param != mLastSynthesisRequest) {
192            Log.e(TAG, "Call to dataAvailable without done() / start()");
193            return;
194        }
195
196        final AudioTrack audioTrack = param.getAudioTrack();
197        final ListEntry bufferCopy = param.getNextBuffer();
198
199        if (bufferCopy == null) {
200            Log.e(TAG, "No buffers available to play.");
201            return;
202        }
203
204        int playState = audioTrack.getPlayState();
205        if (playState == AudioTrack.PLAYSTATE_STOPPED) {
206            if (DBG) Log.d(TAG, "AudioTrack stopped, restarting : " + audioTrack.hashCode());
207            audioTrack.play();
208        }
209        int count = 0;
210        while (count < bufferCopy.mLength) {
211            // Note that we don't take bufferCopy.mOffset into account because
212            // it is guaranteed to be 0.
213            int written = audioTrack.write(bufferCopy.mBytes, count, bufferCopy.mLength);
214            if (written <= 0) {
215                break;
216            }
217            count += written;
218        }
219    }
220
221    private void handleSynthesisDone(Message msg) {
222        final SynthesisMessageParams params = (SynthesisMessageParams) msg.obj;
223        handleSynthesisDone(params);
224    }
225
226    // Flush all remaining data to the audio track, stop it and release
227    // all it's resources.
228    private void handleSynthesisDone(SynthesisMessageParams params) {
229        if (DBG) Log.d(TAG, "handleSynthesisDone()");
230        final AudioTrack audioTrack = params.getAudioTrack();
231
232        try {
233            if (audioTrack != null) {
234                audioTrack.flush();
235                audioTrack.stop();
236                audioTrack.release();
237            }
238        } finally {
239            params.setAudioTrack(null);
240            params.getDispatcher().dispatchUtteranceCompleted();
241            mLastSynthesisRequest = null;
242        }
243    }
244
245    private void handleSynthesisCompleteDataAvailable(Message msg) {
246        final SynthesisMessageParams params = (SynthesisMessageParams) msg.obj;
247        if (DBG) Log.d(TAG, "completeAudioAvailable(" + params + ")");
248
249        // Channel config and bytes per frame are checked before
250        // this message is sent.
251        int channelConfig = AudioPlaybackHandler.getChannelConfig(params.mChannelCount);
252        int bytesPerFrame = AudioPlaybackHandler.getBytesPerFrame(params.mAudioFormat);
253
254        ListEntry entry = params.getNextBuffer();
255
256        if (entry == null) {
257            Log.w(TAG, "completeDataAvailable : No buffers available to play.");
258            return;
259        }
260
261        final AudioTrack audioTrack = new AudioTrack(params.mStreamType, params.mSampleRateInHz,
262                channelConfig, params.mAudioFormat, entry.mLength, AudioTrack.MODE_STATIC);
263
264        // So that handleDone can access this correctly.
265        params.mAudioTrack = audioTrack;
266
267        try {
268            audioTrack.write(entry.mBytes, entry.mOffset, entry.mLength);
269            setupVolume(audioTrack, params.mVolume, params.mPan);
270            audioTrack.play();
271            blockUntilDone(audioTrack, bytesPerFrame, entry.mLength);
272            if (DBG) Log.d(TAG, "Wrote data to audio track successfully : " + entry.mLength);
273        } catch (IllegalStateException ex) {
274            Log.e(TAG, "Playback error", ex);
275        } finally {
276            handleSynthesisDone(msg);
277        }
278    }
279
280
281    private static void blockUntilDone(AudioTrack audioTrack, int bytesPerFrame, int length) {
282        int lengthInFrames = length / bytesPerFrame;
283        int currentPosition = 0;
284        while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames) {
285            long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
286                    audioTrack.getSampleRate();
287            audioTrack.getPlayState();
288            if (DBG) Log.d(TAG, "About to sleep for : " + estimatedTimeMs + " ms," +
289                    " Playback position : " + currentPosition);
290            try {
291                Thread.sleep(estimatedTimeMs);
292            } catch (InterruptedException ie) {
293                break;
294            }
295        }
296    }
297
298    private static AudioTrack createStreamingAudioTrack(int streamType, int sampleRateInHz,
299            int audioFormat, int channelCount, float volume, float pan) {
300        int channelConfig = getChannelConfig(channelCount);
301
302        int minBufferSizeInBytes
303                = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
304        int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
305
306        AudioTrack audioTrack = new AudioTrack(streamType, sampleRateInHz, channelConfig,
307                audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
308        if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
309            Log.w(TAG, "Unable to create audio track.");
310            audioTrack.release();
311            return null;
312        }
313
314        setupVolume(audioTrack, volume, pan);
315        return audioTrack;
316    }
317
318    static int getChannelConfig(int channelCount) {
319        if (channelCount == 1) {
320            return AudioFormat.CHANNEL_OUT_MONO;
321        } else if (channelCount == 2){
322            return AudioFormat.CHANNEL_OUT_STEREO;
323        }
324
325        return 0;
326    }
327
328    static int getBytesPerFrame(int audioFormat) {
329        if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) {
330            return 1;
331        } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) {
332            return 2;
333        }
334
335        return -1;
336    }
337
338    private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
339        float vol = clip(volume, 0.0f, 1.0f);
340        float panning = clip(pan, -1.0f, 1.0f);
341        float volLeft = vol;
342        float volRight = vol;
343        if (panning > 0.0f) {
344            volLeft *= (1.0f - panning);
345        } else if (panning < 0.0f) {
346            volRight *= (1.0f + panning);
347        }
348        if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
349        if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
350            Log.e(TAG, "Failed to set volume");
351        }
352    }
353
354    private static float clip(float value, float min, float max) {
355        return value > max ? max : (value < min ? min : value);
356    }
357
358}
359