AudioPlaybackHandler.java revision 47d6288541324b27c80b9949670f7b6b18d3ae4c
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.media.AudioFormat;
19import android.media.AudioTrack;
20import android.util.Log;
21
22import java.util.Iterator;
23import java.util.concurrent.PriorityBlockingQueue;
24import java.util.concurrent.atomic.AtomicLong;
25
26class AudioPlaybackHandler {
27    private static final String TAG = "TTS.AudioPlaybackHandler";
28    private static final boolean DBG = false;
29
30    private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
31
32    private static final int SYNTHESIS_START = 1;
33    private static final int SYNTHESIS_DATA_AVAILABLE = 2;
34    private static final int SYNTHESIS_COMPLETE_DATA_AVAILABLE = 3;
35    private static final int SYNTHESIS_DONE = 4;
36
37    private static final int PLAY_AUDIO = 5;
38    private static final int PLAY_SILENCE = 6;
39
40    private static final int SHUTDOWN = -1;
41
42    private static final int DEFAULT_PRIORITY = 1;
43    private static final int HIGH_PRIORITY = 0;
44
45    private final PriorityBlockingQueue<ListEntry> mQueue =
46            new PriorityBlockingQueue<ListEntry>();
47    private final Thread mHandlerThread;
48
49    private volatile MessageParams mCurrentParams = null;
50    // Used only for book keeping and error detection.
51    private volatile SynthesisMessageParams mLastSynthesisRequest = null;
52    // Used to order incoming messages in our priority queue.
53    private final AtomicLong mSequenceIdCtr = new AtomicLong(0);
54
55
56    AudioPlaybackHandler() {
57        mHandlerThread = new Thread(new MessageLoop(), "TTS.AudioPlaybackThread");
58    }
59
60    public void start() {
61        mHandlerThread.start();
62    }
63
64    /**
65     * Stops all synthesis for a given {@code token}. If the current token
66     * is currently being processed, an effort will be made to stop it but
67     * that is not guaranteed.
68     */
69    synchronized public void stop(MessageParams token) {
70        if (token == null) {
71            return;
72        }
73
74        removeMessages(token);
75
76        if (token.getType() == MessageParams.TYPE_SYNTHESIS) {
77            AudioTrack current = ((SynthesisMessageParams) token).getAudioTrack();
78            if (current != null) {
79                // Stop the current audio track if it's still playing.
80                // The audio track is thread safe in this regard.
81                current.stop();
82            }
83            mQueue.add(new ListEntry(SYNTHESIS_DONE, token, HIGH_PRIORITY));
84        } else  {
85            final MessageParams current = getCurrentParams();
86
87            if (current != null) {
88                if (token.getType() == MessageParams.TYPE_AUDIO) {
89                    ((AudioMessageParams) current).getPlayer().stop();
90                } else if (token.getType() == MessageParams.TYPE_SILENCE) {
91                    ((SilenceMessageParams) current).getConditionVariable().open();
92                }
93            }
94        }
95    }
96
97    synchronized public void removePlaybackItems(String callingApp) {
98        removeMessages(callingApp);
99        stop(getCurrentParams());
100    }
101
102    synchronized public void removeAllItems() {
103        removeAllMessages();
104        stop(getCurrentParams());
105    }
106
107    /**
108     * Shut down the audio playback thread.
109     */
110    synchronized public void quit() {
111        stop(getCurrentParams());
112        mQueue.add(new ListEntry(SHUTDOWN, null, HIGH_PRIORITY));
113    }
114
115    void enqueueSynthesisStart(SynthesisMessageParams token) {
116        mQueue.add(new ListEntry(SYNTHESIS_START, token));
117    }
118
119    void enqueueSynthesisDataAvailable(SynthesisMessageParams token) {
120        mQueue.add(new ListEntry(SYNTHESIS_DATA_AVAILABLE, token));
121    }
122
123    void enqueueSynthesisCompleteDataAvailable(SynthesisMessageParams token) {
124        mQueue.add(new ListEntry(SYNTHESIS_COMPLETE_DATA_AVAILABLE, token));
125    }
126
127    void enqueueSynthesisDone(SynthesisMessageParams token) {
128        mQueue.add(new ListEntry(SYNTHESIS_DONE, token));
129    }
130
131    void enqueueAudio(AudioMessageParams token) {
132        mQueue.add(new ListEntry(PLAY_AUDIO, token));
133    }
134
135    void enqueueSilence(SilenceMessageParams token) {
136        mQueue.add(new ListEntry(PLAY_SILENCE, token));
137    }
138
139    // -----------------------------------------
140    // End of public API methods.
141    // -----------------------------------------
142
143    // -----------------------------------------
144    // Methods for managing the message queue.
145    // -----------------------------------------
146
147    /*
148     * The MessageLoop is a handler like implementation that
149     * processes messages from a priority queue.
150     */
151    private final class MessageLoop implements Runnable {
152        @Override
153        public void run() {
154            while (true) {
155                ListEntry entry = null;
156                try {
157                    entry = mQueue.take();
158                } catch (InterruptedException ie) {
159                    return;
160                }
161
162                if (entry.mWhat == SHUTDOWN) {
163                    if (DBG) Log.d(TAG, "MessageLoop : Shutting down");
164                    return;
165                }
166
167                if (DBG) {
168                    Log.d(TAG, "MessageLoop : Handling message :" + entry.mWhat
169                            + " ,seqId : " + entry.mSequenceId);
170                }
171
172                setCurrentParams(entry.mMessage);
173                handleMessage(entry);
174                setCurrentParams(null);
175            }
176        }
177    }
178
179    /*
180     * Remove all messages from the queue that contain the supplied token.
181     * Note that the Iterator is thread safe, and other methods can safely
182     * continue adding to the queue at this point.
183     */
184    synchronized private void removeMessages(MessageParams token) {
185        if (token == null) {
186            return;
187        }
188
189        Iterator<ListEntry> it = mQueue.iterator();
190
191        while (it.hasNext()) {
192            final ListEntry current = it.next();
193            if (current.mMessage == token) {
194                it.remove();
195            }
196        }
197    }
198
199    /*
200     * Atomically clear the queue of all messages.
201     */
202    synchronized private void removeAllMessages() {
203        mQueue.clear();
204    }
205
206    /*
207     * Remove all messages that originate from a given calling app.
208     */
209    synchronized private void removeMessages(String callingApp) {
210        Iterator<ListEntry> it = mQueue.iterator();
211
212        while (it.hasNext()) {
213            final ListEntry current = it.next();
214            // The null check is to prevent us from removing control messages,
215            // such as a shutdown message.
216            if (current.mMessage != null &&
217                    callingApp.equals(current.mMessage.getCallingApp())) {
218                it.remove();
219            }
220        }
221    }
222
223    /*
224     * An element of our priority queue of messages. Each message has a priority,
225     * and a sequence id (defined by the order of enqueue calls). Among messages
226     * with the same priority, messages that were received earlier win out.
227     */
228    private final class ListEntry implements Comparable<ListEntry> {
229        final int mWhat;
230        final MessageParams mMessage;
231        final int mPriority;
232        final long mSequenceId;
233
234        private ListEntry(int what, MessageParams message) {
235            this(what, message, DEFAULT_PRIORITY);
236        }
237
238        private ListEntry(int what, MessageParams message, int priority) {
239            mWhat = what;
240            mMessage = message;
241            mPriority = priority;
242            mSequenceId = mSequenceIdCtr.incrementAndGet();
243        }
244
245        @Override
246        public int compareTo(ListEntry that) {
247            if (that == this) {
248                return 0;
249            }
250
251            // Note that this is always 0, 1 or -1.
252            int priorityDiff = mPriority - that.mPriority;
253            if (priorityDiff == 0) {
254                // The == case cannot occur.
255                return (mSequenceId < that.mSequenceId) ? -1 : 1;
256            }
257
258            return priorityDiff;
259        }
260    }
261
262    private void setCurrentParams(MessageParams p) {
263        mCurrentParams = p;
264    }
265
266    private MessageParams getCurrentParams() {
267        return mCurrentParams;
268    }
269
270    // -----------------------------------------
271    // Methods for dealing with individual messages, the methods
272    // below do the actual work.
273    // -----------------------------------------
274
275    private void handleMessage(ListEntry entry) {
276        final MessageParams msg = entry.mMessage;
277        if (entry.mWhat == SYNTHESIS_START) {
278            handleSynthesisStart(msg);
279        } else if (entry.mWhat == SYNTHESIS_DATA_AVAILABLE) {
280            handleSynthesisDataAvailable(msg);
281        } else if (entry.mWhat == SYNTHESIS_DONE) {
282            handleSynthesisDone(msg);
283        } else if (entry.mWhat == SYNTHESIS_COMPLETE_DATA_AVAILABLE) {
284            handleSynthesisCompleteDataAvailable(msg);
285        } else if (entry.mWhat == PLAY_AUDIO) {
286            handleAudio(msg);
287        } else if (entry.mWhat == PLAY_SILENCE) {
288            handleSilence(msg);
289        }
290    }
291
292    // Currently implemented as blocking the audio playback thread for the
293    // specified duration. If a call to stop() is made, the thread
294    // unblocks.
295    private void handleSilence(MessageParams msg) {
296        if (DBG) Log.d(TAG, "handleSilence()");
297        SilenceMessageParams params = (SilenceMessageParams) msg;
298        if (params.getSilenceDurationMs() > 0) {
299            params.getConditionVariable().block(params.getSilenceDurationMs());
300        }
301        params.getDispatcher().dispatchUtteranceCompleted();
302        if (DBG) Log.d(TAG, "handleSilence() done.");
303    }
304
305    // Plays back audio from a given URI. No TTS engine involvement here.
306    private void handleAudio(MessageParams msg) {
307        if (DBG) Log.d(TAG, "handleAudio()");
308        AudioMessageParams params = (AudioMessageParams) msg;
309        // Note that the BlockingMediaPlayer spawns a separate thread.
310        //
311        // TODO: This can be avoided.
312        params.getPlayer().startAndWait();
313        params.getDispatcher().dispatchUtteranceCompleted();
314        if (DBG) Log.d(TAG, "handleAudio() done.");
315    }
316
317    // Denotes the start of a new synthesis request. We create a new
318    // audio track, and prepare it for incoming data.
319    //
320    // Note that since all TTS synthesis happens on a single thread, we
321    // should ALWAYS see the following order :
322    //
323    // handleSynthesisStart -> handleSynthesisDataAvailable(*) -> handleSynthesisDone
324    // OR
325    // handleSynthesisCompleteDataAvailable.
326    private void handleSynthesisStart(MessageParams msg) {
327        if (DBG) Log.d(TAG, "handleSynthesisStart()");
328        final SynthesisMessageParams param = (SynthesisMessageParams) msg;
329
330        // Oops, looks like the engine forgot to call done(). We go through
331        // extra trouble to clean the data to prevent the AudioTrack resources
332        // from being leaked.
333        if (mLastSynthesisRequest != null) {
334            Log.w(TAG, "Error : Missing call to done() for request : " +
335                    mLastSynthesisRequest);
336            handleSynthesisDone(mLastSynthesisRequest);
337        }
338
339        mLastSynthesisRequest = param;
340
341        // Create the audio track.
342        final AudioTrack audioTrack = createStreamingAudioTrack(
343                param.mStreamType, param.mSampleRateInHz, param.mAudioFormat,
344                param.mChannelCount, param.mVolume, param.mPan);
345
346        if (DBG) Log.d(TAG, "Created audio track [" + audioTrack.hashCode() + "]");
347
348        param.setAudioTrack(audioTrack);
349    }
350
351    // More data available to be flushed to the audio track.
352    private void handleSynthesisDataAvailable(MessageParams msg) {
353        final SynthesisMessageParams param = (SynthesisMessageParams) msg;
354        if (param.getAudioTrack() == null) {
355            Log.w(TAG, "Error : null audio track in handleDataAvailable.");
356            return;
357        }
358
359        if (param != mLastSynthesisRequest) {
360            Log.e(TAG, "Call to dataAvailable without done() / start()");
361            return;
362        }
363
364        final AudioTrack audioTrack = param.getAudioTrack();
365        final SynthesisMessageParams.ListEntry bufferCopy = param.getNextBuffer();
366
367        if (bufferCopy == null) {
368            Log.e(TAG, "No buffers available to play.");
369            return;
370        }
371
372        int playState = audioTrack.getPlayState();
373        if (playState == AudioTrack.PLAYSTATE_STOPPED) {
374            if (DBG) Log.d(TAG, "AudioTrack stopped, restarting : " + audioTrack.hashCode());
375            audioTrack.play();
376        }
377        int count = 0;
378        while (count < bufferCopy.mLength) {
379            // Note that we don't take bufferCopy.mOffset into account because
380            // it is guaranteed to be 0.
381            int written = audioTrack.write(bufferCopy.mBytes, count, bufferCopy.mLength);
382            if (written <= 0) {
383                break;
384            }
385            count += written;
386        }
387        param.mBytesWritten += count;
388        param.mLogger.onPlaybackStart();
389    }
390
391    private void handleSynthesisDone(MessageParams msg) {
392        final SynthesisMessageParams params = (SynthesisMessageParams) msg;
393        handleSynthesisDone(params);
394        // This call is delayed more than it should be, but we are
395        // certain at this point that we have all the data we want.
396        params.mLogger.onWriteData();
397    }
398
399    // Wait for the audio track to stop playing, and then release it's resources.
400    private void handleSynthesisDone(SynthesisMessageParams params) {
401        if (DBG) Log.d(TAG, "handleSynthesisDone()");
402        final AudioTrack audioTrack = params.getAudioTrack();
403
404        try {
405            if (audioTrack != null) {
406                if (DBG) Log.d(TAG, "Waiting for audio track to complete : " +
407                        audioTrack.hashCode());
408                blockUntilDone(params);
409                if (DBG) Log.d(TAG, "Releasing audio track [" + audioTrack.hashCode() + "]");
410                // The last call to AudioTrack.write( ) will return only after
411                // all data from the audioTrack has been sent to the mixer, so
412                // it's safe to release at this point.
413                audioTrack.release();
414            }
415        } finally {
416            params.setAudioTrack(null);
417            params.getDispatcher().dispatchUtteranceCompleted();
418            mLastSynthesisRequest = null;
419        }
420    }
421
422    private static void blockUntilDone(SynthesisMessageParams params) {
423        if (params.mAudioTrack == null || params.mBytesWritten <= 0) {
424            return;
425        }
426
427        final AudioTrack track = params.mAudioTrack;
428        final int bytesPerFrame = getBytesPerFrame(params.mAudioFormat);
429        final int lengthInBytes = params.mBytesWritten;
430
431        blockUntilDone(track, bytesPerFrame, lengthInBytes);
432    }
433
434    private void handleSynthesisCompleteDataAvailable(MessageParams msg) {
435        final SynthesisMessageParams params = (SynthesisMessageParams) msg;
436        if (DBG) Log.d(TAG, "completeAudioAvailable(" + params + ")");
437
438        params.mLogger.onPlaybackStart();
439
440        // Channel config and bytes per frame are checked before
441        // this message is sent.
442        int channelConfig = AudioPlaybackHandler.getChannelConfig(params.mChannelCount);
443        int bytesPerFrame = AudioPlaybackHandler.getBytesPerFrame(params.mAudioFormat);
444
445        SynthesisMessageParams.ListEntry entry = params.getNextBuffer();
446
447        if (entry == null) {
448            Log.w(TAG, "completeDataAvailable : No buffers available to play.");
449            return;
450        }
451
452        final AudioTrack audioTrack = new AudioTrack(params.mStreamType, params.mSampleRateInHz,
453                channelConfig, params.mAudioFormat, entry.mLength, AudioTrack.MODE_STATIC);
454
455        // So that handleDone can access this correctly.
456        params.mAudioTrack = audioTrack;
457
458        try {
459            audioTrack.write(entry.mBytes, entry.mOffset, entry.mLength);
460            setupVolume(audioTrack, params.mVolume, params.mPan);
461            audioTrack.play();
462            blockUntilDone(audioTrack, bytesPerFrame, entry.mLength);
463            if (DBG) Log.d(TAG, "Wrote data to audio track successfully : " + entry.mLength);
464        } catch (IllegalStateException ex) {
465            Log.e(TAG, "Playback error", ex);
466        } finally {
467            handleSynthesisDone(msg);
468        }
469    }
470
471
472    private static void blockUntilDone(AudioTrack audioTrack, int bytesPerFrame,
473            int lengthInBytes) {
474        int lengthInFrames = lengthInBytes / bytesPerFrame;
475        int currentPosition = 0;
476        while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames) {
477            if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) {
478                break;
479            }
480
481            long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
482                    audioTrack.getSampleRate();
483
484            if (DBG) Log.d(TAG, "About to sleep for : " + estimatedTimeMs + " ms," +
485                    " Playback position : " + currentPosition);
486            try {
487                Thread.sleep(estimatedTimeMs);
488            } catch (InterruptedException ie) {
489                break;
490            }
491        }
492    }
493
494    private static AudioTrack createStreamingAudioTrack(int streamType, int sampleRateInHz,
495            int audioFormat, int channelCount, float volume, float pan) {
496        int channelConfig = getChannelConfig(channelCount);
497
498        int minBufferSizeInBytes
499                = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
500        int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
501
502        AudioTrack audioTrack = new AudioTrack(streamType, sampleRateInHz, channelConfig,
503                audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
504        if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
505            Log.w(TAG, "Unable to create audio track.");
506            audioTrack.release();
507            return null;
508        }
509
510        setupVolume(audioTrack, volume, pan);
511        return audioTrack;
512    }
513
514    static int getChannelConfig(int channelCount) {
515        if (channelCount == 1) {
516            return AudioFormat.CHANNEL_OUT_MONO;
517        } else if (channelCount == 2){
518            return AudioFormat.CHANNEL_OUT_STEREO;
519        }
520
521        return 0;
522    }
523
524    static int getBytesPerFrame(int audioFormat) {
525        if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) {
526            return 1;
527        } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) {
528            return 2;
529        }
530
531        return -1;
532    }
533
534    private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
535        float vol = clip(volume, 0.0f, 1.0f);
536        float panning = clip(pan, -1.0f, 1.0f);
537        float volLeft = vol;
538        float volRight = vol;
539        if (panning > 0.0f) {
540            volLeft *= (1.0f - panning);
541        } else if (panning < 0.0f) {
542            volRight *= (1.0f + panning);
543        }
544        if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
545        if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
546            Log.e(TAG, "Failed to set volume");
547        }
548    }
549
550    private static float clip(float value, float min, float max) {
551        return value > max ? max : (value < min ? min : value);
552    }
553
554}
555