AudioPlaybackHandler.java revision 6dabb63307a0b63f9386d61e8444aed29db2081e
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.media.AudioFormat;
19import android.media.AudioTrack;
20import android.util.Log;
21
22import java.util.Iterator;
23import java.util.concurrent.PriorityBlockingQueue;
24import java.util.concurrent.atomic.AtomicLong;
25
26class AudioPlaybackHandler {
27    private static final String TAG = "TTS.AudioPlaybackHandler";
28    private static final boolean DBG = false;
29
30    private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
31
32    private static final int SYNTHESIS_START = 1;
33    private static final int SYNTHESIS_DATA_AVAILABLE = 2;
34    private static final int SYNTHESIS_COMPLETE_DATA_AVAILABLE = 3;
35    private static final int SYNTHESIS_DONE = 4;
36
37    private static final int PLAY_AUDIO = 5;
38    private static final int PLAY_SILENCE = 6;
39
40    private static final int SHUTDOWN = -1;
41
42    private static final int DEFAULT_PRIORITY = 1;
43    private static final int HIGH_PRIORITY = 0;
44
45    private final PriorityBlockingQueue<ListEntry> mQueue =
46            new PriorityBlockingQueue<ListEntry>();
47    private final Thread mHandlerThread;
48
49    private volatile MessageParams mCurrentParams = null;
50    // Used only for book keeping and error detection.
51    private volatile SynthesisMessageParams mLastSynthesisRequest = null;
52    // Used to order incoming messages in our priority queue.
53    private final AtomicLong mSequenceIdCtr = new AtomicLong(0);
54
55
56    AudioPlaybackHandler() {
57        mHandlerThread = new Thread(new MessageLoop(), "TTS.AudioPlaybackThread");
58    }
59
60    public void start() {
61        mHandlerThread.start();
62    }
63
64    /**
65     * Stops all synthesis for a given {@code token}. If the current token
66     * is currently being processed, an effort will be made to stop it but
67     * that is not guaranteed.
68     */
69    synchronized public void stop(MessageParams token) {
70        if (token == null) {
71            return;
72        }
73
74        removeMessages(token);
75
76        if (token.getType() == MessageParams.TYPE_SYNTHESIS) {
77            AudioTrack current = ((SynthesisMessageParams) token).getAudioTrack();
78            if (current != null) {
79                // Stop the current audio track if it's still playing.
80                // The audio track is thread safe in this regard.
81                current.stop();
82            }
83            mQueue.add(new ListEntry(SYNTHESIS_DONE, token, HIGH_PRIORITY));
84        } else  {
85            final MessageParams current = getCurrentParams();
86
87            if (current != null) {
88                if (token.getType() == MessageParams.TYPE_AUDIO) {
89                    ((AudioMessageParams) current).getPlayer().stop();
90                } else if (token.getType() == MessageParams.TYPE_SILENCE) {
91                    ((SilenceMessageParams) current).getConditionVariable().open();
92                }
93            }
94        }
95    }
96
97    synchronized public void removePlaybackItems(String callingApp) {
98        removeMessages(callingApp);
99        stop(getCurrentParams());
100    }
101
102    synchronized public void removeAllItems() {
103        removeAllMessages();
104        stop(getCurrentParams());
105    }
106
107    /**
108     * Shut down the audio playback thread.
109     */
110    synchronized public void quit() {
111        stop(getCurrentParams());
112        mQueue.add(new ListEntry(SHUTDOWN, null, HIGH_PRIORITY));
113    }
114
115    void enqueueSynthesisStart(SynthesisMessageParams token) {
116        mQueue.add(new ListEntry(SYNTHESIS_START, token));
117    }
118
119    void enqueueSynthesisDataAvailable(SynthesisMessageParams token) {
120        mQueue.add(new ListEntry(SYNTHESIS_DATA_AVAILABLE, token));
121    }
122
123    void enqueueSynthesisCompleteDataAvailable(SynthesisMessageParams token) {
124        mQueue.add(new ListEntry(SYNTHESIS_COMPLETE_DATA_AVAILABLE, token));
125    }
126
127    void enqueueSynthesisDone(SynthesisMessageParams token) {
128        mQueue.add(new ListEntry(SYNTHESIS_DONE, token));
129    }
130
131    void enqueueAudio(AudioMessageParams token) {
132        mQueue.add(new ListEntry(PLAY_AUDIO, token));
133    }
134
135    void enqueueSilence(SilenceMessageParams token) {
136        mQueue.add(new ListEntry(PLAY_SILENCE, token));
137    }
138
139    // -----------------------------------------
140    // End of public API methods.
141    // -----------------------------------------
142
143    // -----------------------------------------
144    // Methods for managing the message queue.
145    // -----------------------------------------
146
147    /*
148     * The MessageLoop is a handler like implementation that
149     * processes messages from a priority queue.
150     */
151    private final class MessageLoop implements Runnable {
152        @Override
153        public void run() {
154            while (true) {
155                ListEntry entry = null;
156                try {
157                    entry = mQueue.take();
158                } catch (InterruptedException ie) {
159                    return;
160                }
161
162                if (entry.mWhat == SHUTDOWN) {
163                    if (DBG) Log.d(TAG, "MessageLoop : Shutting down");
164                    return;
165                }
166
167                if (DBG) {
168                    Log.d(TAG, "MessageLoop : Handling message :" + entry.mWhat
169                            + " ,seqId : " + entry.mSequenceId);
170                }
171
172                setCurrentParams(entry.mMessage);
173                handleMessage(entry);
174                setCurrentParams(null);
175            }
176        }
177    }
178
179    /*
180     * Remove all messages from the queue that contain the supplied token.
181     * Note that the Iterator is thread safe, and other methods can safely
182     * continue adding to the queue at this point.
183     */
184    synchronized private void removeMessages(MessageParams token) {
185        if (token == null) {
186            return;
187        }
188
189        Iterator<ListEntry> it = mQueue.iterator();
190
191        while (it.hasNext()) {
192            final ListEntry current = it.next();
193            if (current.mMessage == token) {
194                it.remove();
195            }
196        }
197    }
198
199    /*
200     * Atomically clear the queue of all messages.
201     */
202    synchronized private void removeAllMessages() {
203        mQueue.clear();
204    }
205
206    /*
207     * Remove all messages that originate from a given calling app.
208     */
209    synchronized private void removeMessages(String callingApp) {
210        Iterator<ListEntry> it = mQueue.iterator();
211
212        while (it.hasNext()) {
213            final ListEntry current = it.next();
214            // The null check is to prevent us from removing control messages,
215            // such as a shutdown message.
216            if (current.mMessage != null &&
217                    callingApp.equals(current.mMessage.getCallingApp())) {
218                it.remove();
219            }
220        }
221    }
222
223    /*
224     * An element of our priority queue of messages. Each message has a priority,
225     * and a sequence id (defined by the order of enqueue calls). Among messages
226     * with the same priority, messages that were received earlier win out.
227     */
228    private final class ListEntry implements Comparable<ListEntry> {
229        final int mWhat;
230        final MessageParams mMessage;
231        final int mPriority;
232        final long mSequenceId;
233
234        private ListEntry(int what, MessageParams message) {
235            this(what, message, DEFAULT_PRIORITY);
236        }
237
238        private ListEntry(int what, MessageParams message, int priority) {
239            mWhat = what;
240            mMessage = message;
241            mPriority = priority;
242            mSequenceId = mSequenceIdCtr.incrementAndGet();
243        }
244
245        @Override
246        public int compareTo(ListEntry that) {
247            if (that == this) {
248                return 0;
249            }
250
251            // Note that this is always 0, 1 or -1.
252            int priorityDiff = mPriority - that.mPriority;
253            if (priorityDiff == 0) {
254                // The == case cannot occur.
255                return (mSequenceId < that.mSequenceId) ? -1 : 1;
256            }
257
258            return priorityDiff;
259        }
260    }
261
262    private void setCurrentParams(MessageParams p) {
263        mCurrentParams = p;
264    }
265
266    private MessageParams getCurrentParams() {
267        return mCurrentParams;
268    }
269
270    // -----------------------------------------
271    // Methods for dealing with individual messages, the methods
272    // below do the actual work.
273    // -----------------------------------------
274
275    private void handleMessage(ListEntry entry) {
276        final MessageParams msg = entry.mMessage;
277        if (entry.mWhat == SYNTHESIS_START) {
278            handleSynthesisStart(msg);
279        } else if (entry.mWhat == SYNTHESIS_DATA_AVAILABLE) {
280            handleSynthesisDataAvailable(msg);
281        } else if (entry.mWhat == SYNTHESIS_DONE) {
282            handleSynthesisDone(msg);
283        } else if (entry.mWhat == SYNTHESIS_COMPLETE_DATA_AVAILABLE) {
284            handleSynthesisCompleteDataAvailable(msg);
285        } else if (entry.mWhat == PLAY_AUDIO) {
286            handleAudio(msg);
287        } else if (entry.mWhat == PLAY_SILENCE) {
288            handleSilence(msg);
289        }
290    }
291
292    // Currently implemented as blocking the audio playback thread for the
293    // specified duration. If a call to stop() is made, the thread
294    // unblocks.
295    private void handleSilence(MessageParams msg) {
296        if (DBG) Log.d(TAG, "handleSilence()");
297        SilenceMessageParams params = (SilenceMessageParams) msg;
298        if (params.getSilenceDurationMs() > 0) {
299            params.getConditionVariable().block(params.getSilenceDurationMs());
300        }
301        params.getDispatcher().dispatchUtteranceCompleted();
302        if (DBG) Log.d(TAG, "handleSilence() done.");
303    }
304
305    // Plays back audio from a given URI. No TTS engine involvement here.
306    private void handleAudio(MessageParams msg) {
307        if (DBG) Log.d(TAG, "handleAudio()");
308        AudioMessageParams params = (AudioMessageParams) msg;
309        // Note that the BlockingMediaPlayer spawns a separate thread.
310        //
311        // TODO: This can be avoided.
312        params.getPlayer().startAndWait();
313        params.getDispatcher().dispatchUtteranceCompleted();
314        if (DBG) Log.d(TAG, "handleAudio() done.");
315    }
316
317    // Denotes the start of a new synthesis request. We create a new
318    // audio track, and prepare it for incoming data.
319    //
320    // Note that since all TTS synthesis happens on a single thread, we
321    // should ALWAYS see the following order :
322    //
323    // handleSynthesisStart -> handleSynthesisDataAvailable(*) -> handleSynthesisDone
324    // OR
325    // handleSynthesisCompleteDataAvailable.
326    private void handleSynthesisStart(MessageParams msg) {
327        if (DBG) Log.d(TAG, "handleSynthesisStart()");
328        final SynthesisMessageParams param = (SynthesisMessageParams) msg;
329
330        // Oops, looks like the engine forgot to call done(). We go through
331        // extra trouble to clean the data to prevent the AudioTrack resources
332        // from being leaked.
333        if (mLastSynthesisRequest != null) {
334            Log.w(TAG, "Error : Missing call to done() for request : " +
335                    mLastSynthesisRequest);
336            handleSynthesisDone(mLastSynthesisRequest);
337        }
338
339        mLastSynthesisRequest = param;
340
341        // Create the audio track.
342        final AudioTrack audioTrack = createStreamingAudioTrack(
343                param.mStreamType, param.mSampleRateInHz, param.mAudioFormat,
344                param.mChannelCount, param.mVolume, param.mPan);
345
346        if (DBG) Log.d(TAG, "Created audio track [" + audioTrack.hashCode() + "]");
347
348        param.setAudioTrack(audioTrack);
349    }
350
351    // More data available to be flushed to the audio track.
352    private void handleSynthesisDataAvailable(MessageParams msg) {
353        final SynthesisMessageParams param = (SynthesisMessageParams) msg;
354        if (param.getAudioTrack() == null) {
355            Log.w(TAG, "Error : null audio track in handleDataAvailable.");
356            return;
357        }
358
359        if (param != mLastSynthesisRequest) {
360            Log.e(TAG, "Call to dataAvailable without done() / start()");
361            return;
362        }
363
364        final AudioTrack audioTrack = param.getAudioTrack();
365        final SynthesisMessageParams.ListEntry bufferCopy = param.getNextBuffer();
366
367        if (bufferCopy == null) {
368            Log.e(TAG, "No buffers available to play.");
369            return;
370        }
371
372        int playState = audioTrack.getPlayState();
373        if (playState == AudioTrack.PLAYSTATE_STOPPED) {
374            if (DBG) Log.d(TAG, "AudioTrack stopped, restarting : " + audioTrack.hashCode());
375            audioTrack.play();
376        }
377        int count = 0;
378        while (count < bufferCopy.mLength) {
379            // Note that we don't take bufferCopy.mOffset into account because
380            // it is guaranteed to be 0.
381            int written = audioTrack.write(bufferCopy.mBytes, count, bufferCopy.mLength);
382            if (written <= 0) {
383                break;
384            }
385            count += written;
386        }
387
388        param.mLogger.onPlaybackStart();
389    }
390
391    private void handleSynthesisDone(MessageParams msg) {
392        final SynthesisMessageParams params = (SynthesisMessageParams) msg;
393        handleSynthesisDone(params);
394        // This call is delayed more than it should be, but we are
395        // certain at this point that we have all the data we want.
396        params.mLogger.onWriteData();
397    }
398
399    // Flush all remaining data to the audio track, stop it and release
400    // all it's resources.
401    private void handleSynthesisDone(SynthesisMessageParams params) {
402        if (DBG) Log.d(TAG, "handleSynthesisDone()");
403        final AudioTrack audioTrack = params.getAudioTrack();
404
405        try {
406            if (audioTrack != null) {
407                if (DBG) Log.d(TAG, "Releasing audio track [" + audioTrack.hashCode() + "]");
408                // The last call to AudioTrack.write( ) will return only after
409                // all data from the audioTrack has been sent to the mixer, so
410                // it's safe to release at this point.
411                audioTrack.release();
412            }
413        } finally {
414            params.setAudioTrack(null);
415            params.getDispatcher().dispatchUtteranceCompleted();
416            mLastSynthesisRequest = null;
417        }
418    }
419
420    private void handleSynthesisCompleteDataAvailable(MessageParams msg) {
421        final SynthesisMessageParams params = (SynthesisMessageParams) msg;
422        if (DBG) Log.d(TAG, "completeAudioAvailable(" + params + ")");
423
424        params.mLogger.onPlaybackStart();
425
426        // Channel config and bytes per frame are checked before
427        // this message is sent.
428        int channelConfig = AudioPlaybackHandler.getChannelConfig(params.mChannelCount);
429        int bytesPerFrame = AudioPlaybackHandler.getBytesPerFrame(params.mAudioFormat);
430
431        SynthesisMessageParams.ListEntry entry = params.getNextBuffer();
432
433        if (entry == null) {
434            Log.w(TAG, "completeDataAvailable : No buffers available to play.");
435            return;
436        }
437
438        final AudioTrack audioTrack = new AudioTrack(params.mStreamType, params.mSampleRateInHz,
439                channelConfig, params.mAudioFormat, entry.mLength, AudioTrack.MODE_STATIC);
440
441        // So that handleDone can access this correctly.
442        params.mAudioTrack = audioTrack;
443
444        try {
445            audioTrack.write(entry.mBytes, entry.mOffset, entry.mLength);
446            setupVolume(audioTrack, params.mVolume, params.mPan);
447            audioTrack.play();
448            blockUntilDone(audioTrack, bytesPerFrame, entry.mLength);
449            if (DBG) Log.d(TAG, "Wrote data to audio track successfully : " + entry.mLength);
450        } catch (IllegalStateException ex) {
451            Log.e(TAG, "Playback error", ex);
452        } finally {
453            handleSynthesisDone(msg);
454        }
455    }
456
457
458    private static void blockUntilDone(AudioTrack audioTrack, int bytesPerFrame, int length) {
459        int lengthInFrames = length / bytesPerFrame;
460        int currentPosition = 0;
461        while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames) {
462            long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
463                    audioTrack.getSampleRate();
464            audioTrack.getPlayState();
465            if (DBG) Log.d(TAG, "About to sleep for : " + estimatedTimeMs + " ms," +
466                    " Playback position : " + currentPosition);
467            try {
468                Thread.sleep(estimatedTimeMs);
469            } catch (InterruptedException ie) {
470                break;
471            }
472        }
473    }
474
475    private static AudioTrack createStreamingAudioTrack(int streamType, int sampleRateInHz,
476            int audioFormat, int channelCount, float volume, float pan) {
477        int channelConfig = getChannelConfig(channelCount);
478
479        int minBufferSizeInBytes
480                = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
481        int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
482
483        AudioTrack audioTrack = new AudioTrack(streamType, sampleRateInHz, channelConfig,
484                audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
485        if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
486            Log.w(TAG, "Unable to create audio track.");
487            audioTrack.release();
488            return null;
489        }
490
491        setupVolume(audioTrack, volume, pan);
492        return audioTrack;
493    }
494
495    static int getChannelConfig(int channelCount) {
496        if (channelCount == 1) {
497            return AudioFormat.CHANNEL_OUT_MONO;
498        } else if (channelCount == 2){
499            return AudioFormat.CHANNEL_OUT_STEREO;
500        }
501
502        return 0;
503    }
504
505    static int getBytesPerFrame(int audioFormat) {
506        if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) {
507            return 1;
508        } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) {
509            return 2;
510        }
511
512        return -1;
513    }
514
515    private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
516        float vol = clip(volume, 0.0f, 1.0f);
517        float panning = clip(pan, -1.0f, 1.0f);
518        float volLeft = vol;
519        float volRight = vol;
520        if (panning > 0.0f) {
521            volLeft *= (1.0f - panning);
522        } else if (panning < 0.0f) {
523            volRight *= (1.0f + panning);
524        }
525        if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
526        if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
527            Log.e(TAG, "Failed to set volume");
528        }
529    }
530
531    private static float clip(float value, float min, float max) {
532        return value > max ? max : (value < min ? min : value);
533    }
534
535}
536