AudioPlaybackHandler.java revision 963719869967cc257e666809aeb9bff3f25117ed
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.media.AudioFormat;
19import android.media.AudioTrack;
20import android.util.Log;
21
22import java.util.Iterator;
23import java.util.concurrent.PriorityBlockingQueue;
24import java.util.concurrent.atomic.AtomicLong;
25
26class AudioPlaybackHandler {
27    private static final String TAG = "TTS.AudioPlaybackHandler";
28    private static final boolean DBG = false;
29
30    private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
31
32    private static final int SYNTHESIS_START = 1;
33    private static final int SYNTHESIS_DATA_AVAILABLE = 2;
34    private static final int SYNTHESIS_COMPLETE_DATA_AVAILABLE = 3;
35    private static final int SYNTHESIS_DONE = 4;
36
37    private static final int PLAY_AUDIO = 5;
38    private static final int PLAY_SILENCE = 6;
39
40    private static final int SHUTDOWN = -1;
41
42    private static final int DEFAULT_PRIORITY = 1;
43    private static final int HIGH_PRIORITY = 0;
44
45    private final PriorityBlockingQueue<ListEntry> mQueue =
46            new PriorityBlockingQueue<ListEntry>();
47    private final Thread mHandlerThread;
48
49    private volatile MessageParams mCurrentParams = null;
50    // Used only for book keeping and error detection.
51    private volatile SynthesisMessageParams mLastSynthesisRequest = null;
52    // Used to order incoming messages in our priority queue.
53    private final AtomicLong mSequenceIdCtr = new AtomicLong(0);
54
55
56    AudioPlaybackHandler() {
57        mHandlerThread = new Thread(new MessageLoop(), "TTS.AudioPlaybackThread");
58    }
59
60    public void start() {
61        mHandlerThread.start();
62    }
63
64    /**
65     * Stops all synthesis for a given {@code token}. If the current token
66     * is currently being processed, an effort will be made to stop it but
67     * that is not guaranteed.
68     */
69    synchronized public void stop(MessageParams token) {
70        if (token == null) {
71            return;
72        }
73
74        removeMessages(token);
75
76        if (token.getType() == MessageParams.TYPE_SYNTHESIS) {
77            AudioTrack current = ((SynthesisMessageParams) token).getAudioTrack();
78            if (current != null) {
79                // Stop the current audio track if it's still playing.
80                // The audio track is thread safe in this regard.
81                current.stop();
82            }
83            mQueue.add(new ListEntry(SYNTHESIS_DONE, token, HIGH_PRIORITY));
84        } else  {
85            final MessageParams current = getCurrentParams();
86
87            if (current != null) {
88                if (token.getType() == MessageParams.TYPE_AUDIO) {
89                    ((AudioMessageParams) current).getPlayer().stop();
90                } else if (token.getType() == MessageParams.TYPE_SILENCE) {
91                    ((SilenceMessageParams) current).getConditionVariable().open();
92                }
93            }
94        }
95    }
96
97    synchronized public void removePlaybackItems(String callingApp) {
98        removeMessages(callingApp);
99        stop(getCurrentParams());
100    }
101
102    synchronized public void removeAllItems() {
103        removeAllMessages();
104        stop(getCurrentParams());
105    }
106
107    /**
108     * Shut down the audio playback thread.
109     */
110    synchronized public void quit() {
111        stop(getCurrentParams());
112        mQueue.add(new ListEntry(SHUTDOWN, null, HIGH_PRIORITY));
113    }
114
115    void enqueueSynthesisStart(SynthesisMessageParams token) {
116        mQueue.add(new ListEntry(SYNTHESIS_START, token));
117    }
118
119    void enqueueSynthesisDataAvailable(SynthesisMessageParams token) {
120        mQueue.add(new ListEntry(SYNTHESIS_DATA_AVAILABLE, token));
121    }
122
123    void enqueueSynthesisCompleteDataAvailable(SynthesisMessageParams token) {
124        mQueue.add(new ListEntry(SYNTHESIS_COMPLETE_DATA_AVAILABLE, token));
125    }
126
127    void enqueueSynthesisDone(SynthesisMessageParams token) {
128        mQueue.add(new ListEntry(SYNTHESIS_DONE, token));
129    }
130
131    void enqueueAudio(AudioMessageParams token) {
132        mQueue.add(new ListEntry(PLAY_AUDIO, token));
133    }
134
135    void enqueueSilence(SilenceMessageParams token) {
136        mQueue.add(new ListEntry(PLAY_SILENCE, token));
137    }
138
139    // -----------------------------------------
140    // End of public API methods.
141    // -----------------------------------------
142
143    // -----------------------------------------
144    // Methods for managing the message queue.
145    // -----------------------------------------
146
147    /*
148     * The MessageLoop is a handler like implementation that
149     * processes messages from a priority queue.
150     */
151    private final class MessageLoop implements Runnable {
152        @Override
153        public void run() {
154            while (true) {
155                ListEntry entry = null;
156                try {
157                    entry = mQueue.take();
158                } catch (InterruptedException ie) {
159                    return;
160                }
161
162                if (entry.mWhat == SHUTDOWN) {
163                    if (DBG) Log.d(TAG, "MessageLoop : Shutting down");
164                    return;
165                }
166
167                if (DBG) {
168                    Log.d(TAG, "MessageLoop : Handling message :" + entry.mWhat
169                            + " ,seqId : " + entry.mSequenceId);
170                }
171
172                setCurrentParams(entry.mMessage);
173                handleMessage(entry);
174                setCurrentParams(null);
175            }
176        }
177    }
178
179    /*
180     * Remove all messages from the queue that contain the supplied token.
181     * Note that the Iterator is thread safe, and other methods can safely
182     * continue adding to the queue at this point.
183     */
184    synchronized private void removeMessages(MessageParams token) {
185        if (token == null) {
186            return;
187        }
188
189        Iterator<ListEntry> it = mQueue.iterator();
190
191        while (it.hasNext()) {
192            final ListEntry current = it.next();
193            if (current.mMessage == token) {
194                it.remove();
195            }
196        }
197    }
198
199    /*
200     * Atomically clear the queue of all messages.
201     */
202    synchronized private void removeAllMessages() {
203        mQueue.clear();
204    }
205
206    /*
207     * Remove all messages that originate from a given calling app.
208     */
209    synchronized private void removeMessages(String callingApp) {
210        Iterator<ListEntry> it = mQueue.iterator();
211
212        while (it.hasNext()) {
213            final ListEntry current = it.next();
214            // The null check is to prevent us from removing control messages,
215            // such as a shutdown message.
216            if (current.mMessage != null &&
217                    callingApp.equals(current.mMessage.getCallingApp())) {
218                it.remove();
219            }
220        }
221    }
222
223    /*
224     * An element of our priority queue of messages. Each message has a priority,
225     * and a sequence id (defined by the order of enqueue calls). Among messages
226     * with the same priority, messages that were received earlier win out.
227     */
228    private final class ListEntry implements Comparable<ListEntry> {
229        final int mWhat;
230        final MessageParams mMessage;
231        final int mPriority;
232        final long mSequenceId;
233
234        private ListEntry(int what, MessageParams message) {
235            this(what, message, DEFAULT_PRIORITY);
236        }
237
238        private ListEntry(int what, MessageParams message, int priority) {
239            mWhat = what;
240            mMessage = message;
241            mPriority = priority;
242            mSequenceId = mSequenceIdCtr.incrementAndGet();
243        }
244
245        @Override
246        public int compareTo(ListEntry that) {
247            if (that == this) {
248                return 0;
249            }
250
251            // Note that this is always 0, 1 or -1.
252            int priorityDiff = mPriority - that.mPriority;
253            if (priorityDiff == 0) {
254                // The == case cannot occur.
255                return (mSequenceId < that.mSequenceId) ? -1 : 1;
256            }
257
258            return priorityDiff;
259        }
260    }
261
262    private void setCurrentParams(MessageParams p) {
263        mCurrentParams = p;
264    }
265
266    private MessageParams getCurrentParams() {
267        return mCurrentParams;
268    }
269
270    // -----------------------------------------
271    // Methods for dealing with individual messages, the methods
272    // below do the actual work.
273    // -----------------------------------------
274
275    private void handleMessage(ListEntry entry) {
276        final MessageParams msg = entry.mMessage;
277        if (entry.mWhat == SYNTHESIS_START) {
278            handleSynthesisStart(msg);
279        } else if (entry.mWhat == SYNTHESIS_DATA_AVAILABLE) {
280            handleSynthesisDataAvailable(msg);
281        } else if (entry.mWhat == SYNTHESIS_DONE) {
282            handleSynthesisDone(msg);
283        } else if (entry.mWhat == SYNTHESIS_COMPLETE_DATA_AVAILABLE) {
284            handleSynthesisCompleteDataAvailable(msg);
285        } else if (entry.mWhat == PLAY_AUDIO) {
286            handleAudio(msg);
287        } else if (entry.mWhat == PLAY_SILENCE) {
288            handleSilence(msg);
289        }
290    }
291
292    // Currently implemented as blocking the audio playback thread for the
293    // specified duration. If a call to stop() is made, the thread
294    // unblocks.
295    private void handleSilence(MessageParams msg) {
296        if (DBG) Log.d(TAG, "handleSilence()");
297        SilenceMessageParams params = (SilenceMessageParams) msg;
298        if (params.getSilenceDurationMs() > 0) {
299            params.getConditionVariable().block(params.getSilenceDurationMs());
300        }
301        params.getDispatcher().dispatchUtteranceCompleted();
302        if (DBG) Log.d(TAG, "handleSilence() done.");
303    }
304
305    // Plays back audio from a given URI. No TTS engine involvement here.
306    private void handleAudio(MessageParams msg) {
307        if (DBG) Log.d(TAG, "handleAudio()");
308        AudioMessageParams params = (AudioMessageParams) msg;
309        // Note that the BlockingMediaPlayer spawns a separate thread.
310        //
311        // TODO: This can be avoided.
312        params.getPlayer().startAndWait();
313        params.getDispatcher().dispatchUtteranceCompleted();
314        if (DBG) Log.d(TAG, "handleAudio() done.");
315    }
316
317    // Denotes the start of a new synthesis request. We create a new
318    // audio track, and prepare it for incoming data.
319    //
320    // Note that since all TTS synthesis happens on a single thread, we
321    // should ALWAYS see the following order :
322    //
323    // handleSynthesisStart -> handleSynthesisDataAvailable(*) -> handleSynthesisDone
324    // OR
325    // handleSynthesisCompleteDataAvailable.
326    private void handleSynthesisStart(MessageParams msg) {
327        if (DBG) Log.d(TAG, "handleSynthesisStart()");
328        final SynthesisMessageParams param = (SynthesisMessageParams) msg;
329
330        // Oops, looks like the engine forgot to call done(). We go through
331        // extra trouble to clean the data to prevent the AudioTrack resources
332        // from being leaked.
333        if (mLastSynthesisRequest != null) {
334            Log.w(TAG, "Error : Missing call to done() for request : " +
335                    mLastSynthesisRequest);
336            handleSynthesisDone(mLastSynthesisRequest);
337        }
338
339        mLastSynthesisRequest = param;
340
341        // Create the audio track.
342        final AudioTrack audioTrack = createStreamingAudioTrack(
343                param.mStreamType, param.mSampleRateInHz, param.mAudioFormat,
344                param.mChannelCount, param.mVolume, param.mPan);
345
346        if (DBG) Log.d(TAG, "Created audio track [" + audioTrack.hashCode() + "]");
347
348        param.setAudioTrack(audioTrack);
349    }
350
351    // More data available to be flushed to the audio track.
352    private void handleSynthesisDataAvailable(MessageParams msg) {
353        final SynthesisMessageParams param = (SynthesisMessageParams) msg;
354        if (param.getAudioTrack() == null) {
355            Log.w(TAG, "Error : null audio track in handleDataAvailable.");
356            return;
357        }
358
359        if (param != mLastSynthesisRequest) {
360            Log.e(TAG, "Call to dataAvailable without done() / start()");
361            return;
362        }
363
364        final AudioTrack audioTrack = param.getAudioTrack();
365        final SynthesisMessageParams.ListEntry bufferCopy = param.getNextBuffer();
366
367        if (bufferCopy == null) {
368            Log.e(TAG, "No buffers available to play.");
369            return;
370        }
371
372        int playState = audioTrack.getPlayState();
373        if (playState == AudioTrack.PLAYSTATE_STOPPED) {
374            if (DBG) Log.d(TAG, "AudioTrack stopped, restarting : " + audioTrack.hashCode());
375            audioTrack.play();
376        }
377        int count = 0;
378        while (count < bufferCopy.mLength) {
379            // Note that we don't take bufferCopy.mOffset into account because
380            // it is guaranteed to be 0.
381            int written = audioTrack.write(bufferCopy.mBytes, count, bufferCopy.mLength);
382            if (written <= 0) {
383                break;
384            }
385            count += written;
386        }
387    }
388
389    private void handleSynthesisDone(MessageParams msg) {
390        final SynthesisMessageParams params = (SynthesisMessageParams) msg;
391        handleSynthesisDone(params);
392    }
393
394    // Flush all remaining data to the audio track, stop it and release
395    // all it's resources.
396    private void handleSynthesisDone(SynthesisMessageParams params) {
397        if (DBG) Log.d(TAG, "handleSynthesisDone()");
398        final AudioTrack audioTrack = params.getAudioTrack();
399
400        try {
401            if (audioTrack != null) {
402                if (DBG) Log.d(TAG, "Releasing audio track [" + audioTrack.hashCode() + "]");
403                // The last call to AudioTrack.write( ) will return only after
404                // all data from the audioTrack has been sent to the mixer, so
405                // it's safe to release at this point.
406                audioTrack.release();
407            }
408        } finally {
409            params.setAudioTrack(null);
410            params.getDispatcher().dispatchUtteranceCompleted();
411            mLastSynthesisRequest = null;
412        }
413    }
414
415    private void handleSynthesisCompleteDataAvailable(MessageParams msg) {
416        final SynthesisMessageParams params = (SynthesisMessageParams) msg;
417        if (DBG) Log.d(TAG, "completeAudioAvailable(" + params + ")");
418
419        // Channel config and bytes per frame are checked before
420        // this message is sent.
421        int channelConfig = AudioPlaybackHandler.getChannelConfig(params.mChannelCount);
422        int bytesPerFrame = AudioPlaybackHandler.getBytesPerFrame(params.mAudioFormat);
423
424        SynthesisMessageParams.ListEntry entry = params.getNextBuffer();
425
426        if (entry == null) {
427            Log.w(TAG, "completeDataAvailable : No buffers available to play.");
428            return;
429        }
430
431        final AudioTrack audioTrack = new AudioTrack(params.mStreamType, params.mSampleRateInHz,
432                channelConfig, params.mAudioFormat, entry.mLength, AudioTrack.MODE_STATIC);
433
434        // So that handleDone can access this correctly.
435        params.mAudioTrack = audioTrack;
436
437        try {
438            audioTrack.write(entry.mBytes, entry.mOffset, entry.mLength);
439            setupVolume(audioTrack, params.mVolume, params.mPan);
440            audioTrack.play();
441            blockUntilDone(audioTrack, bytesPerFrame, entry.mLength);
442            if (DBG) Log.d(TAG, "Wrote data to audio track successfully : " + entry.mLength);
443        } catch (IllegalStateException ex) {
444            Log.e(TAG, "Playback error", ex);
445        } finally {
446            handleSynthesisDone(msg);
447        }
448    }
449
450
451    private static void blockUntilDone(AudioTrack audioTrack, int bytesPerFrame, int length) {
452        int lengthInFrames = length / bytesPerFrame;
453        int currentPosition = 0;
454        while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames) {
455            long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
456                    audioTrack.getSampleRate();
457            audioTrack.getPlayState();
458            if (DBG) Log.d(TAG, "About to sleep for : " + estimatedTimeMs + " ms," +
459                    " Playback position : " + currentPosition);
460            try {
461                Thread.sleep(estimatedTimeMs);
462            } catch (InterruptedException ie) {
463                break;
464            }
465        }
466    }
467
468    private static AudioTrack createStreamingAudioTrack(int streamType, int sampleRateInHz,
469            int audioFormat, int channelCount, float volume, float pan) {
470        int channelConfig = getChannelConfig(channelCount);
471
472        int minBufferSizeInBytes
473                = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
474        int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
475
476        AudioTrack audioTrack = new AudioTrack(streamType, sampleRateInHz, channelConfig,
477                audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
478        if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
479            Log.w(TAG, "Unable to create audio track.");
480            audioTrack.release();
481            return null;
482        }
483
484        setupVolume(audioTrack, volume, pan);
485        return audioTrack;
486    }
487
488    static int getChannelConfig(int channelCount) {
489        if (channelCount == 1) {
490            return AudioFormat.CHANNEL_OUT_MONO;
491        } else if (channelCount == 2){
492            return AudioFormat.CHANNEL_OUT_STEREO;
493        }
494
495        return 0;
496    }
497
498    static int getBytesPerFrame(int audioFormat) {
499        if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) {
500            return 1;
501        } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) {
502            return 2;
503        }
504
505        return -1;
506    }
507
508    private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
509        float vol = clip(volume, 0.0f, 1.0f);
510        float panning = clip(pan, -1.0f, 1.0f);
511        float volLeft = vol;
512        float volRight = vol;
513        if (panning > 0.0f) {
514            volLeft *= (1.0f - panning);
515        } else if (panning < 0.0f) {
516            volRight *= (1.0f + panning);
517        }
518        if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
519        if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
520            Log.e(TAG, "Failed to set volume");
521        }
522    }
523
524    private static float clip(float value, float min, float max) {
525        return value > max ? max : (value < min ? min : value);
526    }
527
528}
529