AudioPlaybackHandler.java revision 40f71f0be3cefabde9dc066d7707a1e5ebaec820
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.media.AudioFormat;
19import android.media.AudioTrack;
20import android.text.TextUtils;
21import android.util.Log;
22
23import java.util.Iterator;
24import java.util.concurrent.PriorityBlockingQueue;
25import java.util.concurrent.atomic.AtomicLong;
26
27class AudioPlaybackHandler {
28    private static final String TAG = "TTS.AudioPlaybackHandler";
29    private static final boolean DBG_THREADING = false;
30    private static final boolean DBG = false;
31
32    private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
33
34    private static final int SYNTHESIS_START = 1;
35    private static final int SYNTHESIS_DATA_AVAILABLE = 2;
36    private static final int SYNTHESIS_DONE = 3;
37
38    private static final int PLAY_AUDIO = 5;
39    private static final int PLAY_SILENCE = 6;
40
41    private static final int SHUTDOWN = -1;
42
43    private static final int DEFAULT_PRIORITY = 1;
44    private static final int HIGH_PRIORITY = 0;
45
46    private final PriorityBlockingQueue<ListEntry> mQueue =
47            new PriorityBlockingQueue<ListEntry>();
48    private final Thread mHandlerThread;
49
50    private volatile MessageParams mCurrentParams = null;
51    // Used only for book keeping and error detection.
52    private volatile SynthesisMessageParams mLastSynthesisRequest = null;
53    // Used to order incoming messages in our priority queue.
54    private final AtomicLong mSequenceIdCtr = new AtomicLong(0);
55
56
57    AudioPlaybackHandler() {
58        mHandlerThread = new Thread(new MessageLoop(), "TTS.AudioPlaybackThread");
59    }
60
61    public void start() {
62        mHandlerThread.start();
63    }
64
65    /**
66     * Stops all synthesis for a given {@code token}. If the current token
67     * is currently being processed, an effort will be made to stop it but
68     * that is not guaranteed.
69     *
70     * NOTE: This assumes that all other messages in the queue with {@code token}
71     * have been removed already.
72     *
73     * NOTE: Must be called synchronized on {@code AudioPlaybackHandler.this}.
74     */
75    private void stop(MessageParams token) {
76        if (token == null) {
77            return;
78        }
79
80        if (DBG) Log.d(TAG, "Stopping token : " + token);
81
82        if (token.getType() == MessageParams.TYPE_SYNTHESIS) {
83            AudioTrack current = ((SynthesisMessageParams) token).getAudioTrack();
84            if (current != null) {
85                // Stop the current audio track if it's still playing.
86                // The audio track is thread safe in this regard. The current
87                // handleSynthesisDataAvailable call will return soon after this
88                // call.
89                current.stop();
90            }
91            // This is safe because PlaybackSynthesisCallback#stop would have
92            // been called before this method, and will no longer enqueue any
93            // audio for this token.
94            //
95            // (Even if it did, all it would result in is a warning message).
96            mQueue.add(new ListEntry(SYNTHESIS_DONE, token, HIGH_PRIORITY));
97        } else if (token.getType() == MessageParams.TYPE_AUDIO) {
98            ((AudioMessageParams) token).getPlayer().stop();
99            // No cleanup required for audio messages.
100        } else if (token.getType() == MessageParams.TYPE_SILENCE) {
101            ((SilenceMessageParams) token).getConditionVariable().open();
102            // No cleanup required for silence messages.
103        }
104    }
105
106    // -----------------------------------------------------
107    // Methods that add and remove elements from the queue. These do not
108    // need to be synchronized strictly speaking, but they make the behaviour
109    // a lot more predictable. (though it would still be correct without
110    // synchronization).
111    // -----------------------------------------------------
112
113    synchronized public void removePlaybackItems(String callingApp) {
114        if (DBG_THREADING) Log.d(TAG, "Removing all callback items for : " + callingApp);
115        removeMessages(callingApp);
116
117        final MessageParams current = getCurrentParams();
118        if (current != null && TextUtils.equals(callingApp, current.getCallingApp())) {
119            stop(current);
120        }
121
122        final MessageParams lastSynthesis = mLastSynthesisRequest;
123
124        if (lastSynthesis != null && lastSynthesis != current &&
125                TextUtils.equals(callingApp, lastSynthesis.getCallingApp())) {
126            stop(lastSynthesis);
127        }
128    }
129
130    synchronized public void removeAllItems() {
131        if (DBG_THREADING) Log.d(TAG, "Removing all items");
132        removeAllMessages();
133
134        final MessageParams current = getCurrentParams();
135        final MessageParams lastSynthesis = mLastSynthesisRequest;
136        stop(current);
137
138        if (lastSynthesis != null && lastSynthesis != current) {
139            stop(lastSynthesis);
140        }
141    }
142
143    /**
144     * @return false iff the queue is empty and no queue item is currently
145     *        being handled, true otherwise.
146     */
147    public boolean isSpeaking() {
148        return (mQueue.peek() != null) || (mCurrentParams != null);
149    }
150
151    /**
152     * Shut down the audio playback thread.
153     */
154    synchronized public void quit() {
155        removeAllMessages();
156        stop(getCurrentParams());
157        mQueue.add(new ListEntry(SHUTDOWN, null, HIGH_PRIORITY));
158    }
159
160    synchronized void enqueueSynthesisStart(SynthesisMessageParams token) {
161        if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis start : " + token);
162        mQueue.add(new ListEntry(SYNTHESIS_START, token));
163    }
164
165    synchronized void enqueueSynthesisDataAvailable(SynthesisMessageParams token) {
166        if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis data available : " + token);
167        mQueue.add(new ListEntry(SYNTHESIS_DATA_AVAILABLE, token));
168    }
169
170    synchronized void enqueueSynthesisDone(SynthesisMessageParams token) {
171        if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis done : " + token);
172        mQueue.add(new ListEntry(SYNTHESIS_DONE, token));
173    }
174
175    synchronized void enqueueAudio(AudioMessageParams token) {
176        if (DBG_THREADING) Log.d(TAG, "Enqueuing audio : " + token);
177        mQueue.add(new ListEntry(PLAY_AUDIO, token));
178    }
179
180    synchronized void enqueueSilence(SilenceMessageParams token) {
181        if (DBG_THREADING) Log.d(TAG, "Enqueuing silence : " + token);
182        mQueue.add(new ListEntry(PLAY_SILENCE, token));
183    }
184
185    // -----------------------------------------
186    // End of public API methods.
187    // -----------------------------------------
188
189    // -----------------------------------------
190    // Methods for managing the message queue.
191    // -----------------------------------------
192
193    /*
194     * The MessageLoop is a handler like implementation that
195     * processes messages from a priority queue.
196     */
197    private final class MessageLoop implements Runnable {
198        @Override
199        public void run() {
200            while (true) {
201                ListEntry entry = null;
202                try {
203                    entry = mQueue.take();
204                } catch (InterruptedException ie) {
205                    return;
206                }
207
208                if (entry.mWhat == SHUTDOWN) {
209                    if (DBG) Log.d(TAG, "MessageLoop : Shutting down");
210                    return;
211                }
212
213                if (DBG) {
214                    Log.d(TAG, "MessageLoop : Handling message :" + entry.mWhat
215                            + " ,seqId : " + entry.mSequenceId);
216                }
217
218                setCurrentParams(entry.mMessage);
219                handleMessage(entry);
220                setCurrentParams(null);
221            }
222        }
223    }
224
225    /*
226     * Atomically clear the queue of all messages.
227     */
228    synchronized private void removeAllMessages() {
229        mQueue.clear();
230    }
231
232    /*
233     * Remove all messages that originate from a given calling app.
234     */
235    synchronized private void removeMessages(String callingApp) {
236        Iterator<ListEntry> it = mQueue.iterator();
237
238        while (it.hasNext()) {
239            final ListEntry current = it.next();
240            // The null check is to prevent us from removing control messages,
241            // such as a shutdown message.
242            if (current.mMessage != null &&
243                    callingApp.equals(current.mMessage.getCallingApp())) {
244                it.remove();
245            }
246        }
247    }
248
249    /*
250     * An element of our priority queue of messages. Each message has a priority,
251     * and a sequence id (defined by the order of enqueue calls). Among messages
252     * with the same priority, messages that were received earlier win out.
253     */
254    private final class ListEntry implements Comparable<ListEntry> {
255        final int mWhat;
256        final MessageParams mMessage;
257        final int mPriority;
258        final long mSequenceId;
259
260        private ListEntry(int what, MessageParams message) {
261            this(what, message, DEFAULT_PRIORITY);
262        }
263
264        private ListEntry(int what, MessageParams message, int priority) {
265            mWhat = what;
266            mMessage = message;
267            mPriority = priority;
268            mSequenceId = mSequenceIdCtr.incrementAndGet();
269        }
270
271        @Override
272        public int compareTo(ListEntry that) {
273            if (that == this) {
274                return 0;
275            }
276
277            // Note that this is always 0, 1 or -1.
278            int priorityDiff = mPriority - that.mPriority;
279            if (priorityDiff == 0) {
280                // The == case cannot occur.
281                return (mSequenceId < that.mSequenceId) ? -1 : 1;
282            }
283
284            return priorityDiff;
285        }
286    }
287
288    private void setCurrentParams(MessageParams p) {
289        if (DBG_THREADING) {
290            if (p != null) {
291                Log.d(TAG, "Started handling :" + p);
292            } else {
293                Log.d(TAG, "End handling : " + mCurrentParams);
294            }
295        }
296        mCurrentParams = p;
297    }
298
299    private MessageParams getCurrentParams() {
300        return mCurrentParams;
301    }
302
303    // -----------------------------------------
304    // Methods for dealing with individual messages, the methods
305    // below do the actual work.
306    // -----------------------------------------
307
308    private void handleMessage(ListEntry entry) {
309        final MessageParams msg = entry.mMessage;
310        if (entry.mWhat == SYNTHESIS_START) {
311            handleSynthesisStart(msg);
312        } else if (entry.mWhat == SYNTHESIS_DATA_AVAILABLE) {
313            handleSynthesisDataAvailable(msg);
314        } else if (entry.mWhat == SYNTHESIS_DONE) {
315            handleSynthesisDone(msg);
316        } else if (entry.mWhat == PLAY_AUDIO) {
317            handleAudio(msg);
318        } else if (entry.mWhat == PLAY_SILENCE) {
319            handleSilence(msg);
320        }
321    }
322
323    // Currently implemented as blocking the audio playback thread for the
324    // specified duration. If a call to stop() is made, the thread
325    // unblocks.
326    private void handleSilence(MessageParams msg) {
327        if (DBG) Log.d(TAG, "handleSilence()");
328        SilenceMessageParams params = (SilenceMessageParams) msg;
329        params.getDispatcher().dispatchOnStart();
330        if (params.getSilenceDurationMs() > 0) {
331            params.getConditionVariable().block(params.getSilenceDurationMs());
332        }
333        params.getDispatcher().dispatchOnDone();
334        if (DBG) Log.d(TAG, "handleSilence() done.");
335    }
336
337    // Plays back audio from a given URI. No TTS engine involvement here.
338    private void handleAudio(MessageParams msg) {
339        if (DBG) Log.d(TAG, "handleAudio()");
340        AudioMessageParams params = (AudioMessageParams) msg;
341        params.getDispatcher().dispatchOnStart();
342        // Note that the BlockingMediaPlayer spawns a separate thread.
343        //
344        // TODO: This can be avoided.
345        params.getPlayer().startAndWait();
346        params.getDispatcher().dispatchOnDone();
347        if (DBG) Log.d(TAG, "handleAudio() done.");
348    }
349
350    // Denotes the start of a new synthesis request. We create a new
351    // audio track, and prepare it for incoming data.
352    //
353    // Note that since all TTS synthesis happens on a single thread, we
354    // should ALWAYS see the following order :
355    //
356    // handleSynthesisStart -> handleSynthesisDataAvailable(*) -> handleSynthesisDone
357    // OR
358    // handleSynthesisCompleteDataAvailable.
359    private void handleSynthesisStart(MessageParams msg) {
360        if (DBG) Log.d(TAG, "handleSynthesisStart()");
361        final SynthesisMessageParams param = (SynthesisMessageParams) msg;
362
363        // Oops, looks like the engine forgot to call done(). We go through
364        // extra trouble to clean the data to prevent the AudioTrack resources
365        // from being leaked.
366        if (mLastSynthesisRequest != null) {
367            Log.e(TAG, "Error : Missing call to done() for request : " +
368                    mLastSynthesisRequest);
369            handleSynthesisDone(mLastSynthesisRequest);
370        }
371
372        mLastSynthesisRequest = param;
373
374        // Create the audio track.
375        final AudioTrack audioTrack = createStreamingAudioTrack(param);
376
377        if (DBG) Log.d(TAG, "Created audio track [" + audioTrack.hashCode() + "]");
378
379        param.setAudioTrack(audioTrack);
380        msg.getDispatcher().dispatchOnStart();
381    }
382
383    // More data available to be flushed to the audio track.
384    private void handleSynthesisDataAvailable(MessageParams msg) {
385        final SynthesisMessageParams param = (SynthesisMessageParams) msg;
386        if (param.getAudioTrack() == null) {
387            Log.w(TAG, "Error : null audio track in handleDataAvailable : " + param);
388            return;
389        }
390
391        if (param != mLastSynthesisRequest) {
392            Log.e(TAG, "Call to dataAvailable without done() / start()");
393            return;
394        }
395
396        final AudioTrack audioTrack = param.getAudioTrack();
397        final SynthesisMessageParams.ListEntry bufferCopy = param.getNextBuffer();
398
399        if (bufferCopy == null) {
400            Log.e(TAG, "No buffers available to play.");
401            return;
402        }
403
404        int playState = audioTrack.getPlayState();
405        if (playState == AudioTrack.PLAYSTATE_STOPPED) {
406            if (DBG) Log.d(TAG, "AudioTrack stopped, restarting : " + audioTrack.hashCode());
407            audioTrack.play();
408        }
409        int count = 0;
410        while (count < bufferCopy.mBytes.length) {
411            // Note that we don't take bufferCopy.mOffset into account because
412            // it is guaranteed to be 0.
413            int written = audioTrack.write(bufferCopy.mBytes, count, bufferCopy.mBytes.length);
414            if (written <= 0) {
415                break;
416            }
417            count += written;
418        }
419        param.mBytesWritten += count;
420        param.mLogger.onPlaybackStart();
421    }
422
423    // Wait for the audio track to stop playing, and then release its resources.
424    private void handleSynthesisDone(MessageParams msg) {
425        final SynthesisMessageParams params = (SynthesisMessageParams) msg;
426
427        if (DBG) Log.d(TAG, "handleSynthesisDone()");
428        final AudioTrack audioTrack = params.getAudioTrack();
429
430        if (audioTrack == null) {
431            params.getDispatcher().dispatchOnError();
432            return;
433        }
434
435        if (params.mBytesWritten < params.mAudioBufferSize) {
436            if (DBG) Log.d(TAG, "Stopping audio track to flush audio, state was : " +
437                    audioTrack.getPlayState());
438            params.mIsShortUtterance = true;
439            audioTrack.stop();
440        }
441
442        if (DBG) Log.d(TAG, "Waiting for audio track to complete : " +
443                audioTrack.hashCode());
444        blockUntilDone(params);
445        if (DBG) Log.d(TAG, "Releasing audio track [" + audioTrack.hashCode() + "]");
446
447        // The last call to AudioTrack.write( ) will return only after
448        // all data from the audioTrack has been sent to the mixer, so
449        // it's safe to release at this point. Make sure release() and the call
450        // that set the audio track to null are performed atomically.
451        synchronized (this) {
452            // Never allow the audioTrack to be observed in a state where
453            // it is released but non null. The only case this might happen
454            // is in the various stopFoo methods that call AudioTrack#stop from
455            // different threads, but they are synchronized on AudioPlayBackHandler#this
456            // too.
457            audioTrack.release();
458            params.setAudioTrack(null);
459        }
460        if (params.isError()) {
461            params.getDispatcher().dispatchOnError();
462        } else {
463            params.getDispatcher().dispatchOnDone();
464        }
465        mLastSynthesisRequest = null;
466        params.mLogger.onWriteData();
467    }
468
469    /**
470     * The minimum increment of time to wait for an audiotrack to finish
471     * playing.
472     */
473    private static final long MIN_SLEEP_TIME_MS = 20;
474
475    /**
476     * The maximum increment of time to sleep while waiting for an audiotrack
477     * to finish playing.
478     */
479    private static final long MAX_SLEEP_TIME_MS = 2500;
480
481    /**
482     * The maximum amount of time to wait for an audio track to make progress while
483     * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but
484     * could happen in exceptional circumstances like a media_server crash.
485     */
486    private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS;
487
488    private static void blockUntilDone(SynthesisMessageParams params) {
489        if (params.mAudioTrack == null || params.mBytesWritten <= 0) {
490            return;
491        }
492
493        if (params.mIsShortUtterance) {
494            // In this case we would have called AudioTrack#stop() to flush
495            // buffers to the mixer. This makes the playback head position
496            // unobservable and notification markers do not work reliably. We
497            // have no option but to wait until we think the track would finish
498            // playing and release it after.
499            //
500            // This isn't as bad as it looks because (a) We won't end up waiting
501            // for much longer than we should because even at 4khz mono, a short
502            // utterance weighs in at about 2 seconds, and (b) such short utterances
503            // are expected to be relatively infrequent and in a stream of utterances
504            // this shows up as a slightly longer pause.
505            blockUntilEstimatedCompletion(params);
506        } else {
507            blockUntilCompletion(params);
508        }
509    }
510
511    private static void blockUntilEstimatedCompletion(SynthesisMessageParams params) {
512        final int lengthInFrames = params.mBytesWritten / params.mBytesPerFrame;
513        final long estimatedTimeMs = (lengthInFrames * 1000 / params.mSampleRateInHz);
514
515        if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance");
516
517        try {
518            Thread.sleep(estimatedTimeMs);
519        } catch (InterruptedException ie) {
520            // Do nothing.
521        }
522    }
523
524    private static void blockUntilCompletion(SynthesisMessageParams params) {
525        final AudioTrack audioTrack = params.mAudioTrack;
526        final int lengthInFrames = params.mBytesWritten / params.mBytesPerFrame;
527
528        int previousPosition = -1;
529        int currentPosition = 0;
530        long blockedTimeMs = 0;
531
532        while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames &&
533                audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING) {
534
535            final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
536                    audioTrack.getSampleRate();
537            final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS);
538
539            // Check if the audio track has made progress since the last loop
540            // iteration. We should then add in the amount of time that was
541            // spent sleeping in the last iteration.
542            if (currentPosition == previousPosition) {
543                // This works only because the sleep time that would have been calculated
544                // would be the same in the previous iteration too.
545                blockedTimeMs += sleepTimeMs;
546                // If we've taken too long to make progress, bail.
547                if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) {
548                    Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " +
549                            "for AudioTrack to make progress, Aborting");
550                    break;
551                }
552            } else {
553                blockedTimeMs = 0;
554            }
555            previousPosition = currentPosition;
556
557            if (DBG) Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," +
558                    " Playback position : " + currentPosition + ", Length in frames : "
559                    + lengthInFrames);
560            try {
561                Thread.sleep(sleepTimeMs);
562            } catch (InterruptedException ie) {
563                break;
564            }
565        }
566    }
567
568    private static final long clip(long value, long min, long max) {
569        if (value < min) {
570            return min;
571        }
572
573        if (value > max) {
574            return max;
575        }
576
577        return value;
578    }
579
580    private static AudioTrack createStreamingAudioTrack(SynthesisMessageParams params) {
581        final int channelConfig = getChannelConfig(params.mChannelCount);
582        final int sampleRateInHz = params.mSampleRateInHz;
583        final int audioFormat = params.mAudioFormat;
584
585        int minBufferSizeInBytes
586                = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
587        int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
588
589        AudioTrack audioTrack = new AudioTrack(params.mStreamType, sampleRateInHz, channelConfig,
590                audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
591        if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
592            Log.w(TAG, "Unable to create audio track.");
593            audioTrack.release();
594            return null;
595        }
596        params.mAudioBufferSize = bufferSizeInBytes;
597
598        setupVolume(audioTrack, params.mVolume, params.mPan);
599        return audioTrack;
600    }
601
602    static int getChannelConfig(int channelCount) {
603        if (channelCount == 1) {
604            return AudioFormat.CHANNEL_OUT_MONO;
605        } else if (channelCount == 2){
606            return AudioFormat.CHANNEL_OUT_STEREO;
607        }
608
609        return 0;
610    }
611
612    private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
613        float vol = clip(volume, 0.0f, 1.0f);
614        float panning = clip(pan, -1.0f, 1.0f);
615        float volLeft = vol;
616        float volRight = vol;
617        if (panning > 0.0f) {
618            volLeft *= (1.0f - panning);
619        } else if (panning < 0.0f) {
620            volRight *= (1.0f + panning);
621        }
622        if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
623        if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
624            Log.e(TAG, "Failed to set volume");
625        }
626    }
627
628    private static float clip(float value, float min, float max) {
629        return value > max ? max : (value < min ? min : value);
630    }
631
632}
633