AudioPlaybackHandler.java revision 69bc1b2696dde849102f0ac8071999843d01b8d1
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.media.AudioFormat;
19import android.media.AudioTrack;
20import android.text.TextUtils;
21import android.util.Log;
22
23import java.util.Iterator;
24import java.util.concurrent.PriorityBlockingQueue;
25import java.util.concurrent.atomic.AtomicLong;
26
27class AudioPlaybackHandler {
28    private static final String TAG = "TTS.AudioPlaybackHandler";
29    private static final boolean DBG_THREADING = false;
30    private static final boolean DBG = false;
31
32    private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
33
34    private static final int SYNTHESIS_START = 1;
35    private static final int SYNTHESIS_DATA_AVAILABLE = 2;
36    private static final int SYNTHESIS_DONE = 3;
37
38    private static final int PLAY_AUDIO = 5;
39    private static final int PLAY_SILENCE = 6;
40
41    private static final int SHUTDOWN = -1;
42
43    private static final int DEFAULT_PRIORITY = 1;
44    private static final int HIGH_PRIORITY = 0;
45
46    private final PriorityBlockingQueue<ListEntry> mQueue =
47            new PriorityBlockingQueue<ListEntry>();
48    private final Thread mHandlerThread;
49
50    private volatile MessageParams mCurrentParams = null;
51    // Used only for book keeping and error detection.
52    private volatile SynthesisMessageParams mLastSynthesisRequest = null;
53    // Used to order incoming messages in our priority queue.
54    private final AtomicLong mSequenceIdCtr = new AtomicLong(0);
55
56
57    AudioPlaybackHandler() {
58        mHandlerThread = new Thread(new MessageLoop(), "TTS.AudioPlaybackThread");
59    }
60
61    public void start() {
62        mHandlerThread.start();
63    }
64
65    /**
66     * Stops all synthesis for a given {@code token}. If the current token
67     * is currently being processed, an effort will be made to stop it but
68     * that is not guaranteed.
69     *
70     * NOTE: This assumes that all other messages in the queue with {@code token}
71     * have been removed already.
72     *
73     * NOTE: Must be called synchronized on {@code AudioPlaybackHandler.this}.
74     */
75    private void stop(MessageParams token) {
76        if (token == null) {
77            return;
78        }
79
80        if (DBG) Log.d(TAG, "Stopping token : " + token);
81
82        if (token.getType() == MessageParams.TYPE_SYNTHESIS) {
83            AudioTrack current = ((SynthesisMessageParams) token).getAudioTrack();
84            if (current != null) {
85                // Stop the current audio track if it's still playing.
86                // The audio track is thread safe in this regard. The current
87                // handleSynthesisDataAvailable call will return soon after this
88                // call.
89                current.stop();
90            }
91            // This is safe because PlaybackSynthesisCallback#stop would have
92            // been called before this method, and will no longer enqueue any
93            // audio for this token.
94            //
95            // (Even if it did, all it would result in is a warning message).
96            mQueue.add(new ListEntry(SYNTHESIS_DONE, token, HIGH_PRIORITY));
97        } else if (token.getType() == MessageParams.TYPE_AUDIO) {
98            ((AudioMessageParams) token).getPlayer().stop();
99            // No cleanup required for audio messages.
100        } else if (token.getType() == MessageParams.TYPE_SILENCE) {
101            ((SilenceMessageParams) token).getConditionVariable().open();
102            // No cleanup required for silence messages.
103        }
104    }
105
106    // -----------------------------------------------------
107    // Methods that add and remove elements from the queue. These do not
108    // need to be synchronized strictly speaking, but they make the behaviour
109    // a lot more predictable. (though it would still be correct without
110    // synchronization).
111    // -----------------------------------------------------
112
113    synchronized public void removePlaybackItems(String callingApp) {
114        if (DBG_THREADING) Log.d(TAG, "Removing all callback items for : " + callingApp);
115        removeMessages(callingApp);
116
117        final MessageParams current = getCurrentParams();
118        if (current != null && TextUtils.equals(callingApp, current.getCallingApp())) {
119            stop(current);
120        }
121    }
122
123    synchronized public void removeAllItems() {
124        if (DBG_THREADING) Log.d(TAG, "Removing all items");
125        removeAllMessages();
126        stop(getCurrentParams());
127    }
128
129    /**
130     * @return false iff the queue is empty and no queue item is currently
131     *        being handled, true otherwise.
132     */
133    public boolean isSpeaking() {
134        return (mQueue.peek() != null) || (mCurrentParams != null);
135    }
136
137    /**
138     * Shut down the audio playback thread.
139     */
140    synchronized public void quit() {
141        removeAllMessages();
142        stop(getCurrentParams());
143        mQueue.add(new ListEntry(SHUTDOWN, null, HIGH_PRIORITY));
144    }
145
146    synchronized void enqueueSynthesisStart(SynthesisMessageParams token) {
147        if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis start : " + token);
148        mQueue.add(new ListEntry(SYNTHESIS_START, token));
149    }
150
151    synchronized void enqueueSynthesisDataAvailable(SynthesisMessageParams token) {
152        if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis data available : " + token);
153        mQueue.add(new ListEntry(SYNTHESIS_DATA_AVAILABLE, token));
154    }
155
156    synchronized void enqueueSynthesisDone(SynthesisMessageParams token) {
157        if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis done : " + token);
158        mQueue.add(new ListEntry(SYNTHESIS_DONE, token));
159    }
160
161    synchronized void enqueueAudio(AudioMessageParams token) {
162        if (DBG_THREADING) Log.d(TAG, "Enqueuing audio : " + token);
163        mQueue.add(new ListEntry(PLAY_AUDIO, token));
164    }
165
166    synchronized void enqueueSilence(SilenceMessageParams token) {
167        if (DBG_THREADING) Log.d(TAG, "Enqueuing silence : " + token);
168        mQueue.add(new ListEntry(PLAY_SILENCE, token));
169    }
170
171    // -----------------------------------------
172    // End of public API methods.
173    // -----------------------------------------
174
175    // -----------------------------------------
176    // Methods for managing the message queue.
177    // -----------------------------------------
178
179    /*
180     * The MessageLoop is a handler like implementation that
181     * processes messages from a priority queue.
182     */
183    private final class MessageLoop implements Runnable {
184        @Override
185        public void run() {
186            while (true) {
187                ListEntry entry = null;
188                try {
189                    entry = mQueue.take();
190                } catch (InterruptedException ie) {
191                    return;
192                }
193
194                if (entry.mWhat == SHUTDOWN) {
195                    if (DBG) Log.d(TAG, "MessageLoop : Shutting down");
196                    return;
197                }
198
199                if (DBG) {
200                    Log.d(TAG, "MessageLoop : Handling message :" + entry.mWhat
201                            + " ,seqId : " + entry.mSequenceId);
202                }
203
204                setCurrentParams(entry.mMessage);
205                handleMessage(entry);
206                setCurrentParams(null);
207            }
208        }
209    }
210
211    /*
212     * Atomically clear the queue of all messages.
213     */
214    synchronized private void removeAllMessages() {
215        mQueue.clear();
216    }
217
218    /*
219     * Remove all messages that originate from a given calling app.
220     */
221    synchronized private void removeMessages(String callingApp) {
222        Iterator<ListEntry> it = mQueue.iterator();
223
224        while (it.hasNext()) {
225            final ListEntry current = it.next();
226            // The null check is to prevent us from removing control messages,
227            // such as a shutdown message.
228            if (current.mMessage != null &&
229                    callingApp.equals(current.mMessage.getCallingApp())) {
230                it.remove();
231            }
232        }
233    }
234
235    /*
236     * An element of our priority queue of messages. Each message has a priority,
237     * and a sequence id (defined by the order of enqueue calls). Among messages
238     * with the same priority, messages that were received earlier win out.
239     */
240    private final class ListEntry implements Comparable<ListEntry> {
241        final int mWhat;
242        final MessageParams mMessage;
243        final int mPriority;
244        final long mSequenceId;
245
246        private ListEntry(int what, MessageParams message) {
247            this(what, message, DEFAULT_PRIORITY);
248        }
249
250        private ListEntry(int what, MessageParams message, int priority) {
251            mWhat = what;
252            mMessage = message;
253            mPriority = priority;
254            mSequenceId = mSequenceIdCtr.incrementAndGet();
255        }
256
257        @Override
258        public int compareTo(ListEntry that) {
259            if (that == this) {
260                return 0;
261            }
262
263            // Note that this is always 0, 1 or -1.
264            int priorityDiff = mPriority - that.mPriority;
265            if (priorityDiff == 0) {
266                // The == case cannot occur.
267                return (mSequenceId < that.mSequenceId) ? -1 : 1;
268            }
269
270            return priorityDiff;
271        }
272    }
273
274    private void setCurrentParams(MessageParams p) {
275        if (DBG_THREADING) {
276            if (p != null) {
277                Log.d(TAG, "Started handling :" + p);
278            } else {
279                Log.d(TAG, "End handling : " + mCurrentParams);
280            }
281        }
282        mCurrentParams = p;
283    }
284
285    private MessageParams getCurrentParams() {
286        return mCurrentParams;
287    }
288
289    // -----------------------------------------
290    // Methods for dealing with individual messages, the methods
291    // below do the actual work.
292    // -----------------------------------------
293
294    private void handleMessage(ListEntry entry) {
295        final MessageParams msg = entry.mMessage;
296        if (entry.mWhat == SYNTHESIS_START) {
297            handleSynthesisStart(msg);
298        } else if (entry.mWhat == SYNTHESIS_DATA_AVAILABLE) {
299            handleSynthesisDataAvailable(msg);
300        } else if (entry.mWhat == SYNTHESIS_DONE) {
301            handleSynthesisDone(msg);
302        } else if (entry.mWhat == PLAY_AUDIO) {
303            handleAudio(msg);
304        } else if (entry.mWhat == PLAY_SILENCE) {
305            handleSilence(msg);
306        }
307    }
308
309    // Currently implemented as blocking the audio playback thread for the
310    // specified duration. If a call to stop() is made, the thread
311    // unblocks.
312    private void handleSilence(MessageParams msg) {
313        if (DBG) Log.d(TAG, "handleSilence()");
314        SilenceMessageParams params = (SilenceMessageParams) msg;
315        if (params.getSilenceDurationMs() > 0) {
316            params.getConditionVariable().block(params.getSilenceDurationMs());
317        }
318        params.getDispatcher().dispatchUtteranceCompleted();
319        if (DBG) Log.d(TAG, "handleSilence() done.");
320    }
321
322    // Plays back audio from a given URI. No TTS engine involvement here.
323    private void handleAudio(MessageParams msg) {
324        if (DBG) Log.d(TAG, "handleAudio()");
325        AudioMessageParams params = (AudioMessageParams) msg;
326        // Note that the BlockingMediaPlayer spawns a separate thread.
327        //
328        // TODO: This can be avoided.
329        params.getPlayer().startAndWait();
330        params.getDispatcher().dispatchUtteranceCompleted();
331        if (DBG) Log.d(TAG, "handleAudio() done.");
332    }
333
334    // Denotes the start of a new synthesis request. We create a new
335    // audio track, and prepare it for incoming data.
336    //
337    // Note that since all TTS synthesis happens on a single thread, we
338    // should ALWAYS see the following order :
339    //
340    // handleSynthesisStart -> handleSynthesisDataAvailable(*) -> handleSynthesisDone
341    // OR
342    // handleSynthesisCompleteDataAvailable.
343    private void handleSynthesisStart(MessageParams msg) {
344        if (DBG) Log.d(TAG, "handleSynthesisStart()");
345        final SynthesisMessageParams param = (SynthesisMessageParams) msg;
346
347        // Oops, looks like the engine forgot to call done(). We go through
348        // extra trouble to clean the data to prevent the AudioTrack resources
349        // from being leaked.
350        if (mLastSynthesisRequest != null) {
351            Log.w(TAG, "Error : Missing call to done() for request : " +
352                    mLastSynthesisRequest);
353            handleSynthesisDone(mLastSynthesisRequest);
354        }
355
356        mLastSynthesisRequest = param;
357
358        // Create the audio track.
359        final AudioTrack audioTrack = createStreamingAudioTrack(param);
360
361        if (DBG) Log.d(TAG, "Created audio track [" + audioTrack.hashCode() + "]");
362
363        param.setAudioTrack(audioTrack);
364    }
365
366    // More data available to be flushed to the audio track.
367    private void handleSynthesisDataAvailable(MessageParams msg) {
368        final SynthesisMessageParams param = (SynthesisMessageParams) msg;
369        if (param.getAudioTrack() == null) {
370            Log.w(TAG, "Error : null audio track in handleDataAvailable : " + param);
371            return;
372        }
373
374        if (param != mLastSynthesisRequest) {
375            Log.e(TAG, "Call to dataAvailable without done() / start()");
376            return;
377        }
378
379        final AudioTrack audioTrack = param.getAudioTrack();
380        final SynthesisMessageParams.ListEntry bufferCopy = param.getNextBuffer();
381
382        if (bufferCopy == null) {
383            Log.e(TAG, "No buffers available to play.");
384            return;
385        }
386
387        int playState = audioTrack.getPlayState();
388        if (playState == AudioTrack.PLAYSTATE_STOPPED) {
389            if (DBG) Log.d(TAG, "AudioTrack stopped, restarting : " + audioTrack.hashCode());
390            audioTrack.play();
391        }
392        int count = 0;
393        while (count < bufferCopy.mBytes.length) {
394            // Note that we don't take bufferCopy.mOffset into account because
395            // it is guaranteed to be 0.
396            int written = audioTrack.write(bufferCopy.mBytes, count, bufferCopy.mBytes.length);
397            if (written <= 0) {
398                break;
399            }
400            count += written;
401        }
402        param.mBytesWritten += count;
403        param.mLogger.onPlaybackStart();
404    }
405
406    // Wait for the audio track to stop playing, and then release its resources.
407    private void handleSynthesisDone(MessageParams msg) {
408        final SynthesisMessageParams params = (SynthesisMessageParams) msg;
409
410        if (DBG) Log.d(TAG, "handleSynthesisDone()");
411        final AudioTrack audioTrack = params.getAudioTrack();
412
413        if (audioTrack == null) {
414            return;
415        }
416
417        if (params.mBytesWritten < params.mAudioBufferSize) {
418            if (DBG) Log.d(TAG, "Stopping audio track to flush audio, state was : " +
419                    audioTrack.getPlayState());
420            params.mIsShortUtterance = true;
421            audioTrack.stop();
422        }
423
424        if (DBG) Log.d(TAG, "Waiting for audio track to complete : " +
425                audioTrack.hashCode());
426        blockUntilDone(params);
427        if (DBG) Log.d(TAG, "Releasing audio track [" + audioTrack.hashCode() + "]");
428
429        // The last call to AudioTrack.write( ) will return only after
430        // all data from the audioTrack has been sent to the mixer, so
431        // it's safe to release at this point. Make sure release() and the call
432        // that set the audio track to null are performed atomically.
433        synchronized (this) {
434            // Never allow the audioTrack to be observed in a state where
435            // it is released but non null. The only case this might happen
436            // is in the various stopFoo methods that call AudioTrack#stop from
437            // different threads, but they are synchronized on AudioPlayBackHandler#this
438            // too.
439            audioTrack.release();
440            params.setAudioTrack(null);
441        }
442        params.getDispatcher().dispatchUtteranceCompleted();
443        mLastSynthesisRequest = null;
444        params.mLogger.onWriteData();
445    }
446
447    /**
448     * The minimum increment of time to wait for an audiotrack to finish
449     * playing.
450     */
451    private static final long MIN_SLEEP_TIME_MS = 20;
452
453    /**
454     * The maximum increment of time to sleep while waiting for an audiotrack
455     * to finish playing.
456     */
457    private static final long MAX_SLEEP_TIME_MS = 2500;
458
459    /**
460     * The maximum amount of time to wait for an audio track to make progress while
461     * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but
462     * could happen in exceptional circumstances like a media_server crash.
463     */
464    private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS;
465
466    private static void blockUntilDone(SynthesisMessageParams params) {
467        if (params.mAudioTrack == null || params.mBytesWritten <= 0) {
468            return;
469        }
470
471        if (params.mIsShortUtterance) {
472            // In this case we would have called AudioTrack#stop() to flush
473            // buffers to the mixer. This makes the playback head position
474            // unobservable and notification markers do not work reliably. We
475            // have no option but to wait until we think the track would finish
476            // playing and release it after.
477            //
478            // This isn't as bad as it looks because (a) We won't end up waiting
479            // for much longer than we should because even at 4khz mono, a short
480            // utterance weighs in at about 2 seconds, and (b) such short utterances
481            // are expected to be relatively infrequent and in a stream of utterances
482            // this shows up as a slightly longer pause.
483            blockUntilEstimatedCompletion(params);
484        } else {
485            blockUntilCompletion(params);
486        }
487    }
488
489    private static void blockUntilEstimatedCompletion(SynthesisMessageParams params) {
490        final int lengthInFrames = params.mBytesWritten / params.mBytesPerFrame;
491        final long estimatedTimeMs = (lengthInFrames * 1000 / params.mSampleRateInHz);
492
493        if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance");
494
495        try {
496            Thread.sleep(estimatedTimeMs);
497        } catch (InterruptedException ie) {
498            // Do nothing.
499        }
500    }
501
502    private static void blockUntilCompletion(SynthesisMessageParams params) {
503        final AudioTrack audioTrack = params.mAudioTrack;
504        final int lengthInFrames = params.mBytesWritten / params.mBytesPerFrame;
505
506        int previousPosition = -1;
507        int currentPosition = 0;
508        long blockedTimeMs = 0;
509
510        while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames &&
511                audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING) {
512
513            final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
514                    audioTrack.getSampleRate();
515            final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS);
516
517            // Check if the audio track has made progress since the last loop
518            // iteration. We should then add in the amount of time that was
519            // spent sleeping in the last iteration.
520            if (currentPosition == previousPosition) {
521                // This works only because the sleep time that would have been calculated
522                // would be the same in the previous iteration too.
523                blockedTimeMs += sleepTimeMs;
524                // If we've taken too long to make progress, bail.
525                if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) {
526                    Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " +
527                            "for AudioTrack to make progress, Aborting");
528                    break;
529                }
530            } else {
531                blockedTimeMs = 0;
532            }
533            previousPosition = currentPosition;
534
535            if (DBG) Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," +
536                    " Playback position : " + currentPosition + ", Length in frames : "
537                    + lengthInFrames);
538            try {
539                Thread.sleep(sleepTimeMs);
540            } catch (InterruptedException ie) {
541                break;
542            }
543        }
544    }
545
546    private static final long clip(long value, long min, long max) {
547        if (value < min) {
548            return min;
549        }
550
551        if (value > max) {
552            return max;
553        }
554
555        return value;
556    }
557
558    private static AudioTrack createStreamingAudioTrack(SynthesisMessageParams params) {
559        final int channelConfig = getChannelConfig(params.mChannelCount);
560        final int sampleRateInHz = params.mSampleRateInHz;
561        final int audioFormat = params.mAudioFormat;
562
563        int minBufferSizeInBytes
564                = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
565        int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
566
567        AudioTrack audioTrack = new AudioTrack(params.mStreamType, sampleRateInHz, channelConfig,
568                audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
569        if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
570            Log.w(TAG, "Unable to create audio track.");
571            audioTrack.release();
572            return null;
573        }
574        params.mAudioBufferSize = bufferSizeInBytes;
575
576        setupVolume(audioTrack, params.mVolume, params.mPan);
577        return audioTrack;
578    }
579
580    static int getChannelConfig(int channelCount) {
581        if (channelCount == 1) {
582            return AudioFormat.CHANNEL_OUT_MONO;
583        } else if (channelCount == 2){
584            return AudioFormat.CHANNEL_OUT_STEREO;
585        }
586
587        return 0;
588    }
589
590    private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
591        float vol = clip(volume, 0.0f, 1.0f);
592        float panning = clip(pan, -1.0f, 1.0f);
593        float volLeft = vol;
594        float volRight = vol;
595        if (panning > 0.0f) {
596            volLeft *= (1.0f - panning);
597        } else if (panning < 0.0f) {
598            volRight *= (1.0f + panning);
599        }
600        if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
601        if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
602            Log.e(TAG, "Failed to set volume");
603        }
604    }
605
606    private static float clip(float value, float min, float max) {
607        return value > max ? max : (value < min ? min : value);
608    }
609
610}
611