AudioPlaybackHandler.java revision 673f360b0e22a8591f515cba7a90d5cfcfad81a7
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.media.AudioFormat;
19import android.media.AudioTrack;
20import android.text.TextUtils;
21import android.util.Log;
22
23import java.util.Iterator;
24import java.util.concurrent.PriorityBlockingQueue;
25import java.util.concurrent.atomic.AtomicLong;
26
27class AudioPlaybackHandler {
28    private static final String TAG = "TTS.AudioPlaybackHandler";
29    private static final boolean DBG_THREADING = false;
30    private static final boolean DBG = false;
31
32    private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
33
34    private static final int SYNTHESIS_START = 1;
35    private static final int SYNTHESIS_DATA_AVAILABLE = 2;
36    private static final int SYNTHESIS_DONE = 3;
37
38    private static final int PLAY_AUDIO = 5;
39    private static final int PLAY_SILENCE = 6;
40
41    private static final int SHUTDOWN = -1;
42
43    private static final int DEFAULT_PRIORITY = 1;
44    private static final int HIGH_PRIORITY = 0;
45
46    private final PriorityBlockingQueue<ListEntry> mQueue =
47            new PriorityBlockingQueue<ListEntry>();
48    private final Thread mHandlerThread;
49
50    private volatile MessageParams mCurrentParams = null;
51    // Used only for book keeping and error detection.
52    private volatile SynthesisMessageParams mLastSynthesisRequest = null;
53    // Used to order incoming messages in our priority queue.
54    private final AtomicLong mSequenceIdCtr = new AtomicLong(0);
55
56
57    AudioPlaybackHandler() {
58        mHandlerThread = new Thread(new MessageLoop(), "TTS.AudioPlaybackThread");
59    }
60
61    public void start() {
62        mHandlerThread.start();
63    }
64
65    /**
66     * Stops all synthesis for a given {@code token}. If the current token
67     * is currently being processed, an effort will be made to stop it but
68     * that is not guaranteed.
69     *
70     * NOTE: This assumes that all other messages in the queue with {@code token}
71     * have been removed already.
72     *
73     * NOTE: Must be called synchronized on {@code AudioPlaybackHandler.this}.
74     */
75    private void stop(MessageParams token) {
76        if (token == null) {
77            return;
78        }
79
80        if (DBG) Log.d(TAG, "Stopping token : " + token);
81
82        if (token.getType() == MessageParams.TYPE_SYNTHESIS) {
83            AudioTrack current = ((SynthesisMessageParams) token).getAudioTrack();
84            if (current != null) {
85                // Stop the current audio track if it's still playing.
86                // The audio track is thread safe in this regard. The current
87                // handleSynthesisDataAvailable call will return soon after this
88                // call.
89                current.stop();
90            }
91            // This is safe because PlaybackSynthesisCallback#stop would have
92            // been called before this method, and will no longer enqueue any
93            // audio for this token.
94            //
95            // (Even if it did, all it would result in is a warning message).
96            mQueue.add(new ListEntry(SYNTHESIS_DONE, token, HIGH_PRIORITY));
97        } else if (token.getType() == MessageParams.TYPE_AUDIO) {
98            ((AudioMessageParams) token).getPlayer().stop();
99            // No cleanup required for audio messages.
100        } else if (token.getType() == MessageParams.TYPE_SILENCE) {
101            ((SilenceMessageParams) token).getConditionVariable().open();
102            // No cleanup required for silence messages.
103        }
104    }
105
106    // -----------------------------------------------------
107    // Methods that add and remove elements from the queue. These do not
108    // need to be synchronized strictly speaking, but they make the behaviour
109    // a lot more predictable. (though it would still be correct without
110    // synchronization).
111    // -----------------------------------------------------
112
113    synchronized public void removePlaybackItems(String callingApp) {
114        if (DBG_THREADING) Log.d(TAG, "Removing all callback items for : " + callingApp);
115        removeMessages(callingApp);
116
117        final MessageParams current = getCurrentParams();
118        if (current != null && TextUtils.equals(callingApp, current.getCallingApp())) {
119            stop(current);
120        }
121    }
122
123    synchronized public void removeAllItems() {
124        if (DBG_THREADING) Log.d(TAG, "Removing all items");
125        removeAllMessages();
126        stop(getCurrentParams());
127    }
128
129    /**
130     * @return false iff the queue is empty and no queue item is currently
131     *        being handled, true otherwise.
132     */
133    public boolean isSpeaking() {
134        return (mQueue.peek() != null) || (mCurrentParams != null);
135    }
136
137    /**
138     * Shut down the audio playback thread.
139     */
140    synchronized public void quit() {
141        removeAllMessages();
142        stop(getCurrentParams());
143        mQueue.add(new ListEntry(SHUTDOWN, null, HIGH_PRIORITY));
144    }
145
146    synchronized void enqueueSynthesisStart(SynthesisMessageParams token) {
147        if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis start : " + token);
148        mQueue.add(new ListEntry(SYNTHESIS_START, token));
149    }
150
151    synchronized void enqueueSynthesisDataAvailable(SynthesisMessageParams token) {
152        if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis data available : " + token);
153        mQueue.add(new ListEntry(SYNTHESIS_DATA_AVAILABLE, token));
154    }
155
156    synchronized void enqueueSynthesisDone(SynthesisMessageParams token) {
157        if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis done : " + token);
158        mQueue.add(new ListEntry(SYNTHESIS_DONE, token));
159    }
160
161    synchronized void enqueueAudio(AudioMessageParams token) {
162        if (DBG_THREADING) Log.d(TAG, "Enqueuing audio : " + token);
163        mQueue.add(new ListEntry(PLAY_AUDIO, token));
164    }
165
166    synchronized void enqueueSilence(SilenceMessageParams token) {
167        if (DBG_THREADING) Log.d(TAG, "Enqueuing silence : " + token);
168        mQueue.add(new ListEntry(PLAY_SILENCE, token));
169    }
170
171    // -----------------------------------------
172    // End of public API methods.
173    // -----------------------------------------
174
175    // -----------------------------------------
176    // Methods for managing the message queue.
177    // -----------------------------------------
178
179    /*
180     * The MessageLoop is a handler like implementation that
181     * processes messages from a priority queue.
182     */
183    private final class MessageLoop implements Runnable {
184        @Override
185        public void run() {
186            while (true) {
187                ListEntry entry = null;
188                try {
189                    entry = mQueue.take();
190                } catch (InterruptedException ie) {
191                    return;
192                }
193
194                if (entry.mWhat == SHUTDOWN) {
195                    if (DBG) Log.d(TAG, "MessageLoop : Shutting down");
196                    return;
197                }
198
199                if (DBG) {
200                    Log.d(TAG, "MessageLoop : Handling message :" + entry.mWhat
201                            + " ,seqId : " + entry.mSequenceId);
202                }
203
204                setCurrentParams(entry.mMessage);
205                handleMessage(entry);
206                setCurrentParams(null);
207            }
208        }
209    }
210
211    /*
212     * Atomically clear the queue of all messages.
213     */
214    synchronized private void removeAllMessages() {
215        mQueue.clear();
216    }
217
218    /*
219     * Remove all messages that originate from a given calling app.
220     */
221    synchronized private void removeMessages(String callingApp) {
222        Iterator<ListEntry> it = mQueue.iterator();
223
224        while (it.hasNext()) {
225            final ListEntry current = it.next();
226            // The null check is to prevent us from removing control messages,
227            // such as a shutdown message.
228            if (current.mMessage != null &&
229                    callingApp.equals(current.mMessage.getCallingApp())) {
230                it.remove();
231            }
232        }
233    }
234
235    /*
236     * An element of our priority queue of messages. Each message has a priority,
237     * and a sequence id (defined by the order of enqueue calls). Among messages
238     * with the same priority, messages that were received earlier win out.
239     */
240    private final class ListEntry implements Comparable<ListEntry> {
241        final int mWhat;
242        final MessageParams mMessage;
243        final int mPriority;
244        final long mSequenceId;
245
246        private ListEntry(int what, MessageParams message) {
247            this(what, message, DEFAULT_PRIORITY);
248        }
249
250        private ListEntry(int what, MessageParams message, int priority) {
251            mWhat = what;
252            mMessage = message;
253            mPriority = priority;
254            mSequenceId = mSequenceIdCtr.incrementAndGet();
255        }
256
257        @Override
258        public int compareTo(ListEntry that) {
259            if (that == this) {
260                return 0;
261            }
262
263            // Note that this is always 0, 1 or -1.
264            int priorityDiff = mPriority - that.mPriority;
265            if (priorityDiff == 0) {
266                // The == case cannot occur.
267                return (mSequenceId < that.mSequenceId) ? -1 : 1;
268            }
269
270            return priorityDiff;
271        }
272    }
273
274    private void setCurrentParams(MessageParams p) {
275        if (DBG_THREADING) {
276            if (p != null) {
277                Log.d(TAG, "Started handling :" + p);
278            } else {
279                Log.d(TAG, "End handling : " + mCurrentParams);
280            }
281        }
282        mCurrentParams = p;
283    }
284
285    private MessageParams getCurrentParams() {
286        return mCurrentParams;
287    }
288
289    // -----------------------------------------
290    // Methods for dealing with individual messages, the methods
291    // below do the actual work.
292    // -----------------------------------------
293
294    private void handleMessage(ListEntry entry) {
295        final MessageParams msg = entry.mMessage;
296        if (entry.mWhat == SYNTHESIS_START) {
297            handleSynthesisStart(msg);
298        } else if (entry.mWhat == SYNTHESIS_DATA_AVAILABLE) {
299            handleSynthesisDataAvailable(msg);
300        } else if (entry.mWhat == SYNTHESIS_DONE) {
301            handleSynthesisDone(msg);
302        } else if (entry.mWhat == PLAY_AUDIO) {
303            handleAudio(msg);
304        } else if (entry.mWhat == PLAY_SILENCE) {
305            handleSilence(msg);
306        }
307    }
308
309    // Currently implemented as blocking the audio playback thread for the
310    // specified duration. If a call to stop() is made, the thread
311    // unblocks.
312    private void handleSilence(MessageParams msg) {
313        if (DBG) Log.d(TAG, "handleSilence()");
314        SilenceMessageParams params = (SilenceMessageParams) msg;
315        if (params.getSilenceDurationMs() > 0) {
316            params.getConditionVariable().block(params.getSilenceDurationMs());
317        }
318        params.getDispatcher().dispatchUtteranceCompleted();
319        if (DBG) Log.d(TAG, "handleSilence() done.");
320    }
321
322    // Plays back audio from a given URI. No TTS engine involvement here.
323    private void handleAudio(MessageParams msg) {
324        if (DBG) Log.d(TAG, "handleAudio()");
325        AudioMessageParams params = (AudioMessageParams) msg;
326        // Note that the BlockingMediaPlayer spawns a separate thread.
327        //
328        // TODO: This can be avoided.
329        params.getPlayer().startAndWait();
330        params.getDispatcher().dispatchUtteranceCompleted();
331        if (DBG) Log.d(TAG, "handleAudio() done.");
332    }
333
334    // Denotes the start of a new synthesis request. We create a new
335    // audio track, and prepare it for incoming data.
336    //
337    // Note that since all TTS synthesis happens on a single thread, we
338    // should ALWAYS see the following order :
339    //
340    // handleSynthesisStart -> handleSynthesisDataAvailable(*) -> handleSynthesisDone
341    // OR
342    // handleSynthesisCompleteDataAvailable.
343    private void handleSynthesisStart(MessageParams msg) {
344        if (DBG) Log.d(TAG, "handleSynthesisStart()");
345        final SynthesisMessageParams param = (SynthesisMessageParams) msg;
346
347        // Oops, looks like the engine forgot to call done(). We go through
348        // extra trouble to clean the data to prevent the AudioTrack resources
349        // from being leaked.
350        if (mLastSynthesisRequest != null) {
351            Log.w(TAG, "Error : Missing call to done() for request : " +
352                    mLastSynthesisRequest);
353            handleSynthesisDone(mLastSynthesisRequest);
354        }
355
356        mLastSynthesisRequest = param;
357
358        // Create the audio track.
359        final AudioTrack audioTrack = createStreamingAudioTrack(param);
360
361        if (DBG) Log.d(TAG, "Created audio track [" + audioTrack.hashCode() + "]");
362
363        param.setAudioTrack(audioTrack);
364    }
365
366    // More data available to be flushed to the audio track.
367    private void handleSynthesisDataAvailable(MessageParams msg) {
368        final SynthesisMessageParams param = (SynthesisMessageParams) msg;
369        if (param.getAudioTrack() == null) {
370            Log.w(TAG, "Error : null audio track in handleDataAvailable : " + param);
371            return;
372        }
373
374        if (param != mLastSynthesisRequest) {
375            Log.e(TAG, "Call to dataAvailable without done() / start()");
376            return;
377        }
378
379        final AudioTrack audioTrack = param.getAudioTrack();
380        final SynthesisMessageParams.ListEntry bufferCopy = param.getNextBuffer();
381
382        if (bufferCopy == null) {
383            Log.e(TAG, "No buffers available to play.");
384            return;
385        }
386
387        int playState = audioTrack.getPlayState();
388        if (playState == AudioTrack.PLAYSTATE_STOPPED) {
389            if (DBG) Log.d(TAG, "AudioTrack stopped, restarting : " + audioTrack.hashCode());
390            audioTrack.play();
391        }
392        int count = 0;
393        while (count < bufferCopy.mLength) {
394            // Note that we don't take bufferCopy.mOffset into account because
395            // it is guaranteed to be 0.
396            int written = audioTrack.write(bufferCopy.mBytes, count, bufferCopy.mLength);
397            if (written <= 0) {
398                break;
399            }
400            count += written;
401        }
402        param.mBytesWritten += count;
403        param.mLogger.onPlaybackStart();
404    }
405
406    // Wait for the audio track to stop playing, and then release its resources.
407    private void handleSynthesisDone(MessageParams msg) {
408        final SynthesisMessageParams params = (SynthesisMessageParams) msg;
409
410        if (DBG) Log.d(TAG, "handleSynthesisDone()");
411        final AudioTrack audioTrack = params.getAudioTrack();
412
413        if (audioTrack == null) {
414            return;
415        }
416
417        if (params.mBytesWritten < params.mAudioBufferSize) {
418            audioTrack.stop();
419        }
420
421        if (DBG) Log.d(TAG, "Waiting for audio track to complete : " +
422                audioTrack.hashCode());
423        blockUntilDone(params);
424        if (DBG) Log.d(TAG, "Releasing audio track [" + audioTrack.hashCode() + "]");
425
426        // The last call to AudioTrack.write( ) will return only after
427        // all data from the audioTrack has been sent to the mixer, so
428        // it's safe to release at this point. Make sure release() and the call
429        // that set the audio track to null are performed atomically.
430        synchronized (this) {
431            // Never allow the audioTrack to be observed in a state where
432            // it is released but non null. The only case this might happen
433            // is in the various stopFoo methods that call AudioTrack#stop from
434            // different threads, but they are synchronized on AudioPlayBackHandler#this
435            // too.
436            audioTrack.release();
437            params.setAudioTrack(null);
438        }
439        params.getDispatcher().dispatchUtteranceCompleted();
440        mLastSynthesisRequest = null;
441        params.mLogger.onWriteData();
442    }
443
444    /**
445     * The minimum increment of time to wait for an audiotrack to finish
446     * playing.
447     */
448    private static final long MIN_SLEEP_TIME_MS = 20;
449
450    private static void blockUntilDone(SynthesisMessageParams params) {
451        if (params.mAudioTrack == null || params.mBytesWritten <= 0) {
452            return;
453        }
454
455        final AudioTrack audioTrack = params.mAudioTrack;
456        final int bytesPerFrame = getBytesPerFrame(params.mAudioFormat);
457        final int lengthInBytes = params.mBytesWritten;
458        final int lengthInFrames = lengthInBytes / bytesPerFrame;
459
460        int currentPosition = 0;
461        while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames) {
462            if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) {
463                break;
464            }
465
466            final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
467                    audioTrack.getSampleRate();
468
469            final long sleepTimeMs = Math.max(estimatedTimeMs, MIN_SLEEP_TIME_MS);
470
471            if (DBG) Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," +
472                    " Playback position : " + currentPosition + ", Length in frames : "
473                    + lengthInFrames);
474            try {
475                Thread.sleep(sleepTimeMs);
476            } catch (InterruptedException ie) {
477                break;
478            }
479        }
480    }
481
482    private static AudioTrack createStreamingAudioTrack(SynthesisMessageParams params) {
483        final int channelConfig = getChannelConfig(params.mChannelCount);
484        final int sampleRateInHz = params.mSampleRateInHz;
485        final int audioFormat = params.mAudioFormat;
486
487        int minBufferSizeInBytes
488                = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
489        int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
490
491        AudioTrack audioTrack = new AudioTrack(params.mStreamType, sampleRateInHz, channelConfig,
492                audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
493        if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
494            Log.w(TAG, "Unable to create audio track.");
495            audioTrack.release();
496            return null;
497        }
498        params.mAudioBufferSize = bufferSizeInBytes;
499
500        setupVolume(audioTrack, params.mVolume, params.mPan);
501        return audioTrack;
502    }
503
504    static int getChannelConfig(int channelCount) {
505        if (channelCount == 1) {
506            return AudioFormat.CHANNEL_OUT_MONO;
507        } else if (channelCount == 2){
508            return AudioFormat.CHANNEL_OUT_STEREO;
509        }
510
511        return 0;
512    }
513
514    static int getBytesPerFrame(int audioFormat) {
515        if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) {
516            return 1;
517        } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) {
518            return 2;
519        }
520
521        return -1;
522    }
523
524    private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
525        float vol = clip(volume, 0.0f, 1.0f);
526        float panning = clip(pan, -1.0f, 1.0f);
527        float volLeft = vol;
528        float volRight = vol;
529        if (panning > 0.0f) {
530            volLeft *= (1.0f - panning);
531        } else if (panning < 0.0f) {
532            volRight *= (1.0f + panning);
533        }
534        if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
535        if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
536            Log.e(TAG, "Failed to set volume");
537        }
538    }
539
540    private static float clip(float value, float min, float max) {
541        return value > max ? max : (value < min ? min : value);
542    }
543
544}
545