AudioPlaybackHandler.java revision 90e5650f96dabadaaf141beae20a646855073ae1
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.media.AudioFormat;
19import android.media.AudioTrack;
20import android.text.TextUtils;
21import android.util.Log;
22
23import java.util.Iterator;
24import java.util.concurrent.PriorityBlockingQueue;
25import java.util.concurrent.atomic.AtomicLong;
26
27class AudioPlaybackHandler {
28    private static final String TAG = "TTS.AudioPlaybackHandler";
29    private static final boolean DBG_THREADING = false;
30    private static final boolean DBG = false;
31
32    private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
33
34    private static final int SYNTHESIS_START = 1;
35    private static final int SYNTHESIS_DATA_AVAILABLE = 2;
36    private static final int SYNTHESIS_DONE = 3;
37
38    private static final int PLAY_AUDIO = 5;
39    private static final int PLAY_SILENCE = 6;
40
41    private static final int SHUTDOWN = -1;
42
43    private static final int DEFAULT_PRIORITY = 1;
44    private static final int HIGH_PRIORITY = 0;
45
46    private final PriorityBlockingQueue<ListEntry> mQueue =
47            new PriorityBlockingQueue<ListEntry>();
48    private final Thread mHandlerThread;
49
50    private volatile MessageParams mCurrentParams = null;
51    // Used only for book keeping and error detection.
52    private volatile SynthesisMessageParams mLastSynthesisRequest = null;
53    // Used to order incoming messages in our priority queue.
54    private final AtomicLong mSequenceIdCtr = new AtomicLong(0);
55
56
57    AudioPlaybackHandler() {
58        mHandlerThread = new Thread(new MessageLoop(), "TTS.AudioPlaybackThread");
59    }
60
61    public void start() {
62        mHandlerThread.start();
63    }
64
65    /**
66     * Stops all synthesis for a given {@code token}. If the current token
67     * is currently being processed, an effort will be made to stop it but
68     * that is not guaranteed.
69     *
70     * NOTE: This assumes that all other messages in the queue with {@code token}
71     * have been removed already.
72     *
73     * NOTE: Must be called synchronized on {@code AudioPlaybackHandler.this}.
74     */
75    private void stop(MessageParams token) {
76        if (token == null) {
77            return;
78        }
79
80        if (DBG) Log.d(TAG, "Stopping token : " + token);
81
82        if (token.getType() == MessageParams.TYPE_SYNTHESIS) {
83            AudioTrack current = ((SynthesisMessageParams) token).getAudioTrack();
84            if (current != null) {
85                // Stop the current audio track if it's still playing.
86                // The audio track is thread safe in this regard. The current
87                // handleSynthesisDataAvailable call will return soon after this
88                // call.
89                current.stop();
90            }
91            // This is safe because PlaybackSynthesisCallback#stop would have
92            // been called before this method, and will no longer enqueue any
93            // audio for this token.
94            //
95            // (Even if it did, all it would result in is a warning message).
96            mQueue.add(new ListEntry(SYNTHESIS_DONE, token, HIGH_PRIORITY));
97        } else if (token.getType() == MessageParams.TYPE_AUDIO) {
98            ((AudioMessageParams) token).getPlayer().stop();
99            // No cleanup required for audio messages.
100        } else if (token.getType() == MessageParams.TYPE_SILENCE) {
101            ((SilenceMessageParams) token).getConditionVariable().open();
102            // No cleanup required for silence messages.
103        }
104    }
105
106    // -----------------------------------------------------
107    // Methods that add and remove elements from the queue. These do not
108    // need to be synchronized strictly speaking, but they make the behaviour
109    // a lot more predictable. (though it would still be correct without
110    // synchronization).
111    // -----------------------------------------------------
112
113    synchronized public void removePlaybackItems(String callingApp) {
114        if (DBG_THREADING) Log.d(TAG, "Removing all callback items for : " + callingApp);
115        removeMessages(callingApp);
116
117        final MessageParams current = getCurrentParams();
118        if (current != null && TextUtils.equals(callingApp, current.getCallingApp())) {
119            stop(current);
120        }
121    }
122
123    synchronized public void removeAllItems() {
124        if (DBG_THREADING) Log.d(TAG, "Removing all items");
125        removeAllMessages();
126        stop(getCurrentParams());
127    }
128
129    /**
130     * @return false iff the queue is empty and no queue item is currently
131     *        being handled, true otherwise.
132     */
133    public boolean isSpeaking() {
134        return (mQueue.peek() != null) || (mCurrentParams != null);
135    }
136
137    /**
138     * Shut down the audio playback thread.
139     */
140    synchronized public void quit() {
141        removeAllMessages();
142        stop(getCurrentParams());
143        mQueue.add(new ListEntry(SHUTDOWN, null, HIGH_PRIORITY));
144    }
145
146    synchronized void enqueueSynthesisStart(SynthesisMessageParams token) {
147        if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis start : " + token);
148        mQueue.add(new ListEntry(SYNTHESIS_START, token));
149    }
150
151    synchronized void enqueueSynthesisDataAvailable(SynthesisMessageParams token) {
152        if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis data available : " + token);
153        mQueue.add(new ListEntry(SYNTHESIS_DATA_AVAILABLE, token));
154    }
155
156    synchronized void enqueueSynthesisDone(SynthesisMessageParams token) {
157        if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis done : " + token);
158        mQueue.add(new ListEntry(SYNTHESIS_DONE, token));
159    }
160
161    synchronized void enqueueAudio(AudioMessageParams token) {
162        if (DBG_THREADING) Log.d(TAG, "Enqueuing audio : " + token);
163        mQueue.add(new ListEntry(PLAY_AUDIO, token));
164    }
165
166    synchronized void enqueueSilence(SilenceMessageParams token) {
167        if (DBG_THREADING) Log.d(TAG, "Enqueuing silence : " + token);
168        mQueue.add(new ListEntry(PLAY_SILENCE, token));
169    }
170
171    // -----------------------------------------
172    // End of public API methods.
173    // -----------------------------------------
174
175    // -----------------------------------------
176    // Methods for managing the message queue.
177    // -----------------------------------------
178
179    /*
180     * The MessageLoop is a handler like implementation that
181     * processes messages from a priority queue.
182     */
183    private final class MessageLoop implements Runnable {
184        @Override
185        public void run() {
186            while (true) {
187                ListEntry entry = null;
188                try {
189                    entry = mQueue.take();
190                } catch (InterruptedException ie) {
191                    return;
192                }
193
194                if (entry.mWhat == SHUTDOWN) {
195                    if (DBG) Log.d(TAG, "MessageLoop : Shutting down");
196                    return;
197                }
198
199                if (DBG) {
200                    Log.d(TAG, "MessageLoop : Handling message :" + entry.mWhat
201                            + " ,seqId : " + entry.mSequenceId);
202                }
203
204                setCurrentParams(entry.mMessage);
205                handleMessage(entry);
206                setCurrentParams(null);
207            }
208        }
209    }
210
211    /*
212     * Atomically clear the queue of all messages.
213     */
214    synchronized private void removeAllMessages() {
215        mQueue.clear();
216    }
217
218    /*
219     * Remove all messages that originate from a given calling app.
220     */
221    synchronized private void removeMessages(String callingApp) {
222        Iterator<ListEntry> it = mQueue.iterator();
223
224        while (it.hasNext()) {
225            final ListEntry current = it.next();
226            // The null check is to prevent us from removing control messages,
227            // such as a shutdown message.
228            if (current.mMessage != null &&
229                    callingApp.equals(current.mMessage.getCallingApp())) {
230                it.remove();
231            }
232        }
233    }
234
235    /*
236     * An element of our priority queue of messages. Each message has a priority,
237     * and a sequence id (defined by the order of enqueue calls). Among messages
238     * with the same priority, messages that were received earlier win out.
239     */
240    private final class ListEntry implements Comparable<ListEntry> {
241        final int mWhat;
242        final MessageParams mMessage;
243        final int mPriority;
244        final long mSequenceId;
245
246        private ListEntry(int what, MessageParams message) {
247            this(what, message, DEFAULT_PRIORITY);
248        }
249
250        private ListEntry(int what, MessageParams message, int priority) {
251            mWhat = what;
252            mMessage = message;
253            mPriority = priority;
254            mSequenceId = mSequenceIdCtr.incrementAndGet();
255        }
256
257        @Override
258        public int compareTo(ListEntry that) {
259            if (that == this) {
260                return 0;
261            }
262
263            // Note that this is always 0, 1 or -1.
264            int priorityDiff = mPriority - that.mPriority;
265            if (priorityDiff == 0) {
266                // The == case cannot occur.
267                return (mSequenceId < that.mSequenceId) ? -1 : 1;
268            }
269
270            return priorityDiff;
271        }
272    }
273
274    private void setCurrentParams(MessageParams p) {
275        if (DBG_THREADING) {
276            if (p != null) {
277                Log.d(TAG, "Started handling :" + p);
278            } else {
279                Log.d(TAG, "End handling : " + mCurrentParams);
280            }
281        }
282        mCurrentParams = p;
283    }
284
285    private MessageParams getCurrentParams() {
286        return mCurrentParams;
287    }
288
289    // -----------------------------------------
290    // Methods for dealing with individual messages, the methods
291    // below do the actual work.
292    // -----------------------------------------
293
294    private void handleMessage(ListEntry entry) {
295        final MessageParams msg = entry.mMessage;
296        if (entry.mWhat == SYNTHESIS_START) {
297            handleSynthesisStart(msg);
298        } else if (entry.mWhat == SYNTHESIS_DATA_AVAILABLE) {
299            handleSynthesisDataAvailable(msg);
300        } else if (entry.mWhat == SYNTHESIS_DONE) {
301            handleSynthesisDone(msg);
302        } else if (entry.mWhat == PLAY_AUDIO) {
303            handleAudio(msg);
304        } else if (entry.mWhat == PLAY_SILENCE) {
305            handleSilence(msg);
306        }
307    }
308
309    // Currently implemented as blocking the audio playback thread for the
310    // specified duration. If a call to stop() is made, the thread
311    // unblocks.
312    private void handleSilence(MessageParams msg) {
313        if (DBG) Log.d(TAG, "handleSilence()");
314        SilenceMessageParams params = (SilenceMessageParams) msg;
315        if (params.getSilenceDurationMs() > 0) {
316            params.getConditionVariable().block(params.getSilenceDurationMs());
317        }
318        params.getDispatcher().dispatchUtteranceCompleted();
319        if (DBG) Log.d(TAG, "handleSilence() done.");
320    }
321
322    // Plays back audio from a given URI. No TTS engine involvement here.
323    private void handleAudio(MessageParams msg) {
324        if (DBG) Log.d(TAG, "handleAudio()");
325        AudioMessageParams params = (AudioMessageParams) msg;
326        // Note that the BlockingMediaPlayer spawns a separate thread.
327        //
328        // TODO: This can be avoided.
329        params.getPlayer().startAndWait();
330        params.getDispatcher().dispatchUtteranceCompleted();
331        if (DBG) Log.d(TAG, "handleAudio() done.");
332    }
333
334    // Denotes the start of a new synthesis request. We create a new
335    // audio track, and prepare it for incoming data.
336    //
337    // Note that since all TTS synthesis happens on a single thread, we
338    // should ALWAYS see the following order :
339    //
340    // handleSynthesisStart -> handleSynthesisDataAvailable(*) -> handleSynthesisDone
341    // OR
342    // handleSynthesisCompleteDataAvailable.
343    private void handleSynthesisStart(MessageParams msg) {
344        if (DBG) Log.d(TAG, "handleSynthesisStart()");
345        final SynthesisMessageParams param = (SynthesisMessageParams) msg;
346
347        // Oops, looks like the engine forgot to call done(). We go through
348        // extra trouble to clean the data to prevent the AudioTrack resources
349        // from being leaked.
350        if (mLastSynthesisRequest != null) {
351            Log.w(TAG, "Error : Missing call to done() for request : " +
352                    mLastSynthesisRequest);
353            handleSynthesisDone(mLastSynthesisRequest);
354        }
355
356        mLastSynthesisRequest = param;
357
358        // Create the audio track.
359        final AudioTrack audioTrack = createStreamingAudioTrack(
360                param.mStreamType, param.mSampleRateInHz, param.mAudioFormat,
361                param.mChannelCount, param.mVolume, param.mPan);
362
363        if (DBG) Log.d(TAG, "Created audio track [" + audioTrack.hashCode() + "]");
364
365        param.setAudioTrack(audioTrack);
366    }
367
368    // More data available to be flushed to the audio track.
369    private void handleSynthesisDataAvailable(MessageParams msg) {
370        final SynthesisMessageParams param = (SynthesisMessageParams) msg;
371        if (param.getAudioTrack() == null) {
372            Log.w(TAG, "Error : null audio track in handleDataAvailable : " + param);
373            return;
374        }
375
376        if (param != mLastSynthesisRequest) {
377            Log.e(TAG, "Call to dataAvailable without done() / start()");
378            return;
379        }
380
381        final AudioTrack audioTrack = param.getAudioTrack();
382        final SynthesisMessageParams.ListEntry bufferCopy = param.getNextBuffer();
383
384        if (bufferCopy == null) {
385            Log.e(TAG, "No buffers available to play.");
386            return;
387        }
388
389        int playState = audioTrack.getPlayState();
390        if (playState == AudioTrack.PLAYSTATE_STOPPED) {
391            if (DBG) Log.d(TAG, "AudioTrack stopped, restarting : " + audioTrack.hashCode());
392            audioTrack.play();
393        }
394        int count = 0;
395        while (count < bufferCopy.mLength) {
396            // Note that we don't take bufferCopy.mOffset into account because
397            // it is guaranteed to be 0.
398            int written = audioTrack.write(bufferCopy.mBytes, count, bufferCopy.mLength);
399            if (written <= 0) {
400                break;
401            }
402            count += written;
403        }
404        param.mBytesWritten += count;
405        param.mLogger.onPlaybackStart();
406    }
407
408    private void handleSynthesisDone(MessageParams msg) {
409        final SynthesisMessageParams params = (SynthesisMessageParams) msg;
410        handleSynthesisDone(params);
411        // This call is delayed more than it should be, but we are
412        // certain at this point that we have all the data we want.
413        params.mLogger.onWriteData();
414    }
415
416    // Wait for the audio track to stop playing, and then release it's resources.
417    private void handleSynthesisDone(SynthesisMessageParams params) {
418        if (DBG) Log.d(TAG, "handleSynthesisDone()");
419        final AudioTrack audioTrack = params.getAudioTrack();
420
421        try {
422            if (audioTrack != null) {
423                if (DBG) Log.d(TAG, "Waiting for audio track to complete : " +
424                        audioTrack.hashCode());
425                blockUntilDone(params);
426                if (DBG) Log.d(TAG, "Releasing audio track [" + audioTrack.hashCode() + "]");
427                // The last call to AudioTrack.write( ) will return only after
428                // all data from the audioTrack has been sent to the mixer, so
429                // it's safe to release at this point.
430                audioTrack.release();
431            }
432        } finally {
433            params.setAudioTrack(null);
434            params.getDispatcher().dispatchUtteranceCompleted();
435            mLastSynthesisRequest = null;
436        }
437    }
438
439    private static void blockUntilDone(SynthesisMessageParams params) {
440        if (params.mAudioTrack == null || params.mBytesWritten <= 0) {
441            return;
442        }
443
444        final AudioTrack audioTrack = params.mAudioTrack;
445        final int bytesPerFrame = getBytesPerFrame(params.mAudioFormat);
446        final int lengthInBytes = params.mBytesWritten;
447        final int lengthInFrames = lengthInBytes / bytesPerFrame;
448
449        int currentPosition = 0;
450        while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames) {
451            if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) {
452                break;
453            }
454
455            long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
456                    audioTrack.getSampleRate();
457
458            if (DBG) Log.d(TAG, "About to sleep for : " + estimatedTimeMs + " ms," +
459                    " Playback position : " + currentPosition);
460            try {
461                Thread.sleep(estimatedTimeMs);
462            } catch (InterruptedException ie) {
463                break;
464            }
465        }
466    }
467
468    private static AudioTrack createStreamingAudioTrack(int streamType, int sampleRateInHz,
469            int audioFormat, int channelCount, float volume, float pan) {
470        int channelConfig = getChannelConfig(channelCount);
471
472        int minBufferSizeInBytes
473                = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
474        int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
475
476        AudioTrack audioTrack = new AudioTrack(streamType, sampleRateInHz, channelConfig,
477                audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
478        if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
479            Log.w(TAG, "Unable to create audio track.");
480            audioTrack.release();
481            return null;
482        }
483
484        setupVolume(audioTrack, volume, pan);
485        return audioTrack;
486    }
487
488    static int getChannelConfig(int channelCount) {
489        if (channelCount == 1) {
490            return AudioFormat.CHANNEL_OUT_MONO;
491        } else if (channelCount == 2){
492            return AudioFormat.CHANNEL_OUT_STEREO;
493        }
494
495        return 0;
496    }
497
498    static int getBytesPerFrame(int audioFormat) {
499        if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) {
500            return 1;
501        } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) {
502            return 2;
503        }
504
505        return -1;
506    }
507
508    private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
509        float vol = clip(volume, 0.0f, 1.0f);
510        float panning = clip(pan, -1.0f, 1.0f);
511        float volLeft = vol;
512        float volRight = vol;
513        if (panning > 0.0f) {
514            volLeft *= (1.0f - panning);
515        } else if (panning < 0.0f) {
516            volRight *= (1.0f + panning);
517        }
518        if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
519        if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
520            Log.e(TAG, "Failed to set volume");
521        }
522    }
523
524    private static float clip(float value, float min, float max) {
525        return value > max ? max : (value < min ? min : value);
526    }
527
528}
529