AudioPlaybackHandler.java revision c34f76fe89b5a31d01d63067c2f24b9a6a76df18
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.media.AudioFormat;
19import android.media.AudioTrack;
20import android.util.Log;
21
22import java.util.Iterator;
23import java.util.concurrent.PriorityBlockingQueue;
24import java.util.concurrent.atomic.AtomicLong;
25
26class AudioPlaybackHandler {
27    private static final String TAG = "TTS.AudioPlaybackHandler";
28    private static final boolean DBG = false;
29
30    private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
31
32    private static final int SYNTHESIS_START = 1;
33    private static final int SYNTHESIS_DATA_AVAILABLE = 2;
34    private static final int SYNTHESIS_DONE = 3;
35
36    private static final int PLAY_AUDIO = 5;
37    private static final int PLAY_SILENCE = 6;
38
39    private static final int SHUTDOWN = -1;
40
41    private static final int DEFAULT_PRIORITY = 1;
42    private static final int HIGH_PRIORITY = 0;
43
44    private final PriorityBlockingQueue<ListEntry> mQueue =
45            new PriorityBlockingQueue<ListEntry>();
46    private final Thread mHandlerThread;
47
48    private volatile MessageParams mCurrentParams = null;
49    // Used only for book keeping and error detection.
50    private volatile SynthesisMessageParams mLastSynthesisRequest = null;
51    // Used to order incoming messages in our priority queue.
52    private final AtomicLong mSequenceIdCtr = new AtomicLong(0);
53
54
55    AudioPlaybackHandler() {
56        mHandlerThread = new Thread(new MessageLoop(), "TTS.AudioPlaybackThread");
57    }
58
59    public void start() {
60        mHandlerThread.start();
61    }
62
63    /**
64     * Stops all synthesis for a given {@code token}. If the current token
65     * is currently being processed, an effort will be made to stop it but
66     * that is not guaranteed.
67     */
68    synchronized public void stop(MessageParams token) {
69        if (token == null) {
70            return;
71        }
72
73        removeMessages(token);
74
75        if (token.getType() == MessageParams.TYPE_SYNTHESIS) {
76            AudioTrack current = ((SynthesisMessageParams) token).getAudioTrack();
77            if (current != null) {
78                // Stop the current audio track if it's still playing.
79                // The audio track is thread safe in this regard.
80                current.stop();
81            }
82            mQueue.add(new ListEntry(SYNTHESIS_DONE, token, HIGH_PRIORITY));
83        } else  {
84            final MessageParams current = getCurrentParams();
85
86            if (current != null) {
87                if (token.getType() == MessageParams.TYPE_AUDIO) {
88                    ((AudioMessageParams) current).getPlayer().stop();
89                } else if (token.getType() == MessageParams.TYPE_SILENCE) {
90                    ((SilenceMessageParams) current).getConditionVariable().open();
91                }
92            }
93        }
94    }
95
96    synchronized public void removePlaybackItems(String callingApp) {
97        removeMessages(callingApp);
98        stop(getCurrentParams());
99    }
100
101    synchronized public void removeAllItems() {
102        removeAllMessages();
103        stop(getCurrentParams());
104    }
105
106    /**
107     * @return false iff the queue is empty and no queue item is currently
108     *        being handled, true otherwise.
109     */
110    public boolean isSpeaking() {
111        return (mQueue.peek() != null) || (mCurrentParams != null);
112    }
113
114    /**
115     * Shut down the audio playback thread.
116     */
117    synchronized public void quit() {
118        stop(getCurrentParams());
119        mQueue.add(new ListEntry(SHUTDOWN, null, HIGH_PRIORITY));
120    }
121
122    void enqueueSynthesisStart(SynthesisMessageParams token) {
123        mQueue.add(new ListEntry(SYNTHESIS_START, token));
124    }
125
126    void enqueueSynthesisDataAvailable(SynthesisMessageParams token) {
127        mQueue.add(new ListEntry(SYNTHESIS_DATA_AVAILABLE, token));
128    }
129
130    void enqueueSynthesisDone(SynthesisMessageParams token) {
131        mQueue.add(new ListEntry(SYNTHESIS_DONE, token));
132    }
133
134    void enqueueAudio(AudioMessageParams token) {
135        mQueue.add(new ListEntry(PLAY_AUDIO, token));
136    }
137
138    void enqueueSilence(SilenceMessageParams token) {
139        mQueue.add(new ListEntry(PLAY_SILENCE, token));
140    }
141
142    // -----------------------------------------
143    // End of public API methods.
144    // -----------------------------------------
145
146    // -----------------------------------------
147    // Methods for managing the message queue.
148    // -----------------------------------------
149
150    /*
151     * The MessageLoop is a handler like implementation that
152     * processes messages from a priority queue.
153     */
154    private final class MessageLoop implements Runnable {
155        @Override
156        public void run() {
157            while (true) {
158                ListEntry entry = null;
159                try {
160                    entry = mQueue.take();
161                } catch (InterruptedException ie) {
162                    return;
163                }
164
165                if (entry.mWhat == SHUTDOWN) {
166                    if (DBG) Log.d(TAG, "MessageLoop : Shutting down");
167                    return;
168                }
169
170                if (DBG) {
171                    Log.d(TAG, "MessageLoop : Handling message :" + entry.mWhat
172                            + " ,seqId : " + entry.mSequenceId);
173                }
174
175                setCurrentParams(entry.mMessage);
176                handleMessage(entry);
177                setCurrentParams(null);
178            }
179        }
180    }
181
182    /*
183     * Remove all messages from the queue that contain the supplied token.
184     * Note that the Iterator is thread safe, and other methods can safely
185     * continue adding to the queue at this point.
186     */
187    synchronized private void removeMessages(MessageParams token) {
188        if (token == null) {
189            return;
190        }
191
192        Iterator<ListEntry> it = mQueue.iterator();
193
194        while (it.hasNext()) {
195            final ListEntry current = it.next();
196            if (current.mMessage == token) {
197                it.remove();
198            }
199        }
200    }
201
202    /*
203     * Atomically clear the queue of all messages.
204     */
205    synchronized private void removeAllMessages() {
206        mQueue.clear();
207    }
208
209    /*
210     * Remove all messages that originate from a given calling app.
211     */
212    synchronized private void removeMessages(String callingApp) {
213        Iterator<ListEntry> it = mQueue.iterator();
214
215        while (it.hasNext()) {
216            final ListEntry current = it.next();
217            // The null check is to prevent us from removing control messages,
218            // such as a shutdown message.
219            if (current.mMessage != null &&
220                    callingApp.equals(current.mMessage.getCallingApp())) {
221                it.remove();
222            }
223        }
224    }
225
226    /*
227     * An element of our priority queue of messages. Each message has a priority,
228     * and a sequence id (defined by the order of enqueue calls). Among messages
229     * with the same priority, messages that were received earlier win out.
230     */
231    private final class ListEntry implements Comparable<ListEntry> {
232        final int mWhat;
233        final MessageParams mMessage;
234        final int mPriority;
235        final long mSequenceId;
236
237        private ListEntry(int what, MessageParams message) {
238            this(what, message, DEFAULT_PRIORITY);
239        }
240
241        private ListEntry(int what, MessageParams message, int priority) {
242            mWhat = what;
243            mMessage = message;
244            mPriority = priority;
245            mSequenceId = mSequenceIdCtr.incrementAndGet();
246        }
247
248        @Override
249        public int compareTo(ListEntry that) {
250            if (that == this) {
251                return 0;
252            }
253
254            // Note that this is always 0, 1 or -1.
255            int priorityDiff = mPriority - that.mPriority;
256            if (priorityDiff == 0) {
257                // The == case cannot occur.
258                return (mSequenceId < that.mSequenceId) ? -1 : 1;
259            }
260
261            return priorityDiff;
262        }
263    }
264
265    private void setCurrentParams(MessageParams p) {
266        mCurrentParams = p;
267    }
268
269    private MessageParams getCurrentParams() {
270        return mCurrentParams;
271    }
272
273    // -----------------------------------------
274    // Methods for dealing with individual messages, the methods
275    // below do the actual work.
276    // -----------------------------------------
277
278    private void handleMessage(ListEntry entry) {
279        final MessageParams msg = entry.mMessage;
280        if (entry.mWhat == SYNTHESIS_START) {
281            handleSynthesisStart(msg);
282        } else if (entry.mWhat == SYNTHESIS_DATA_AVAILABLE) {
283            handleSynthesisDataAvailable(msg);
284        } else if (entry.mWhat == SYNTHESIS_DONE) {
285            handleSynthesisDone(msg);
286        } else if (entry.mWhat == PLAY_AUDIO) {
287            handleAudio(msg);
288        } else if (entry.mWhat == PLAY_SILENCE) {
289            handleSilence(msg);
290        }
291    }
292
293    // Currently implemented as blocking the audio playback thread for the
294    // specified duration. If a call to stop() is made, the thread
295    // unblocks.
296    private void handleSilence(MessageParams msg) {
297        if (DBG) Log.d(TAG, "handleSilence()");
298        SilenceMessageParams params = (SilenceMessageParams) msg;
299        if (params.getSilenceDurationMs() > 0) {
300            params.getConditionVariable().block(params.getSilenceDurationMs());
301        }
302        params.getDispatcher().dispatchUtteranceCompleted();
303        if (DBG) Log.d(TAG, "handleSilence() done.");
304    }
305
306    // Plays back audio from a given URI. No TTS engine involvement here.
307    private void handleAudio(MessageParams msg) {
308        if (DBG) Log.d(TAG, "handleAudio()");
309        AudioMessageParams params = (AudioMessageParams) msg;
310        // Note that the BlockingMediaPlayer spawns a separate thread.
311        //
312        // TODO: This can be avoided.
313        params.getPlayer().startAndWait();
314        params.getDispatcher().dispatchUtteranceCompleted();
315        if (DBG) Log.d(TAG, "handleAudio() done.");
316    }
317
318    // Denotes the start of a new synthesis request. We create a new
319    // audio track, and prepare it for incoming data.
320    //
321    // Note that since all TTS synthesis happens on a single thread, we
322    // should ALWAYS see the following order :
323    //
324    // handleSynthesisStart -> handleSynthesisDataAvailable(*) -> handleSynthesisDone
325    // OR
326    // handleSynthesisCompleteDataAvailable.
327    private void handleSynthesisStart(MessageParams msg) {
328        if (DBG) Log.d(TAG, "handleSynthesisStart()");
329        final SynthesisMessageParams param = (SynthesisMessageParams) msg;
330
331        // Oops, looks like the engine forgot to call done(). We go through
332        // extra trouble to clean the data to prevent the AudioTrack resources
333        // from being leaked.
334        if (mLastSynthesisRequest != null) {
335            Log.w(TAG, "Error : Missing call to done() for request : " +
336                    mLastSynthesisRequest);
337            handleSynthesisDone(mLastSynthesisRequest);
338        }
339
340        mLastSynthesisRequest = param;
341
342        // Create the audio track.
343        final AudioTrack audioTrack = createStreamingAudioTrack(
344                param.mStreamType, param.mSampleRateInHz, param.mAudioFormat,
345                param.mChannelCount, param.mVolume, param.mPan);
346
347        if (DBG) Log.d(TAG, "Created audio track [" + audioTrack.hashCode() + "]");
348
349        param.setAudioTrack(audioTrack);
350    }
351
352    // More data available to be flushed to the audio track.
353    private void handleSynthesisDataAvailable(MessageParams msg) {
354        final SynthesisMessageParams param = (SynthesisMessageParams) msg;
355        if (param.getAudioTrack() == null) {
356            Log.w(TAG, "Error : null audio track in handleDataAvailable.");
357            return;
358        }
359
360        if (param != mLastSynthesisRequest) {
361            Log.e(TAG, "Call to dataAvailable without done() / start()");
362            return;
363        }
364
365        final AudioTrack audioTrack = param.getAudioTrack();
366        final SynthesisMessageParams.ListEntry bufferCopy = param.getNextBuffer();
367
368        if (bufferCopy == null) {
369            Log.e(TAG, "No buffers available to play.");
370            return;
371        }
372
373        int playState = audioTrack.getPlayState();
374        if (playState == AudioTrack.PLAYSTATE_STOPPED) {
375            if (DBG) Log.d(TAG, "AudioTrack stopped, restarting : " + audioTrack.hashCode());
376            audioTrack.play();
377        }
378        int count = 0;
379        while (count < bufferCopy.mLength) {
380            // Note that we don't take bufferCopy.mOffset into account because
381            // it is guaranteed to be 0.
382            int written = audioTrack.write(bufferCopy.mBytes, count, bufferCopy.mLength);
383            if (written <= 0) {
384                break;
385            }
386            count += written;
387        }
388        param.mBytesWritten += count;
389        param.mLogger.onPlaybackStart();
390    }
391
392    private void handleSynthesisDone(MessageParams msg) {
393        final SynthesisMessageParams params = (SynthesisMessageParams) msg;
394        handleSynthesisDone(params);
395        // This call is delayed more than it should be, but we are
396        // certain at this point that we have all the data we want.
397        params.mLogger.onWriteData();
398    }
399
400    // Wait for the audio track to stop playing, and then release it's resources.
401    private void handleSynthesisDone(SynthesisMessageParams params) {
402        if (DBG) Log.d(TAG, "handleSynthesisDone()");
403        final AudioTrack audioTrack = params.getAudioTrack();
404
405        try {
406            if (audioTrack != null) {
407                if (DBG) Log.d(TAG, "Waiting for audio track to complete : " +
408                        audioTrack.hashCode());
409                blockUntilDone(params);
410                if (DBG) Log.d(TAG, "Releasing audio track [" + audioTrack.hashCode() + "]");
411                // The last call to AudioTrack.write( ) will return only after
412                // all data from the audioTrack has been sent to the mixer, so
413                // it's safe to release at this point.
414                audioTrack.release();
415            }
416        } finally {
417            params.setAudioTrack(null);
418            params.getDispatcher().dispatchUtteranceCompleted();
419            mLastSynthesisRequest = null;
420        }
421    }
422
423    private static void blockUntilDone(SynthesisMessageParams params) {
424        if (params.mAudioTrack == null || params.mBytesWritten <= 0) {
425            return;
426        }
427
428        final AudioTrack audioTrack = params.mAudioTrack;
429        final int bytesPerFrame = getBytesPerFrame(params.mAudioFormat);
430        final int lengthInBytes = params.mBytesWritten;
431        final int lengthInFrames = lengthInBytes / bytesPerFrame;
432
433        int currentPosition = 0;
434        while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames) {
435            if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) {
436                break;
437            }
438
439            long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
440                    audioTrack.getSampleRate();
441
442            if (DBG) Log.d(TAG, "About to sleep for : " + estimatedTimeMs + " ms," +
443                    " Playback position : " + currentPosition);
444            try {
445                Thread.sleep(estimatedTimeMs);
446            } catch (InterruptedException ie) {
447                break;
448            }
449        }
450    }
451
452    private static AudioTrack createStreamingAudioTrack(int streamType, int sampleRateInHz,
453            int audioFormat, int channelCount, float volume, float pan) {
454        int channelConfig = getChannelConfig(channelCount);
455
456        int minBufferSizeInBytes
457                = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
458        int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
459
460        AudioTrack audioTrack = new AudioTrack(streamType, sampleRateInHz, channelConfig,
461                audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
462        if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
463            Log.w(TAG, "Unable to create audio track.");
464            audioTrack.release();
465            return null;
466        }
467
468        setupVolume(audioTrack, volume, pan);
469        return audioTrack;
470    }
471
472    static int getChannelConfig(int channelCount) {
473        if (channelCount == 1) {
474            return AudioFormat.CHANNEL_OUT_MONO;
475        } else if (channelCount == 2){
476            return AudioFormat.CHANNEL_OUT_STEREO;
477        }
478
479        return 0;
480    }
481
482    static int getBytesPerFrame(int audioFormat) {
483        if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) {
484            return 1;
485        } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) {
486            return 2;
487        }
488
489        return -1;
490    }
491
492    private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
493        float vol = clip(volume, 0.0f, 1.0f);
494        float panning = clip(pan, -1.0f, 1.0f);
495        float volLeft = vol;
496        float volRight = vol;
497        if (panning > 0.0f) {
498            volLeft *= (1.0f - panning);
499        } else if (panning < 0.0f) {
500            volRight *= (1.0f + panning);
501        }
502        if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
503        if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
504            Log.e(TAG, "Failed to set volume");
505        }
506    }
507
508    private static float clip(float value, float min, float max) {
509        return value > max ? max : (value < min ? min : value);
510    }
511
512}
513