AudioPlaybackHandler.java revision abc63fbddab2477a2954bc804aba2826e1f11084
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.media.AudioFormat;
19import android.media.AudioTrack;
20import android.util.Log;
21
22import java.util.Iterator;
23import java.util.concurrent.PriorityBlockingQueue;
24import java.util.concurrent.atomic.AtomicLong;
25
26class AudioPlaybackHandler {
27    private static final String TAG = "TTS.AudioPlaybackHandler";
28    private static final boolean DBG = false;
29
30    private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
31
32    private static final int SYNTHESIS_START = 1;
33    private static final int SYNTHESIS_DATA_AVAILABLE = 2;
34    private static final int SYNTHESIS_COMPLETE_DATA_AVAILABLE = 3;
35    private static final int SYNTHESIS_DONE = 4;
36
37    private static final int PLAY_AUDIO = 5;
38    private static final int PLAY_SILENCE = 6;
39
40    private static final int SHUTDOWN = -1;
41
42    private static final int DEFAULT_PRIORITY = 1;
43    private static final int HIGH_PRIORITY = 0;
44
45    private final PriorityBlockingQueue<ListEntry> mQueue =
46            new PriorityBlockingQueue<ListEntry>();
47    private final Thread mHandlerThread;
48
49    private volatile MessageParams mCurrentParams = null;
50    // Used only for book keeping and error detection.
51    private volatile SynthesisMessageParams mLastSynthesisRequest = null;
52    // Used to order incoming messages in our priority queue.
53    private final AtomicLong mSequenceIdCtr = new AtomicLong(0);
54
55
56    AudioPlaybackHandler() {
57        mHandlerThread = new Thread(new MessageLoop(), "TTS.AudioPlaybackThread");
58    }
59
60    public void start() {
61        mHandlerThread.start();
62    }
63
64    /**
65     * Stops all synthesis for a given {@code token}. If the current token
66     * is currently being processed, an effort will be made to stop it but
67     * that is not guaranteed.
68     */
69    synchronized public void stop(MessageParams token) {
70        if (token == null) {
71            return;
72        }
73
74        removeMessages(token);
75
76        if (token.getType() == MessageParams.TYPE_SYNTHESIS) {
77            mQueue.add(new ListEntry(SYNTHESIS_DONE, token, HIGH_PRIORITY));
78        } else  {
79            final MessageParams current = getCurrentParams();
80
81            if (current != null) {
82                if (token.getType() == MessageParams.TYPE_AUDIO) {
83                    ((AudioMessageParams) current).getPlayer().stop();
84                } else if (token.getType() == MessageParams.TYPE_SILENCE) {
85                    ((SilenceMessageParams) current).getConditionVariable().open();
86                }
87            }
88        }
89    }
90
91    synchronized public void removePlaybackItems(String callingApp) {
92        removeMessages(callingApp);
93        stop(getCurrentParams());
94    }
95
96    synchronized public void removeAllItems() {
97        removeAllMessages();
98        stop(getCurrentParams());
99    }
100
101    /**
102     * Shut down the audio playback thread.
103     */
104    synchronized public void quit() {
105        stop(getCurrentParams());
106        mQueue.add(new ListEntry(SHUTDOWN, null, HIGH_PRIORITY));
107    }
108
109    void enqueueSynthesisStart(SynthesisMessageParams token) {
110        mQueue.add(new ListEntry(SYNTHESIS_START, token));
111    }
112
113    void enqueueSynthesisDataAvailable(SynthesisMessageParams token) {
114        mQueue.add(new ListEntry(SYNTHESIS_DATA_AVAILABLE, token));
115    }
116
117    void enqueueSynthesisCompleteDataAvailable(SynthesisMessageParams token) {
118        mQueue.add(new ListEntry(SYNTHESIS_COMPLETE_DATA_AVAILABLE, token));
119    }
120
121    void enqueueSynthesisDone(SynthesisMessageParams token) {
122        mQueue.add(new ListEntry(SYNTHESIS_DONE, token));
123    }
124
125    void enqueueAudio(AudioMessageParams token) {
126        mQueue.add(new ListEntry(PLAY_AUDIO, token));
127    }
128
129    void enqueueSilence(SilenceMessageParams token) {
130        mQueue.add(new ListEntry(PLAY_SILENCE, token));
131    }
132
133    // -----------------------------------------
134    // End of public API methods.
135    // -----------------------------------------
136
137    // -----------------------------------------
138    // Methods for managing the message queue.
139    // -----------------------------------------
140
141    /*
142     * The MessageLoop is a handler like implementation that
143     * processes messages from a priority queue.
144     */
145    private final class MessageLoop implements Runnable {
146        @Override
147        public void run() {
148            while (true) {
149                ListEntry entry = null;
150                try {
151                    entry = mQueue.take();
152                } catch (InterruptedException ie) {
153                    return;
154                }
155
156                if (entry.mWhat == SHUTDOWN) {
157                    if (DBG) Log.d(TAG, "MessageLoop : Shutting down");
158                    return;
159                }
160
161                if (DBG) {
162                    Log.d(TAG, "MessageLoop : Handling message :" + entry.mWhat
163                            + " ,seqId : " + entry.mSequenceId);
164                }
165
166                setCurrentParams(entry.mMessage);
167                handleMessage(entry);
168                setCurrentParams(null);
169            }
170        }
171    }
172
173    /*
174     * Remove all messages from the queue that contain the supplied token.
175     * Note that the Iterator is thread safe, and other methods can safely
176     * continue adding to the queue at this point.
177     */
178    synchronized private void removeMessages(MessageParams token) {
179        if (token == null) {
180            return;
181        }
182
183        Iterator<ListEntry> it = mQueue.iterator();
184
185        while (it.hasNext()) {
186            final ListEntry current = it.next();
187            if (current.mMessage == token) {
188                it.remove();
189            }
190        }
191    }
192
193    /*
194     * Atomically clear the queue of all messages.
195     */
196    synchronized private void removeAllMessages() {
197        mQueue.clear();
198    }
199
200    /*
201     * Remove all messages that originate from a given calling app.
202     */
203    synchronized private void removeMessages(String callingApp) {
204        Iterator<ListEntry> it = mQueue.iterator();
205
206        while (it.hasNext()) {
207            final ListEntry current = it.next();
208            // The null check is to prevent us from removing control messages,
209            // such as a shutdown message.
210            if (current.mMessage != null &&
211                    callingApp.equals(current.mMessage.getCallingApp())) {
212                it.remove();
213            }
214        }
215    }
216
217    /*
218     * An element of our priority queue of messages. Each message has a priority,
219     * and a sequence id (defined by the order of enqueue calls). Among messages
220     * with the same priority, messages that were received earlier win out.
221     */
222    private final class ListEntry implements Comparable<ListEntry> {
223        final int mWhat;
224        final MessageParams mMessage;
225        final int mPriority;
226        final long mSequenceId;
227
228        private ListEntry(int what, MessageParams message) {
229            this(what, message, DEFAULT_PRIORITY);
230        }
231
232        private ListEntry(int what, MessageParams message, int priority) {
233            mWhat = what;
234            mMessage = message;
235            mPriority = priority;
236            mSequenceId = mSequenceIdCtr.incrementAndGet();
237        }
238
239        @Override
240        public int compareTo(ListEntry that) {
241            if (that == this) {
242                return 0;
243            }
244
245            // Note that this is always 0, 1 or -1.
246            int priorityDiff = mPriority - that.mPriority;
247            if (priorityDiff == 0) {
248                // The == case cannot occur.
249                return (mSequenceId < that.mSequenceId) ? -1 : 1;
250            }
251
252            return priorityDiff;
253        }
254    }
255
256    private void setCurrentParams(MessageParams p) {
257        mCurrentParams = p;
258    }
259
260    private MessageParams getCurrentParams() {
261        return mCurrentParams;
262    }
263
264    // -----------------------------------------
265    // Methods for dealing with individual messages, the methods
266    // below do the actual work.
267    // -----------------------------------------
268
269    private void handleMessage(ListEntry entry) {
270        final MessageParams msg = entry.mMessage;
271        if (entry.mWhat == SYNTHESIS_START) {
272            handleSynthesisStart(msg);
273        } else if (entry.mWhat == SYNTHESIS_DATA_AVAILABLE) {
274            handleSynthesisDataAvailable(msg);
275        } else if (entry.mWhat == SYNTHESIS_DONE) {
276            handleSynthesisDone(msg);
277        } else if (entry.mWhat == SYNTHESIS_COMPLETE_DATA_AVAILABLE) {
278            handleSynthesisCompleteDataAvailable(msg);
279        } else if (entry.mWhat == PLAY_AUDIO) {
280            handleAudio(msg);
281        } else if (entry.mWhat == PLAY_SILENCE) {
282            handleSilence(msg);
283        }
284    }
285
286    // Currently implemented as blocking the audio playback thread for the
287    // specified duration. If a call to stop() is made, the thread
288    // unblocks.
289    private void handleSilence(MessageParams msg) {
290        if (DBG) Log.d(TAG, "handleSilence()");
291        SilenceMessageParams params = (SilenceMessageParams) msg;
292        if (params.getSilenceDurationMs() > 0) {
293            params.getConditionVariable().block(params.getSilenceDurationMs());
294        }
295        params.getDispatcher().dispatchUtteranceCompleted();
296        if (DBG) Log.d(TAG, "handleSilence() done.");
297    }
298
299    // Plays back audio from a given URI. No TTS engine involvement here.
300    private void handleAudio(MessageParams msg) {
301        if (DBG) Log.d(TAG, "handleAudio()");
302        AudioMessageParams params = (AudioMessageParams) msg;
303        // Note that the BlockingMediaPlayer spawns a separate thread.
304        //
305        // TODO: This can be avoided.
306        params.getPlayer().startAndWait();
307        params.getDispatcher().dispatchUtteranceCompleted();
308        if (DBG) Log.d(TAG, "handleAudio() done.");
309    }
310
311    // Denotes the start of a new synthesis request. We create a new
312    // audio track, and prepare it for incoming data.
313    //
314    // Note that since all TTS synthesis happens on a single thread, we
315    // should ALWAYS see the following order :
316    //
317    // handleSynthesisStart -> handleSynthesisDataAvailable(*) -> handleSynthesisDone
318    // OR
319    // handleSynthesisCompleteDataAvailable.
320    private void handleSynthesisStart(MessageParams msg) {
321        if (DBG) Log.d(TAG, "handleSynthesisStart()");
322        final SynthesisMessageParams param = (SynthesisMessageParams) msg;
323
324        // Oops, looks like the engine forgot to call done(). We go through
325        // extra trouble to clean the data to prevent the AudioTrack resources
326        // from being leaked.
327        if (mLastSynthesisRequest != null) {
328            Log.w(TAG, "Error : Missing call to done() for request : " +
329                    mLastSynthesisRequest);
330            handleSynthesisDone(mLastSynthesisRequest);
331        }
332
333        mLastSynthesisRequest = param;
334
335        // Create the audio track.
336        final AudioTrack audioTrack = createStreamingAudioTrack(
337                param.mStreamType, param.mSampleRateInHz, param.mAudioFormat,
338                param.mChannelCount, param.mVolume, param.mPan);
339
340        if (DBG) Log.d(TAG, "Created audio track [" + audioTrack.hashCode() + "]");
341
342        param.setAudioTrack(audioTrack);
343    }
344
345    // More data available to be flushed to the audio track.
346    private void handleSynthesisDataAvailable(MessageParams msg) {
347        final SynthesisMessageParams param = (SynthesisMessageParams) msg;
348        if (param.getAudioTrack() == null) {
349            Log.w(TAG, "Error : null audio track in handleDataAvailable.");
350            return;
351        }
352
353        if (param != mLastSynthesisRequest) {
354            Log.e(TAG, "Call to dataAvailable without done() / start()");
355            return;
356        }
357
358        final AudioTrack audioTrack = param.getAudioTrack();
359        final SynthesisMessageParams.ListEntry bufferCopy = param.getNextBuffer();
360
361        if (bufferCopy == null) {
362            Log.e(TAG, "No buffers available to play.");
363            return;
364        }
365
366        int playState = audioTrack.getPlayState();
367        if (playState == AudioTrack.PLAYSTATE_STOPPED) {
368            if (DBG) Log.d(TAG, "AudioTrack stopped, restarting : " + audioTrack.hashCode());
369            audioTrack.play();
370        }
371        int count = 0;
372        while (count < bufferCopy.mLength) {
373            // Note that we don't take bufferCopy.mOffset into account because
374            // it is guaranteed to be 0.
375            int written = audioTrack.write(bufferCopy.mBytes, count, bufferCopy.mLength);
376            if (written <= 0) {
377                break;
378            }
379            count += written;
380        }
381    }
382
383    private void handleSynthesisDone(MessageParams msg) {
384        final SynthesisMessageParams params = (SynthesisMessageParams) msg;
385        handleSynthesisDone(params);
386    }
387
388    // Flush all remaining data to the audio track, stop it and release
389    // all it's resources.
390    private void handleSynthesisDone(SynthesisMessageParams params) {
391        if (DBG) Log.d(TAG, "handleSynthesisDone()");
392        final AudioTrack audioTrack = params.getAudioTrack();
393
394        try {
395            if (audioTrack != null) {
396                audioTrack.flush();
397                audioTrack.stop();
398                if (DBG) Log.d(TAG, "Releasing audio track [" + audioTrack.hashCode() + "]");
399                audioTrack.release();
400            }
401        } finally {
402            params.setAudioTrack(null);
403            params.getDispatcher().dispatchUtteranceCompleted();
404            mLastSynthesisRequest = null;
405        }
406    }
407
408    private void handleSynthesisCompleteDataAvailable(MessageParams msg) {
409        final SynthesisMessageParams params = (SynthesisMessageParams) msg;
410        if (DBG) Log.d(TAG, "completeAudioAvailable(" + params + ")");
411
412        // Channel config and bytes per frame are checked before
413        // this message is sent.
414        int channelConfig = AudioPlaybackHandler.getChannelConfig(params.mChannelCount);
415        int bytesPerFrame = AudioPlaybackHandler.getBytesPerFrame(params.mAudioFormat);
416
417        SynthesisMessageParams.ListEntry entry = params.getNextBuffer();
418
419        if (entry == null) {
420            Log.w(TAG, "completeDataAvailable : No buffers available to play.");
421            return;
422        }
423
424        final AudioTrack audioTrack = new AudioTrack(params.mStreamType, params.mSampleRateInHz,
425                channelConfig, params.mAudioFormat, entry.mLength, AudioTrack.MODE_STATIC);
426
427        // So that handleDone can access this correctly.
428        params.mAudioTrack = audioTrack;
429
430        try {
431            audioTrack.write(entry.mBytes, entry.mOffset, entry.mLength);
432            setupVolume(audioTrack, params.mVolume, params.mPan);
433            audioTrack.play();
434            blockUntilDone(audioTrack, bytesPerFrame, entry.mLength);
435            if (DBG) Log.d(TAG, "Wrote data to audio track successfully : " + entry.mLength);
436        } catch (IllegalStateException ex) {
437            Log.e(TAG, "Playback error", ex);
438        } finally {
439            handleSynthesisDone(msg);
440        }
441    }
442
443
444    private static void blockUntilDone(AudioTrack audioTrack, int bytesPerFrame, int length) {
445        int lengthInFrames = length / bytesPerFrame;
446        int currentPosition = 0;
447        while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames) {
448            long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
449                    audioTrack.getSampleRate();
450            audioTrack.getPlayState();
451            if (DBG) Log.d(TAG, "About to sleep for : " + estimatedTimeMs + " ms," +
452                    " Playback position : " + currentPosition);
453            try {
454                Thread.sleep(estimatedTimeMs);
455            } catch (InterruptedException ie) {
456                break;
457            }
458        }
459    }
460
461    private static AudioTrack createStreamingAudioTrack(int streamType, int sampleRateInHz,
462            int audioFormat, int channelCount, float volume, float pan) {
463        int channelConfig = getChannelConfig(channelCount);
464
465        int minBufferSizeInBytes
466                = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
467        int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
468
469        AudioTrack audioTrack = new AudioTrack(streamType, sampleRateInHz, channelConfig,
470                audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
471        if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
472            Log.w(TAG, "Unable to create audio track.");
473            audioTrack.release();
474            return null;
475        }
476
477        setupVolume(audioTrack, volume, pan);
478        return audioTrack;
479    }
480
481    static int getChannelConfig(int channelCount) {
482        if (channelCount == 1) {
483            return AudioFormat.CHANNEL_OUT_MONO;
484        } else if (channelCount == 2){
485            return AudioFormat.CHANNEL_OUT_STEREO;
486        }
487
488        return 0;
489    }
490
491    static int getBytesPerFrame(int audioFormat) {
492        if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) {
493            return 1;
494        } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) {
495            return 2;
496        }
497
498        return -1;
499    }
500
501    private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
502        float vol = clip(volume, 0.0f, 1.0f);
503        float panning = clip(pan, -1.0f, 1.0f);
504        float volLeft = vol;
505        float volRight = vol;
506        if (panning > 0.0f) {
507            volLeft *= (1.0f - panning);
508        } else if (panning < 0.0f) {
509            volRight *= (1.0f + panning);
510        }
511        if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
512        if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
513            Log.e(TAG, "Failed to set volume");
514        }
515    }
516
517    private static float clip(float value, float min, float max) {
518        return value > max ? max : (value < min ? min : value);
519    }
520
521}
522