BlockingAudioTrack.java revision 65c50784564d0bae9276fde5472dd8898a781bcd
1// Copyright 2011 Google Inc. All Rights Reserved.
2
3package android.speech.tts;
4
5import android.media.AudioFormat;
6import android.media.AudioTrack;
7import android.speech.tts.TextToSpeechService.AudioOutputParams;
8import android.util.Log;
9
10/**
11 * Exposes parts of the {@link AudioTrack} API by delegating calls to an
12 * underlying {@link AudioTrack}. Additionally, provides methods like
13 * {@link #waitAndRelease()} that will block until all audiotrack
14 * data has been flushed to the mixer, and is estimated to have completed
15 * playback.
16 */
17class BlockingAudioTrack {
18    private static final String TAG = "TTS.BlockingAudioTrack";
19    private static final boolean DBG = false;
20
21
22    /**
23     * The minimum increment of time to wait for an AudioTrack to finish
24     * playing.
25     */
26    private static final long MIN_SLEEP_TIME_MS = 20;
27
28    /**
29     * The maximum increment of time to sleep while waiting for an AudioTrack
30     * to finish playing.
31     */
32    private static final long MAX_SLEEP_TIME_MS = 2500;
33
34    /**
35     * The maximum amount of time to wait for an audio track to make progress while
36     * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but
37     * could happen in exceptional circumstances like a media_server crash.
38     */
39    private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS;
40
41    /**
42     * Minimum size of the buffer of the underlying {@link android.media.AudioTrack}
43     * we create.
44     */
45    private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
46
47
48    private final AudioOutputParams mAudioParams;
49    private final int mSampleRateInHz;
50    private final int mAudioFormat;
51    private final int mChannelCount;
52
53
54    private final int mBytesPerFrame;
55    /**
56     * A "short utterance" is one that uses less bytes than the audio
57     * track buffer size (mAudioBufferSize). In this case, we need to call
58     * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly
59     * different logic is required to wait for the track to finish.
60     *
61     * Not volatile, accessed only from the audio playback thread.
62     */
63    private boolean mIsShortUtterance;
64    /**
65     * Will be valid after a call to {@link #init()}.
66     */
67    private int mAudioBufferSize;
68    private int mBytesWritten = 0;
69
70    // Need to be seen by stop() which can be called from another thread. mAudioTrack will be
71    // set to null only after waitAndRelease().
72    private Object mAudioTrackLock = new Object();
73    private AudioTrack mAudioTrack;
74    private volatile boolean mStopped;
75
76    private int mSessionId;
77
78    BlockingAudioTrack(AudioOutputParams audioParams, int sampleRate,
79            int audioFormat, int channelCount) {
80        mAudioParams = audioParams;
81        mSampleRateInHz = sampleRate;
82        mAudioFormat = audioFormat;
83        mChannelCount = channelCount;
84
85        mBytesPerFrame = AudioFormat.getBytesPerSample(mAudioFormat) * mChannelCount;
86        mIsShortUtterance = false;
87        mAudioBufferSize = 0;
88        mBytesWritten = 0;
89
90        mAudioTrack = null;
91        mStopped = false;
92    }
93
94    public boolean init() {
95        AudioTrack track = createStreamingAudioTrack();
96        synchronized (mAudioTrackLock) {
97            mAudioTrack = track;
98        }
99
100        if (track == null) {
101            return false;
102        } else {
103            return true;
104        }
105    }
106
107    public void stop() {
108        synchronized (mAudioTrackLock) {
109            if (mAudioTrack != null) {
110                mAudioTrack.stop();
111            }
112            mStopped = true;
113        }
114    }
115
116    public int write(byte[] data) {
117        AudioTrack track = null;
118        synchronized (mAudioTrackLock) {
119            track = mAudioTrack;
120        }
121
122        if (track == null || mStopped) {
123            return -1;
124        }
125        final int bytesWritten = writeToAudioTrack(track, data);
126
127        mBytesWritten += bytesWritten;
128        return bytesWritten;
129    }
130
131    public void waitAndRelease() {
132        AudioTrack track = null;
133        synchronized (mAudioTrackLock) {
134            track = mAudioTrack;
135        }
136        if (track == null) {
137            if (DBG) Log.d(TAG, "Audio track null [duplicate call to waitAndRelease ?]");
138            return;
139        }
140
141        // For "small" audio tracks, we have to stop() them to make them mixable,
142        // else the audio subsystem will wait indefinitely for us to fill the buffer
143        // before rendering the track mixable.
144        //
145        // If mStopped is true, the track would already have been stopped, so not
146        // much point not doing that again.
147        if (mBytesWritten < mAudioBufferSize && !mStopped) {
148            if (DBG) {
149                Log.d(TAG, "Stopping audio track to flush audio, state was : " +
150                        track.getPlayState() + ",stopped= " + mStopped);
151            }
152
153            mIsShortUtterance = true;
154            track.stop();
155        }
156
157        // Block until the audio track is done only if we haven't stopped yet.
158        if (!mStopped) {
159            if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode());
160            blockUntilDone(mAudioTrack);
161        }
162
163        // The last call to AudioTrack.write( ) will return only after
164        // all data from the audioTrack has been sent to the mixer, so
165        // it's safe to release at this point.
166        if (DBG) Log.d(TAG, "Releasing audio track [" + track.hashCode() + "]");
167        synchronized (mAudioTrackLock) {
168            mAudioTrack = null;
169        }
170        track.release();
171    }
172
173
174    static int getChannelConfig(int channelCount) {
175        if (channelCount == 1) {
176            return AudioFormat.CHANNEL_OUT_MONO;
177        } else if (channelCount == 2){
178            return AudioFormat.CHANNEL_OUT_STEREO;
179        }
180
181        return 0;
182    }
183
184    long getAudioLengthMs(int numBytes) {
185        final int unconsumedFrames = numBytes / mBytesPerFrame;
186        final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz;
187
188        return estimatedTimeMs;
189    }
190
191    private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) {
192        if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) {
193            if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode());
194            audioTrack.play();
195        }
196
197        int count = 0;
198        while (count < bytes.length) {
199            // Note that we don't take bufferCopy.mOffset into account because
200            // it is guaranteed to be 0.
201            int written = audioTrack.write(bytes, count, bytes.length);
202            if (written <= 0) {
203                break;
204            }
205            count += written;
206        }
207        return count;
208    }
209
210    private AudioTrack createStreamingAudioTrack() {
211        final int channelConfig = getChannelConfig(mChannelCount);
212
213        int minBufferSizeInBytes
214                = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat);
215        int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
216
217        AudioFormat audioFormat = (new AudioFormat.Builder())
218                .setChannelMask(channelConfig)
219                .setEncoding(mAudioFormat)
220                .setSampleRate(mSampleRateInHz).build();
221        AudioTrack audioTrack = new AudioTrack(mAudioParams.mAudioAttributes,
222                audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM,
223                mAudioParams.mSessionId);
224
225        if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
226            Log.w(TAG, "Unable to create audio track.");
227            audioTrack.release();
228            return null;
229        }
230
231        mAudioBufferSize = bufferSizeInBytes;
232
233        setupVolume(audioTrack, mAudioParams.mVolume, mAudioParams.mPan);
234        return audioTrack;
235    }
236
237    private void blockUntilDone(AudioTrack audioTrack) {
238        if (mBytesWritten <= 0) {
239            return;
240        }
241
242        if (mIsShortUtterance) {
243            // In this case we would have called AudioTrack#stop() to flush
244            // buffers to the mixer. This makes the playback head position
245            // unobservable and notification markers do not work reliably. We
246            // have no option but to wait until we think the track would finish
247            // playing and release it after.
248            //
249            // This isn't as bad as it looks because (a) We won't end up waiting
250            // for much longer than we should because even at 4khz mono, a short
251            // utterance weighs in at about 2 seconds, and (b) such short utterances
252            // are expected to be relatively infrequent and in a stream of utterances
253            // this shows up as a slightly longer pause.
254            blockUntilEstimatedCompletion();
255        } else {
256            blockUntilCompletion(audioTrack);
257        }
258    }
259
260    private void blockUntilEstimatedCompletion() {
261        final int lengthInFrames = mBytesWritten / mBytesPerFrame;
262        final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz);
263
264        if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance");
265
266        try {
267            Thread.sleep(estimatedTimeMs);
268        } catch (InterruptedException ie) {
269            // Do nothing.
270        }
271    }
272
273    private void blockUntilCompletion(AudioTrack audioTrack) {
274        final int lengthInFrames = mBytesWritten / mBytesPerFrame;
275
276        int previousPosition = -1;
277        int currentPosition = 0;
278        long blockedTimeMs = 0;
279
280        while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames &&
281                audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) {
282
283            final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
284                    audioTrack.getSampleRate();
285            final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS);
286
287            // Check if the audio track has made progress since the last loop
288            // iteration. We should then add in the amount of time that was
289            // spent sleeping in the last iteration.
290            if (currentPosition == previousPosition) {
291                // This works only because the sleep time that would have been calculated
292                // would be the same in the previous iteration too.
293                blockedTimeMs += sleepTimeMs;
294                // If we've taken too long to make progress, bail.
295                if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) {
296                    Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " +
297                            "for AudioTrack to make progress, Aborting");
298                    break;
299                }
300            } else {
301                blockedTimeMs = 0;
302            }
303            previousPosition = currentPosition;
304
305            if (DBG) {
306                Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," +
307                        " Playback position : " + currentPosition + ", Length in frames : "
308                        + lengthInFrames);
309            }
310            try {
311                Thread.sleep(sleepTimeMs);
312            } catch (InterruptedException ie) {
313                break;
314            }
315        }
316    }
317
318    private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
319        final float vol = clip(volume, 0.0f, 1.0f);
320        final float panning = clip(pan, -1.0f, 1.0f);
321
322        float volLeft = vol;
323        float volRight = vol;
324        if (panning > 0.0f) {
325            volLeft *= (1.0f - panning);
326        } else if (panning < 0.0f) {
327            volRight *= (1.0f + panning);
328        }
329        if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
330        if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
331            Log.e(TAG, "Failed to set volume");
332        }
333    }
334
335    private static final long clip(long value, long min, long max) {
336        return value < min ? min : (value < max ? value : max);
337    }
338
339    private static final float clip(float value, float min, float max) {
340        return value < min ? min : (value < max ? value : max);
341    }
342
343    /**
344     * @see
345     *     AudioTrack#setPlaybackPositionUpdateListener(AudioTrack.OnPlaybackPositionUpdateListener).
346     */
347    public void setPlaybackPositionUpdateListener(
348            AudioTrack.OnPlaybackPositionUpdateListener listener) {
349        synchronized (mAudioTrackLock) {
350            if (mAudioTrack != null) {
351                mAudioTrack.setPlaybackPositionUpdateListener(listener);
352            }
353        }
354    }
355
356    /** @see AudioTrack#setNotificationMarkerPosition(int). */
357    public void setNotificationMarkerPosition(int frames) {
358        synchronized (mAudioTrackLock) {
359            if (mAudioTrack != null) {
360                mAudioTrack.setNotificationMarkerPosition(frames);
361            }
362        }
363    }
364}
365