BlockingAudioTrack.java revision 67ae6bc83cf2b30b0c61b9ebba5fed7a0038549c
1// Copyright 2011 Google Inc. All Rights Reserved.
2
3package android.speech.tts;
4
5import android.media.AudioFormat;
6import android.media.AudioTrack;
7import android.util.Log;
8
9/**
10 * Exposes parts of the {@link AudioTrack} API by delegating calls to an
11 * underlying {@link AudioTrack}. Additionally, provides methods like
12 * {@link #waitAndRelease()} that will block until all audiotrack
13 * data has been flushed to the mixer, and is estimated to have completed
14 * playback.
15 */
16class BlockingAudioTrack {
17    private static final String TAG = "TTS.BlockingAudioTrack";
18    private static final boolean DBG = false;
19
20
21    /**
22     * The minimum increment of time to wait for an AudioTrack to finish
23     * playing.
24     */
25    private static final long MIN_SLEEP_TIME_MS = 20;
26
27    /**
28     * The maximum increment of time to sleep while waiting for an AudioTrack
29     * to finish playing.
30     */
31    private static final long MAX_SLEEP_TIME_MS = 2500;
32
33    /**
34     * The maximum amount of time to wait for an audio track to make progress while
35     * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but
36     * could happen in exceptional circumstances like a media_server crash.
37     */
38    private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS;
39
40    /**
41     * Minimum size of the buffer of the underlying {@link android.media.AudioTrack}
42     * we create.
43     */
44    private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
45
46
47    private final int mStreamType;
48    private final int mSampleRateInHz;
49    private final int mAudioFormat;
50    private final int mChannelCount;
51    private final float mVolume;
52    private final float mPan;
53
54    private final int mBytesPerFrame;
55    /**
56     * A "short utterance" is one that uses less bytes than the audio
57     * track buffer size (mAudioBufferSize). In this case, we need to call
58     * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly
59     * different logic is required to wait for the track to finish.
60     *
61     * Not volatile, accessed only from the audio playback thread.
62     */
63    private boolean mIsShortUtterance;
64    /**
65     * Will be valid after a call to {@link #init()}.
66     */
67    private int mAudioBufferSize;
68    private int mBytesWritten = 0;
69
70    private AudioTrack mAudioTrack;
71    private volatile boolean mStopped;
72    // Locks the initialization / uninitialization of the audio track.
73    // This is required because stop() will throw an illegal state exception
74    // if called before init() or after mAudioTrack.release().
75    private final Object mAudioTrackLock = new Object();
76
77    BlockingAudioTrack(int streamType, int sampleRate,
78            int audioFormat, int channelCount,
79            float volume, float pan) {
80        mStreamType = streamType;
81        mSampleRateInHz = sampleRate;
82        mAudioFormat = audioFormat;
83        mChannelCount = channelCount;
84        mVolume = volume;
85        mPan = pan;
86
87        mBytesPerFrame = getBytesPerFrame(mAudioFormat) * mChannelCount;
88        mIsShortUtterance = false;
89        mAudioBufferSize = 0;
90        mBytesWritten = 0;
91
92        mAudioTrack = null;
93        mStopped = false;
94    }
95
96    public void init() {
97        AudioTrack track = createStreamingAudioTrack();
98
99        synchronized (mAudioTrackLock) {
100            mAudioTrack = track;
101        }
102    }
103
104    public void stop() {
105        synchronized (mAudioTrackLock) {
106            if (mAudioTrack != null) {
107                mAudioTrack.stop();
108            }
109        }
110        mStopped = true;
111    }
112
113    public int write(byte[] data) {
114        if (mAudioTrack == null || mStopped) {
115            return -1;
116        }
117        final int bytesWritten = writeToAudioTrack(mAudioTrack, data);
118        mBytesWritten += bytesWritten;
119        return bytesWritten;
120    }
121
122    public void waitAndRelease() {
123        // For "small" audio tracks, we have to stop() them to make them mixable,
124        // else the audio subsystem will wait indefinitely for us to fill the buffer
125        // before rendering the track mixable.
126        //
127        // If mStopped is true, the track would already have been stopped, so not
128        // much point not doing that again.
129        if (mBytesWritten < mAudioBufferSize && !mStopped) {
130            if (DBG) {
131                Log.d(TAG, "Stopping audio track to flush audio, state was : " +
132                        mAudioTrack.getPlayState() + ",stopped= " + mStopped);
133            }
134
135            mIsShortUtterance = true;
136            mAudioTrack.stop();
137        }
138
139        // Block until the audio track is done only if we haven't stopped yet.
140        if (!mStopped) {
141            if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode());
142            blockUntilDone(mAudioTrack);
143        }
144
145        // The last call to AudioTrack.write( ) will return only after
146        // all data from the audioTrack has been sent to the mixer, so
147        // it's safe to release at this point.
148        if (DBG) Log.d(TAG, "Releasing audio track [" + mAudioTrack.hashCode() + "]");
149        synchronized (mAudioTrackLock) {
150            mAudioTrack.release();
151            mAudioTrack = null;
152        }
153    }
154
155
156    static int getChannelConfig(int channelCount) {
157        if (channelCount == 1) {
158            return AudioFormat.CHANNEL_OUT_MONO;
159        } else if (channelCount == 2){
160            return AudioFormat.CHANNEL_OUT_STEREO;
161        }
162
163        return 0;
164    }
165
166    long getAudioLengthMs(int numBytes) {
167        final int unconsumedFrames = numBytes / mBytesPerFrame;
168        final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz;
169
170        return estimatedTimeMs;
171    }
172
173    private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) {
174        if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) {
175            if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode());
176            audioTrack.play();
177        }
178
179        int count = 0;
180        while (count < bytes.length) {
181            // Note that we don't take bufferCopy.mOffset into account because
182            // it is guaranteed to be 0.
183            int written = audioTrack.write(bytes, count, bytes.length);
184            if (written <= 0) {
185                break;
186            }
187            count += written;
188        }
189        return count;
190    }
191
192    private AudioTrack createStreamingAudioTrack() {
193        final int channelConfig = getChannelConfig(mChannelCount);
194
195        int minBufferSizeInBytes
196                = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat);
197        int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
198
199        AudioTrack audioTrack = new AudioTrack(mStreamType, mSampleRateInHz, channelConfig,
200                mAudioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
201        if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
202            Log.w(TAG, "Unable to create audio track.");
203            audioTrack.release();
204            return null;
205        }
206
207        mAudioBufferSize = bufferSizeInBytes;
208
209        setupVolume(audioTrack, mVolume, mPan);
210        return audioTrack;
211    }
212
213    private static int getBytesPerFrame(int audioFormat) {
214        if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) {
215            return 1;
216        } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) {
217            return 2;
218        }
219
220        return -1;
221    }
222
223
224    private void blockUntilDone(AudioTrack audioTrack) {
225        if (mBytesWritten <= 0) {
226            return;
227        }
228
229        if (mIsShortUtterance) {
230            // In this case we would have called AudioTrack#stop() to flush
231            // buffers to the mixer. This makes the playback head position
232            // unobservable and notification markers do not work reliably. We
233            // have no option but to wait until we think the track would finish
234            // playing and release it after.
235            //
236            // This isn't as bad as it looks because (a) We won't end up waiting
237            // for much longer than we should because even at 4khz mono, a short
238            // utterance weighs in at about 2 seconds, and (b) such short utterances
239            // are expected to be relatively infrequent and in a stream of utterances
240            // this shows up as a slightly longer pause.
241            blockUntilEstimatedCompletion();
242        } else {
243            blockUntilCompletion(audioTrack);
244        }
245    }
246
247    private void blockUntilEstimatedCompletion() {
248        final int lengthInFrames = mBytesWritten / mBytesPerFrame;
249        final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz);
250
251        if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance");
252
253        try {
254            Thread.sleep(estimatedTimeMs);
255        } catch (InterruptedException ie) {
256            // Do nothing.
257        }
258    }
259
260    private void blockUntilCompletion(AudioTrack audioTrack) {
261        final int lengthInFrames = mBytesWritten / mBytesPerFrame;
262
263        int previousPosition = -1;
264        int currentPosition = 0;
265        long blockedTimeMs = 0;
266
267        while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames &&
268                audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) {
269
270            final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
271                    audioTrack.getSampleRate();
272            final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS);
273
274            // Check if the audio track has made progress since the last loop
275            // iteration. We should then add in the amount of time that was
276            // spent sleeping in the last iteration.
277            if (currentPosition == previousPosition) {
278                // This works only because the sleep time that would have been calculated
279                // would be the same in the previous iteration too.
280                blockedTimeMs += sleepTimeMs;
281                // If we've taken too long to make progress, bail.
282                if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) {
283                    Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " +
284                            "for AudioTrack to make progress, Aborting");
285                    break;
286                }
287            } else {
288                blockedTimeMs = 0;
289            }
290            previousPosition = currentPosition;
291
292            if (DBG) {
293                Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," +
294                        " Playback position : " + currentPosition + ", Length in frames : "
295                        + lengthInFrames);
296            }
297            try {
298                Thread.sleep(sleepTimeMs);
299            } catch (InterruptedException ie) {
300                break;
301            }
302        }
303    }
304
305    private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
306        final float vol = clip(volume, 0.0f, 1.0f);
307        final float panning = clip(pan, -1.0f, 1.0f);
308
309        float volLeft = vol;
310        float volRight = vol;
311        if (panning > 0.0f) {
312            volLeft *= (1.0f - panning);
313        } else if (panning < 0.0f) {
314            volRight *= (1.0f + panning);
315        }
316        if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
317        if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
318            Log.e(TAG, "Failed to set volume");
319        }
320    }
321
322    private static final long clip(long value, long min, long max) {
323        if (value < min) {
324            return min;
325        }
326
327        if (value > max) {
328            return max;
329        }
330
331        return value;
332    }
333
334    private static float clip(float value, float min, float max) {
335        return value > max ? max : (value < min ? min : value);
336    }
337
338}
339