BlockingAudioTrack.java revision ed4e541a20cc662b8399844684d18ad0060bd1cb
1// Copyright 2011 Google Inc. All Rights Reserved.
2
3package android.speech.tts;
4
5import android.media.AudioFormat;
6import android.media.AudioTrack;
7import android.util.Log;
8
9/**
10 * Exposes parts of the {@link AudioTrack} API by delegating calls to an
11 * underlying {@link AudioTrack}. Additionally, provides methods like
12 * {@link #waitAndRelease()} that will block until all audiotrack
13 * data has been flushed to the mixer, and is estimated to have completed
14 * playback.
15 */
16class BlockingAudioTrack {
17    private static final String TAG = "TTS.BlockingAudioTrack";
18    private static final boolean DBG = false;
19
20
21    /**
22     * The minimum increment of time to wait for an AudioTrack to finish
23     * playing.
24     */
25    private static final long MIN_SLEEP_TIME_MS = 20;
26
27    /**
28     * The maximum increment of time to sleep while waiting for an AudioTrack
29     * to finish playing.
30     */
31    private static final long MAX_SLEEP_TIME_MS = 2500;
32
33    /**
34     * The maximum amount of time to wait for an audio track to make progress while
35     * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but
36     * could happen in exceptional circumstances like a media_server crash.
37     */
38    private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS;
39
40    /**
41     * Minimum size of the buffer of the underlying {@link android.media.AudioTrack}
42     * we create.
43     */
44    private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
45
46
47    private final int mStreamType;
48    private final int mSampleRateInHz;
49    private final int mAudioFormat;
50    private final int mChannelCount;
51    private final float mVolume;
52    private final float mPan;
53
54    private final int mBytesPerFrame;
55    /**
56     * A "short utterance" is one that uses less bytes than the audio
57     * track buffer size (mAudioBufferSize). In this case, we need to call
58     * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly
59     * different logic is required to wait for the track to finish.
60     *
61     * Not volatile, accessed only from the audio playback thread.
62     */
63    private boolean mIsShortUtterance;
64    /**
65     * Will be valid after a call to {@link #init()}.
66     */
67    private int mAudioBufferSize;
68    private int mBytesWritten = 0;
69
70    // Need to be seen by stop() which can be called from another thread. mAudioTrack will be
71    // set to null only after waitAndRelease().
72    private volatile AudioTrack mAudioTrack;
73    private volatile boolean mStopped;
74
75    BlockingAudioTrack(int streamType, int sampleRate,
76            int audioFormat, int channelCount,
77            float volume, float pan) {
78        mStreamType = streamType;
79        mSampleRateInHz = sampleRate;
80        mAudioFormat = audioFormat;
81        mChannelCount = channelCount;
82        mVolume = volume;
83        mPan = pan;
84
85        mBytesPerFrame = getBytesPerFrame(mAudioFormat) * mChannelCount;
86        mIsShortUtterance = false;
87        mAudioBufferSize = 0;
88        mBytesWritten = 0;
89
90        mAudioTrack = null;
91        mStopped = false;
92    }
93
94    public boolean init() {
95        AudioTrack track = createStreamingAudioTrack();
96        mAudioTrack = track;
97
98        if (track == null) {
99            return false;
100        } else {
101            return true;
102        }
103    }
104
105    public void stop() {
106        AudioTrack track = mAudioTrack;
107        if (track != null) {
108            track.stop();
109        }
110        mStopped = true;
111    }
112
113    public int write(byte[] data) {
114        if (mAudioTrack == null || mStopped) {
115            return -1;
116        }
117        final int bytesWritten = writeToAudioTrack(mAudioTrack, data);
118        mBytesWritten += bytesWritten;
119        return bytesWritten;
120    }
121
122    public void waitAndRelease() {
123        AudioTrack track = mAudioTrack;
124        if (track == null) {
125            if (DBG) Log.d(TAG, "Audio track null [duplicate call to waitAndRelease ?]");
126            return;
127        }
128
129        // For "small" audio tracks, we have to stop() them to make them mixable,
130        // else the audio subsystem will wait indefinitely for us to fill the buffer
131        // before rendering the track mixable.
132        //
133        // If mStopped is true, the track would already have been stopped, so not
134        // much point not doing that again.
135        if (mBytesWritten < mAudioBufferSize && !mStopped) {
136            if (DBG) {
137                Log.d(TAG, "Stopping audio track to flush audio, state was : " +
138                        track.getPlayState() + ",stopped= " + mStopped);
139            }
140
141            mIsShortUtterance = true;
142            track.stop();
143        }
144
145        // Block until the audio track is done only if we haven't stopped yet.
146        if (!mStopped) {
147            if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode());
148            blockUntilDone(mAudioTrack);
149        }
150
151        // The last call to AudioTrack.write( ) will return only after
152        // all data from the audioTrack has been sent to the mixer, so
153        // it's safe to release at this point.
154        if (DBG) Log.d(TAG, "Releasing audio track [" + track.hashCode() + "]");
155        track.release();
156        mAudioTrack = null;
157    }
158
159
160    static int getChannelConfig(int channelCount) {
161        if (channelCount == 1) {
162            return AudioFormat.CHANNEL_OUT_MONO;
163        } else if (channelCount == 2){
164            return AudioFormat.CHANNEL_OUT_STEREO;
165        }
166
167        return 0;
168    }
169
170    long getAudioLengthMs(int numBytes) {
171        final int unconsumedFrames = numBytes / mBytesPerFrame;
172        final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz;
173
174        return estimatedTimeMs;
175    }
176
177    private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) {
178        if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) {
179            if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode());
180            audioTrack.play();
181        }
182
183        int count = 0;
184        while (count < bytes.length) {
185            // Note that we don't take bufferCopy.mOffset into account because
186            // it is guaranteed to be 0.
187            int written = audioTrack.write(bytes, count, bytes.length);
188            if (written <= 0) {
189                break;
190            }
191            count += written;
192        }
193        return count;
194    }
195
196    private AudioTrack createStreamingAudioTrack() {
197        final int channelConfig = getChannelConfig(mChannelCount);
198
199        int minBufferSizeInBytes
200                = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat);
201        int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
202
203        AudioTrack audioTrack = new AudioTrack(mStreamType, mSampleRateInHz, channelConfig,
204                mAudioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
205        if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
206            Log.w(TAG, "Unable to create audio track.");
207            audioTrack.release();
208            return null;
209        }
210
211        mAudioBufferSize = bufferSizeInBytes;
212
213        setupVolume(audioTrack, mVolume, mPan);
214        return audioTrack;
215    }
216
217    private static int getBytesPerFrame(int audioFormat) {
218        if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) {
219            return 1;
220        } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) {
221            return 2;
222        }
223
224        return -1;
225    }
226
227
228    private void blockUntilDone(AudioTrack audioTrack) {
229        if (mBytesWritten <= 0) {
230            return;
231        }
232
233        if (mIsShortUtterance) {
234            // In this case we would have called AudioTrack#stop() to flush
235            // buffers to the mixer. This makes the playback head position
236            // unobservable and notification markers do not work reliably. We
237            // have no option but to wait until we think the track would finish
238            // playing and release it after.
239            //
240            // This isn't as bad as it looks because (a) We won't end up waiting
241            // for much longer than we should because even at 4khz mono, a short
242            // utterance weighs in at about 2 seconds, and (b) such short utterances
243            // are expected to be relatively infrequent and in a stream of utterances
244            // this shows up as a slightly longer pause.
245            blockUntilEstimatedCompletion();
246        } else {
247            blockUntilCompletion(audioTrack);
248        }
249    }
250
251    private void blockUntilEstimatedCompletion() {
252        final int lengthInFrames = mBytesWritten / mBytesPerFrame;
253        final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz);
254
255        if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance");
256
257        try {
258            Thread.sleep(estimatedTimeMs);
259        } catch (InterruptedException ie) {
260            // Do nothing.
261        }
262    }
263
264    private void blockUntilCompletion(AudioTrack audioTrack) {
265        final int lengthInFrames = mBytesWritten / mBytesPerFrame;
266
267        int previousPosition = -1;
268        int currentPosition = 0;
269        long blockedTimeMs = 0;
270
271        while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames &&
272                audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) {
273
274            final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
275                    audioTrack.getSampleRate();
276            final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS);
277
278            // Check if the audio track has made progress since the last loop
279            // iteration. We should then add in the amount of time that was
280            // spent sleeping in the last iteration.
281            if (currentPosition == previousPosition) {
282                // This works only because the sleep time that would have been calculated
283                // would be the same in the previous iteration too.
284                blockedTimeMs += sleepTimeMs;
285                // If we've taken too long to make progress, bail.
286                if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) {
287                    Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " +
288                            "for AudioTrack to make progress, Aborting");
289                    break;
290                }
291            } else {
292                blockedTimeMs = 0;
293            }
294            previousPosition = currentPosition;
295
296            if (DBG) {
297                Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," +
298                        " Playback position : " + currentPosition + ", Length in frames : "
299                        + lengthInFrames);
300            }
301            try {
302                Thread.sleep(sleepTimeMs);
303            } catch (InterruptedException ie) {
304                break;
305            }
306        }
307    }
308
309    private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
310        final float vol = clip(volume, 0.0f, 1.0f);
311        final float panning = clip(pan, -1.0f, 1.0f);
312
313        float volLeft = vol;
314        float volRight = vol;
315        if (panning > 0.0f) {
316            volLeft *= (1.0f - panning);
317        } else if (panning < 0.0f) {
318            volRight *= (1.0f + panning);
319        }
320        if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
321        if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
322            Log.e(TAG, "Failed to set volume");
323        }
324    }
325
326    private static final long clip(long value, long min, long max) {
327        if (value < min) {
328            return min;
329        }
330
331        if (value > max) {
332            return max;
333        }
334
335        return value;
336    }
337
338    private static float clip(float value, float min, float max) {
339        return value > max ? max : (value < min ? min : value);
340    }
341
342}
343