1// Copyright 2011 Google Inc. All Rights Reserved.
2
3package android.speech.tts;
4
5import android.media.AudioFormat;
6import android.media.AudioTrack;
7import android.util.Log;
8
9/**
10 * Exposes parts of the {@link AudioTrack} API by delegating calls to an
11 * underlying {@link AudioTrack}. Additionally, provides methods like
12 * {@link #waitAndRelease()} that will block until all audiotrack
13 * data has been flushed to the mixer, and is estimated to have completed
14 * playback.
15 */
16class BlockingAudioTrack {
17    private static final String TAG = "TTS.BlockingAudioTrack";
18    private static final boolean DBG = false;
19
20
21    /**
22     * The minimum increment of time to wait for an AudioTrack to finish
23     * playing.
24     */
25    private static final long MIN_SLEEP_TIME_MS = 20;
26
27    /**
28     * The maximum increment of time to sleep while waiting for an AudioTrack
29     * to finish playing.
30     */
31    private static final long MAX_SLEEP_TIME_MS = 2500;
32
33    /**
34     * The maximum amount of time to wait for an audio track to make progress while
35     * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but
36     * could happen in exceptional circumstances like a media_server crash.
37     */
38    private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS;
39
40    /**
41     * Minimum size of the buffer of the underlying {@link android.media.AudioTrack}
42     * we create.
43     */
44    private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
45
46
47    private final int mStreamType;
48    private final int mSampleRateInHz;
49    private final int mAudioFormat;
50    private final int mChannelCount;
51    private final float mVolume;
52    private final float mPan;
53
54    private final int mBytesPerFrame;
55    /**
56     * A "short utterance" is one that uses less bytes than the audio
57     * track buffer size (mAudioBufferSize). In this case, we need to call
58     * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly
59     * different logic is required to wait for the track to finish.
60     *
61     * Not volatile, accessed only from the audio playback thread.
62     */
63    private boolean mIsShortUtterance;
64    /**
65     * Will be valid after a call to {@link #init()}.
66     */
67    private int mAudioBufferSize;
68    private int mBytesWritten = 0;
69
70    // Need to be seen by stop() which can be called from another thread. mAudioTrack will be
71    // set to null only after waitAndRelease().
72    private Object mAudioTrackLock = new Object();
73    private AudioTrack mAudioTrack;
74    private volatile boolean mStopped;
75
76    BlockingAudioTrack(int streamType, int sampleRate,
77            int audioFormat, int channelCount,
78            float volume, float pan) {
79        mStreamType = streamType;
80        mSampleRateInHz = sampleRate;
81        mAudioFormat = audioFormat;
82        mChannelCount = channelCount;
83        mVolume = volume;
84        mPan = pan;
85
86        mBytesPerFrame = getBytesPerFrame(mAudioFormat) * mChannelCount;
87        mIsShortUtterance = false;
88        mAudioBufferSize = 0;
89        mBytesWritten = 0;
90
91        mAudioTrack = null;
92        mStopped = false;
93    }
94
95    public boolean init() {
96        AudioTrack track = createStreamingAudioTrack();
97        synchronized (mAudioTrackLock) {
98            mAudioTrack = track;
99        }
100
101        if (track == null) {
102            return false;
103        } else {
104            return true;
105        }
106    }
107
108    public void stop() {
109        synchronized (mAudioTrackLock) {
110            if (mAudioTrack != null) {
111                mAudioTrack.stop();
112            }
113            mStopped = true;
114        }
115    }
116
117    public int write(byte[] data) {
118        AudioTrack track = null;
119        synchronized (mAudioTrackLock) {
120            track = mAudioTrack;
121        }
122
123        if (track == null || mStopped) {
124            return -1;
125        }
126        final int bytesWritten = writeToAudioTrack(track, data);
127
128        mBytesWritten += bytesWritten;
129        return bytesWritten;
130    }
131
132    public void waitAndRelease() {
133        AudioTrack track = null;
134        synchronized (mAudioTrackLock) {
135            track = mAudioTrack;
136        }
137        if (track == null) {
138            if (DBG) Log.d(TAG, "Audio track null [duplicate call to waitAndRelease ?]");
139            return;
140        }
141
142        // For "small" audio tracks, we have to stop() them to make them mixable,
143        // else the audio subsystem will wait indefinitely for us to fill the buffer
144        // before rendering the track mixable.
145        //
146        // If mStopped is true, the track would already have been stopped, so not
147        // much point not doing that again.
148        if (mBytesWritten < mAudioBufferSize && !mStopped) {
149            if (DBG) {
150                Log.d(TAG, "Stopping audio track to flush audio, state was : " +
151                        track.getPlayState() + ",stopped= " + mStopped);
152            }
153
154            mIsShortUtterance = true;
155            track.stop();
156        }
157
158        // Block until the audio track is done only if we haven't stopped yet.
159        if (!mStopped) {
160            if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode());
161            blockUntilDone(mAudioTrack);
162        }
163
164        // The last call to AudioTrack.write( ) will return only after
165        // all data from the audioTrack has been sent to the mixer, so
166        // it's safe to release at this point.
167        if (DBG) Log.d(TAG, "Releasing audio track [" + track.hashCode() + "]");
168        synchronized(mAudioTrackLock) {
169            mAudioTrack = null;
170        }
171        track.release();
172    }
173
174
175    static int getChannelConfig(int channelCount) {
176        if (channelCount == 1) {
177            return AudioFormat.CHANNEL_OUT_MONO;
178        } else if (channelCount == 2){
179            return AudioFormat.CHANNEL_OUT_STEREO;
180        }
181
182        return 0;
183    }
184
185    long getAudioLengthMs(int numBytes) {
186        final int unconsumedFrames = numBytes / mBytesPerFrame;
187        final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz;
188
189        return estimatedTimeMs;
190    }
191
192    private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) {
193        if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) {
194            if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode());
195            audioTrack.play();
196        }
197
198        int count = 0;
199        while (count < bytes.length) {
200            // Note that we don't take bufferCopy.mOffset into account because
201            // it is guaranteed to be 0.
202            int written = audioTrack.write(bytes, count, bytes.length);
203            if (written <= 0) {
204                break;
205            }
206            count += written;
207        }
208        return count;
209    }
210
211    private AudioTrack createStreamingAudioTrack() {
212        final int channelConfig = getChannelConfig(mChannelCount);
213
214        int minBufferSizeInBytes
215                = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat);
216        int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
217
218        AudioTrack audioTrack = new AudioTrack(mStreamType, mSampleRateInHz, channelConfig,
219                mAudioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
220        if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
221            Log.w(TAG, "Unable to create audio track.");
222            audioTrack.release();
223            return null;
224        }
225
226        mAudioBufferSize = bufferSizeInBytes;
227
228        setupVolume(audioTrack, mVolume, mPan);
229        return audioTrack;
230    }
231
232    private static int getBytesPerFrame(int audioFormat) {
233        if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) {
234            return 1;
235        } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) {
236            return 2;
237        }
238
239        return -1;
240    }
241
242
243    private void blockUntilDone(AudioTrack audioTrack) {
244        if (mBytesWritten <= 0) {
245            return;
246        }
247
248        if (mIsShortUtterance) {
249            // In this case we would have called AudioTrack#stop() to flush
250            // buffers to the mixer. This makes the playback head position
251            // unobservable and notification markers do not work reliably. We
252            // have no option but to wait until we think the track would finish
253            // playing and release it after.
254            //
255            // This isn't as bad as it looks because (a) We won't end up waiting
256            // for much longer than we should because even at 4khz mono, a short
257            // utterance weighs in at about 2 seconds, and (b) such short utterances
258            // are expected to be relatively infrequent and in a stream of utterances
259            // this shows up as a slightly longer pause.
260            blockUntilEstimatedCompletion();
261        } else {
262            blockUntilCompletion(audioTrack);
263        }
264    }
265
266    private void blockUntilEstimatedCompletion() {
267        final int lengthInFrames = mBytesWritten / mBytesPerFrame;
268        final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz);
269
270        if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance");
271
272        try {
273            Thread.sleep(estimatedTimeMs);
274        } catch (InterruptedException ie) {
275            // Do nothing.
276        }
277    }
278
279    private void blockUntilCompletion(AudioTrack audioTrack) {
280        final int lengthInFrames = mBytesWritten / mBytesPerFrame;
281
282        int previousPosition = -1;
283        int currentPosition = 0;
284        long blockedTimeMs = 0;
285
286        while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames &&
287                audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) {
288
289            final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
290                    audioTrack.getSampleRate();
291            final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS);
292
293            // Check if the audio track has made progress since the last loop
294            // iteration. We should then add in the amount of time that was
295            // spent sleeping in the last iteration.
296            if (currentPosition == previousPosition) {
297                // This works only because the sleep time that would have been calculated
298                // would be the same in the previous iteration too.
299                blockedTimeMs += sleepTimeMs;
300                // If we've taken too long to make progress, bail.
301                if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) {
302                    Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " +
303                            "for AudioTrack to make progress, Aborting");
304                    break;
305                }
306            } else {
307                blockedTimeMs = 0;
308            }
309            previousPosition = currentPosition;
310
311            if (DBG) {
312                Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," +
313                        " Playback position : " + currentPosition + ", Length in frames : "
314                        + lengthInFrames);
315            }
316            try {
317                Thread.sleep(sleepTimeMs);
318            } catch (InterruptedException ie) {
319                break;
320            }
321        }
322    }
323
324    private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
325        final float vol = clip(volume, 0.0f, 1.0f);
326        final float panning = clip(pan, -1.0f, 1.0f);
327
328        float volLeft = vol;
329        float volRight = vol;
330        if (panning > 0.0f) {
331            volLeft *= (1.0f - panning);
332        } else if (panning < 0.0f) {
333            volRight *= (1.0f + panning);
334        }
335        if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
336        if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
337            Log.e(TAG, "Failed to set volume");
338        }
339    }
340
341    private static final long clip(long value, long min, long max) {
342        if (value < min) {
343            return min;
344        }
345
346        if (value > max) {
347            return max;
348        }
349
350        return value;
351    }
352
353    private static float clip(float value, float min, float max) {
354        return value > max ? max : (value < min ? min : value);
355    }
356
357}
358