18abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)// Copyright 2011 Google Inc. All Rights Reserved.
28abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)
38abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)package android.speech.tts;
48abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)
58abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)import android.media.AudioAttributes;
68abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)import android.media.AudioFormat;
78abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)import android.media.AudioTrack;
88abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)import android.speech.tts.TextToSpeechService.AudioOutputParams;
98abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)import android.util.Log;
108abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)
118abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)/**
128abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) * Exposes parts of the {@link AudioTrack} API by delegating calls to an
138abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) * underlying {@link AudioTrack}. Additionally, provides methods like
148abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) * {@link #waitAndRelease()} that will block until all audiotrack
158abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) * data has been flushed to the mixer, and is estimated to have completed
168abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) * playback.
178abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) */
188abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)class BlockingAudioTrack {
198abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)    private static final String TAG = "TTS.BlockingAudioTrack";
208abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)    private static final boolean DBG = false;
218abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)
228abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)
238abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)    /**
248abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)     * The minimum increment of time to wait for an AudioTrack to finish
258abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)     * playing.
268abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)     */
278abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)    private static final long MIN_SLEEP_TIME_MS = 20;
288abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)
298abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)    /**
308abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)     * The maximum increment of time to sleep while waiting for an AudioTrack
318abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)     * to finish playing.
328abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)     */
338abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)    private static final long MAX_SLEEP_TIME_MS = 2500;
3409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
351e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)    /**
367242dc3dbeb210b5e876a3c42d1ec1a667fc621aPrimiano Tucci     * The maximum amount of time to wait for an audio track to make progress while
377242dc3dbeb210b5e876a3c42d1ec1a667fc621aPrimiano Tucci     * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but
387242dc3dbeb210b5e876a3c42d1ec1a667fc621aPrimiano Tucci     * could happen in exceptional circumstances like a media_server crash.
397242dc3dbeb210b5e876a3c42d1ec1a667fc621aPrimiano Tucci     */
40f79f16f17ddc4f842d7b7a38603e280e94be826aTorne (Richard Coles)    private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS;
4109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
428abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)    /**
43d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)     * Minimum size of the buffer of the underlying {@link android.media.AudioTrack}
44197021e6b966cfb06891637935ef33fff06433d1Ben Murdoch     * we create.
4519cde67944066db31e633d9e386f2aa9bf9fadb3Torne (Richard Coles)     */
468abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)    private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
47c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)
4809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
4919cde67944066db31e633d9e386f2aa9bf9fadb3Torne (Richard Coles)    private final AudioOutputParams mAudioParams;
5009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    private final int mSampleRateInHz;
518abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)    private final int mAudioFormat;
52c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)    private final int mChannelCount;
538abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)
54f79f16f17ddc4f842d7b7a38603e280e94be826aTorne (Richard Coles)
55f79f16f17ddc4f842d7b7a38603e280e94be826aTorne (Richard Coles)    private final int mBytesPerFrame;
569bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    /**
579bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)     * A "short utterance" is one that uses less bytes than the audio
589bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)     * track buffer size (mAudioBufferSize). In this case, we need to call
599bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)     * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly
609bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)     * different logic is required to wait for the track to finish.
619bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)     *
629bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)     * Not volatile, accessed only from the audio playback thread.
639bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)     */
649bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    private boolean mIsShortUtterance;
659bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    /**
669bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)     * Will be valid after a call to {@link #init()}.
679bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)     */
6809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    private int mAudioBufferSize;
699bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    private int mBytesWritten = 0;
709bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)
719bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    // Need to be seen by stop() which can be called from another thread. mAudioTrack will be
729bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    // set to null only after waitAndRelease().
739bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    private Object mAudioTrackLock = new Object();
749bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    private AudioTrack mAudioTrack;
759bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    private volatile boolean mStopped;
769bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)
779bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    private int mSessionId;
789bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)
799bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    BlockingAudioTrack(AudioOutputParams audioParams, int sampleRate,
809bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)            int audioFormat, int channelCount) {
819bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        mAudioParams = audioParams;
829bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        mSampleRateInHz = sampleRate;
839bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        mAudioFormat = audioFormat;
849bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        mChannelCount = channelCount;
859bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)
869bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        mBytesPerFrame = AudioFormat.getBytesPerSample(mAudioFormat) * mChannelCount;
879bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        mIsShortUtterance = false;
889bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        mAudioBufferSize = 0;
899bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        mBytesWritten = 0;
909bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)
919bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        mAudioTrack = null;
929bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        mStopped = false;
939bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    }
949bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)
959bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    public boolean init() {
969bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        AudioTrack track = createStreamingAudioTrack();
979bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        synchronized (mAudioTrackLock) {
989bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)            mAudioTrack = track;
999bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        }
1009bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)
1019bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        if (track == null) {
1029bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)            return false;
1039bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        } else {
1049bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)            return true;
1059bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        }
1069bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    }
1079bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)
1089bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    public void stop() {
1099bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        synchronized (mAudioTrackLock) {
1109bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)            if (mAudioTrack != null) {
1119bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)                mAudioTrack.stop();
1129bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)            }
1139bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)            mStopped = true;
1149bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        }
1159bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    }
1169bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)
1179bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    public int write(byte[] data) {
1189bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        AudioTrack track = null;
1199bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        synchronized (mAudioTrackLock) {
1209bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)            track = mAudioTrack;
1219bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        }
1229bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)
1239bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        if (track == null || mStopped) {
1249bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)            return -1;
1259bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        }
12609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        final int bytesWritten = writeToAudioTrack(track, data);
1279bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)
1289bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        mBytesWritten += bytesWritten;
1299bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        return bytesWritten;
1309bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    }
1319bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)
1329bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    public void waitAndRelease() {
1339bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        AudioTrack track = null;
13409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        synchronized (mAudioTrackLock) {
1359bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)            track = mAudioTrack;
1369bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        }
1379bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        if (track == null) {
1389bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)            if (DBG) Log.d(TAG, "Audio track null [duplicate call to waitAndRelease ?]");
1399bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)            return;
1409bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        }
1419bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)
1429bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        // For "small" audio tracks, we have to stop() them to make them mixable,
1439bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        // else the audio subsystem will wait indefinitely for us to fill the buffer
1449bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        // before rendering the track mixable.
1459bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        //
1469bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        // If mStopped is true, the track would already have been stopped, so not
1479bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        // much point not doing that again.
1489bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        if (mBytesWritten < mAudioBufferSize && !mStopped) {
1499bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)            if (DBG) {
1509bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)                Log.d(TAG, "Stopping audio track to flush audio, state was : " +
1519bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)                        track.getPlayState() + ",stopped= " + mStopped);
1529bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)            }
1539bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)
1549bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)            mIsShortUtterance = true;
1559bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)            track.stop();
1569bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        }
1579bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)
158e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles)        // Block until the audio track is done only if we haven't stopped yet.
159e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles)        if (!mStopped) {
160e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles)            if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode());
161e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles)            blockUntilDone(mAudioTrack);
162e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles)        }
1638abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)
1648abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)        // The last call to AudioTrack.write( ) will return only after
1658abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)        // all data from the audioTrack has been sent to the mixer, so
1669bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        // it's safe to release at this point.
167197021e6b966cfb06891637935ef33fff06433d1Ben Murdoch        if (DBG) Log.d(TAG, "Releasing audio track [" + track.hashCode() + "]");
1688abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)        synchronized(mAudioTrackLock) {
1698abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)            mAudioTrack = null;
1708abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)        }
1718abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)        track.release();
1728abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)    }
1738abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)
174e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles)
175e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles)    static int getChannelConfig(int channelCount) {
176e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles)        if (channelCount == 1) {
177e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles)            return AudioFormat.CHANNEL_OUT_MONO;
178e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles)        } else if (channelCount == 2){
179e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles)            return AudioFormat.CHANNEL_OUT_STEREO;
180a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)        }
181a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)
182a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)        return 0;
183a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)    }
184a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)
185a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)    long getAudioLengthMs(int numBytes) {
186a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)        final int unconsumedFrames = numBytes / mBytesPerFrame;
187d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)        final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz;
188d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)
189d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)        return estimatedTimeMs;
190d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)    }
191d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)
192d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)    private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) {
193d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)        if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) {
194d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)            if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode());
195d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)            audioTrack.play();
196d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)        }
197d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)
198d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)        int count = 0;
199d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)        while (count < bytes.length) {
200d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)            // Note that we don't take bufferCopy.mOffset into account because
201d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)            // it is guaranteed to be 0.
202d6cdb82654e8f3343a693ca752d5c4cee0324e17Torne (Richard Coles)            int written = audioTrack.write(bytes, count, bytes.length);
203d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)            if (written <= 0) {
204d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)                break;
205d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)            }
206d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)            count += written;
207d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)        }
20809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        return count;
20951b2906e11752df6c18351cf520e30522d3b53a1Torne (Richard Coles)    }
210d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)
21110f88d5669dbd969c059d61ba09fa37dd72ac559Ben Murdoch    private AudioTrack createStreamingAudioTrack() {
21210f88d5669dbd969c059d61ba09fa37dd72ac559Ben Murdoch        final int channelConfig = getChannelConfig(mChannelCount);
21310f88d5669dbd969c059d61ba09fa37dd72ac559Ben Murdoch
21410f88d5669dbd969c059d61ba09fa37dd72ac559Ben Murdoch        int minBufferSizeInBytes
21510f88d5669dbd969c059d61ba09fa37dd72ac559Ben Murdoch                = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat);
216d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)        int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
217d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)
218d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)        AudioFormat audioFormat = (new AudioFormat.Builder())
21909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)                .setChannelMask(channelConfig)
22009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)                .setEncoding(mAudioFormat)
22109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)                .setSampleRate(mSampleRateInHz).build();
22209380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        AudioTrack audioTrack = new AudioTrack(mAudioParams.mAudioAttributes,
22309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)                audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM,
22409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)                mAudioParams.mSessionId);
22509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
22609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
22709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)            Log.w(TAG, "Unable to create audio track.");
22809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)            audioTrack.release();
22909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)            return null;
23009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        }
23109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
23209380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        mAudioBufferSize = bufferSizeInBytes;
23309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
23409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        setupVolume(audioTrack, mAudioParams.mVolume, mAudioParams.mPan);
23509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        return audioTrack;
23651b2906e11752df6c18351cf520e30522d3b53a1Torne (Richard Coles)    }
23709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
23809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    private void blockUntilDone(AudioTrack audioTrack) {
23909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        if (mBytesWritten <= 0) {
24007a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch            return;
24107a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch        }
24207a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch
24307a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch        if (mIsShortUtterance) {
24407a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch            // In this case we would have called AudioTrack#stop() to flush
24507a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch            // buffers to the mixer. This makes the playback head position
24607a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch            // unobservable and notification markers do not work reliably. We
24707a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch            // have no option but to wait until we think the track would finish
24807a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch            // playing and release it after.
24907a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch            //
25007a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch            // This isn't as bad as it looks because (a) We won't end up waiting
25107a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch            // for much longer than we should because even at 4khz mono, a short
25207a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch            // utterance weighs in at about 2 seconds, and (b) such short utterances
25307a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch            // are expected to be relatively infrequent and in a stream of utterances
25409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)            // this shows up as a slightly longer pause.
25551b2906e11752df6c18351cf520e30522d3b53a1Torne (Richard Coles)            blockUntilEstimatedCompletion();
25651b2906e11752df6c18351cf520e30522d3b53a1Torne (Richard Coles)        } else {
25709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)            blockUntilCompletion(audioTrack);
258a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)        }
259a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)    }
260a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)
261a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)    private void blockUntilEstimatedCompletion() {
26209380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        final int lengthInFrames = mBytesWritten / mBytesPerFrame;
26309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz);
26409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
26509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance");
26609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
26709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        try {
26809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)            Thread.sleep(estimatedTimeMs);
269197021e6b966cfb06891637935ef33fff06433d1Ben Murdoch        } catch (InterruptedException ie) {
270d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)            // Do nothing.
271197021e6b966cfb06891637935ef33fff06433d1Ben Murdoch        }
272a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)    }
27309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
27409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    private void blockUntilCompletion(AudioTrack audioTrack) {
275197021e6b966cfb06891637935ef33fff06433d1Ben Murdoch        final int lengthInFrames = mBytesWritten / mBytesPerFrame;
27609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
277a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)        int previousPosition = -1;
27809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        int currentPosition = 0;
27909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        long blockedTimeMs = 0;
28009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
281a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)        while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames &&
282a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)                audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) {
28309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
28409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)            final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
28509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)                    audioTrack.getSampleRate();
28609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)            final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS);
28709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
28809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)            // Check if the audio track has made progress since the last loop
289a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)            // iteration. We should then add in the amount of time that was
29009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)            // spent sleeping in the last iteration.
291a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)            if (currentPosition == previousPosition) {
292a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)                // This works only because the sleep time that would have been calculated
293a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)                // would be the same in the previous iteration too.
294a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)                blockedTimeMs += sleepTimeMs;
295a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)                // If we've taken too long to make progress, bail.
29609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)                if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) {
297a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)                    Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " +
298c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                            "for AudioTrack to make progress, Aborting");
299c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                    break;
300c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                }
301c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)            } else {
302c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                blockedTimeMs = 0;
303c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)            }
304c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)            previousPosition = currentPosition;
305c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)
306c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)            if (DBG) {
307c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," +
308c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                        " Playback position : " + currentPosition + ", Length in frames : "
309c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                        + lengthInFrames);
310c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)            }
311c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)            try {
312c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                Thread.sleep(sleepTimeMs);
313c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)            } catch (InterruptedException ie) {
314c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                break;
315c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)            }
316c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)        }
317c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)    }
318c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)
319c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)    private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
320c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)        final float vol = clip(volume, 0.0f, 1.0f);
321c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)        final float panning = clip(pan, -1.0f, 1.0f);
322c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)
323c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)        float volLeft = vol;
324c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)        float volRight = vol;
325c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)        if (panning > 0.0f) {
326c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)            volLeft *= (1.0f - panning);
327c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)        } else if (panning < 0.0f) {
328c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)            volRight *= (1.0f + panning);
329c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)        }
330c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)        if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
331c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)        if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
332c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)            Log.e(TAG, "Failed to set volume");
333c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)        }
33409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    }
33509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
33609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    private static final long clip(long value, long min, long max) {
33709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        return value < min ? min : (value < max ? value : max);
338a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)    }
339a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)
34009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    private static final float clip(float value, float min, float max) {
3418abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)        return value < min ? min : (value < max ? value : max);
3428abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)    }
34309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
3448abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)}
34509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)