18abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)// Copyright 2011 Google Inc. All Rights Reserved. 28abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) 38abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)package android.speech.tts; 48abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) 58abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)import android.media.AudioAttributes; 68abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)import android.media.AudioFormat; 78abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)import android.media.AudioTrack; 88abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)import android.speech.tts.TextToSpeechService.AudioOutputParams; 98abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)import android.util.Log; 108abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) 118abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)/** 128abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) * Exposes parts of the {@link AudioTrack} API by delegating calls to an 138abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) * underlying {@link AudioTrack}. Additionally, provides methods like 148abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) * {@link #waitAndRelease()} that will block until all audiotrack 158abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) * data has been flushed to the mixer, and is estimated to have completed 168abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) * playback. 178abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) */ 188abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)class BlockingAudioTrack { 198abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) private static final String TAG = "TTS.BlockingAudioTrack"; 208abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) private static final boolean DBG = false; 218abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) 228abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) 238abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) /** 248abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) * The minimum increment of time to wait for an AudioTrack to finish 258abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) * playing. 268abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) */ 278abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) private static final long MIN_SLEEP_TIME_MS = 20; 288abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) 298abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) /** 308abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) * The maximum increment of time to sleep while waiting for an AudioTrack 318abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) * to finish playing. 328abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) */ 338abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) private static final long MAX_SLEEP_TIME_MS = 2500; 3409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) 351e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles) /** 367242dc3dbeb210b5e876a3c42d1ec1a667fc621aPrimiano Tucci * The maximum amount of time to wait for an audio track to make progress while 377242dc3dbeb210b5e876a3c42d1ec1a667fc621aPrimiano Tucci * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but 387242dc3dbeb210b5e876a3c42d1ec1a667fc621aPrimiano Tucci * could happen in exceptional circumstances like a media_server crash. 397242dc3dbeb210b5e876a3c42d1ec1a667fc621aPrimiano Tucci */ 40f79f16f17ddc4f842d7b7a38603e280e94be826aTorne (Richard Coles) private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS; 4109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) 428abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) /** 43d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) * Minimum size of the buffer of the underlying {@link android.media.AudioTrack} 44197021e6b966cfb06891637935ef33fff06433d1Ben Murdoch * we create. 4519cde67944066db31e633d9e386f2aa9bf9fadb3Torne (Richard Coles) */ 468abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) private static final int MIN_AUDIO_BUFFER_SIZE = 8192; 47c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) 4809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) 4919cde67944066db31e633d9e386f2aa9bf9fadb3Torne (Richard Coles) private final AudioOutputParams mAudioParams; 5009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) private final int mSampleRateInHz; 518abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) private final int mAudioFormat; 52c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) private final int mChannelCount; 538abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) 54f79f16f17ddc4f842d7b7a38603e280e94be826aTorne (Richard Coles) 55f79f16f17ddc4f842d7b7a38603e280e94be826aTorne (Richard Coles) private final int mBytesPerFrame; 569bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) /** 579bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) * A "short utterance" is one that uses less bytes than the audio 589bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) * track buffer size (mAudioBufferSize). In this case, we need to call 599bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly 609bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) * different logic is required to wait for the track to finish. 619bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) * 629bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) * Not volatile, accessed only from the audio playback thread. 639bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) */ 649bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) private boolean mIsShortUtterance; 659bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) /** 669bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) * Will be valid after a call to {@link #init()}. 679bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) */ 6809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) private int mAudioBufferSize; 699bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) private int mBytesWritten = 0; 709bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) 719bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) // Need to be seen by stop() which can be called from another thread. mAudioTrack will be 729bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) // set to null only after waitAndRelease(). 739bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) private Object mAudioTrackLock = new Object(); 749bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) private AudioTrack mAudioTrack; 759bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) private volatile boolean mStopped; 769bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) 779bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) private int mSessionId; 789bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) 799bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) BlockingAudioTrack(AudioOutputParams audioParams, int sampleRate, 809bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) int audioFormat, int channelCount) { 819bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) mAudioParams = audioParams; 829bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) mSampleRateInHz = sampleRate; 839bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) mAudioFormat = audioFormat; 849bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) mChannelCount = channelCount; 859bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) 869bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) mBytesPerFrame = AudioFormat.getBytesPerSample(mAudioFormat) * mChannelCount; 879bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) mIsShortUtterance = false; 889bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) mAudioBufferSize = 0; 899bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) mBytesWritten = 0; 909bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) 919bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) mAudioTrack = null; 929bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) mStopped = false; 939bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) } 949bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) 959bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) public boolean init() { 969bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) AudioTrack track = createStreamingAudioTrack(); 979bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) synchronized (mAudioTrackLock) { 989bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) mAudioTrack = track; 999bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) } 1009bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) 1019bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) if (track == null) { 1029bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) return false; 1039bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) } else { 1049bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) return true; 1059bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) } 1069bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) } 1079bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) 1089bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) public void stop() { 1099bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) synchronized (mAudioTrackLock) { 1109bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) if (mAudioTrack != null) { 1119bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) mAudioTrack.stop(); 1129bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) } 1139bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) mStopped = true; 1149bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) } 1159bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) } 1169bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) 1179bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) public int write(byte[] data) { 1189bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) AudioTrack track = null; 1199bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) synchronized (mAudioTrackLock) { 1209bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) track = mAudioTrack; 1219bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) } 1229bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) 1239bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) if (track == null || mStopped) { 1249bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) return -1; 1259bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) } 12609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) final int bytesWritten = writeToAudioTrack(track, data); 1279bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) 1289bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) mBytesWritten += bytesWritten; 1299bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) return bytesWritten; 1309bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) } 1319bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) 1329bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) public void waitAndRelease() { 1339bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) AudioTrack track = null; 13409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) synchronized (mAudioTrackLock) { 1359bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) track = mAudioTrack; 1369bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) } 1379bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) if (track == null) { 1389bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) if (DBG) Log.d(TAG, "Audio track null [duplicate call to waitAndRelease ?]"); 1399bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) return; 1409bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) } 1419bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) 1429bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) // For "small" audio tracks, we have to stop() them to make them mixable, 1439bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) // else the audio subsystem will wait indefinitely for us to fill the buffer 1449bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) // before rendering the track mixable. 1459bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) // 1469bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) // If mStopped is true, the track would already have been stopped, so not 1479bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) // much point not doing that again. 1489bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) if (mBytesWritten < mAudioBufferSize && !mStopped) { 1499bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) if (DBG) { 1509bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) Log.d(TAG, "Stopping audio track to flush audio, state was : " + 1519bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) track.getPlayState() + ",stopped= " + mStopped); 1529bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) } 1539bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) 1549bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) mIsShortUtterance = true; 1559bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) track.stop(); 1569bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) } 1579bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) 158e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles) // Block until the audio track is done only if we haven't stopped yet. 159e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles) if (!mStopped) { 160e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles) if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode()); 161e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles) blockUntilDone(mAudioTrack); 162e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles) } 1638abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) 1648abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) // The last call to AudioTrack.write( ) will return only after 1658abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) // all data from the audioTrack has been sent to the mixer, so 1669bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles) // it's safe to release at this point. 167197021e6b966cfb06891637935ef33fff06433d1Ben Murdoch if (DBG) Log.d(TAG, "Releasing audio track [" + track.hashCode() + "]"); 1688abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) synchronized(mAudioTrackLock) { 1698abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) mAudioTrack = null; 1708abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) } 1718abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) track.release(); 1728abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) } 1738abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) 174e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles) 175e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles) static int getChannelConfig(int channelCount) { 176e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles) if (channelCount == 1) { 177e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles) return AudioFormat.CHANNEL_OUT_MONO; 178e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles) } else if (channelCount == 2){ 179e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles) return AudioFormat.CHANNEL_OUT_STEREO; 180a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) } 181a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) 182a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) return 0; 183a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) } 184a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) 185a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) long getAudioLengthMs(int numBytes) { 186a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) final int unconsumedFrames = numBytes / mBytesPerFrame; 187d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz; 188d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) 189d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) return estimatedTimeMs; 190d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) } 191d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) 192d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) { 193d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) { 194d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode()); 195d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) audioTrack.play(); 196d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) } 197d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) 198d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) int count = 0; 199d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) while (count < bytes.length) { 200d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) // Note that we don't take bufferCopy.mOffset into account because 201d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) // it is guaranteed to be 0. 202d6cdb82654e8f3343a693ca752d5c4cee0324e17Torne (Richard Coles) int written = audioTrack.write(bytes, count, bytes.length); 203d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) if (written <= 0) { 204d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) break; 205d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) } 206d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) count += written; 207d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) } 20809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) return count; 20951b2906e11752df6c18351cf520e30522d3b53a1Torne (Richard Coles) } 210d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) 21110f88d5669dbd969c059d61ba09fa37dd72ac559Ben Murdoch private AudioTrack createStreamingAudioTrack() { 21210f88d5669dbd969c059d61ba09fa37dd72ac559Ben Murdoch final int channelConfig = getChannelConfig(mChannelCount); 21310f88d5669dbd969c059d61ba09fa37dd72ac559Ben Murdoch 21410f88d5669dbd969c059d61ba09fa37dd72ac559Ben Murdoch int minBufferSizeInBytes 21510f88d5669dbd969c059d61ba09fa37dd72ac559Ben Murdoch = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat); 216d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes); 217d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) 218d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) AudioFormat audioFormat = (new AudioFormat.Builder()) 21909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) .setChannelMask(channelConfig) 22009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) .setEncoding(mAudioFormat) 22109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) .setSampleRate(mSampleRateInHz).build(); 22209380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) AudioTrack audioTrack = new AudioTrack(mAudioParams.mAudioAttributes, 22309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM, 22409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) mAudioParams.mSessionId); 22509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) 22609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) { 22709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) Log.w(TAG, "Unable to create audio track."); 22809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) audioTrack.release(); 22909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) return null; 23009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) } 23109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) 23209380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) mAudioBufferSize = bufferSizeInBytes; 23309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) 23409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) setupVolume(audioTrack, mAudioParams.mVolume, mAudioParams.mPan); 23509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) return audioTrack; 23651b2906e11752df6c18351cf520e30522d3b53a1Torne (Richard Coles) } 23709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) 23809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) private void blockUntilDone(AudioTrack audioTrack) { 23909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) if (mBytesWritten <= 0) { 24007a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch return; 24107a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch } 24207a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch 24307a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch if (mIsShortUtterance) { 24407a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch // In this case we would have called AudioTrack#stop() to flush 24507a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch // buffers to the mixer. This makes the playback head position 24607a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch // unobservable and notification markers do not work reliably. We 24707a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch // have no option but to wait until we think the track would finish 24807a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch // playing and release it after. 24907a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch // 25007a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch // This isn't as bad as it looks because (a) We won't end up waiting 25107a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch // for much longer than we should because even at 4khz mono, a short 25207a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch // utterance weighs in at about 2 seconds, and (b) such short utterances 25307a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch // are expected to be relatively infrequent and in a stream of utterances 25409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) // this shows up as a slightly longer pause. 25551b2906e11752df6c18351cf520e30522d3b53a1Torne (Richard Coles) blockUntilEstimatedCompletion(); 25651b2906e11752df6c18351cf520e30522d3b53a1Torne (Richard Coles) } else { 25709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) blockUntilCompletion(audioTrack); 258a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) } 259a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) } 260a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) 261a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) private void blockUntilEstimatedCompletion() { 26209380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) final int lengthInFrames = mBytesWritten / mBytesPerFrame; 26309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz); 26409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) 26509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance"); 26609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) 26709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) try { 26809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) Thread.sleep(estimatedTimeMs); 269197021e6b966cfb06891637935ef33fff06433d1Ben Murdoch } catch (InterruptedException ie) { 270d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) // Do nothing. 271197021e6b966cfb06891637935ef33fff06433d1Ben Murdoch } 272a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) } 27309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) 27409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) private void blockUntilCompletion(AudioTrack audioTrack) { 275197021e6b966cfb06891637935ef33fff06433d1Ben Murdoch final int lengthInFrames = mBytesWritten / mBytesPerFrame; 27609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) 277a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) int previousPosition = -1; 27809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) int currentPosition = 0; 27909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) long blockedTimeMs = 0; 28009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) 281a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames && 282a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) { 28309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) 28409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) / 28509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) audioTrack.getSampleRate(); 28609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS); 28709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) 28809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) // Check if the audio track has made progress since the last loop 289a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) // iteration. We should then add in the amount of time that was 29009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) // spent sleeping in the last iteration. 291a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) if (currentPosition == previousPosition) { 292a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) // This works only because the sleep time that would have been calculated 293a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) // would be the same in the previous iteration too. 294a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) blockedTimeMs += sleepTimeMs; 295a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) // If we've taken too long to make progress, bail. 29609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) { 297a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " + 298c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) "for AudioTrack to make progress, Aborting"); 299c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) break; 300c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) } 301c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) } else { 302c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) blockedTimeMs = 0; 303c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) } 304c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) previousPosition = currentPosition; 305c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) 306c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) if (DBG) { 307c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," + 308c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) " Playback position : " + currentPosition + ", Length in frames : " 309c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) + lengthInFrames); 310c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) } 311c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) try { 312c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) Thread.sleep(sleepTimeMs); 313c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) } catch (InterruptedException ie) { 314c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) break; 315c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) } 316c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) } 317c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) } 318c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) 319c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) private static void setupVolume(AudioTrack audioTrack, float volume, float pan) { 320c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) final float vol = clip(volume, 0.0f, 1.0f); 321c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) final float panning = clip(pan, -1.0f, 1.0f); 322c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) 323c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) float volLeft = vol; 324c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) float volRight = vol; 325c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) if (panning > 0.0f) { 326c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) volLeft *= (1.0f - panning); 327c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) } else if (panning < 0.0f) { 328c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) volRight *= (1.0f + panning); 329c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) } 330c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight); 331c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) { 332c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) Log.e(TAG, "Failed to set volume"); 333c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles) } 33409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) } 33509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) 33609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) private static final long clip(long value, long min, long max) { 33709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) return value < min ? min : (value < max ? value : max); 338a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) } 339a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles) 34009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) private static final float clip(float value, float min, float max) { 3418abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) return value < min ? min : (value < max ? value : max); 3428abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles) } 34309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) 3448abfc5808a4e34d6e03867af8bc440dee641886fTorne (Richard Coles)} 34509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)