BlockingAudioTrack.java revision ed4e541a20cc662b8399844684d18ad0060bd1cb
1// Copyright 2011 Google Inc. All Rights Reserved. 2 3package android.speech.tts; 4 5import android.media.AudioFormat; 6import android.media.AudioTrack; 7import android.util.Log; 8 9/** 10 * Exposes parts of the {@link AudioTrack} API by delegating calls to an 11 * underlying {@link AudioTrack}. Additionally, provides methods like 12 * {@link #waitAndRelease()} that will block until all audiotrack 13 * data has been flushed to the mixer, and is estimated to have completed 14 * playback. 15 */ 16class BlockingAudioTrack { 17 private static final String TAG = "TTS.BlockingAudioTrack"; 18 private static final boolean DBG = false; 19 20 21 /** 22 * The minimum increment of time to wait for an AudioTrack to finish 23 * playing. 24 */ 25 private static final long MIN_SLEEP_TIME_MS = 20; 26 27 /** 28 * The maximum increment of time to sleep while waiting for an AudioTrack 29 * to finish playing. 30 */ 31 private static final long MAX_SLEEP_TIME_MS = 2500; 32 33 /** 34 * The maximum amount of time to wait for an audio track to make progress while 35 * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but 36 * could happen in exceptional circumstances like a media_server crash. 37 */ 38 private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS; 39 40 /** 41 * Minimum size of the buffer of the underlying {@link android.media.AudioTrack} 42 * we create. 43 */ 44 private static final int MIN_AUDIO_BUFFER_SIZE = 8192; 45 46 47 private final int mStreamType; 48 private final int mSampleRateInHz; 49 private final int mAudioFormat; 50 private final int mChannelCount; 51 private final float mVolume; 52 private final float mPan; 53 54 private final int mBytesPerFrame; 55 /** 56 * A "short utterance" is one that uses less bytes than the audio 57 * track buffer size (mAudioBufferSize). In this case, we need to call 58 * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly 59 * different logic is required to wait for the track to finish. 60 * 61 * Not volatile, accessed only from the audio playback thread. 62 */ 63 private boolean mIsShortUtterance; 64 /** 65 * Will be valid after a call to {@link #init()}. 66 */ 67 private int mAudioBufferSize; 68 private int mBytesWritten = 0; 69 70 // Need to be seen by stop() which can be called from another thread. mAudioTrack will be 71 // set to null only after waitAndRelease(). 72 private volatile AudioTrack mAudioTrack; 73 private volatile boolean mStopped; 74 75 BlockingAudioTrack(int streamType, int sampleRate, 76 int audioFormat, int channelCount, 77 float volume, float pan) { 78 mStreamType = streamType; 79 mSampleRateInHz = sampleRate; 80 mAudioFormat = audioFormat; 81 mChannelCount = channelCount; 82 mVolume = volume; 83 mPan = pan; 84 85 mBytesPerFrame = getBytesPerFrame(mAudioFormat) * mChannelCount; 86 mIsShortUtterance = false; 87 mAudioBufferSize = 0; 88 mBytesWritten = 0; 89 90 mAudioTrack = null; 91 mStopped = false; 92 } 93 94 public boolean init() { 95 AudioTrack track = createStreamingAudioTrack(); 96 mAudioTrack = track; 97 98 if (track == null) { 99 return false; 100 } else { 101 return true; 102 } 103 } 104 105 public void stop() { 106 AudioTrack track = mAudioTrack; 107 if (track != null) { 108 track.stop(); 109 } 110 mStopped = true; 111 } 112 113 public int write(byte[] data) { 114 if (mAudioTrack == null || mStopped) { 115 return -1; 116 } 117 final int bytesWritten = writeToAudioTrack(mAudioTrack, data); 118 mBytesWritten += bytesWritten; 119 return bytesWritten; 120 } 121 122 public void waitAndRelease() { 123 AudioTrack track = mAudioTrack; 124 if (track == null) { 125 if (DBG) Log.d(TAG, "Audio track null [duplicate call to waitAndRelease ?]"); 126 return; 127 } 128 129 // For "small" audio tracks, we have to stop() them to make them mixable, 130 // else the audio subsystem will wait indefinitely for us to fill the buffer 131 // before rendering the track mixable. 132 // 133 // If mStopped is true, the track would already have been stopped, so not 134 // much point not doing that again. 135 if (mBytesWritten < mAudioBufferSize && !mStopped) { 136 if (DBG) { 137 Log.d(TAG, "Stopping audio track to flush audio, state was : " + 138 track.getPlayState() + ",stopped= " + mStopped); 139 } 140 141 mIsShortUtterance = true; 142 track.stop(); 143 } 144 145 // Block until the audio track is done only if we haven't stopped yet. 146 if (!mStopped) { 147 if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode()); 148 blockUntilDone(mAudioTrack); 149 } 150 151 // The last call to AudioTrack.write( ) will return only after 152 // all data from the audioTrack has been sent to the mixer, so 153 // it's safe to release at this point. 154 if (DBG) Log.d(TAG, "Releasing audio track [" + track.hashCode() + "]"); 155 track.release(); 156 mAudioTrack = null; 157 } 158 159 160 static int getChannelConfig(int channelCount) { 161 if (channelCount == 1) { 162 return AudioFormat.CHANNEL_OUT_MONO; 163 } else if (channelCount == 2){ 164 return AudioFormat.CHANNEL_OUT_STEREO; 165 } 166 167 return 0; 168 } 169 170 long getAudioLengthMs(int numBytes) { 171 final int unconsumedFrames = numBytes / mBytesPerFrame; 172 final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz; 173 174 return estimatedTimeMs; 175 } 176 177 private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) { 178 if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) { 179 if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode()); 180 audioTrack.play(); 181 } 182 183 int count = 0; 184 while (count < bytes.length) { 185 // Note that we don't take bufferCopy.mOffset into account because 186 // it is guaranteed to be 0. 187 int written = audioTrack.write(bytes, count, bytes.length); 188 if (written <= 0) { 189 break; 190 } 191 count += written; 192 } 193 return count; 194 } 195 196 private AudioTrack createStreamingAudioTrack() { 197 final int channelConfig = getChannelConfig(mChannelCount); 198 199 int minBufferSizeInBytes 200 = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat); 201 int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes); 202 203 AudioTrack audioTrack = new AudioTrack(mStreamType, mSampleRateInHz, channelConfig, 204 mAudioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM); 205 if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) { 206 Log.w(TAG, "Unable to create audio track."); 207 audioTrack.release(); 208 return null; 209 } 210 211 mAudioBufferSize = bufferSizeInBytes; 212 213 setupVolume(audioTrack, mVolume, mPan); 214 return audioTrack; 215 } 216 217 private static int getBytesPerFrame(int audioFormat) { 218 if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) { 219 return 1; 220 } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) { 221 return 2; 222 } 223 224 return -1; 225 } 226 227 228 private void blockUntilDone(AudioTrack audioTrack) { 229 if (mBytesWritten <= 0) { 230 return; 231 } 232 233 if (mIsShortUtterance) { 234 // In this case we would have called AudioTrack#stop() to flush 235 // buffers to the mixer. This makes the playback head position 236 // unobservable and notification markers do not work reliably. We 237 // have no option but to wait until we think the track would finish 238 // playing and release it after. 239 // 240 // This isn't as bad as it looks because (a) We won't end up waiting 241 // for much longer than we should because even at 4khz mono, a short 242 // utterance weighs in at about 2 seconds, and (b) such short utterances 243 // are expected to be relatively infrequent and in a stream of utterances 244 // this shows up as a slightly longer pause. 245 blockUntilEstimatedCompletion(); 246 } else { 247 blockUntilCompletion(audioTrack); 248 } 249 } 250 251 private void blockUntilEstimatedCompletion() { 252 final int lengthInFrames = mBytesWritten / mBytesPerFrame; 253 final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz); 254 255 if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance"); 256 257 try { 258 Thread.sleep(estimatedTimeMs); 259 } catch (InterruptedException ie) { 260 // Do nothing. 261 } 262 } 263 264 private void blockUntilCompletion(AudioTrack audioTrack) { 265 final int lengthInFrames = mBytesWritten / mBytesPerFrame; 266 267 int previousPosition = -1; 268 int currentPosition = 0; 269 long blockedTimeMs = 0; 270 271 while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames && 272 audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) { 273 274 final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) / 275 audioTrack.getSampleRate(); 276 final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS); 277 278 // Check if the audio track has made progress since the last loop 279 // iteration. We should then add in the amount of time that was 280 // spent sleeping in the last iteration. 281 if (currentPosition == previousPosition) { 282 // This works only because the sleep time that would have been calculated 283 // would be the same in the previous iteration too. 284 blockedTimeMs += sleepTimeMs; 285 // If we've taken too long to make progress, bail. 286 if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) { 287 Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " + 288 "for AudioTrack to make progress, Aborting"); 289 break; 290 } 291 } else { 292 blockedTimeMs = 0; 293 } 294 previousPosition = currentPosition; 295 296 if (DBG) { 297 Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," + 298 " Playback position : " + currentPosition + ", Length in frames : " 299 + lengthInFrames); 300 } 301 try { 302 Thread.sleep(sleepTimeMs); 303 } catch (InterruptedException ie) { 304 break; 305 } 306 } 307 } 308 309 private static void setupVolume(AudioTrack audioTrack, float volume, float pan) { 310 final float vol = clip(volume, 0.0f, 1.0f); 311 final float panning = clip(pan, -1.0f, 1.0f); 312 313 float volLeft = vol; 314 float volRight = vol; 315 if (panning > 0.0f) { 316 volLeft *= (1.0f - panning); 317 } else if (panning < 0.0f) { 318 volRight *= (1.0f + panning); 319 } 320 if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight); 321 if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) { 322 Log.e(TAG, "Failed to set volume"); 323 } 324 } 325 326 private static final long clip(long value, long min, long max) { 327 if (value < min) { 328 return min; 329 } 330 331 if (value > max) { 332 return max; 333 } 334 335 return value; 336 } 337 338 private static float clip(float value, float min, float max) { 339 return value > max ? max : (value < min ? min : value); 340 } 341 342} 343