BlockingAudioTrack.java revision 5cbf17ca053b09beadd0b031a46ce193ab27a0f8
1// Copyright 2011 Google Inc. All Rights Reserved. 2 3package android.speech.tts; 4 5import android.media.AudioFormat; 6import android.media.AudioTrack; 7import android.speech.tts.TextToSpeechService.AudioOutputParams; 8import android.util.Log; 9 10/** 11 * Exposes parts of the {@link AudioTrack} API by delegating calls to an 12 * underlying {@link AudioTrack}. Additionally, provides methods like 13 * {@link #waitAndRelease()} that will block until all audiotrack 14 * data has been flushed to the mixer, and is estimated to have completed 15 * playback. 16 */ 17class BlockingAudioTrack { 18 private static final String TAG = "TTS.BlockingAudioTrack"; 19 private static final boolean DBG = false; 20 21 22 /** 23 * The minimum increment of time to wait for an AudioTrack to finish 24 * playing. 25 */ 26 private static final long MIN_SLEEP_TIME_MS = 20; 27 28 /** 29 * The maximum increment of time to sleep while waiting for an AudioTrack 30 * to finish playing. 31 */ 32 private static final long MAX_SLEEP_TIME_MS = 2500; 33 34 /** 35 * The maximum amount of time to wait for an audio track to make progress while 36 * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but 37 * could happen in exceptional circumstances like a media_server crash. 38 */ 39 private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS; 40 41 /** 42 * Minimum size of the buffer of the underlying {@link android.media.AudioTrack} 43 * we create. 44 */ 45 private static final int MIN_AUDIO_BUFFER_SIZE = 8192; 46 47 48 private final AudioOutputParams mAudioParams; 49 private final int mSampleRateInHz; 50 private final int mAudioFormat; 51 private final int mChannelCount; 52 53 54 private final int mBytesPerFrame; 55 /** 56 * A "short utterance" is one that uses less bytes than the audio 57 * track buffer size (mAudioBufferSize). In this case, we need to call 58 * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly 59 * different logic is required to wait for the track to finish. 60 * 61 * Not volatile, accessed only from the audio playback thread. 62 */ 63 private boolean mIsShortUtterance; 64 /** 65 * Will be valid after a call to {@link #init()}. 66 */ 67 private int mAudioBufferSize; 68 private int mBytesWritten = 0; 69 70 // Need to be seen by stop() which can be called from another thread. mAudioTrack will be 71 // set to null only after waitAndRelease(). 72 private Object mAudioTrackLock = new Object(); 73 private AudioTrack mAudioTrack; 74 private volatile boolean mStopped; 75 76 private int mSessionId; 77 78 BlockingAudioTrack(AudioOutputParams audioParams, int sampleRate, 79 int audioFormat, int channelCount) { 80 mAudioParams = audioParams; 81 mSampleRateInHz = sampleRate; 82 mAudioFormat = audioFormat; 83 mChannelCount = channelCount; 84 85 mBytesPerFrame = AudioFormat.getBytesPerSample(mAudioFormat) * mChannelCount; 86 mIsShortUtterance = false; 87 mAudioBufferSize = 0; 88 mBytesWritten = 0; 89 90 mAudioTrack = null; 91 mStopped = false; 92 } 93 94 public boolean init() { 95 AudioTrack track = createStreamingAudioTrack(); 96 synchronized (mAudioTrackLock) { 97 mAudioTrack = track; 98 } 99 100 if (track == null) { 101 return false; 102 } else { 103 return true; 104 } 105 } 106 107 public void stop() { 108 synchronized (mAudioTrackLock) { 109 if (mAudioTrack != null) { 110 mAudioTrack.stop(); 111 } 112 mStopped = true; 113 } 114 } 115 116 public int write(byte[] data) { 117 AudioTrack track = null; 118 synchronized (mAudioTrackLock) { 119 track = mAudioTrack; 120 } 121 122 if (track == null || mStopped) { 123 return -1; 124 } 125 final int bytesWritten = writeToAudioTrack(track, data); 126 127 mBytesWritten += bytesWritten; 128 return bytesWritten; 129 } 130 131 public void waitAndRelease() { 132 AudioTrack track = null; 133 synchronized (mAudioTrackLock) { 134 track = mAudioTrack; 135 } 136 if (track == null) { 137 if (DBG) Log.d(TAG, "Audio track null [duplicate call to waitAndRelease ?]"); 138 return; 139 } 140 141 // For "small" audio tracks, we have to stop() them to make them mixable, 142 // else the audio subsystem will wait indefinitely for us to fill the buffer 143 // before rendering the track mixable. 144 // 145 // If mStopped is true, the track would already have been stopped, so not 146 // much point not doing that again. 147 if (mBytesWritten < mAudioBufferSize && !mStopped) { 148 if (DBG) { 149 Log.d(TAG, "Stopping audio track to flush audio, state was : " + 150 track.getPlayState() + ",stopped= " + mStopped); 151 } 152 153 mIsShortUtterance = true; 154 track.stop(); 155 } 156 157 // Block until the audio track is done only if we haven't stopped yet. 158 if (!mStopped) { 159 if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode()); 160 blockUntilDone(mAudioTrack); 161 } 162 163 // The last call to AudioTrack.write( ) will return only after 164 // all data from the audioTrack has been sent to the mixer, so 165 // it's safe to release at this point. 166 if (DBG) Log.d(TAG, "Releasing audio track [" + track.hashCode() + "]"); 167 synchronized(mAudioTrackLock) { 168 mAudioTrack = null; 169 } 170 track.release(); 171 } 172 173 174 static int getChannelConfig(int channelCount) { 175 if (channelCount == 1) { 176 return AudioFormat.CHANNEL_OUT_MONO; 177 } else if (channelCount == 2){ 178 return AudioFormat.CHANNEL_OUT_STEREO; 179 } 180 181 return 0; 182 } 183 184 long getAudioLengthMs(int numBytes) { 185 final int unconsumedFrames = numBytes / mBytesPerFrame; 186 final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz; 187 188 return estimatedTimeMs; 189 } 190 191 private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) { 192 if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) { 193 if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode()); 194 audioTrack.play(); 195 } 196 197 int count = 0; 198 while (count < bytes.length) { 199 // Note that we don't take bufferCopy.mOffset into account because 200 // it is guaranteed to be 0. 201 int written = audioTrack.write(bytes, count, bytes.length); 202 if (written <= 0) { 203 break; 204 } 205 count += written; 206 } 207 return count; 208 } 209 210 private AudioTrack createStreamingAudioTrack() { 211 final int channelConfig = getChannelConfig(mChannelCount); 212 213 int minBufferSizeInBytes 214 = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat); 215 int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes); 216 217 AudioTrack audioTrack = new AudioTrack(mAudioParams.mStreamType, mSampleRateInHz, 218 channelConfig, mAudioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM, 219 mAudioParams.mSessionId); 220 if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) { 221 Log.w(TAG, "Unable to create audio track."); 222 audioTrack.release(); 223 return null; 224 } 225 226 mAudioBufferSize = bufferSizeInBytes; 227 228 setupVolume(audioTrack, mAudioParams.mVolume, mAudioParams.mPan); 229 return audioTrack; 230 } 231 232 private void blockUntilDone(AudioTrack audioTrack) { 233 if (mBytesWritten <= 0) { 234 return; 235 } 236 237 if (mIsShortUtterance) { 238 // In this case we would have called AudioTrack#stop() to flush 239 // buffers to the mixer. This makes the playback head position 240 // unobservable and notification markers do not work reliably. We 241 // have no option but to wait until we think the track would finish 242 // playing and release it after. 243 // 244 // This isn't as bad as it looks because (a) We won't end up waiting 245 // for much longer than we should because even at 4khz mono, a short 246 // utterance weighs in at about 2 seconds, and (b) such short utterances 247 // are expected to be relatively infrequent and in a stream of utterances 248 // this shows up as a slightly longer pause. 249 blockUntilEstimatedCompletion(); 250 } else { 251 blockUntilCompletion(audioTrack); 252 } 253 } 254 255 private void blockUntilEstimatedCompletion() { 256 final int lengthInFrames = mBytesWritten / mBytesPerFrame; 257 final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz); 258 259 if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance"); 260 261 try { 262 Thread.sleep(estimatedTimeMs); 263 } catch (InterruptedException ie) { 264 // Do nothing. 265 } 266 } 267 268 private void blockUntilCompletion(AudioTrack audioTrack) { 269 final int lengthInFrames = mBytesWritten / mBytesPerFrame; 270 271 int previousPosition = -1; 272 int currentPosition = 0; 273 long blockedTimeMs = 0; 274 275 while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames && 276 audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) { 277 278 final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) / 279 audioTrack.getSampleRate(); 280 final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS); 281 282 // Check if the audio track has made progress since the last loop 283 // iteration. We should then add in the amount of time that was 284 // spent sleeping in the last iteration. 285 if (currentPosition == previousPosition) { 286 // This works only because the sleep time that would have been calculated 287 // would be the same in the previous iteration too. 288 blockedTimeMs += sleepTimeMs; 289 // If we've taken too long to make progress, bail. 290 if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) { 291 Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " + 292 "for AudioTrack to make progress, Aborting"); 293 break; 294 } 295 } else { 296 blockedTimeMs = 0; 297 } 298 previousPosition = currentPosition; 299 300 if (DBG) { 301 Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," + 302 " Playback position : " + currentPosition + ", Length in frames : " 303 + lengthInFrames); 304 } 305 try { 306 Thread.sleep(sleepTimeMs); 307 } catch (InterruptedException ie) { 308 break; 309 } 310 } 311 } 312 313 private static void setupVolume(AudioTrack audioTrack, float volume, float pan) { 314 final float vol = clip(volume, 0.0f, 1.0f); 315 final float panning = clip(pan, -1.0f, 1.0f); 316 317 float volLeft = vol; 318 float volRight = vol; 319 if (panning > 0.0f) { 320 volLeft *= (1.0f - panning); 321 } else if (panning < 0.0f) { 322 volRight *= (1.0f + panning); 323 } 324 if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight); 325 if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) { 326 Log.e(TAG, "Failed to set volume"); 327 } 328 } 329 330 private static final long clip(long value, long min, long max) { 331 return value < min ? min : (value < max ? value : max); 332 } 333 334 private static final float clip(float value, float min, float max) { 335 return value < min ? min : (value < max ? value : max); 336 } 337 338} 339