AudioPlaybackHandler.java revision 754c72ed9e8e83e5a913aa7552fc2e1b1b5277e0
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16package android.speech.tts; 17 18import android.media.AudioFormat; 19import android.media.AudioTrack; 20import android.text.TextUtils; 21import android.util.Log; 22 23import java.util.Iterator; 24import java.util.concurrent.PriorityBlockingQueue; 25import java.util.concurrent.atomic.AtomicLong; 26 27class AudioPlaybackHandler { 28 private static final String TAG = "TTS.AudioPlaybackHandler"; 29 private static final boolean DBG_THREADING = false; 30 private static final boolean DBG = false; 31 32 private static final int MIN_AUDIO_BUFFER_SIZE = 8192; 33 34 private static final int SYNTHESIS_START = 1; 35 private static final int SYNTHESIS_DATA_AVAILABLE = 2; 36 private static final int SYNTHESIS_DONE = 3; 37 38 private static final int PLAY_AUDIO = 5; 39 private static final int PLAY_SILENCE = 6; 40 41 private static final int SHUTDOWN = -1; 42 43 private static final int DEFAULT_PRIORITY = 1; 44 private static final int HIGH_PRIORITY = 0; 45 46 private final PriorityBlockingQueue<ListEntry> mQueue = 47 new PriorityBlockingQueue<ListEntry>(); 48 private final Thread mHandlerThread; 49 50 private volatile MessageParams mCurrentParams = null; 51 // Used only for book keeping and error detection. 52 private volatile SynthesisMessageParams mLastSynthesisRequest = null; 53 // Used to order incoming messages in our priority queue. 54 private final AtomicLong mSequenceIdCtr = new AtomicLong(0); 55 56 57 AudioPlaybackHandler() { 58 mHandlerThread = new Thread(new MessageLoop(), "TTS.AudioPlaybackThread"); 59 } 60 61 public void start() { 62 mHandlerThread.start(); 63 } 64 65 /** 66 * Stops all synthesis for a given {@code token}. If the current token 67 * is currently being processed, an effort will be made to stop it but 68 * that is not guaranteed. 69 * 70 * NOTE: This assumes that all other messages in the queue with {@code token} 71 * have been removed already. 72 * 73 * NOTE: Must be called synchronized on {@code AudioPlaybackHandler.this}. 74 */ 75 private void stop(MessageParams token) { 76 if (token == null) { 77 return; 78 } 79 80 if (DBG) Log.d(TAG, "Stopping token : " + token); 81 82 if (token.getType() == MessageParams.TYPE_SYNTHESIS) { 83 AudioTrack current = ((SynthesisMessageParams) token).getAudioTrack(); 84 if (current != null) { 85 // Stop the current audio track if it's still playing. 86 // The audio track is thread safe in this regard. The current 87 // handleSynthesisDataAvailable call will return soon after this 88 // call. 89 current.stop(); 90 } 91 // This is safe because PlaybackSynthesisCallback#stop would have 92 // been called before this method, and will no longer enqueue any 93 // audio for this token. 94 // 95 // (Even if it did, all it would result in is a warning message). 96 mQueue.add(new ListEntry(SYNTHESIS_DONE, token, HIGH_PRIORITY)); 97 } else if (token.getType() == MessageParams.TYPE_AUDIO) { 98 ((AudioMessageParams) token).getPlayer().stop(); 99 // No cleanup required for audio messages. 100 } else if (token.getType() == MessageParams.TYPE_SILENCE) { 101 ((SilenceMessageParams) token).getConditionVariable().open(); 102 // No cleanup required for silence messages. 103 } 104 } 105 106 // ----------------------------------------------------- 107 // Methods that add and remove elements from the queue. These do not 108 // need to be synchronized strictly speaking, but they make the behaviour 109 // a lot more predictable. (though it would still be correct without 110 // synchronization). 111 // ----------------------------------------------------- 112 113 synchronized public void removePlaybackItems(String callingApp) { 114 if (DBG_THREADING) Log.d(TAG, "Removing all callback items for : " + callingApp); 115 removeMessages(callingApp); 116 117 final MessageParams current = getCurrentParams(); 118 if (current != null && TextUtils.equals(callingApp, current.getCallingApp())) { 119 stop(current); 120 } 121 } 122 123 synchronized public void removeAllItems() { 124 if (DBG_THREADING) Log.d(TAG, "Removing all items"); 125 removeAllMessages(); 126 stop(getCurrentParams()); 127 } 128 129 /** 130 * @return false iff the queue is empty and no queue item is currently 131 * being handled, true otherwise. 132 */ 133 public boolean isSpeaking() { 134 return (mQueue.peek() != null) || (mCurrentParams != null); 135 } 136 137 /** 138 * Shut down the audio playback thread. 139 */ 140 synchronized public void quit() { 141 removeAllMessages(); 142 stop(getCurrentParams()); 143 mQueue.add(new ListEntry(SHUTDOWN, null, HIGH_PRIORITY)); 144 } 145 146 synchronized void enqueueSynthesisStart(SynthesisMessageParams token) { 147 if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis start : " + token); 148 mQueue.add(new ListEntry(SYNTHESIS_START, token)); 149 } 150 151 synchronized void enqueueSynthesisDataAvailable(SynthesisMessageParams token) { 152 if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis data available : " + token); 153 mQueue.add(new ListEntry(SYNTHESIS_DATA_AVAILABLE, token)); 154 } 155 156 synchronized void enqueueSynthesisDone(SynthesisMessageParams token) { 157 if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis done : " + token); 158 mQueue.add(new ListEntry(SYNTHESIS_DONE, token)); 159 } 160 161 synchronized void enqueueAudio(AudioMessageParams token) { 162 if (DBG_THREADING) Log.d(TAG, "Enqueuing audio : " + token); 163 mQueue.add(new ListEntry(PLAY_AUDIO, token)); 164 } 165 166 synchronized void enqueueSilence(SilenceMessageParams token) { 167 if (DBG_THREADING) Log.d(TAG, "Enqueuing silence : " + token); 168 mQueue.add(new ListEntry(PLAY_SILENCE, token)); 169 } 170 171 // ----------------------------------------- 172 // End of public API methods. 173 // ----------------------------------------- 174 175 // ----------------------------------------- 176 // Methods for managing the message queue. 177 // ----------------------------------------- 178 179 /* 180 * The MessageLoop is a handler like implementation that 181 * processes messages from a priority queue. 182 */ 183 private final class MessageLoop implements Runnable { 184 @Override 185 public void run() { 186 while (true) { 187 ListEntry entry = null; 188 try { 189 entry = mQueue.take(); 190 } catch (InterruptedException ie) { 191 return; 192 } 193 194 if (entry.mWhat == SHUTDOWN) { 195 if (DBG) Log.d(TAG, "MessageLoop : Shutting down"); 196 return; 197 } 198 199 if (DBG) { 200 Log.d(TAG, "MessageLoop : Handling message :" + entry.mWhat 201 + " ,seqId : " + entry.mSequenceId); 202 } 203 204 setCurrentParams(entry.mMessage); 205 handleMessage(entry); 206 setCurrentParams(null); 207 } 208 } 209 } 210 211 /* 212 * Atomically clear the queue of all messages. 213 */ 214 synchronized private void removeAllMessages() { 215 mQueue.clear(); 216 } 217 218 /* 219 * Remove all messages that originate from a given calling app. 220 */ 221 synchronized private void removeMessages(String callingApp) { 222 Iterator<ListEntry> it = mQueue.iterator(); 223 224 while (it.hasNext()) { 225 final ListEntry current = it.next(); 226 // The null check is to prevent us from removing control messages, 227 // such as a shutdown message. 228 if (current.mMessage != null && 229 callingApp.equals(current.mMessage.getCallingApp())) { 230 it.remove(); 231 } 232 } 233 } 234 235 /* 236 * An element of our priority queue of messages. Each message has a priority, 237 * and a sequence id (defined by the order of enqueue calls). Among messages 238 * with the same priority, messages that were received earlier win out. 239 */ 240 private final class ListEntry implements Comparable<ListEntry> { 241 final int mWhat; 242 final MessageParams mMessage; 243 final int mPriority; 244 final long mSequenceId; 245 246 private ListEntry(int what, MessageParams message) { 247 this(what, message, DEFAULT_PRIORITY); 248 } 249 250 private ListEntry(int what, MessageParams message, int priority) { 251 mWhat = what; 252 mMessage = message; 253 mPriority = priority; 254 mSequenceId = mSequenceIdCtr.incrementAndGet(); 255 } 256 257 @Override 258 public int compareTo(ListEntry that) { 259 if (that == this) { 260 return 0; 261 } 262 263 // Note that this is always 0, 1 or -1. 264 int priorityDiff = mPriority - that.mPriority; 265 if (priorityDiff == 0) { 266 // The == case cannot occur. 267 return (mSequenceId < that.mSequenceId) ? -1 : 1; 268 } 269 270 return priorityDiff; 271 } 272 } 273 274 private void setCurrentParams(MessageParams p) { 275 if (DBG_THREADING) { 276 if (p != null) { 277 Log.d(TAG, "Started handling :" + p); 278 } else { 279 Log.d(TAG, "End handling : " + mCurrentParams); 280 } 281 } 282 mCurrentParams = p; 283 } 284 285 private MessageParams getCurrentParams() { 286 return mCurrentParams; 287 } 288 289 // ----------------------------------------- 290 // Methods for dealing with individual messages, the methods 291 // below do the actual work. 292 // ----------------------------------------- 293 294 private void handleMessage(ListEntry entry) { 295 final MessageParams msg = entry.mMessage; 296 if (entry.mWhat == SYNTHESIS_START) { 297 handleSynthesisStart(msg); 298 } else if (entry.mWhat == SYNTHESIS_DATA_AVAILABLE) { 299 handleSynthesisDataAvailable(msg); 300 } else if (entry.mWhat == SYNTHESIS_DONE) { 301 handleSynthesisDone(msg); 302 } else if (entry.mWhat == PLAY_AUDIO) { 303 handleAudio(msg); 304 } else if (entry.mWhat == PLAY_SILENCE) { 305 handleSilence(msg); 306 } 307 } 308 309 // Currently implemented as blocking the audio playback thread for the 310 // specified duration. If a call to stop() is made, the thread 311 // unblocks. 312 private void handleSilence(MessageParams msg) { 313 if (DBG) Log.d(TAG, "handleSilence()"); 314 SilenceMessageParams params = (SilenceMessageParams) msg; 315 params.getDispatcher().dispatchOnStart(); 316 if (params.getSilenceDurationMs() > 0) { 317 params.getConditionVariable().block(params.getSilenceDurationMs()); 318 } 319 params.getDispatcher().dispatchOnDone(); 320 if (DBG) Log.d(TAG, "handleSilence() done."); 321 } 322 323 // Plays back audio from a given URI. No TTS engine involvement here. 324 private void handleAudio(MessageParams msg) { 325 if (DBG) Log.d(TAG, "handleAudio()"); 326 AudioMessageParams params = (AudioMessageParams) msg; 327 params.getDispatcher().dispatchOnStart(); 328 // Note that the BlockingMediaPlayer spawns a separate thread. 329 // 330 // TODO: This can be avoided. 331 params.getPlayer().startAndWait(); 332 params.getDispatcher().dispatchOnDone(); 333 if (DBG) Log.d(TAG, "handleAudio() done."); 334 } 335 336 // Denotes the start of a new synthesis request. We create a new 337 // audio track, and prepare it for incoming data. 338 // 339 // Note that since all TTS synthesis happens on a single thread, we 340 // should ALWAYS see the following order : 341 // 342 // handleSynthesisStart -> handleSynthesisDataAvailable(*) -> handleSynthesisDone 343 // OR 344 // handleSynthesisCompleteDataAvailable. 345 private void handleSynthesisStart(MessageParams msg) { 346 if (DBG) Log.d(TAG, "handleSynthesisStart()"); 347 final SynthesisMessageParams param = (SynthesisMessageParams) msg; 348 349 // Oops, looks like the engine forgot to call done(). We go through 350 // extra trouble to clean the data to prevent the AudioTrack resources 351 // from being leaked. 352 if (mLastSynthesisRequest != null) { 353 Log.w(TAG, "Error : Missing call to done() for request : " + 354 mLastSynthesisRequest); 355 handleSynthesisDone(mLastSynthesisRequest); 356 } 357 358 mLastSynthesisRequest = param; 359 360 // Create the audio track. 361 final AudioTrack audioTrack = createStreamingAudioTrack(param); 362 363 if (DBG) Log.d(TAG, "Created audio track [" + audioTrack.hashCode() + "]"); 364 365 param.setAudioTrack(audioTrack); 366 msg.getDispatcher().dispatchOnStart(); 367 } 368 369 // More data available to be flushed to the audio track. 370 private void handleSynthesisDataAvailable(MessageParams msg) { 371 final SynthesisMessageParams param = (SynthesisMessageParams) msg; 372 if (param.getAudioTrack() == null) { 373 Log.w(TAG, "Error : null audio track in handleDataAvailable : " + param); 374 return; 375 } 376 377 if (param != mLastSynthesisRequest) { 378 Log.e(TAG, "Call to dataAvailable without done() / start()"); 379 return; 380 } 381 382 final AudioTrack audioTrack = param.getAudioTrack(); 383 final SynthesisMessageParams.ListEntry bufferCopy = param.getNextBuffer(); 384 385 if (bufferCopy == null) { 386 Log.e(TAG, "No buffers available to play."); 387 return; 388 } 389 390 int playState = audioTrack.getPlayState(); 391 if (playState == AudioTrack.PLAYSTATE_STOPPED) { 392 if (DBG) Log.d(TAG, "AudioTrack stopped, restarting : " + audioTrack.hashCode()); 393 audioTrack.play(); 394 } 395 int count = 0; 396 while (count < bufferCopy.mBytes.length) { 397 // Note that we don't take bufferCopy.mOffset into account because 398 // it is guaranteed to be 0. 399 int written = audioTrack.write(bufferCopy.mBytes, count, bufferCopy.mBytes.length); 400 if (written <= 0) { 401 break; 402 } 403 count += written; 404 } 405 param.mBytesWritten += count; 406 param.mLogger.onPlaybackStart(); 407 } 408 409 // Wait for the audio track to stop playing, and then release its resources. 410 private void handleSynthesisDone(MessageParams msg) { 411 final SynthesisMessageParams params = (SynthesisMessageParams) msg; 412 413 if (DBG) Log.d(TAG, "handleSynthesisDone()"); 414 final AudioTrack audioTrack = params.getAudioTrack(); 415 416 if (audioTrack == null) { 417 params.getDispatcher().dispatchOnError(); 418 return; 419 } 420 421 if (params.mBytesWritten < params.mAudioBufferSize) { 422 if (DBG) Log.d(TAG, "Stopping audio track to flush audio, state was : " + 423 audioTrack.getPlayState()); 424 params.mIsShortUtterance = true; 425 audioTrack.stop(); 426 } 427 428 if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + 429 audioTrack.hashCode()); 430 blockUntilDone(params); 431 if (DBG) Log.d(TAG, "Releasing audio track [" + audioTrack.hashCode() + "]"); 432 433 // The last call to AudioTrack.write( ) will return only after 434 // all data from the audioTrack has been sent to the mixer, so 435 // it's safe to release at this point. Make sure release() and the call 436 // that set the audio track to null are performed atomically. 437 synchronized (this) { 438 // Never allow the audioTrack to be observed in a state where 439 // it is released but non null. The only case this might happen 440 // is in the various stopFoo methods that call AudioTrack#stop from 441 // different threads, but they are synchronized on AudioPlayBackHandler#this 442 // too. 443 audioTrack.release(); 444 params.setAudioTrack(null); 445 } 446 params.getDispatcher().dispatchOnDone(); 447 mLastSynthesisRequest = null; 448 params.mLogger.onWriteData(); 449 } 450 451 /** 452 * The minimum increment of time to wait for an audiotrack to finish 453 * playing. 454 */ 455 private static final long MIN_SLEEP_TIME_MS = 20; 456 457 /** 458 * The maximum increment of time to sleep while waiting for an audiotrack 459 * to finish playing. 460 */ 461 private static final long MAX_SLEEP_TIME_MS = 2500; 462 463 /** 464 * The maximum amount of time to wait for an audio track to make progress while 465 * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but 466 * could happen in exceptional circumstances like a media_server crash. 467 */ 468 private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS; 469 470 private static void blockUntilDone(SynthesisMessageParams params) { 471 if (params.mAudioTrack == null || params.mBytesWritten <= 0) { 472 return; 473 } 474 475 if (params.mIsShortUtterance) { 476 // In this case we would have called AudioTrack#stop() to flush 477 // buffers to the mixer. This makes the playback head position 478 // unobservable and notification markers do not work reliably. We 479 // have no option but to wait until we think the track would finish 480 // playing and release it after. 481 // 482 // This isn't as bad as it looks because (a) We won't end up waiting 483 // for much longer than we should because even at 4khz mono, a short 484 // utterance weighs in at about 2 seconds, and (b) such short utterances 485 // are expected to be relatively infrequent and in a stream of utterances 486 // this shows up as a slightly longer pause. 487 blockUntilEstimatedCompletion(params); 488 } else { 489 blockUntilCompletion(params); 490 } 491 } 492 493 private static void blockUntilEstimatedCompletion(SynthesisMessageParams params) { 494 final int lengthInFrames = params.mBytesWritten / params.mBytesPerFrame; 495 final long estimatedTimeMs = (lengthInFrames * 1000 / params.mSampleRateInHz); 496 497 if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance"); 498 499 try { 500 Thread.sleep(estimatedTimeMs); 501 } catch (InterruptedException ie) { 502 // Do nothing. 503 } 504 } 505 506 private static void blockUntilCompletion(SynthesisMessageParams params) { 507 final AudioTrack audioTrack = params.mAudioTrack; 508 final int lengthInFrames = params.mBytesWritten / params.mBytesPerFrame; 509 510 int previousPosition = -1; 511 int currentPosition = 0; 512 long blockedTimeMs = 0; 513 514 while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames && 515 audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING) { 516 517 final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) / 518 audioTrack.getSampleRate(); 519 final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS); 520 521 // Check if the audio track has made progress since the last loop 522 // iteration. We should then add in the amount of time that was 523 // spent sleeping in the last iteration. 524 if (currentPosition == previousPosition) { 525 // This works only because the sleep time that would have been calculated 526 // would be the same in the previous iteration too. 527 blockedTimeMs += sleepTimeMs; 528 // If we've taken too long to make progress, bail. 529 if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) { 530 Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " + 531 "for AudioTrack to make progress, Aborting"); 532 break; 533 } 534 } else { 535 blockedTimeMs = 0; 536 } 537 previousPosition = currentPosition; 538 539 if (DBG) Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," + 540 " Playback position : " + currentPosition + ", Length in frames : " 541 + lengthInFrames); 542 try { 543 Thread.sleep(sleepTimeMs); 544 } catch (InterruptedException ie) { 545 break; 546 } 547 } 548 } 549 550 private static final long clip(long value, long min, long max) { 551 if (value < min) { 552 return min; 553 } 554 555 if (value > max) { 556 return max; 557 } 558 559 return value; 560 } 561 562 private static AudioTrack createStreamingAudioTrack(SynthesisMessageParams params) { 563 final int channelConfig = getChannelConfig(params.mChannelCount); 564 final int sampleRateInHz = params.mSampleRateInHz; 565 final int audioFormat = params.mAudioFormat; 566 567 int minBufferSizeInBytes 568 = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat); 569 int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes); 570 571 AudioTrack audioTrack = new AudioTrack(params.mStreamType, sampleRateInHz, channelConfig, 572 audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM); 573 if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) { 574 Log.w(TAG, "Unable to create audio track."); 575 audioTrack.release(); 576 return null; 577 } 578 params.mAudioBufferSize = bufferSizeInBytes; 579 580 setupVolume(audioTrack, params.mVolume, params.mPan); 581 return audioTrack; 582 } 583 584 static int getChannelConfig(int channelCount) { 585 if (channelCount == 1) { 586 return AudioFormat.CHANNEL_OUT_MONO; 587 } else if (channelCount == 2){ 588 return AudioFormat.CHANNEL_OUT_STEREO; 589 } 590 591 return 0; 592 } 593 594 private static void setupVolume(AudioTrack audioTrack, float volume, float pan) { 595 float vol = clip(volume, 0.0f, 1.0f); 596 float panning = clip(pan, -1.0f, 1.0f); 597 float volLeft = vol; 598 float volRight = vol; 599 if (panning > 0.0f) { 600 volLeft *= (1.0f - panning); 601 } else if (panning < 0.0f) { 602 volRight *= (1.0f + panning); 603 } 604 if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight); 605 if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) { 606 Log.e(TAG, "Failed to set volume"); 607 } 608 } 609 610 private static float clip(float value, float min, float max) { 611 return value > max ? max : (value < min ? min : value); 612 } 613 614} 615