AudioPlaybackHandler.java revision 492b7f0d51f53164aa6eb974cd7ab6a7889af677
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16package android.speech.tts; 17 18import android.media.AudioFormat; 19import android.media.AudioTrack; 20import android.text.TextUtils; 21import android.util.Log; 22 23import java.util.Iterator; 24import java.util.concurrent.PriorityBlockingQueue; 25import java.util.concurrent.atomic.AtomicLong; 26 27class AudioPlaybackHandler { 28 private static final String TAG = "TTS.AudioPlaybackHandler"; 29 private static final boolean DBG_THREADING = false; 30 private static final boolean DBG = false; 31 32 private static final int MIN_AUDIO_BUFFER_SIZE = 8192; 33 34 private static final int SYNTHESIS_START = 1; 35 private static final int SYNTHESIS_DATA_AVAILABLE = 2; 36 private static final int SYNTHESIS_DONE = 3; 37 38 private static final int PLAY_AUDIO = 5; 39 private static final int PLAY_SILENCE = 6; 40 41 private static final int SHUTDOWN = -1; 42 43 private static final int DEFAULT_PRIORITY = 1; 44 private static final int HIGH_PRIORITY = 0; 45 46 private final PriorityBlockingQueue<ListEntry> mQueue = 47 new PriorityBlockingQueue<ListEntry>(); 48 private final Thread mHandlerThread; 49 50 private volatile MessageParams mCurrentParams = null; 51 // Used only for book keeping and error detection. 52 private volatile SynthesisMessageParams mLastSynthesisRequest = null; 53 // Used to order incoming messages in our priority queue. 54 private final AtomicLong mSequenceIdCtr = new AtomicLong(0); 55 56 57 AudioPlaybackHandler() { 58 mHandlerThread = new Thread(new MessageLoop(), "TTS.AudioPlaybackThread"); 59 } 60 61 public void start() { 62 mHandlerThread.start(); 63 } 64 65 /** 66 * Stops all synthesis for a given {@code token}. If the current token 67 * is currently being processed, an effort will be made to stop it but 68 * that is not guaranteed. 69 * 70 * NOTE: This assumes that all other messages in the queue with {@code token} 71 * have been removed already. 72 * 73 * NOTE: Must be called synchronized on {@code AudioPlaybackHandler.this}. 74 */ 75 private void stop(MessageParams token) { 76 if (token == null) { 77 return; 78 } 79 80 if (DBG) Log.d(TAG, "Stopping token : " + token); 81 82 if (token.getType() == MessageParams.TYPE_SYNTHESIS) { 83 AudioTrack current = ((SynthesisMessageParams) token).getAudioTrack(); 84 if (current != null) { 85 // Stop the current audio track if it's still playing. 86 // The audio track is thread safe in this regard. The current 87 // handleSynthesisDataAvailable call will return soon after this 88 // call. 89 current.stop(); 90 } 91 // This is safe because PlaybackSynthesisCallback#stop would have 92 // been called before this method, and will no longer enqueue any 93 // audio for this token. 94 // 95 // (Even if it did, all it would result in is a warning message). 96 mQueue.add(new ListEntry(SYNTHESIS_DONE, token, HIGH_PRIORITY)); 97 } else if (token.getType() == MessageParams.TYPE_AUDIO) { 98 ((AudioMessageParams) token).getPlayer().stop(); 99 // No cleanup required for audio messages. 100 } else if (token.getType() == MessageParams.TYPE_SILENCE) { 101 ((SilenceMessageParams) token).getConditionVariable().open(); 102 // No cleanup required for silence messages. 103 } 104 } 105 106 // ----------------------------------------------------- 107 // Methods that add and remove elements from the queue. These do not 108 // need to be synchronized strictly speaking, but they make the behaviour 109 // a lot more predictable. (though it would still be correct without 110 // synchronization). 111 // ----------------------------------------------------- 112 113 synchronized public void removePlaybackItems(Object callerIdentity) { 114 if (DBG_THREADING) Log.d(TAG, "Removing all callback items for : " + callerIdentity); 115 removeMessages(callerIdentity); 116 117 final MessageParams current = getCurrentParams(); 118 if (current != null && (current.getCallerIdentity() == callerIdentity)) { 119 stop(current); 120 } 121 122 final MessageParams lastSynthesis = mLastSynthesisRequest; 123 124 if (lastSynthesis != null && lastSynthesis != current && 125 (lastSynthesis.getCallerIdentity() == callerIdentity)) { 126 stop(lastSynthesis); 127 } 128 } 129 130 synchronized public void removeAllItems() { 131 if (DBG_THREADING) Log.d(TAG, "Removing all items"); 132 removeAllMessages(); 133 134 final MessageParams current = getCurrentParams(); 135 final MessageParams lastSynthesis = mLastSynthesisRequest; 136 stop(current); 137 138 if (lastSynthesis != null && lastSynthesis != current) { 139 stop(lastSynthesis); 140 } 141 } 142 143 /** 144 * @return false iff the queue is empty and no queue item is currently 145 * being handled, true otherwise. 146 */ 147 public boolean isSpeaking() { 148 return (mQueue.peek() != null) || (mCurrentParams != null); 149 } 150 151 /** 152 * Shut down the audio playback thread. 153 */ 154 synchronized public void quit() { 155 removeAllMessages(); 156 stop(getCurrentParams()); 157 mQueue.add(new ListEntry(SHUTDOWN, null, HIGH_PRIORITY)); 158 } 159 160 synchronized void enqueueSynthesisStart(SynthesisMessageParams token) { 161 if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis start : " + token); 162 mQueue.add(new ListEntry(SYNTHESIS_START, token)); 163 } 164 165 synchronized void enqueueSynthesisDataAvailable(SynthesisMessageParams token) { 166 if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis data available : " + token); 167 mQueue.add(new ListEntry(SYNTHESIS_DATA_AVAILABLE, token)); 168 } 169 170 synchronized void enqueueSynthesisDone(SynthesisMessageParams token) { 171 if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis done : " + token); 172 mQueue.add(new ListEntry(SYNTHESIS_DONE, token)); 173 } 174 175 synchronized void enqueueAudio(AudioMessageParams token) { 176 if (DBG_THREADING) Log.d(TAG, "Enqueuing audio : " + token); 177 mQueue.add(new ListEntry(PLAY_AUDIO, token)); 178 } 179 180 synchronized void enqueueSilence(SilenceMessageParams token) { 181 if (DBG_THREADING) Log.d(TAG, "Enqueuing silence : " + token); 182 mQueue.add(new ListEntry(PLAY_SILENCE, token)); 183 } 184 185 // ----------------------------------------- 186 // End of public API methods. 187 // ----------------------------------------- 188 189 // ----------------------------------------- 190 // Methods for managing the message queue. 191 // ----------------------------------------- 192 193 /* 194 * The MessageLoop is a handler like implementation that 195 * processes messages from a priority queue. 196 */ 197 private final class MessageLoop implements Runnable { 198 @Override 199 public void run() { 200 while (true) { 201 ListEntry entry = null; 202 try { 203 entry = mQueue.take(); 204 } catch (InterruptedException ie) { 205 return; 206 } 207 208 if (entry.mWhat == SHUTDOWN) { 209 if (DBG) Log.d(TAG, "MessageLoop : Shutting down"); 210 return; 211 } 212 213 if (DBG) { 214 Log.d(TAG, "MessageLoop : Handling message :" + entry.mWhat 215 + " ,seqId : " + entry.mSequenceId); 216 } 217 218 setCurrentParams(entry.mMessage); 219 handleMessage(entry); 220 setCurrentParams(null); 221 } 222 } 223 } 224 225 /* 226 * Atomically clear the queue of all messages. 227 */ 228 synchronized private void removeAllMessages() { 229 mQueue.clear(); 230 } 231 232 /* 233 * Remove all messages that originate from a given calling app. 234 */ 235 synchronized private void removeMessages(Object callerIdentity) { 236 Iterator<ListEntry> it = mQueue.iterator(); 237 238 while (it.hasNext()) { 239 final ListEntry current = it.next(); 240 // The null check is to prevent us from removing control messages, 241 // such as a shutdown message. 242 if (current.mMessage != null && 243 current.mMessage.getCallerIdentity() == callerIdentity) { 244 it.remove(); 245 } 246 } 247 } 248 249 /* 250 * An element of our priority queue of messages. Each message has a priority, 251 * and a sequence id (defined by the order of enqueue calls). Among messages 252 * with the same priority, messages that were received earlier win out. 253 */ 254 private final class ListEntry implements Comparable<ListEntry> { 255 final int mWhat; 256 final MessageParams mMessage; 257 final int mPriority; 258 final long mSequenceId; 259 260 private ListEntry(int what, MessageParams message) { 261 this(what, message, DEFAULT_PRIORITY); 262 } 263 264 private ListEntry(int what, MessageParams message, int priority) { 265 mWhat = what; 266 mMessage = message; 267 mPriority = priority; 268 mSequenceId = mSequenceIdCtr.incrementAndGet(); 269 } 270 271 @Override 272 public int compareTo(ListEntry that) { 273 if (that == this) { 274 return 0; 275 } 276 277 // Note that this is always 0, 1 or -1. 278 int priorityDiff = mPriority - that.mPriority; 279 if (priorityDiff == 0) { 280 // The == case cannot occur. 281 return (mSequenceId < that.mSequenceId) ? -1 : 1; 282 } 283 284 return priorityDiff; 285 } 286 } 287 288 private void setCurrentParams(MessageParams p) { 289 if (DBG_THREADING) { 290 if (p != null) { 291 Log.d(TAG, "Started handling :" + p); 292 } else { 293 Log.d(TAG, "End handling : " + mCurrentParams); 294 } 295 } 296 mCurrentParams = p; 297 } 298 299 private MessageParams getCurrentParams() { 300 return mCurrentParams; 301 } 302 303 // ----------------------------------------- 304 // Methods for dealing with individual messages, the methods 305 // below do the actual work. 306 // ----------------------------------------- 307 308 private void handleMessage(ListEntry entry) { 309 final MessageParams msg = entry.mMessage; 310 if (entry.mWhat == SYNTHESIS_START) { 311 handleSynthesisStart(msg); 312 } else if (entry.mWhat == SYNTHESIS_DATA_AVAILABLE) { 313 handleSynthesisDataAvailable(msg); 314 } else if (entry.mWhat == SYNTHESIS_DONE) { 315 handleSynthesisDone(msg); 316 } else if (entry.mWhat == PLAY_AUDIO) { 317 handleAudio(msg); 318 } else if (entry.mWhat == PLAY_SILENCE) { 319 handleSilence(msg); 320 } 321 } 322 323 // Currently implemented as blocking the audio playback thread for the 324 // specified duration. If a call to stop() is made, the thread 325 // unblocks. 326 private void handleSilence(MessageParams msg) { 327 if (DBG) Log.d(TAG, "handleSilence()"); 328 SilenceMessageParams params = (SilenceMessageParams) msg; 329 params.getDispatcher().dispatchOnStart(); 330 if (params.getSilenceDurationMs() > 0) { 331 params.getConditionVariable().block(params.getSilenceDurationMs()); 332 } 333 params.getDispatcher().dispatchOnDone(); 334 if (DBG) Log.d(TAG, "handleSilence() done."); 335 } 336 337 // Plays back audio from a given URI. No TTS engine involvement here. 338 private void handleAudio(MessageParams msg) { 339 if (DBG) Log.d(TAG, "handleAudio()"); 340 AudioMessageParams params = (AudioMessageParams) msg; 341 params.getDispatcher().dispatchOnStart(); 342 // Note that the BlockingMediaPlayer spawns a separate thread. 343 // 344 // TODO: This can be avoided. 345 params.getPlayer().startAndWait(); 346 params.getDispatcher().dispatchOnDone(); 347 if (DBG) Log.d(TAG, "handleAudio() done."); 348 } 349 350 // Denotes the start of a new synthesis request. We create a new 351 // audio track, and prepare it for incoming data. 352 // 353 // Note that since all TTS synthesis happens on a single thread, we 354 // should ALWAYS see the following order : 355 // 356 // handleSynthesisStart -> handleSynthesisDataAvailable(*) -> handleSynthesisDone 357 // OR 358 // handleSynthesisCompleteDataAvailable. 359 private void handleSynthesisStart(MessageParams msg) { 360 if (DBG) Log.d(TAG, "handleSynthesisStart()"); 361 final SynthesisMessageParams param = (SynthesisMessageParams) msg; 362 363 // Oops, looks like the engine forgot to call done(). We go through 364 // extra trouble to clean the data to prevent the AudioTrack resources 365 // from being leaked. 366 if (mLastSynthesisRequest != null) { 367 Log.e(TAG, "Error : Missing call to done() for request : " + 368 mLastSynthesisRequest); 369 handleSynthesisDone(mLastSynthesisRequest); 370 } 371 372 mLastSynthesisRequest = param; 373 374 // Create the audio track. 375 final AudioTrack audioTrack = createStreamingAudioTrack(param); 376 377 if (DBG) Log.d(TAG, "Created audio track [" + audioTrack.hashCode() + "]"); 378 379 param.setAudioTrack(audioTrack); 380 msg.getDispatcher().dispatchOnStart(); 381 } 382 383 // More data available to be flushed to the audio track. 384 private void handleSynthesisDataAvailable(MessageParams msg) { 385 final SynthesisMessageParams param = (SynthesisMessageParams) msg; 386 if (param.getAudioTrack() == null) { 387 Log.w(TAG, "Error : null audio track in handleDataAvailable : " + param); 388 return; 389 } 390 391 if (param != mLastSynthesisRequest) { 392 Log.e(TAG, "Call to dataAvailable without done() / start()"); 393 return; 394 } 395 396 final AudioTrack audioTrack = param.getAudioTrack(); 397 final SynthesisMessageParams.ListEntry bufferCopy = param.getNextBuffer(); 398 399 if (bufferCopy == null) { 400 Log.e(TAG, "No buffers available to play."); 401 return; 402 } 403 404 int playState = audioTrack.getPlayState(); 405 if (playState == AudioTrack.PLAYSTATE_STOPPED) { 406 if (DBG) Log.d(TAG, "AudioTrack stopped, restarting : " + audioTrack.hashCode()); 407 audioTrack.play(); 408 } 409 int count = 0; 410 while (count < bufferCopy.mBytes.length) { 411 // Note that we don't take bufferCopy.mOffset into account because 412 // it is guaranteed to be 0. 413 int written = audioTrack.write(bufferCopy.mBytes, count, bufferCopy.mBytes.length); 414 if (written <= 0) { 415 break; 416 } 417 count += written; 418 } 419 param.mBytesWritten += count; 420 param.mLogger.onPlaybackStart(); 421 } 422 423 // Wait for the audio track to stop playing, and then release its resources. 424 private void handleSynthesisDone(MessageParams msg) { 425 final SynthesisMessageParams params = (SynthesisMessageParams) msg; 426 427 if (DBG) Log.d(TAG, "handleSynthesisDone()"); 428 final AudioTrack audioTrack = params.getAudioTrack(); 429 430 if (audioTrack == null) { 431 params.getDispatcher().dispatchOnError(); 432 return; 433 } 434 435 if (params.mBytesWritten < params.mAudioBufferSize) { 436 if (DBG) Log.d(TAG, "Stopping audio track to flush audio, state was : " + 437 audioTrack.getPlayState()); 438 params.mIsShortUtterance = true; 439 audioTrack.stop(); 440 } 441 442 if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + 443 audioTrack.hashCode()); 444 blockUntilDone(params); 445 if (DBG) Log.d(TAG, "Releasing audio track [" + audioTrack.hashCode() + "]"); 446 447 // The last call to AudioTrack.write( ) will return only after 448 // all data from the audioTrack has been sent to the mixer, so 449 // it's safe to release at this point. Make sure release() and the call 450 // that set the audio track to null are performed atomically. 451 synchronized (this) { 452 // Never allow the audioTrack to be observed in a state where 453 // it is released but non null. The only case this might happen 454 // is in the various stopFoo methods that call AudioTrack#stop from 455 // different threads, but they are synchronized on AudioPlayBackHandler#this 456 // too. 457 audioTrack.release(); 458 params.setAudioTrack(null); 459 } 460 if (params.isError()) { 461 params.getDispatcher().dispatchOnError(); 462 } else { 463 params.getDispatcher().dispatchOnDone(); 464 } 465 mLastSynthesisRequest = null; 466 params.mLogger.onWriteData(); 467 } 468 469 /** 470 * The minimum increment of time to wait for an audiotrack to finish 471 * playing. 472 */ 473 private static final long MIN_SLEEP_TIME_MS = 20; 474 475 /** 476 * The maximum increment of time to sleep while waiting for an audiotrack 477 * to finish playing. 478 */ 479 private static final long MAX_SLEEP_TIME_MS = 2500; 480 481 /** 482 * The maximum amount of time to wait for an audio track to make progress while 483 * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but 484 * could happen in exceptional circumstances like a media_server crash. 485 */ 486 private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS; 487 488 private static void blockUntilDone(SynthesisMessageParams params) { 489 if (params.mAudioTrack == null || params.mBytesWritten <= 0) { 490 return; 491 } 492 493 if (params.mIsShortUtterance) { 494 // In this case we would have called AudioTrack#stop() to flush 495 // buffers to the mixer. This makes the playback head position 496 // unobservable and notification markers do not work reliably. We 497 // have no option but to wait until we think the track would finish 498 // playing and release it after. 499 // 500 // This isn't as bad as it looks because (a) We won't end up waiting 501 // for much longer than we should because even at 4khz mono, a short 502 // utterance weighs in at about 2 seconds, and (b) such short utterances 503 // are expected to be relatively infrequent and in a stream of utterances 504 // this shows up as a slightly longer pause. 505 blockUntilEstimatedCompletion(params); 506 } else { 507 blockUntilCompletion(params); 508 } 509 } 510 511 private static void blockUntilEstimatedCompletion(SynthesisMessageParams params) { 512 final int lengthInFrames = params.mBytesWritten / params.mBytesPerFrame; 513 final long estimatedTimeMs = (lengthInFrames * 1000 / params.mSampleRateInHz); 514 515 if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance"); 516 517 try { 518 Thread.sleep(estimatedTimeMs); 519 } catch (InterruptedException ie) { 520 // Do nothing. 521 } 522 } 523 524 private static void blockUntilCompletion(SynthesisMessageParams params) { 525 final AudioTrack audioTrack = params.mAudioTrack; 526 final int lengthInFrames = params.mBytesWritten / params.mBytesPerFrame; 527 528 int previousPosition = -1; 529 int currentPosition = 0; 530 long blockedTimeMs = 0; 531 532 while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames && 533 audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING) { 534 535 final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) / 536 audioTrack.getSampleRate(); 537 final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS); 538 539 // Check if the audio track has made progress since the last loop 540 // iteration. We should then add in the amount of time that was 541 // spent sleeping in the last iteration. 542 if (currentPosition == previousPosition) { 543 // This works only because the sleep time that would have been calculated 544 // would be the same in the previous iteration too. 545 blockedTimeMs += sleepTimeMs; 546 // If we've taken too long to make progress, bail. 547 if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) { 548 Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " + 549 "for AudioTrack to make progress, Aborting"); 550 break; 551 } 552 } else { 553 blockedTimeMs = 0; 554 } 555 previousPosition = currentPosition; 556 557 if (DBG) Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," + 558 " Playback position : " + currentPosition + ", Length in frames : " 559 + lengthInFrames); 560 try { 561 Thread.sleep(sleepTimeMs); 562 } catch (InterruptedException ie) { 563 break; 564 } 565 } 566 } 567 568 private static final long clip(long value, long min, long max) { 569 if (value < min) { 570 return min; 571 } 572 573 if (value > max) { 574 return max; 575 } 576 577 return value; 578 } 579 580 private static AudioTrack createStreamingAudioTrack(SynthesisMessageParams params) { 581 final int channelConfig = getChannelConfig(params.mChannelCount); 582 final int sampleRateInHz = params.mSampleRateInHz; 583 final int audioFormat = params.mAudioFormat; 584 585 int minBufferSizeInBytes 586 = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat); 587 int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes); 588 589 AudioTrack audioTrack = new AudioTrack(params.mStreamType, sampleRateInHz, channelConfig, 590 audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM); 591 if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) { 592 Log.w(TAG, "Unable to create audio track."); 593 audioTrack.release(); 594 return null; 595 } 596 params.mAudioBufferSize = bufferSizeInBytes; 597 598 setupVolume(audioTrack, params.mVolume, params.mPan); 599 return audioTrack; 600 } 601 602 static int getChannelConfig(int channelCount) { 603 if (channelCount == 1) { 604 return AudioFormat.CHANNEL_OUT_MONO; 605 } else if (channelCount == 2){ 606 return AudioFormat.CHANNEL_OUT_STEREO; 607 } 608 609 return 0; 610 } 611 612 private static void setupVolume(AudioTrack audioTrack, float volume, float pan) { 613 float vol = clip(volume, 0.0f, 1.0f); 614 float panning = clip(pan, -1.0f, 1.0f); 615 float volLeft = vol; 616 float volRight = vol; 617 if (panning > 0.0f) { 618 volLeft *= (1.0f - panning); 619 } else if (panning < 0.0f) { 620 volRight *= (1.0f + panning); 621 } 622 if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight); 623 if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) { 624 Log.e(TAG, "Failed to set volume"); 625 } 626 } 627 628 private static float clip(float value, float min, float max) { 629 return value > max ? max : (value < min ? min : value); 630 } 631 632} 633