AudioPlaybackHandler.java revision 90e5650f96dabadaaf141beae20a646855073ae1
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16package android.speech.tts; 17 18import android.media.AudioFormat; 19import android.media.AudioTrack; 20import android.text.TextUtils; 21import android.util.Log; 22 23import java.util.Iterator; 24import java.util.concurrent.PriorityBlockingQueue; 25import java.util.concurrent.atomic.AtomicLong; 26 27class AudioPlaybackHandler { 28 private static final String TAG = "TTS.AudioPlaybackHandler"; 29 private static final boolean DBG_THREADING = false; 30 private static final boolean DBG = false; 31 32 private static final int MIN_AUDIO_BUFFER_SIZE = 8192; 33 34 private static final int SYNTHESIS_START = 1; 35 private static final int SYNTHESIS_DATA_AVAILABLE = 2; 36 private static final int SYNTHESIS_DONE = 3; 37 38 private static final int PLAY_AUDIO = 5; 39 private static final int PLAY_SILENCE = 6; 40 41 private static final int SHUTDOWN = -1; 42 43 private static final int DEFAULT_PRIORITY = 1; 44 private static final int HIGH_PRIORITY = 0; 45 46 private final PriorityBlockingQueue<ListEntry> mQueue = 47 new PriorityBlockingQueue<ListEntry>(); 48 private final Thread mHandlerThread; 49 50 private volatile MessageParams mCurrentParams = null; 51 // Used only for book keeping and error detection. 52 private volatile SynthesisMessageParams mLastSynthesisRequest = null; 53 // Used to order incoming messages in our priority queue. 54 private final AtomicLong mSequenceIdCtr = new AtomicLong(0); 55 56 57 AudioPlaybackHandler() { 58 mHandlerThread = new Thread(new MessageLoop(), "TTS.AudioPlaybackThread"); 59 } 60 61 public void start() { 62 mHandlerThread.start(); 63 } 64 65 /** 66 * Stops all synthesis for a given {@code token}. If the current token 67 * is currently being processed, an effort will be made to stop it but 68 * that is not guaranteed. 69 * 70 * NOTE: This assumes that all other messages in the queue with {@code token} 71 * have been removed already. 72 * 73 * NOTE: Must be called synchronized on {@code AudioPlaybackHandler.this}. 74 */ 75 private void stop(MessageParams token) { 76 if (token == null) { 77 return; 78 } 79 80 if (DBG) Log.d(TAG, "Stopping token : " + token); 81 82 if (token.getType() == MessageParams.TYPE_SYNTHESIS) { 83 AudioTrack current = ((SynthesisMessageParams) token).getAudioTrack(); 84 if (current != null) { 85 // Stop the current audio track if it's still playing. 86 // The audio track is thread safe in this regard. The current 87 // handleSynthesisDataAvailable call will return soon after this 88 // call. 89 current.stop(); 90 } 91 // This is safe because PlaybackSynthesisCallback#stop would have 92 // been called before this method, and will no longer enqueue any 93 // audio for this token. 94 // 95 // (Even if it did, all it would result in is a warning message). 96 mQueue.add(new ListEntry(SYNTHESIS_DONE, token, HIGH_PRIORITY)); 97 } else if (token.getType() == MessageParams.TYPE_AUDIO) { 98 ((AudioMessageParams) token).getPlayer().stop(); 99 // No cleanup required for audio messages. 100 } else if (token.getType() == MessageParams.TYPE_SILENCE) { 101 ((SilenceMessageParams) token).getConditionVariable().open(); 102 // No cleanup required for silence messages. 103 } 104 } 105 106 // ----------------------------------------------------- 107 // Methods that add and remove elements from the queue. These do not 108 // need to be synchronized strictly speaking, but they make the behaviour 109 // a lot more predictable. (though it would still be correct without 110 // synchronization). 111 // ----------------------------------------------------- 112 113 synchronized public void removePlaybackItems(String callingApp) { 114 if (DBG_THREADING) Log.d(TAG, "Removing all callback items for : " + callingApp); 115 removeMessages(callingApp); 116 117 final MessageParams current = getCurrentParams(); 118 if (current != null && TextUtils.equals(callingApp, current.getCallingApp())) { 119 stop(current); 120 } 121 } 122 123 synchronized public void removeAllItems() { 124 if (DBG_THREADING) Log.d(TAG, "Removing all items"); 125 removeAllMessages(); 126 stop(getCurrentParams()); 127 } 128 129 /** 130 * @return false iff the queue is empty and no queue item is currently 131 * being handled, true otherwise. 132 */ 133 public boolean isSpeaking() { 134 return (mQueue.peek() != null) || (mCurrentParams != null); 135 } 136 137 /** 138 * Shut down the audio playback thread. 139 */ 140 synchronized public void quit() { 141 removeAllMessages(); 142 stop(getCurrentParams()); 143 mQueue.add(new ListEntry(SHUTDOWN, null, HIGH_PRIORITY)); 144 } 145 146 synchronized void enqueueSynthesisStart(SynthesisMessageParams token) { 147 if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis start : " + token); 148 mQueue.add(new ListEntry(SYNTHESIS_START, token)); 149 } 150 151 synchronized void enqueueSynthesisDataAvailable(SynthesisMessageParams token) { 152 if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis data available : " + token); 153 mQueue.add(new ListEntry(SYNTHESIS_DATA_AVAILABLE, token)); 154 } 155 156 synchronized void enqueueSynthesisDone(SynthesisMessageParams token) { 157 if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis done : " + token); 158 mQueue.add(new ListEntry(SYNTHESIS_DONE, token)); 159 } 160 161 synchronized void enqueueAudio(AudioMessageParams token) { 162 if (DBG_THREADING) Log.d(TAG, "Enqueuing audio : " + token); 163 mQueue.add(new ListEntry(PLAY_AUDIO, token)); 164 } 165 166 synchronized void enqueueSilence(SilenceMessageParams token) { 167 if (DBG_THREADING) Log.d(TAG, "Enqueuing silence : " + token); 168 mQueue.add(new ListEntry(PLAY_SILENCE, token)); 169 } 170 171 // ----------------------------------------- 172 // End of public API methods. 173 // ----------------------------------------- 174 175 // ----------------------------------------- 176 // Methods for managing the message queue. 177 // ----------------------------------------- 178 179 /* 180 * The MessageLoop is a handler like implementation that 181 * processes messages from a priority queue. 182 */ 183 private final class MessageLoop implements Runnable { 184 @Override 185 public void run() { 186 while (true) { 187 ListEntry entry = null; 188 try { 189 entry = mQueue.take(); 190 } catch (InterruptedException ie) { 191 return; 192 } 193 194 if (entry.mWhat == SHUTDOWN) { 195 if (DBG) Log.d(TAG, "MessageLoop : Shutting down"); 196 return; 197 } 198 199 if (DBG) { 200 Log.d(TAG, "MessageLoop : Handling message :" + entry.mWhat 201 + " ,seqId : " + entry.mSequenceId); 202 } 203 204 setCurrentParams(entry.mMessage); 205 handleMessage(entry); 206 setCurrentParams(null); 207 } 208 } 209 } 210 211 /* 212 * Atomically clear the queue of all messages. 213 */ 214 synchronized private void removeAllMessages() { 215 mQueue.clear(); 216 } 217 218 /* 219 * Remove all messages that originate from a given calling app. 220 */ 221 synchronized private void removeMessages(String callingApp) { 222 Iterator<ListEntry> it = mQueue.iterator(); 223 224 while (it.hasNext()) { 225 final ListEntry current = it.next(); 226 // The null check is to prevent us from removing control messages, 227 // such as a shutdown message. 228 if (current.mMessage != null && 229 callingApp.equals(current.mMessage.getCallingApp())) { 230 it.remove(); 231 } 232 } 233 } 234 235 /* 236 * An element of our priority queue of messages. Each message has a priority, 237 * and a sequence id (defined by the order of enqueue calls). Among messages 238 * with the same priority, messages that were received earlier win out. 239 */ 240 private final class ListEntry implements Comparable<ListEntry> { 241 final int mWhat; 242 final MessageParams mMessage; 243 final int mPriority; 244 final long mSequenceId; 245 246 private ListEntry(int what, MessageParams message) { 247 this(what, message, DEFAULT_PRIORITY); 248 } 249 250 private ListEntry(int what, MessageParams message, int priority) { 251 mWhat = what; 252 mMessage = message; 253 mPriority = priority; 254 mSequenceId = mSequenceIdCtr.incrementAndGet(); 255 } 256 257 @Override 258 public int compareTo(ListEntry that) { 259 if (that == this) { 260 return 0; 261 } 262 263 // Note that this is always 0, 1 or -1. 264 int priorityDiff = mPriority - that.mPriority; 265 if (priorityDiff == 0) { 266 // The == case cannot occur. 267 return (mSequenceId < that.mSequenceId) ? -1 : 1; 268 } 269 270 return priorityDiff; 271 } 272 } 273 274 private void setCurrentParams(MessageParams p) { 275 if (DBG_THREADING) { 276 if (p != null) { 277 Log.d(TAG, "Started handling :" + p); 278 } else { 279 Log.d(TAG, "End handling : " + mCurrentParams); 280 } 281 } 282 mCurrentParams = p; 283 } 284 285 private MessageParams getCurrentParams() { 286 return mCurrentParams; 287 } 288 289 // ----------------------------------------- 290 // Methods for dealing with individual messages, the methods 291 // below do the actual work. 292 // ----------------------------------------- 293 294 private void handleMessage(ListEntry entry) { 295 final MessageParams msg = entry.mMessage; 296 if (entry.mWhat == SYNTHESIS_START) { 297 handleSynthesisStart(msg); 298 } else if (entry.mWhat == SYNTHESIS_DATA_AVAILABLE) { 299 handleSynthesisDataAvailable(msg); 300 } else if (entry.mWhat == SYNTHESIS_DONE) { 301 handleSynthesisDone(msg); 302 } else if (entry.mWhat == PLAY_AUDIO) { 303 handleAudio(msg); 304 } else if (entry.mWhat == PLAY_SILENCE) { 305 handleSilence(msg); 306 } 307 } 308 309 // Currently implemented as blocking the audio playback thread for the 310 // specified duration. If a call to stop() is made, the thread 311 // unblocks. 312 private void handleSilence(MessageParams msg) { 313 if (DBG) Log.d(TAG, "handleSilence()"); 314 SilenceMessageParams params = (SilenceMessageParams) msg; 315 if (params.getSilenceDurationMs() > 0) { 316 params.getConditionVariable().block(params.getSilenceDurationMs()); 317 } 318 params.getDispatcher().dispatchUtteranceCompleted(); 319 if (DBG) Log.d(TAG, "handleSilence() done."); 320 } 321 322 // Plays back audio from a given URI. No TTS engine involvement here. 323 private void handleAudio(MessageParams msg) { 324 if (DBG) Log.d(TAG, "handleAudio()"); 325 AudioMessageParams params = (AudioMessageParams) msg; 326 // Note that the BlockingMediaPlayer spawns a separate thread. 327 // 328 // TODO: This can be avoided. 329 params.getPlayer().startAndWait(); 330 params.getDispatcher().dispatchUtteranceCompleted(); 331 if (DBG) Log.d(TAG, "handleAudio() done."); 332 } 333 334 // Denotes the start of a new synthesis request. We create a new 335 // audio track, and prepare it for incoming data. 336 // 337 // Note that since all TTS synthesis happens on a single thread, we 338 // should ALWAYS see the following order : 339 // 340 // handleSynthesisStart -> handleSynthesisDataAvailable(*) -> handleSynthesisDone 341 // OR 342 // handleSynthesisCompleteDataAvailable. 343 private void handleSynthesisStart(MessageParams msg) { 344 if (DBG) Log.d(TAG, "handleSynthesisStart()"); 345 final SynthesisMessageParams param = (SynthesisMessageParams) msg; 346 347 // Oops, looks like the engine forgot to call done(). We go through 348 // extra trouble to clean the data to prevent the AudioTrack resources 349 // from being leaked. 350 if (mLastSynthesisRequest != null) { 351 Log.w(TAG, "Error : Missing call to done() for request : " + 352 mLastSynthesisRequest); 353 handleSynthesisDone(mLastSynthesisRequest); 354 } 355 356 mLastSynthesisRequest = param; 357 358 // Create the audio track. 359 final AudioTrack audioTrack = createStreamingAudioTrack( 360 param.mStreamType, param.mSampleRateInHz, param.mAudioFormat, 361 param.mChannelCount, param.mVolume, param.mPan); 362 363 if (DBG) Log.d(TAG, "Created audio track [" + audioTrack.hashCode() + "]"); 364 365 param.setAudioTrack(audioTrack); 366 } 367 368 // More data available to be flushed to the audio track. 369 private void handleSynthesisDataAvailable(MessageParams msg) { 370 final SynthesisMessageParams param = (SynthesisMessageParams) msg; 371 if (param.getAudioTrack() == null) { 372 Log.w(TAG, "Error : null audio track in handleDataAvailable : " + param); 373 return; 374 } 375 376 if (param != mLastSynthesisRequest) { 377 Log.e(TAG, "Call to dataAvailable without done() / start()"); 378 return; 379 } 380 381 final AudioTrack audioTrack = param.getAudioTrack(); 382 final SynthesisMessageParams.ListEntry bufferCopy = param.getNextBuffer(); 383 384 if (bufferCopy == null) { 385 Log.e(TAG, "No buffers available to play."); 386 return; 387 } 388 389 int playState = audioTrack.getPlayState(); 390 if (playState == AudioTrack.PLAYSTATE_STOPPED) { 391 if (DBG) Log.d(TAG, "AudioTrack stopped, restarting : " + audioTrack.hashCode()); 392 audioTrack.play(); 393 } 394 int count = 0; 395 while (count < bufferCopy.mLength) { 396 // Note that we don't take bufferCopy.mOffset into account because 397 // it is guaranteed to be 0. 398 int written = audioTrack.write(bufferCopy.mBytes, count, bufferCopy.mLength); 399 if (written <= 0) { 400 break; 401 } 402 count += written; 403 } 404 param.mBytesWritten += count; 405 param.mLogger.onPlaybackStart(); 406 } 407 408 private void handleSynthesisDone(MessageParams msg) { 409 final SynthesisMessageParams params = (SynthesisMessageParams) msg; 410 handleSynthesisDone(params); 411 // This call is delayed more than it should be, but we are 412 // certain at this point that we have all the data we want. 413 params.mLogger.onWriteData(); 414 } 415 416 // Wait for the audio track to stop playing, and then release it's resources. 417 private void handleSynthesisDone(SynthesisMessageParams params) { 418 if (DBG) Log.d(TAG, "handleSynthesisDone()"); 419 final AudioTrack audioTrack = params.getAudioTrack(); 420 421 try { 422 if (audioTrack != null) { 423 if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + 424 audioTrack.hashCode()); 425 blockUntilDone(params); 426 if (DBG) Log.d(TAG, "Releasing audio track [" + audioTrack.hashCode() + "]"); 427 // The last call to AudioTrack.write( ) will return only after 428 // all data from the audioTrack has been sent to the mixer, so 429 // it's safe to release at this point. 430 audioTrack.release(); 431 } 432 } finally { 433 params.setAudioTrack(null); 434 params.getDispatcher().dispatchUtteranceCompleted(); 435 mLastSynthesisRequest = null; 436 } 437 } 438 439 private static void blockUntilDone(SynthesisMessageParams params) { 440 if (params.mAudioTrack == null || params.mBytesWritten <= 0) { 441 return; 442 } 443 444 final AudioTrack audioTrack = params.mAudioTrack; 445 final int bytesPerFrame = getBytesPerFrame(params.mAudioFormat); 446 final int lengthInBytes = params.mBytesWritten; 447 final int lengthInFrames = lengthInBytes / bytesPerFrame; 448 449 int currentPosition = 0; 450 while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames) { 451 if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) { 452 break; 453 } 454 455 long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) / 456 audioTrack.getSampleRate(); 457 458 if (DBG) Log.d(TAG, "About to sleep for : " + estimatedTimeMs + " ms," + 459 " Playback position : " + currentPosition); 460 try { 461 Thread.sleep(estimatedTimeMs); 462 } catch (InterruptedException ie) { 463 break; 464 } 465 } 466 } 467 468 private static AudioTrack createStreamingAudioTrack(int streamType, int sampleRateInHz, 469 int audioFormat, int channelCount, float volume, float pan) { 470 int channelConfig = getChannelConfig(channelCount); 471 472 int minBufferSizeInBytes 473 = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat); 474 int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes); 475 476 AudioTrack audioTrack = new AudioTrack(streamType, sampleRateInHz, channelConfig, 477 audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM); 478 if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) { 479 Log.w(TAG, "Unable to create audio track."); 480 audioTrack.release(); 481 return null; 482 } 483 484 setupVolume(audioTrack, volume, pan); 485 return audioTrack; 486 } 487 488 static int getChannelConfig(int channelCount) { 489 if (channelCount == 1) { 490 return AudioFormat.CHANNEL_OUT_MONO; 491 } else if (channelCount == 2){ 492 return AudioFormat.CHANNEL_OUT_STEREO; 493 } 494 495 return 0; 496 } 497 498 static int getBytesPerFrame(int audioFormat) { 499 if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) { 500 return 1; 501 } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) { 502 return 2; 503 } 504 505 return -1; 506 } 507 508 private static void setupVolume(AudioTrack audioTrack, float volume, float pan) { 509 float vol = clip(volume, 0.0f, 1.0f); 510 float panning = clip(pan, -1.0f, 1.0f); 511 float volLeft = vol; 512 float volRight = vol; 513 if (panning > 0.0f) { 514 volLeft *= (1.0f - panning); 515 } else if (panning < 0.0f) { 516 volRight *= (1.0f + panning); 517 } 518 if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight); 519 if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) { 520 Log.e(TAG, "Failed to set volume"); 521 } 522 } 523 524 private static float clip(float value, float min, float max) { 525 return value > max ? max : (value < min ? min : value); 526 } 527 528} 529