AudioPlaybackHandler.java revision 8d1fc2403b8277e68d7816b2bbf05464a4c7a58a
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16package android.speech.tts; 17 18import android.media.AudioFormat; 19import android.media.AudioTrack; 20import android.os.Handler; 21import android.os.Looper; 22import android.os.Message; 23import android.speech.tts.SynthesisMessageParams.ListEntry; 24import android.util.Log; 25 26class AudioPlaybackHandler extends Handler { 27 private static final String TAG = "TTS.AudioPlaybackHandler"; 28 private static final boolean DBG = false; 29 30 private static final int MIN_AUDIO_BUFFER_SIZE = 8192; 31 32 private static final int SYNTHESIS_START = 1; 33 private static final int SYNTHESIS_DATA_AVAILABLE = 2; 34 private static final int SYNTHESIS_COMPLETE_DATA_AVAILABLE = 3; 35 private static final int SYNTHESIS_DONE = 4; 36 37 private static final int PLAY_AUDIO = 5; 38 private static final int PLAY_SILENCE = 6; 39 40 // Accessed by multiple threads, synchronized by "this". 41 private MessageParams mCurrentParams; 42 // Used only for book keeping and error detection. 43 private SynthesisMessageParams mLastSynthesisRequest; 44 45 AudioPlaybackHandler(Looper looper) { 46 super(looper); 47 } 48 49 @Override 50 public synchronized void handleMessage(Message msg) { 51 if (msg.what == SYNTHESIS_START) { 52 mCurrentParams = (SynthesisMessageParams) msg.obj; 53 handleSynthesisStart(msg); 54 } else if (msg.what == SYNTHESIS_DATA_AVAILABLE) { 55 handleSynthesisDataAvailable(msg); 56 } else if (msg.what == SYNTHESIS_DONE) { 57 handleSynthesisDone(msg); 58 } else if (msg.what == SYNTHESIS_COMPLETE_DATA_AVAILABLE) { 59 handleSynthesisCompleteDataAvailable(msg); 60 } else if (msg.what == PLAY_AUDIO) { 61 handleAudio(msg); 62 } else if (msg.what == PLAY_SILENCE) { 63 handleSilence(msg); 64 } 65 66 mCurrentParams = null; 67 } 68 69 /** 70 * Stops all synthesis for a given {@code token}. If the current token 71 * is currently being processed, an effort will be made to stop it but 72 * that is not guaranteed. 73 */ 74 synchronized public void stop(MessageParams token) { 75 removeCallbacksAndMessages(token); 76 77 if (token.getType() == MessageParams.TYPE_SYNTHESIS) { 78 sendMessageAtFrontOfQueue(obtainMessage(SYNTHESIS_DONE, token)); 79 } else if (token == mCurrentParams) { 80 if (token.getType() == MessageParams.TYPE_AUDIO) { 81 ((AudioMessageParams) mCurrentParams).getPlayer().stop(); 82 } else if (token.getType() == MessageParams.TYPE_SILENCE) { 83 ((SilenceMessageParams) mCurrentParams).getConditionVariable().open(); 84 } 85 } 86 } 87 88 /** 89 * Shut down the audio playback thread. 90 */ 91 synchronized public void quit() { 92 if (mCurrentParams != null) { 93 stop(mCurrentParams); 94 } 95 getLooper().quit(); 96 } 97 98 void enqueueSynthesisStart(SynthesisMessageParams token) { 99 sendMessage(obtainMessage(SYNTHESIS_START, token)); 100 } 101 102 void enqueueSynthesisDataAvailable(SynthesisMessageParams token) { 103 sendMessage(obtainMessage(SYNTHESIS_DATA_AVAILABLE, token)); 104 } 105 106 void enqueueSynthesisCompleteDataAvailable(SynthesisMessageParams token) { 107 sendMessage(obtainMessage(SYNTHESIS_COMPLETE_DATA_AVAILABLE, token)); 108 } 109 110 void enqueueSynthesisDone(SynthesisMessageParams token) { 111 sendMessage(obtainMessage(SYNTHESIS_DONE, token)); 112 } 113 114 void enqueueAudio(AudioMessageParams token) { 115 sendMessage(obtainMessage(PLAY_AUDIO, token)); 116 } 117 118 void enqueueSilence(SilenceMessageParams token) { 119 sendMessage(obtainMessage(PLAY_SILENCE, token)); 120 } 121 122 // ----------------------------------------- 123 // End of public API methods. 124 // ----------------------------------------- 125 126 // Currently implemented as blocking the audio playback thread for the 127 // specified duration. If a call to stop() is made, the thread 128 // unblocks. 129 private void handleSilence(Message msg) { 130 if (DBG) Log.d(TAG, "handleSilence()"); 131 SilenceMessageParams params = (SilenceMessageParams) msg.obj; 132 if (params.getSilenceDurationMs() > 0) { 133 params.getConditionVariable().block(params.getSilenceDurationMs()); 134 } 135 params.getDispatcher().dispatchUtteranceCompleted(); 136 if (DBG) Log.d(TAG, "handleSilence() done."); 137 } 138 139 // Plays back audio from a given URI. No TTS engine involvement here. 140 private void handleAudio(Message msg) { 141 if (DBG) Log.d(TAG, "handleAudio()"); 142 AudioMessageParams params = (AudioMessageParams) msg.obj; 143 // Note that the BlockingMediaPlayer spawns a separate thread. 144 // 145 // TODO: This can be avoided. 146 params.getPlayer().startAndWait(); 147 params.getDispatcher().dispatchUtteranceCompleted(); 148 if (DBG) Log.d(TAG, "handleAudio() done."); 149 } 150 151 // Denotes the start of a new synthesis request. We create a new 152 // audio track, and prepare it for incoming data. 153 // 154 // Note that since all TTS synthesis happens on a single thread, we 155 // should ALWAYS see the following order : 156 // 157 // handleSynthesisStart -> handleSynthesisDataAvailable(*) -> handleSynthesisDone 158 // OR 159 // handleSynthesisCompleteDataAvailable. 160 private void handleSynthesisStart(Message msg) { 161 if (DBG) Log.d(TAG, "handleSynthesisStart()"); 162 final SynthesisMessageParams param = (SynthesisMessageParams) msg.obj; 163 164 // Oops, looks like the engine forgot to call done(). We go through 165 // extra trouble to clean the data to prevent the AudioTrack resources 166 // from being leaked. 167 if (mLastSynthesisRequest != null) { 168 Log.w(TAG, "Error : Missing call to done() for request : " + 169 mLastSynthesisRequest); 170 handleSynthesisDone(mLastSynthesisRequest); 171 } 172 173 mLastSynthesisRequest = param; 174 175 // Create the audio track. 176 final AudioTrack audioTrack = createStreamingAudioTrack( 177 param.mStreamType, param.mSampleRateInHz, param.mAudioFormat, 178 param.mChannelCount, param.mVolume, param.mPan); 179 180 param.setAudioTrack(audioTrack); 181 } 182 183 // More data available to be flushed to the audio track. 184 private void handleSynthesisDataAvailable(Message msg) { 185 final SynthesisMessageParams param = (SynthesisMessageParams) msg.obj; 186 if (param.getAudioTrack() == null) { 187 Log.w(TAG, "Error : null audio track in handleDataAvailable."); 188 return; 189 } 190 191 if (param != mLastSynthesisRequest) { 192 Log.e(TAG, "Call to dataAvailable without done() / start()"); 193 return; 194 } 195 196 final AudioTrack audioTrack = param.getAudioTrack(); 197 final ListEntry bufferCopy = param.getNextBuffer(); 198 199 if (bufferCopy == null) { 200 Log.e(TAG, "No buffers available to play."); 201 return; 202 } 203 204 int playState = audioTrack.getPlayState(); 205 if (playState == AudioTrack.PLAYSTATE_STOPPED) { 206 if (DBG) Log.d(TAG, "AudioTrack stopped, restarting : " + audioTrack.hashCode()); 207 audioTrack.play(); 208 } 209 int count = 0; 210 while (count < bufferCopy.mLength) { 211 // Note that we don't take bufferCopy.mOffset into account because 212 // it is guaranteed to be 0. 213 int written = audioTrack.write(bufferCopy.mBytes, count, bufferCopy.mLength); 214 if (written <= 0) { 215 break; 216 } 217 count += written; 218 } 219 } 220 221 private void handleSynthesisDone(Message msg) { 222 final SynthesisMessageParams params = (SynthesisMessageParams) msg.obj; 223 handleSynthesisDone(params); 224 } 225 226 // Flush all remaining data to the audio track, stop it and release 227 // all it's resources. 228 private void handleSynthesisDone(SynthesisMessageParams params) { 229 if (DBG) Log.d(TAG, "handleSynthesisDone()"); 230 final AudioTrack audioTrack = params.getAudioTrack(); 231 232 try { 233 if (audioTrack != null) { 234 audioTrack.flush(); 235 audioTrack.stop(); 236 audioTrack.release(); 237 } 238 } finally { 239 params.setAudioTrack(null); 240 params.getDispatcher().dispatchUtteranceCompleted(); 241 mLastSynthesisRequest = null; 242 } 243 } 244 245 private void handleSynthesisCompleteDataAvailable(Message msg) { 246 final SynthesisMessageParams params = (SynthesisMessageParams) msg.obj; 247 if (DBG) Log.d(TAG, "completeAudioAvailable(" + params + ")"); 248 249 // Channel config and bytes per frame are checked before 250 // this message is sent. 251 int channelConfig = AudioPlaybackHandler.getChannelConfig(params.mChannelCount); 252 int bytesPerFrame = AudioPlaybackHandler.getBytesPerFrame(params.mAudioFormat); 253 254 ListEntry entry = params.getNextBuffer(); 255 256 if (entry == null) { 257 Log.w(TAG, "completeDataAvailable : No buffers available to play."); 258 return; 259 } 260 261 final AudioTrack audioTrack = new AudioTrack(params.mStreamType, params.mSampleRateInHz, 262 channelConfig, params.mAudioFormat, entry.mLength, AudioTrack.MODE_STATIC); 263 264 // So that handleDone can access this correctly. 265 params.mAudioTrack = audioTrack; 266 267 try { 268 audioTrack.write(entry.mBytes, entry.mOffset, entry.mLength); 269 setupVolume(audioTrack, params.mVolume, params.mPan); 270 audioTrack.play(); 271 blockUntilDone(audioTrack, bytesPerFrame, entry.mLength); 272 if (DBG) Log.d(TAG, "Wrote data to audio track successfully : " + entry.mLength); 273 } catch (IllegalStateException ex) { 274 Log.e(TAG, "Playback error", ex); 275 } finally { 276 handleSynthesisDone(msg); 277 } 278 } 279 280 281 private static void blockUntilDone(AudioTrack audioTrack, int bytesPerFrame, int length) { 282 int lengthInFrames = length / bytesPerFrame; 283 int currentPosition = 0; 284 while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames) { 285 long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) / 286 audioTrack.getSampleRate(); 287 audioTrack.getPlayState(); 288 if (DBG) Log.d(TAG, "About to sleep for : " + estimatedTimeMs + " ms," + 289 " Playback position : " + currentPosition); 290 try { 291 Thread.sleep(estimatedTimeMs); 292 } catch (InterruptedException ie) { 293 break; 294 } 295 } 296 } 297 298 private static AudioTrack createStreamingAudioTrack(int streamType, int sampleRateInHz, 299 int audioFormat, int channelCount, float volume, float pan) { 300 int channelConfig = getChannelConfig(channelCount); 301 302 int minBufferSizeInBytes 303 = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat); 304 int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes); 305 306 AudioTrack audioTrack = new AudioTrack(streamType, sampleRateInHz, channelConfig, 307 audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM); 308 if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) { 309 Log.w(TAG, "Unable to create audio track."); 310 audioTrack.release(); 311 return null; 312 } 313 314 setupVolume(audioTrack, volume, pan); 315 return audioTrack; 316 } 317 318 static int getChannelConfig(int channelCount) { 319 if (channelCount == 1) { 320 return AudioFormat.CHANNEL_OUT_MONO; 321 } else if (channelCount == 2){ 322 return AudioFormat.CHANNEL_OUT_STEREO; 323 } 324 325 return 0; 326 } 327 328 static int getBytesPerFrame(int audioFormat) { 329 if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) { 330 return 1; 331 } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) { 332 return 2; 333 } 334 335 return -1; 336 } 337 338 private static void setupVolume(AudioTrack audioTrack, float volume, float pan) { 339 float vol = clip(volume, 0.0f, 1.0f); 340 float panning = clip(pan, -1.0f, 1.0f); 341 float volLeft = vol; 342 float volRight = vol; 343 if (panning > 0.0f) { 344 volLeft *= (1.0f - panning); 345 } else if (panning < 0.0f) { 346 volRight *= (1.0f + panning); 347 } 348 if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight); 349 if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) { 350 Log.e(TAG, "Failed to set volume"); 351 } 352 } 353 354 private static float clip(float value, float min, float max) { 355 return value > max ? max : (value < min ? min : value); 356 } 357 358} 359