VoiceDialerActivity.java revision ed873c21cd8ad85df735ec841b147f5ac0f740a9
1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.voicedialer; 18 19import android.app.Activity; 20import android.app.AlertDialog; 21import android.bluetooth.BluetoothHeadset; 22import android.content.BroadcastReceiver; 23import android.content.Context; 24import android.content.DialogInterface; 25import android.content.Intent; 26import android.content.IntentFilter; 27import android.media.AudioManager; 28import android.media.ToneGenerator; 29import android.os.Bundle; 30import android.os.Environment; 31import android.os.Handler; 32import android.os.SystemProperties; 33import android.os.Vibrator; 34import android.speech.tts.TextToSpeech; 35import android.util.Config; 36import android.util.Log; 37import android.view.View; 38import android.view.WindowManager; 39import android.widget.TextView; 40import java.io.File; 41import java.io.InputStream; 42import java.util.HashMap; 43import java.io.IOException; 44 45/** 46 * TODO: get rid of the anonymous classes 47 * 48 * This class is the user interface of the VoiceDialer application. 49 * It begins in the INITIALIZING state. 50 * 51 * INITIALIZING : 52 * This transitions out on events from TTS and the BluetoothHeadset 53 * once TTS initialized and SCO channel set up: 54 * * prompt the user "speak now" 55 * * transition to the SPEAKING_GREETING state 56 * 57 * SPEAKING_GREETING: 58 * This transitions out only on events from TTS or the fallback runnable 59 * once the greeting utterance completes: 60 * * begin listening for the command using the {@link CommandRecognizerEngine} 61 * * transition to the WAITING_FOR_COMMAND state 62 * 63 * WAITING_FOR_COMMAND : 64 * This transitions out only on events from the recognizer 65 * on RecognitionFailure or RecognitionError: 66 * * begin speaking "try again." 67 * * transition to state SPEAKING_TRY_AGAIN 68 * on RecognitionSuccess: 69 * single result: 70 * * begin speaking the sentence describing the intent 71 * * transition to the SPEAKING_CHOSEN_ACTION 72 * multiple results: 73 * * begin speaking each of the choices in order 74 * * transition to the SPEAKING_CHOICES state 75 * 76 * SPEAKING_TRY_AGAIN: 77 * This transitions out only on events from TTS or the fallback runnable 78 * once the try again utterance completes: 79 * * begin listening for the command using the {@link CommandRecognizerEngine} 80 * * transition to the LISTENING_FOR_COMMAND state 81 * 82 * SPEAKING_CHOSEN_ACTION: 83 * This transitions out only on events from TTS or the fallback runnable 84 * once the utterance completes: 85 * * dispatch the intent that was chosen 86 * * transition to the EXITING state 87 * * finish the activity 88 * 89 * SPEAKING_CHOICES: 90 * This transitions out only on events from TTS or the fallback runnable 91 * once the utterance completes: 92 * * begin listening for the user's choice using the 93 * {@link PhoneTypeChoiceRecognizerEngine} 94 * * transition to the WAITING_FOR_CHOICE state. 95 * 96 * WAITING_FOR_CHOICE: 97 * This transitions out only on events from the recognizer 98 * on RecognitionFailure or RecognitionError: 99 * * begin speaking the "invalid choice" message, along with the list 100 * of choices 101 * * transition to the SPEAKING_CHOICES state 102 * on RecognitionSuccess: 103 * if the result is "try again", prompt the user to say a command, begin 104 * listening for the command, and transition back to the WAITING_FOR_COMMAND 105 * state. 106 * if the result is "exit", then being speaking the "goodbye" message and 107 * transition to the SPEAKING_GOODBYE state. 108 * if the result is a valid choice, begin speaking the action chosen,initiate 109 * the command the user has choose and exit. 110 * if not a valid choice, speak the "invalid choice" message, begin 111 * speaking the choices in order again, transition to the 112 * SPEAKING_CHOICES 113 * 114 * SPEAKING_GOODBYE: 115 * This transitions out only on events from TTS or the fallback runnable 116 * after a time out, finish the activity. 117 * 118 */ 119 120public class VoiceDialerActivity extends Activity { 121 122 private static final String TAG = "VoiceDialerActivity"; 123 124 private static final String MICROPHONE_EXTRA = "microphone"; 125 private static final String CONTACTS_EXTRA = "contacts"; 126 127 private static final String SPEAK_NOW_UTTERANCE = "speak_now"; 128 private static final String TRY_AGAIN_UTTERANCE = "try_again"; 129 private static final String CHOSEN_ACTION_UTTERANCE = "chose_action"; 130 private static final String GOODBYE_UTTERANCE = "goodbye"; 131 private static final String CHOICES_UTTERANCE = "choices"; 132 133 private static final int FIRST_UTTERANCE_DELAY = 300; 134 private static final int MAX_TTS_DELAY = 6000; 135 private static final int EXIT_DELAY = 2000; 136 137 private static final int BLUETOOTH_SAMPLE_RATE = 8000; 138 private static final int REGULAR_SAMPLE_RATE = 11025; 139 140 private static final int INITIALIZING = 0; 141 private static final int SPEAKING_GREETING = 1; 142 private static final int WAITING_FOR_COMMAND = 2; 143 private static final int SPEAKING_TRY_AGAIN = 3; 144 private static final int SPEAKING_CHOICES = 4; 145 private static final int WAITING_FOR_CHOICE = 5; 146 private static final int WAITING_FOR_DIALOG_CHOICE = 6; 147 private static final int SPEAKING_CHOSEN_ACTION = 7; 148 private static final int SPEAKING_GOODBYE = 8; 149 private static final int EXITING = 9; 150 151 private static final CommandRecognizerEngine mCommandEngine = 152 new CommandRecognizerEngine(); 153 private static final PhoneTypeChoiceRecognizerEngine mPhoneTypeChoiceEngine = 154 new PhoneTypeChoiceRecognizerEngine(); 155 private CommandRecognizerClient mCommandClient; 156 private ChoiceRecognizerClient mChoiceClient; 157 private ToneGenerator mToneGenerator; 158 private Handler mHandler; 159 private Thread mRecognizerThread = null; 160 private AudioManager mAudioManager; 161 private BluetoothHeadset mBluetoothHeadset; 162 private TextToSpeech mTts; 163 private HashMap<String, String> mTtsParams; 164 private VoiceDialerBroadcastReceiver mReceiver; 165 private int mBluetoothAudioState; 166 private boolean mWaitingForTts; 167 private boolean mWaitingForScoConnection; 168 private Intent[] mAvailableChoices; 169 private Intent mChosenAction; 170 private int mBluetoothVoiceVolume; 171 private int mState; 172 private AlertDialog mAlertDialog; 173 private Runnable mFallbackRunnable; 174 private boolean mUsingBluetooth = false; 175 private int mSampleRate; 176 177 @Override 178 protected void onCreate(Bundle icicle) { 179 if (Config.LOGD) Log.d(TAG, "onCreate"); 180 super.onCreate(icicle); 181 mHandler = new Handler(); 182 mAudioManager = (AudioManager)getSystemService(AUDIO_SERVICE); 183 mToneGenerator = new ToneGenerator(AudioManager.STREAM_RING, 184 ToneGenerator.MAX_VOLUME); 185 } 186 187 protected void onStart() { 188 if (Config.LOGD) Log.d(TAG, "onStart " + getIntent()); 189 super.onStart(); 190 191 mState = INITIALIZING; 192 mChosenAction = null; 193 mAudioManager.requestAudioFocus( 194 null, AudioManager.STREAM_MUSIC, 195 AudioManager.AUDIOFOCUS_GAIN_TRANSIENT); 196 197 // set this flag so this activity will stay in front of the keyguard 198 int flags = WindowManager.LayoutParams.FLAG_SHOW_WHEN_LOCKED; 199 getWindow().addFlags(flags); 200 201 // open main window 202 setTheme(android.R.style.Theme_Dialog); 203 setTitle(R.string.title); 204 setContentView(R.layout.voice_dialing); 205 findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE); 206 findViewById(R.id.retry_view).setVisibility(View.INVISIBLE); 207 findViewById(R.id.microphone_loading_view).setVisibility(View.VISIBLE); 208 if (RecognizerLogger.isEnabled(this)) { 209 ((TextView) findViewById(R.id.substate)).setText(R.string.logging_enabled); 210 } 211 212 // Get handle to BluetoothHeadset object 213 IntentFilter audioStateFilter; 214 audioStateFilter = new IntentFilter(); 215 audioStateFilter.addAction(BluetoothHeadset.ACTION_AUDIO_STATE_CHANGED); 216 mReceiver = new VoiceDialerBroadcastReceiver(); 217 registerReceiver(mReceiver, audioStateFilter); 218 219 mCommandEngine.setContactsFile(newFile(getArg(CONTACTS_EXTRA))); 220 mCommandEngine.setMinimizeResults(true); 221 mCommandEngine.setAllowOpenEntries(false); 222 mCommandClient = new CommandRecognizerClient(); 223 mChoiceClient = new ChoiceRecognizerClient(); 224 225 mBluetoothAudioState = BluetoothHeadset.STATE_ERROR; 226 227 if (BluetoothHeadset.isBluetoothVoiceDialingEnabled(this) && 228 Intent.ACTION_VOICE_COMMAND.equals(getIntent().getAction())) { 229 mUsingBluetooth = true; 230 } else { 231 mUsingBluetooth = false; 232 } 233 234 if (mUsingBluetooth) { 235 if (Config.LOGD) Log.d(TAG, "using bluetooth"); 236 mSampleRate = BLUETOOTH_SAMPLE_RATE; 237 mCommandEngine.setMinimizeResults(true); 238 mCommandEngine.setAllowOpenEntries(false); 239 240 // we can't start recognizing until we get connected to the BluetoothHeadset 241 // and have a connected audio state. We will listen for these 242 // states to change. 243 mWaitingForScoConnection = true; 244 mBluetoothHeadset = new BluetoothHeadset(this, 245 mBluetoothHeadsetServiceListener); 246 247 // initialize the text to speech system 248 mWaitingForTts = true; 249 mTts = new TextToSpeech(this, new TtsInitListener()); 250 mTtsParams = new HashMap<String, String>(); 251 mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_STREAM, 252 String.valueOf(AudioManager.STREAM_VOICE_CALL)); 253 // we need to wait for the TTS system and the SCO connection 254 // before we can start listening. 255 } else { 256 if (Config.LOGD) Log.d(TAG, "not using bluetooth"); 257 mSampleRate = REGULAR_SAMPLE_RATE; 258 mCommandEngine.setMinimizeResults(false); 259 mCommandEngine.setAllowOpenEntries(true); 260 261 // we're not using bluetooth apparently, just start listening. 262 listenForCommand(); 263 } 264 } 265 266 class ErrorRunnable implements Runnable { 267 private int mErrorMsg; 268 public ErrorRunnable(int errorMsg) { 269 mErrorMsg = errorMsg; 270 } 271 272 public void run() { 273 // put up an error and exit 274 mHandler.removeCallbacks(mMicFlasher); 275 ((TextView)findViewById(R.id.state)).setText(R.string.failure); 276 ((TextView)findViewById(R.id.substate)).setText(mErrorMsg); 277 ((TextView)findViewById(R.id.substate)).setText( 278 R.string.headset_connection_lost); 279 findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE); 280 findViewById(R.id.retry_view).setVisibility(View.VISIBLE); 281 282 283 if (!mUsingBluetooth) { 284 playSound(ToneGenerator.TONE_PROP_NACK); 285 } 286 } 287 } 288 289 class OnTtsCompletionRunnable implements Runnable { 290 private boolean mFallback; 291 292 OnTtsCompletionRunnable(boolean fallback) { 293 mFallback = fallback; 294 } 295 296 public void run() { 297 if (mFallback) { 298 Log.e(TAG, "utterance completion not delivered, using fallback"); 299 } 300 Log.d(TAG, "onTtsCompletionRunnable"); 301 if (mState == SPEAKING_GREETING || mState == SPEAKING_TRY_AGAIN) { 302 listenForCommand(); 303 } else if (mState == SPEAKING_CHOICES) { 304 listenForChoice(); 305 } else if (mState == SPEAKING_GOODBYE) { 306 mState = EXITING; 307 finish(); 308 } else if (mState == SPEAKING_CHOSEN_ACTION) { 309 mState = EXITING; 310 startActivityHelp(mChosenAction); 311 finish(); 312 } 313 } 314 } 315 316 class GreetingRunnable implements Runnable { 317 public void run() { 318 mState = SPEAKING_GREETING; 319 mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, 320 SPEAK_NOW_UTTERANCE); 321 mTts.speak(getString(R.string.speak_now_tts), 322 TextToSpeech.QUEUE_FLUSH, 323 mTtsParams); 324 // Normally, we will begin listening for the command after the 325 // utterance completes. As a fallback in case the utterance 326 // does not complete, post a delayed runnable to fire 327 // the intent. 328 mFallbackRunnable = new OnTtsCompletionRunnable(true); 329 mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY); 330 } 331 } 332 333 class TtsInitListener implements TextToSpeech.OnInitListener { 334 public void onInit(int status) { 335 // status can be either TextToSpeech.SUCCESS or TextToSpeech.ERROR. 336 if (Config.LOGD) Log.d(TAG, "onInit for tts"); 337 if (status != TextToSpeech.SUCCESS) { 338 // Initialization failed. 339 Log.e(TAG, "Could not initialize TextToSpeech."); 340 mHandler.post(new ErrorRunnable(R.string.recognition_error)); 341 exitActivity(); 342 return; 343 } 344 345 if (mTts == null) { 346 Log.e(TAG, "null tts"); 347 mHandler.post(new ErrorRunnable(R.string.recognition_error)); 348 exitActivity(); 349 return; 350 } 351 352 mTts.setOnUtteranceCompletedListener(new OnUtteranceCompletedListener()); 353 354 // The TTS engine has been successfully initialized. 355 mWaitingForTts = false; 356 357 // TTS over bluetooth is really loud, 358 // Limit volume to -18dB. Stream volume range represents approximately 50dB 359 // (See AudioSystem.cpp linearToLog()) so the number of steps corresponding 360 // to 18dB is 18 / (50 / maxSteps). 361 mBluetoothVoiceVolume = mAudioManager.getStreamVolume( 362 AudioManager.STREAM_BLUETOOTH_SCO); 363 int maxVolume = mAudioManager.getStreamMaxVolume(AudioManager.STREAM_BLUETOOTH_SCO); 364 int volume = maxVolume - ((18 / (50/maxVolume)) + 1); 365 if (mBluetoothVoiceVolume > volume) { 366 mAudioManager.setStreamVolume(AudioManager.STREAM_BLUETOOTH_SCO, volume, 0); 367 } 368 369 if (mWaitingForScoConnection) { 370 // the bluetooth connection is not up yet, still waiting. 371 } else { 372 // we now have SCO connection and TTS, so we can start. 373 mHandler.postDelayed(new GreetingRunnable(), FIRST_UTTERANCE_DELAY); 374 } 375 } 376 } 377 378 class OnUtteranceCompletedListener 379 implements TextToSpeech.OnUtteranceCompletedListener { 380 public void onUtteranceCompleted(String utteranceId) { 381 Log.d(TAG, "onUtteranceCompleted " + utteranceId); 382 // since the utterance has completed, we no longer need the fallback. 383 mHandler.removeCallbacks(mFallbackRunnable); 384 mFallbackRunnable = null; 385 mHandler.post(new OnTtsCompletionRunnable(false)); 386 } 387 } 388 389 private BluetoothHeadset.ServiceListener mBluetoothHeadsetServiceListener = 390 new BluetoothHeadset.ServiceListener() { 391 public void onServiceConnected() { 392 if (mBluetoothHeadset != null && 393 mBluetoothHeadset.getState() == BluetoothHeadset.STATE_CONNECTED) { 394 mBluetoothHeadset.startVoiceRecognition(); 395 } 396 397 if (Config.LOGD) Log.d(TAG, "onServiceConnected"); 398 } 399 public void onServiceDisconnected() {} 400 }; 401 402 private class VoiceDialerBroadcastReceiver extends BroadcastReceiver { 403 @Override 404 public void onReceive(Context context, Intent intent) { 405 String action = intent.getAction(); 406 if (action.equals(BluetoothHeadset.ACTION_AUDIO_STATE_CHANGED)) { 407 mBluetoothAudioState = intent.getIntExtra( 408 BluetoothHeadset.EXTRA_AUDIO_STATE, 409 BluetoothHeadset.STATE_ERROR); 410 if (Config.LOGD) Log.d(TAG, "HEADSET AUDIO_STATE_CHANGED -> " + 411 mBluetoothAudioState); 412 413 if (mBluetoothAudioState == BluetoothHeadset.AUDIO_STATE_CONNECTED && 414 mWaitingForScoConnection) { 415 // SCO channel has just become available. 416 mWaitingForScoConnection = false; 417 if (mWaitingForTts) { 418 // still waiting for the TTS to be set up. 419 } else { 420 // we now have SCO connection and TTS, so we can start. 421 mHandler.postDelayed(new GreetingRunnable(), FIRST_UTTERANCE_DELAY); 422 } 423 } else { 424 if (!mWaitingForScoConnection) { 425 // apparently our connection to the headset has dropped. 426 // we won't be able to continue voicedialing. 427 if (Config.LOGD) Log.d(TAG, "lost sco connection"); 428 429 mHandler.post(new ErrorRunnable( 430 R.string.headset_connection_lost)); 431 432 exitActivity(); 433 } 434 } 435 } 436 } 437 } 438 439 private void askToTryAgain() { 440 // get work off UAPI thread 441 mHandler.post(new Runnable() { 442 public void run() { 443 if (mAlertDialog != null) { 444 mAlertDialog.dismiss(); 445 } 446 447 mHandler.removeCallbacks(mMicFlasher); 448 ((TextView)findViewById(R.id.state)).setText(R.string.please_try_again); 449 findViewById(R.id.state).setVisibility(View.VISIBLE); 450 findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE); 451 findViewById(R.id.retry_view).setVisibility(View.VISIBLE); 452 453 if (mUsingBluetooth) { 454 mState = SPEAKING_TRY_AGAIN; 455 mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, 456 TRY_AGAIN_UTTERANCE); 457 mTts.speak(getString(R.string.no_results_tts), 458 TextToSpeech.QUEUE_FLUSH, 459 mTtsParams); 460 461 // Normally, the we will start listening after the 462 // utterance completes. As a fallback in case the utterance 463 // does not complete, post a delayed runnable to fire 464 // the intent. 465 mFallbackRunnable = new OnTtsCompletionRunnable(true); 466 mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY); 467 } else { 468 try { 469 Thread.sleep(playSound(ToneGenerator.TONE_PROP_NACK)); 470 } catch (InterruptedException e) { 471 } 472 // we are not using tts, so we just start listening again. 473 listenForCommand(); 474 } 475 } 476 }); 477 } 478 479 private void performChoice() { 480 if (mUsingBluetooth) { 481 String sentenceSpoken = spaceOutDigits( 482 mChosenAction.getStringExtra( 483 RecognizerEngine.SENTENCE_EXTRA)); 484 485 mState = SPEAKING_CHOSEN_ACTION; 486 mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, 487 CHOSEN_ACTION_UTTERANCE); 488 mTts.speak(sentenceSpoken, 489 TextToSpeech.QUEUE_FLUSH, 490 mTtsParams); 491 492 // Normally, the intent will be dispatched after the 493 // utterance completes. As a fallback in case the utterance 494 // does not complete, post a delayed runnable to fire 495 // the intent. 496 mFallbackRunnable = new OnTtsCompletionRunnable(true); 497 mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY); 498 } else { 499 // just dispatch the intent 500 startActivityHelp(mChosenAction); 501 finish(); 502 } 503 } 504 505 private void waitForChoice() { 506 if (mUsingBluetooth) { 507 // We are running in bluetooth mode, and we have 508 // multiple matches. Speak the choices and let 509 // the user choose. 510 511 // We will not start listening until the utterance 512 // of the choice list completes. 513 speakChoices(); 514 515 // Normally, listening will begin after the 516 // utterance completes. As a fallback in case the utterance 517 // does not complete, post a delayed runnable to begin 518 // listening. 519 mFallbackRunnable = new OnTtsCompletionRunnable(true); 520 mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY); 521 } else { 522 // We are not running in bluetooth mode, so all 523 // we need to do is wait for the user to select 524 // a choice from the alert dialog. We will wait 525 // indefinitely for this. 526 mState = WAITING_FOR_DIALOG_CHOICE; 527 } 528 } 529 530 private class CommandRecognizerClient implements RecognizerClient { 531 static final int MIN_VOLUME_TO_SKIP = 2; 532 /** 533 * Called by the {@link RecognizerEngine} when the microphone is started. 534 */ 535 public void onMicrophoneStart(InputStream mic) { 536 if (Config.LOGD) Log.d(TAG, "onMicrophoneStart"); 537 538 if (!mUsingBluetooth) { 539 playSound(ToneGenerator.TONE_PROP_BEEP); 540 541 int ringVolume = mAudioManager.getStreamVolume( 542 AudioManager.STREAM_RING); 543 Log.d(TAG, "ringVolume " + ringVolume); 544 545 if (ringVolume >= MIN_VOLUME_TO_SKIP) { 546 // now we're playing a sound, and corrupting the input sample. 547 // So we need to pull that junk off of the input stream so that the 548 // recognizer won't see it. 549 try { 550 skipBeep(mic); 551 } catch (java.io.IOException e) { 552 Log.e(TAG, "IOException " + e); 553 } 554 } else { 555 if (Config.LOGD) Log.d(TAG, "no tone"); 556 } 557 } 558 559 mHandler.post(new Runnable() { 560 public void run() { 561 findViewById(R.id.retry_view).setVisibility(View.INVISIBLE); 562 findViewById(R.id.microphone_loading_view).setVisibility( 563 View.INVISIBLE); 564 ((TextView)findViewById(R.id.state)).setText(R.string.listening); 565 mHandler.post(mMicFlasher); 566 } 567 }); 568 } 569 570 /** 571 * Beep detection 572 */ 573 private static final int START_WINDOW_MS = 500; // Beep detection window duration in ms 574 private static final int SINE_FREQ = 400; // base sine frequency on beep 575 private static final int NUM_PERIODS_BLOCK = 10; // number of sine periods in one energy averaging block 576 private static final int THRESHOLD = 8; // absolute pseudo energy threshold 577 private static final int START = 0; // beep detection start 578 private static final int RISING = 1; // beep rising edge start 579 private static final int TOP = 2; // beep constant energy detected 580 581 void skipBeep(InputStream is) throws IOException { 582 int sampleCount = ((mSampleRate / SINE_FREQ) * NUM_PERIODS_BLOCK); 583 int blockSize = 2 * sampleCount; // energy averaging block 584 585 if (is == null || blockSize == 0) { 586 return; 587 } 588 589 byte[] buf = new byte[blockSize]; 590 int maxBytes = 2 * ((START_WINDOW_MS * mSampleRate) / 1000); 591 maxBytes = ((maxBytes-1) / blockSize + 1) * blockSize; 592 593 int count = 0; 594 int state = START; // detection state 595 long prevE = 0; // previous pseudo energy 596 long peak = 0; 597 int threshold = THRESHOLD*sampleCount; // absolute energy threshold 598 Log.d(TAG, "blockSize " + blockSize); 599 600 while (count < maxBytes) { 601 int cnt = 0; 602 while (cnt < blockSize) { 603 int n = is.read(buf, cnt, blockSize-cnt); 604 if (n < 0) { 605 throw new java.io.IOException(); 606 } 607 cnt += n; 608 } 609 610 // compute pseudo energy 611 cnt = blockSize; 612 long sumx = 0; 613 long sumxx = 0; 614 while (cnt >= 2) { 615 short smp = (short)((buf[cnt - 1] << 8) + (buf[cnt - 2] & 0xFF)); 616 sumx += smp; 617 sumxx += smp*smp; 618 cnt -= 2; 619 } 620 long energy = (sumxx*sampleCount - sumx*sumx)/(sampleCount*sampleCount); 621 Log.d(TAG, "sumx " + sumx + " sumxx " + sumxx + " ee " + energy); 622 623 switch (state) { 624 case START: 625 if (energy > threshold && energy > (prevE * 2) && prevE != 0) { 626 // rising edge if energy doubled and > abs threshold 627 state = RISING; 628 if (Config.LOGD) Log.d(TAG, "start RISING: " + count +" time: "+ (((1000*count)/2)/mSampleRate)); 629 } 630 break; 631 case RISING: 632 if (energy < threshold || energy < (prevE / 2)){ 633 // energy fell back below half of previous, back to start 634 if (Config.LOGD) Log.d(TAG, "back to START: " + count +" time: "+ (((1000*count)/2)/mSampleRate)); 635 peak = 0; 636 state = START; 637 } else if (energy > (prevE / 2) && energy < (prevE * 2)) { 638 // Start of constant energy 639 if (Config.LOGD) Log.d(TAG, "start TOP: " + count +" time: "+ (((1000*count)/2)/mSampleRate)); 640 if (peak < energy) { 641 peak = energy; 642 } 643 state = TOP; 644 } 645 break; 646 case TOP: 647 if (energy < threshold || energy < (peak / 2)) { 648 // e went to less than half of the peak 649 if (Config.LOGD) Log.d(TAG, "end TOP: " + count +" time: "+ (((1000*count)/2)/mSampleRate)); 650 return; 651 } 652 break; 653 } 654 prevE = energy; 655 count += blockSize; 656 } 657 if (Config.LOGD) Log.d(TAG, "no beep detected, timed out"); 658 } 659 660 /** 661 * Called by the {@link RecognizerEngine} if the recognizer fails. 662 */ 663 public void onRecognitionFailure(final String msg) { 664 if (Config.LOGD) Log.d(TAG, "onRecognitionFailure " + msg); 665 // we had zero results. Just try again. 666 askToTryAgain(); 667 } 668 669 /** 670 * Called by the {@link RecognizerEngine} on an internal error. 671 */ 672 public void onRecognitionError(final String msg) { 673 if (Config.LOGD) Log.d(TAG, "onRecognitionError " + msg); 674 mHandler.post(new ErrorRunnable(R.string.recognition_error)); 675 exitActivity(); 676 } 677 678 /** 679 * Called by the {@link RecognizerEngine} when is succeeds. If there is 680 * only one item, then the Intent is dispatched immediately. 681 * If there are more, then an AlertDialog is displayed and the user is 682 * prompted to select. 683 * @param intents a list of Intents corresponding to the sentences. 684 */ 685 public void onRecognitionSuccess(final Intent[] intents) { 686 if (Config.LOGD) Log.d(TAG, "CommandRecognizerClient onRecognitionSuccess " + 687 intents.length); 688 if (mState != WAITING_FOR_COMMAND) { 689 if (Config.LOGD) Log.d(TAG, "not waiting for command, ignoring"); 690 return; 691 } 692 693 // store the intents in a member variable so that we can access it 694 // later when the user chooses which action to perform. 695 mAvailableChoices = intents; 696 697 mHandler.post(new Runnable() { 698 public void run() { 699 if (!mUsingBluetooth) { 700 playSound(ToneGenerator.TONE_PROP_ACK); 701 } 702 mHandler.removeCallbacks(mMicFlasher); 703 704 String[] sentences = new String[intents.length]; 705 for (int i = 0; i < intents.length; i++) { 706 sentences[i] = intents[i].getStringExtra( 707 RecognizerEngine.SENTENCE_EXTRA); 708 } 709 710 if (intents.length == 0) { 711 onRecognitionFailure("zero intents"); 712 return; 713 } 714 715 if (intents.length > 0) { 716 // see if we the response was "exit" or "cancel". 717 String value = intents[0].getStringExtra( 718 RecognizerEngine.SEMANTIC_EXTRA); 719 if (Config.LOGD) Log.d(TAG, "value " + value); 720 if ("X".equals(value)) { 721 exitActivity(); 722 return; 723 } 724 } 725 726 if (mUsingBluetooth && 727 (intents.length == 1 || 728 !Intent.ACTION_CALL_PRIVILEGED.equals( 729 intents[0].getAction()))) { 730 // When we're running in bluetooth mode, we expect 731 // that the user is not looking at the screen and cannot 732 // interact with the device in any way besides voice 733 // commands. In this case we need to minimize how many 734 // interactions the user has to perform in order to call 735 // someone. 736 // So if there is only one match, instead of making the 737 // user confirm, we just assume it's correct, speak 738 // the choice over TTS, and then dispatch it. 739 // If there are multiple matches for some intent type 740 // besides "call", it's too difficult for the user to 741 // explain which one they meant, so we just take the highest 742 // confidence match and dispatch that. 743 744 // Speak the sentence for the action we are about 745 // to dispatch so that the user knows what is happening. 746 mChosenAction = intents[0]; 747 performChoice(); 748 749 return; 750 } else { 751 // Either we are not running in bluetooth mode, 752 // or we had multiple matches. Either way, we need 753 // the user to confirm the choice. 754 // Put up a dialog from which the user can select 755 // his/her choice. 756 DialogInterface.OnCancelListener cancelListener = 757 new DialogInterface.OnCancelListener() { 758 759 public void onCancel(DialogInterface dialog) { 760 if (Config.LOGD) { 761 Log.d(TAG, "cancelListener.onCancel"); 762 } 763 dialog.dismiss(); 764 finish(); 765 } 766 }; 767 768 DialogInterface.OnClickListener clickListener = 769 new DialogInterface.OnClickListener() { 770 771 public void onClick(DialogInterface dialog, int which) { 772 if (Config.LOGD) { 773 Log.d(TAG, "clickListener.onClick " + which); 774 } 775 startActivityHelp(intents[which]); 776 dialog.dismiss(); 777 finish(); 778 } 779 }; 780 781 DialogInterface.OnClickListener negativeListener = 782 new DialogInterface.OnClickListener() { 783 784 public void onClick(DialogInterface dialog, int which) { 785 if (Config.LOGD) { 786 Log.d(TAG, "negativeListener.onClick " + 787 which); 788 } 789 dialog.dismiss(); 790 finish(); 791 } 792 }; 793 794 mAlertDialog = 795 new AlertDialog.Builder(VoiceDialerActivity.this) 796 .setTitle(R.string.title) 797 .setItems(sentences, clickListener) 798 .setOnCancelListener(cancelListener) 799 .setNegativeButton(android.R.string.cancel, 800 negativeListener) 801 .show(); 802 803 waitForChoice(); 804 } 805 } 806 }); 807 } 808 } 809 810 private class ChoiceRecognizerClient implements RecognizerClient { 811 public void onRecognitionSuccess(final Intent[] intents) { 812 if (Config.LOGD) Log.d(TAG, "ChoiceRecognizerClient onRecognitionSuccess"); 813 if (mState != WAITING_FOR_CHOICE) { 814 if (Config.LOGD) Log.d(TAG, "not waiting for choice, ignoring"); 815 return; 816 } 817 818 if (mAlertDialog != null) { 819 mAlertDialog.dismiss(); 820 } 821 822 // disregard all but the first intent. 823 if (intents.length > 0) { 824 String value = intents[0].getStringExtra( 825 RecognizerEngine.SEMANTIC_EXTRA); 826 if (Config.LOGD) Log.d(TAG, "value " + value); 827 if ("R".equals(value)) { 828 if (mUsingBluetooth) { 829 mHandler.post(new GreetingRunnable()); 830 } else { 831 listenForCommand(); 832 } 833 } else if ("X".equals(value)) { 834 exitActivity(); 835 } else { 836 // it's a phone type response 837 mChosenAction = null; 838 for (int i = 0; i < mAvailableChoices.length; i++) { 839 if (value.equalsIgnoreCase( 840 mAvailableChoices[i].getStringExtra( 841 CommandRecognizerEngine.PHONE_TYPE_EXTRA))) { 842 mChosenAction = mAvailableChoices[i]; 843 } 844 } 845 846 if (mChosenAction != null) { 847 performChoice(); 848 } else { 849 // invalid choice 850 if (Config.LOGD) Log.d(TAG, "invalid choice" + value); 851 852 if (mUsingBluetooth) { 853 mTtsParams.remove(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID); 854 mTts.speak(getString(R.string.invalid_choice_tts), 855 TextToSpeech.QUEUE_FLUSH, 856 mTtsParams); 857 } 858 waitForChoice(); 859 } 860 } 861 } 862 } 863 864 public void onRecognitionFailure(String msg) { 865 if (Config.LOGD) Log.d(TAG, "ChoiceRecognizerClient onRecognitionFailure"); 866 exitActivity(); 867 } 868 869 public void onRecognitionError(String err) { 870 if (Config.LOGD) Log.d(TAG, "ChoiceRecognizerClient onRecognitionError"); 871 mHandler.post(new ErrorRunnable(R.string.recognition_error)); 872 exitActivity(); 873 } 874 875 public void onMicrophoneStart(InputStream mic) { 876 if (Config.LOGD) Log.d(TAG, "ChoiceRecognizerClient onMicrophoneStart"); 877 } 878 } 879 880 private void speakChoices() { 881 if (Config.LOGD) Log.d(TAG, "speakChoices"); 882 mState = SPEAKING_CHOICES; 883 884 String sentenceSpoken = spaceOutDigits( 885 mAvailableChoices[0].getStringExtra( 886 RecognizerEngine.SENTENCE_EXTRA)); 887 888 // When we have multiple choices, they will be of the form 889 // "call jack jones at home", "call jack jones on mobile". 890 // Speak the entire first sentence, then the last word from each 891 // of the remaining sentences. This will come out to something 892 // like "call jack jones at home mobile or work". 893 StringBuilder builder = new StringBuilder(); 894 builder.append(sentenceSpoken); 895 896 int count = mAvailableChoices.length; 897 for (int i=1; i < count; i++) { 898 if (i == count-1) { 899 builder.append(" or "); 900 } else { 901 builder.append(" "); 902 } 903 String tmpSentence = mAvailableChoices[i].getStringExtra( 904 RecognizerEngine.SENTENCE_EXTRA); 905 String[] words = tmpSentence.trim().split(" "); 906 builder.append(words[words.length-1]); 907 } 908 mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, 909 CHOICES_UTTERANCE); 910 mTts.speak(builder.toString(), 911 TextToSpeech.QUEUE_ADD, 912 mTtsParams); 913 } 914 915 916 private static String spaceOutDigits(String sentenceDisplay) { 917 // if we have a sentence of the form "dial 123 456 7890", 918 // we need to insert a space between each digit, otherwise 919 // the TTS engine will say "dial one hundred twenty three...." 920 // When there already is a space, we also insert a comma, 921 // so that it pauses between sections. For the displayable 922 // sentence "dial 123 456 7890" it will speak 923 // "dial 1 2 3, 4 5 6, 7 8 9 0" 924 char buffer[] = sentenceDisplay.toCharArray(); 925 StringBuilder builder = new StringBuilder(); 926 boolean buildingNumber = false; 927 int l = sentenceDisplay.length(); 928 for (int index = 0; index < l; index++) { 929 char c = buffer[index]; 930 if (Character.isDigit(c)) { 931 if (buildingNumber) { 932 builder.append(" "); 933 } 934 buildingNumber = true; 935 builder.append(c); 936 } else if (c == ' ') { 937 if (buildingNumber) { 938 builder.append(","); 939 } else { 940 builder.append(" "); 941 } 942 } else { 943 buildingNumber = false; 944 builder.append(c); 945 } 946 } 947 return builder.toString(); 948 } 949 950 private void startActivityHelp(Intent intent) { 951 startActivity(intent); 952 } 953 954 private void listenForCommand() { 955 if (Config.LOGD) Log.d(TAG, "" 956 + "Command(): MICROPHONE_EXTRA: "+getArg(MICROPHONE_EXTRA)+ 957 ", CONTACTS_EXTRA: "+getArg(CONTACTS_EXTRA)); 958 959 mState = WAITING_FOR_COMMAND; 960 mRecognizerThread = new Thread() { 961 public void run() { 962 mCommandEngine.recognize(mCommandClient, 963 VoiceDialerActivity.this, 964 newFile(getArg(MICROPHONE_EXTRA)), 965 mSampleRate); 966 } 967 }; 968 mRecognizerThread.start(); 969 } 970 971 private void listenForChoice() { 972 if (Config.LOGD) Log.d(TAG, "listenForChoice(): MICROPHONE_EXTRA: " + 973 getArg(MICROPHONE_EXTRA)); 974 975 mState = WAITING_FOR_CHOICE; 976 mRecognizerThread = new Thread() { 977 public void run() { 978 mPhoneTypeChoiceEngine.recognize(mChoiceClient, 979 VoiceDialerActivity.this, 980 newFile(getArg(MICROPHONE_EXTRA)), mSampleRate); 981 } 982 }; 983 mRecognizerThread.start(); 984 } 985 986 private void exitActivity() { 987 synchronized(this) { 988 if (mState != EXITING) { 989 if (Config.LOGD) Log.d(TAG, "exitActivity"); 990 mState = SPEAKING_GOODBYE; 991 if (mUsingBluetooth) { 992 mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, 993 GOODBYE_UTTERANCE); 994 mTts.speak(getString(R.string.goodbye_tts), 995 TextToSpeech.QUEUE_FLUSH, 996 mTtsParams); 997 // Normally, the activity will finish() after the 998 // utterance completes. As a fallback in case the utterance 999 // does not complete, post a delayed runnable finish the 1000 // activity. 1001 mFallbackRunnable = new OnTtsCompletionRunnable(true); 1002 mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY); 1003 } else { 1004 mHandler.postDelayed(new Runnable() { 1005 public void run() { 1006 finish(); 1007 } 1008 }, EXIT_DELAY); 1009 } 1010 } 1011 } 1012 } 1013 1014 private String getArg(String name) { 1015 if (name == null) return null; 1016 String arg = getIntent().getStringExtra(name); 1017 if (arg != null) return arg; 1018 arg = SystemProperties.get("app.voicedialer." + name); 1019 return arg != null && arg.length() > 0 ? arg : null; 1020 } 1021 1022 private static File newFile(String name) { 1023 return name != null ? new File(name) : null; 1024 } 1025 1026 private int playSound(int toneType) { 1027 int msecDelay = 1; 1028 1029 // use the MediaPlayer to prompt the user 1030 if (mToneGenerator != null) { 1031 mToneGenerator.startTone(toneType); 1032 msecDelay = StrictMath.max(msecDelay, 300); 1033 } 1034 // use the Vibrator to prompt the user 1035 if (mAudioManager != null && 1036 mAudioManager.shouldVibrate(AudioManager.VIBRATE_TYPE_RINGER)) { 1037 final int VIBRATOR_TIME = 150; 1038 final int VIBRATOR_GUARD_TIME = 150; 1039 Vibrator vibrator = new Vibrator(); 1040 vibrator.vibrate(VIBRATOR_TIME); 1041 msecDelay = StrictMath.max(msecDelay, 1042 VIBRATOR_TIME + VIBRATOR_GUARD_TIME); 1043 } 1044 1045 1046 return msecDelay; 1047 } 1048 1049 protected void onStop() { 1050 if (Config.LOGD) Log.d(TAG, "onStop"); 1051 1052 synchronized(this) { 1053 mState = EXITING; 1054 } 1055 1056 if (mAlertDialog != null) { 1057 mAlertDialog.dismiss(); 1058 } 1059 1060 // set the volume back to the level it was before we started. 1061 mAudioManager.setStreamVolume(AudioManager.STREAM_BLUETOOTH_SCO, 1062 mBluetoothVoiceVolume, 0); 1063 mAudioManager.abandonAudioFocus(null); 1064 1065 // shut down bluetooth, if it exists 1066 if (mBluetoothHeadset != null) { 1067 mBluetoothHeadset.stopVoiceRecognition(); 1068 mBluetoothHeadset.close(); 1069 mBluetoothHeadset = null; 1070 } 1071 1072 // shut down recognizer and wait for the thread to complete 1073 if (mRecognizerThread != null) { 1074 mRecognizerThread.interrupt(); 1075 try { 1076 mRecognizerThread.join(); 1077 } catch (InterruptedException e) { 1078 if (Config.LOGD) Log.d(TAG, "onStop mRecognizerThread.join exception " + e); 1079 } 1080 mRecognizerThread = null; 1081 } 1082 1083 // clean up UI 1084 mHandler.removeCallbacks(mMicFlasher); 1085 mHandler.removeMessages(0); 1086 1087 if (mTts != null) { 1088 mTts.stop(); 1089 mTts.shutdown(); 1090 mTts = null; 1091 } 1092 unregisterReceiver(mReceiver); 1093 1094 super.onStop(); 1095 1096 // It makes no sense to have this activity maintain state when in 1097 // background. When it stops, it should just be destroyed. 1098 finish(); 1099 } 1100 1101 private Runnable mMicFlasher = new Runnable() { 1102 int visible = View.VISIBLE; 1103 1104 public void run() { 1105 findViewById(R.id.microphone_view).setVisibility(visible); 1106 findViewById(R.id.state).setVisibility(visible); 1107 visible = visible == View.VISIBLE ? View.INVISIBLE : View.VISIBLE; 1108 mHandler.postDelayed(this, 750); 1109 } 1110 }; 1111 1112 @Override 1113 protected void onDestroy() { 1114 if (Config.LOGD) Log.d(TAG, "onDestroy"); 1115 super.onDestroy(); 1116 } 1117}