VoiceDialerActivity.java revision fbc76d73e034753bf284d52f0df766369ae3cb5b
1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.voicedialer; 18 19import android.app.Activity; 20import android.app.AlertDialog; 21import android.bluetooth.BluetoothHeadset; 22import android.content.BroadcastReceiver; 23import android.content.Context; 24import android.content.DialogInterface; 25import android.content.Intent; 26import android.content.IntentFilter; 27import android.media.AudioManager; 28import android.media.ToneGenerator; 29import android.os.Bundle; 30import android.os.Handler; 31import android.os.SystemProperties; 32import android.os.Vibrator; 33import android.speech.tts.TextToSpeech; 34import android.util.Config; 35import android.util.Log; 36import android.view.View; 37import android.view.WindowManager; 38import android.widget.TextView; 39import java.io.File; 40import java.io.InputStream; 41import java.util.HashMap; 42import java.io.IOException; 43 44/** 45 * TODO: get rid of the anonymous classes 46 * 47 * This class is the user interface of the VoiceDialer application. 48 * It begins in the INITIALIZING state. 49 * 50 * INITIALIZING : 51 * This transitions out on events from TTS and the BluetoothHeadset 52 * once TTS initialized and SCO channel set up: 53 * * prompt the user "speak now" 54 * * transition to the SPEAKING_GREETING state 55 * 56 * SPEAKING_GREETING: 57 * This transitions out only on events from TTS or the fallback runnable 58 * once the greeting utterance completes: 59 * * begin listening for the command using the {@link CommandRecognizerEngine} 60 * * transition to the WAITING_FOR_COMMAND state 61 * 62 * WAITING_FOR_COMMAND : 63 * This transitions out only on events from the recognizer 64 * on RecognitionFailure or RecognitionError: 65 * * begin speaking "try again." 66 * * transition to state SPEAKING_TRY_AGAIN 67 * on RecognitionSuccess: 68 * single result: 69 * * begin speaking the sentence describing the intent 70 * * transition to the SPEAKING_CHOSEN_ACTION 71 * multiple results: 72 * * begin speaking each of the choices in order 73 * * transition to the SPEAKING_CHOICES state 74 * 75 * SPEAKING_TRY_AGAIN: 76 * This transitions out only on events from TTS or the fallback runnable 77 * once the try again utterance completes: 78 * * begin listening for the command using the {@link CommandRecognizerEngine} 79 * * transition to the LISTENING_FOR_COMMAND state 80 * 81 * SPEAKING_CHOSEN_ACTION: 82 * This transitions out only on events from TTS or the fallback runnable 83 * once the utterance completes: 84 * * dispatch the intent that was chosen 85 * * transition to the EXITING state 86 * * finish the activity 87 * 88 * SPEAKING_CHOICES: 89 * This transitions out only on events from TTS or the fallback runnable 90 * once the utterance completes: 91 * * begin listening for the user's choice using the 92 * {@link PhoneTypeChoiceRecognizerEngine} 93 * * transition to the WAITING_FOR_CHOICE state. 94 * 95 * WAITING_FOR_CHOICE: 96 * This transitions out only on events from the recognizer 97 * on RecognitionFailure or RecognitionError: 98 * * begin speaking the "invalid choice" message, along with the list 99 * of choices 100 * * transition to the SPEAKING_CHOICES state 101 * on RecognitionSuccess: 102 * if the result is "try again", prompt the user to say a command, begin 103 * listening for the command, and transition back to the WAITING_FOR_COMMAND 104 * state. 105 * if the result is "exit", then being speaking the "goodbye" message and 106 * transition to the SPEAKING_GOODBYE state. 107 * if the result is a valid choice, begin speaking the action chosen,initiate 108 * the command the user has choose and exit. 109 * if not a valid choice, speak the "invalid choice" message, begin 110 * speaking the choices in order again, transition to the 111 * SPEAKING_CHOICES 112 * 113 * SPEAKING_GOODBYE: 114 * This transitions out only on events from TTS or the fallback runnable 115 * after a time out, finish the activity. 116 * 117 */ 118 119public class VoiceDialerActivity extends Activity { 120 121 private static final String TAG = "VoiceDialerActivity"; 122 123 private static final String MICROPHONE_EXTRA = "microphone"; 124 private static final String CONTACTS_EXTRA = "contacts"; 125 126 private static final String SPEAK_NOW_UTTERANCE = "speak_now"; 127 private static final String TRY_AGAIN_UTTERANCE = "try_again"; 128 private static final String CHOSEN_ACTION_UTTERANCE = "chose_action"; 129 private static final String GOODBYE_UTTERANCE = "goodbye"; 130 private static final String CHOICES_UTTERANCE = "choices"; 131 132 private static final int FIRST_UTTERANCE_DELAY = 300; 133 private static final int MAX_TTS_DELAY = 6000; 134 private static final int EXIT_DELAY = 2000; 135 136 private static final int BLUETOOTH_SAMPLE_RATE = 8000; 137 private static final int REGULAR_SAMPLE_RATE = 11025; 138 139 private static final int INITIALIZING = 0; 140 private static final int SPEAKING_GREETING = 1; 141 private static final int WAITING_FOR_COMMAND = 2; 142 private static final int SPEAKING_TRY_AGAIN = 3; 143 private static final int SPEAKING_CHOICES = 4; 144 private static final int WAITING_FOR_CHOICE = 5; 145 private static final int WAITING_FOR_DIALOG_CHOICE = 6; 146 private static final int SPEAKING_CHOSEN_ACTION = 7; 147 private static final int SPEAKING_GOODBYE = 8; 148 private static final int EXITING = 9; 149 150 private static final CommandRecognizerEngine mCommandEngine = 151 new CommandRecognizerEngine(); 152 private static final PhoneTypeChoiceRecognizerEngine mPhoneTypeChoiceEngine = 153 new PhoneTypeChoiceRecognizerEngine(); 154 private CommandRecognizerClient mCommandClient; 155 private ChoiceRecognizerClient mChoiceClient; 156 private ToneGenerator mToneGenerator; 157 private Handler mHandler; 158 private Thread mRecognizerThread = null; 159 private AudioManager mAudioManager; 160 private BluetoothHeadset mBluetoothHeadset; 161 private TextToSpeech mTts; 162 private HashMap<String, String> mTtsParams; 163 private VoiceDialerBroadcastReceiver mReceiver; 164 private int mBluetoothAudioState; 165 private boolean mWaitingForTts; 166 private boolean mWaitingForScoConnection; 167 private Intent[] mAvailableChoices; 168 private Intent mChosenAction; 169 private int mBluetoothVoiceVolume; 170 private int mState; 171 private AlertDialog mAlertDialog; 172 private Runnable mFallbackRunnable; 173 private boolean mUsingBluetooth = false; 174 private int mSampleRate; 175 176 @Override 177 protected void onCreate(Bundle icicle) { 178 if (Config.LOGD) Log.d(TAG, "onCreate"); 179 super.onCreate(icicle); 180 mHandler = new Handler(); 181 mAudioManager = (AudioManager)getSystemService(AUDIO_SERVICE); 182 mToneGenerator = new ToneGenerator(AudioManager.STREAM_RING, 183 ToneGenerator.MAX_VOLUME); 184 } 185 186 protected void onStart() { 187 if (Config.LOGD) Log.d(TAG, "onStart " + getIntent()); 188 super.onStart(); 189 190 mState = INITIALIZING; 191 mChosenAction = null; 192 mAudioManager.requestAudioFocus( 193 null, AudioManager.STREAM_MUSIC, 194 AudioManager.AUDIOFOCUS_GAIN_TRANSIENT); 195 196 // set this flag so this activity will stay in front of the keyguard 197 int flags = WindowManager.LayoutParams.FLAG_SHOW_WHEN_LOCKED; 198 getWindow().addFlags(flags); 199 200 // open main window 201 setTheme(android.R.style.Theme_Dialog); 202 setTitle(R.string.title); 203 setContentView(R.layout.voice_dialing); 204 findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE); 205 findViewById(R.id.retry_view).setVisibility(View.INVISIBLE); 206 findViewById(R.id.microphone_loading_view).setVisibility(View.VISIBLE); 207 if (RecognizerLogger.isEnabled(this)) { 208 ((TextView) findViewById(R.id.substate)).setText(R.string.logging_enabled); 209 } 210 211 // Get handle to BluetoothHeadset object 212 IntentFilter audioStateFilter; 213 audioStateFilter = new IntentFilter(); 214 audioStateFilter.addAction(BluetoothHeadset.ACTION_AUDIO_STATE_CHANGED); 215 mReceiver = new VoiceDialerBroadcastReceiver(); 216 registerReceiver(mReceiver, audioStateFilter); 217 218 mCommandEngine.setContactsFile(newFile(getArg(CONTACTS_EXTRA))); 219 mCommandEngine.setMinimizeResults(true); 220 mCommandEngine.setAllowOpenEntries(false); 221 mCommandClient = new CommandRecognizerClient(); 222 mChoiceClient = new ChoiceRecognizerClient(); 223 224 mBluetoothAudioState = BluetoothHeadset.STATE_ERROR; 225 226 if (BluetoothHeadset.isBluetoothVoiceDialingEnabled(this)) { 227 mBluetoothHeadset = new BluetoothHeadset(this, 228 mBluetoothHeadsetServiceListener); 229 } else { 230 mUsingBluetooth = false; 231 if (Config.LOGD) Log.d(TAG, "bluetooth unavailable"); 232 mSampleRate = REGULAR_SAMPLE_RATE; 233 mCommandEngine.setMinimizeResults(false); 234 mCommandEngine.setAllowOpenEntries(true); 235 236 // we're not using bluetooth apparently, just start listening. 237 listenForCommand(); 238 } 239 240 } 241 242 class ErrorRunnable implements Runnable { 243 private int mErrorMsg; 244 public ErrorRunnable(int errorMsg) { 245 mErrorMsg = errorMsg; 246 } 247 248 public void run() { 249 // put up an error and exit 250 mHandler.removeCallbacks(mMicFlasher); 251 ((TextView)findViewById(R.id.state)).setText(R.string.failure); 252 ((TextView)findViewById(R.id.substate)).setText(mErrorMsg); 253 ((TextView)findViewById(R.id.substate)).setText( 254 R.string.headset_connection_lost); 255 findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE); 256 findViewById(R.id.retry_view).setVisibility(View.VISIBLE); 257 258 259 if (!mUsingBluetooth) { 260 playSound(ToneGenerator.TONE_PROP_NACK); 261 } 262 } 263 } 264 265 class OnTtsCompletionRunnable implements Runnable { 266 private boolean mFallback; 267 268 OnTtsCompletionRunnable(boolean fallback) { 269 mFallback = fallback; 270 } 271 272 public void run() { 273 if (mFallback) { 274 Log.e(TAG, "utterance completion not delivered, using fallback"); 275 } 276 Log.d(TAG, "onTtsCompletionRunnable"); 277 if (mState == SPEAKING_GREETING || mState == SPEAKING_TRY_AGAIN) { 278 listenForCommand(); 279 } else if (mState == SPEAKING_CHOICES) { 280 listenForChoice(); 281 } else if (mState == SPEAKING_GOODBYE) { 282 mState = EXITING; 283 finish(); 284 } else if (mState == SPEAKING_CHOSEN_ACTION) { 285 mState = EXITING; 286 startActivityHelp(mChosenAction); 287 finish(); 288 } 289 } 290 } 291 292 class GreetingRunnable implements Runnable { 293 public void run() { 294 mState = SPEAKING_GREETING; 295 mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, 296 SPEAK_NOW_UTTERANCE); 297 mTts.speak(getString(R.string.speak_now_tts), 298 TextToSpeech.QUEUE_FLUSH, 299 mTtsParams); 300 // Normally, we will begin listening for the command after the 301 // utterance completes. As a fallback in case the utterance 302 // does not complete, post a delayed runnable to fire 303 // the intent. 304 mFallbackRunnable = new OnTtsCompletionRunnable(true); 305 mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY); 306 } 307 } 308 309 class TtsInitListener implements TextToSpeech.OnInitListener { 310 public void onInit(int status) { 311 // status can be either TextToSpeech.SUCCESS or TextToSpeech.ERROR. 312 if (Config.LOGD) Log.d(TAG, "onInit for tts"); 313 if (status != TextToSpeech.SUCCESS) { 314 // Initialization failed. 315 Log.e(TAG, "Could not initialize TextToSpeech."); 316 mHandler.post(new ErrorRunnable(R.string.recognition_error)); 317 exitActivity(); 318 return; 319 } 320 321 if (mTts == null) { 322 Log.e(TAG, "null tts"); 323 mHandler.post(new ErrorRunnable(R.string.recognition_error)); 324 exitActivity(); 325 return; 326 } 327 328 mTts.setOnUtteranceCompletedListener(new OnUtteranceCompletedListener()); 329 330 // The TTS engine has been successfully initialized. 331 mWaitingForTts = false; 332 333 // TTS over bluetooth is really loud, 334 // Limit volume to -18dB. Stream volume range represents approximately 50dB 335 // (See AudioSystem.cpp linearToLog()) so the number of steps corresponding 336 // to 18dB is 18 / (50 / maxSteps). 337 mBluetoothVoiceVolume = mAudioManager.getStreamVolume( 338 AudioManager.STREAM_BLUETOOTH_SCO); 339 int maxVolume = mAudioManager.getStreamMaxVolume(AudioManager.STREAM_BLUETOOTH_SCO); 340 int volume = maxVolume - ((18 / (50/maxVolume)) + 1); 341 if (mBluetoothVoiceVolume > volume) { 342 mAudioManager.setStreamVolume(AudioManager.STREAM_BLUETOOTH_SCO, volume, 0); 343 } 344 345 if (mWaitingForScoConnection) { 346 // the bluetooth connection is not up yet, still waiting. 347 } else { 348 // we now have SCO connection and TTS, so we can start. 349 mHandler.postDelayed(new GreetingRunnable(), FIRST_UTTERANCE_DELAY); 350 } 351 } 352 } 353 354 class OnUtteranceCompletedListener 355 implements TextToSpeech.OnUtteranceCompletedListener { 356 public void onUtteranceCompleted(String utteranceId) { 357 if (Config.LOGD) Log.d(TAG, "onUtteranceCompleted " + utteranceId); 358 // since the utterance has completed, we no longer need the fallback. 359 mHandler.removeCallbacks(mFallbackRunnable); 360 mFallbackRunnable = null; 361 mHandler.post(new OnTtsCompletionRunnable(false)); 362 } 363 } 364 365 private BluetoothHeadset.ServiceListener mBluetoothHeadsetServiceListener = 366 new BluetoothHeadset.ServiceListener() { 367 public void onServiceConnected() { 368 if (Config.LOGD) Log.d(TAG, "headset status " + mBluetoothHeadset.getState()); 369 370 if (mBluetoothHeadset.getState() == BluetoothHeadset.STATE_CONNECTED) { 371 if (Config.LOGD) Log.d(TAG, "using bluetooth"); 372 mUsingBluetooth = true; 373 374 mBluetoothHeadset.startVoiceRecognition(); 375 376 mSampleRate = BLUETOOTH_SAMPLE_RATE; 377 mCommandEngine.setMinimizeResults(true); 378 mCommandEngine.setAllowOpenEntries(false); 379 380 // we can't start recognizing until we get connected to the BluetoothHeadset 381 // and have a connected audio state. We will listen for these 382 // states to change. 383 mWaitingForScoConnection = true; 384 385 // initialize the text to speech system 386 mWaitingForTts = true; 387 mTts = new TextToSpeech(VoiceDialerActivity.this, new TtsInitListener()); 388 mTtsParams = new HashMap<String, String>(); 389 mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_STREAM, 390 String.valueOf(AudioManager.STREAM_VOICE_CALL)); 391 // we need to wait for the TTS system and the SCO connection 392 // before we can start listening. 393 } else { 394 if (Config.LOGD) Log.d(TAG, "not using bluetooth"); 395 mUsingBluetooth = false; 396 mSampleRate = REGULAR_SAMPLE_RATE; 397 mCommandEngine.setMinimizeResults(false); 398 mCommandEngine.setAllowOpenEntries(true); 399 400 // we're not using bluetooth apparently, just start listening. 401 listenForCommand(); 402 } 403 404 if (Config.LOGD) Log.d(TAG, "onServiceConnected"); 405 } 406 public void onServiceDisconnected() {} 407 }; 408 409 private class VoiceDialerBroadcastReceiver extends BroadcastReceiver { 410 @Override 411 public void onReceive(Context context, Intent intent) { 412 String action = intent.getAction(); 413 if (action.equals(BluetoothHeadset.ACTION_AUDIO_STATE_CHANGED)) { 414 mBluetoothAudioState = intent.getIntExtra( 415 BluetoothHeadset.EXTRA_AUDIO_STATE, 416 BluetoothHeadset.STATE_ERROR); 417 if (Config.LOGD) Log.d(TAG, "HEADSET AUDIO_STATE_CHANGED -> " + 418 mBluetoothAudioState); 419 420 if (mBluetoothAudioState == BluetoothHeadset.AUDIO_STATE_CONNECTED && 421 mWaitingForScoConnection) { 422 // SCO channel has just become available. 423 mWaitingForScoConnection = false; 424 if (mWaitingForTts) { 425 // still waiting for the TTS to be set up. 426 } else { 427 // we now have SCO connection and TTS, so we can start. 428 mHandler.postDelayed(new GreetingRunnable(), FIRST_UTTERANCE_DELAY); 429 } 430 } else { 431 if (!mWaitingForScoConnection) { 432 // apparently our connection to the headset has dropped. 433 // we won't be able to continue voicedialing. 434 if (Config.LOGD) Log.d(TAG, "lost sco connection"); 435 436 mHandler.post(new ErrorRunnable( 437 R.string.headset_connection_lost)); 438 439 exitActivity(); 440 } 441 } 442 } 443 } 444 } 445 446 private void askToTryAgain() { 447 // get work off UAPI thread 448 mHandler.post(new Runnable() { 449 public void run() { 450 if (mAlertDialog != null) { 451 mAlertDialog.dismiss(); 452 } 453 454 mHandler.removeCallbacks(mMicFlasher); 455 ((TextView)findViewById(R.id.state)).setText(R.string.please_try_again); 456 findViewById(R.id.state).setVisibility(View.VISIBLE); 457 findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE); 458 findViewById(R.id.retry_view).setVisibility(View.VISIBLE); 459 460 if (mUsingBluetooth) { 461 mState = SPEAKING_TRY_AGAIN; 462 mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, 463 TRY_AGAIN_UTTERANCE); 464 mTts.speak(getString(R.string.no_results_tts), 465 TextToSpeech.QUEUE_FLUSH, 466 mTtsParams); 467 468 // Normally, the we will start listening after the 469 // utterance completes. As a fallback in case the utterance 470 // does not complete, post a delayed runnable to fire 471 // the intent. 472 mFallbackRunnable = new OnTtsCompletionRunnable(true); 473 mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY); 474 } else { 475 try { 476 Thread.sleep(playSound(ToneGenerator.TONE_PROP_NACK)); 477 } catch (InterruptedException e) { 478 } 479 // we are not using tts, so we just start listening again. 480 listenForCommand(); 481 } 482 } 483 }); 484 } 485 486 private void performChoice() { 487 if (mUsingBluetooth) { 488 String sentenceSpoken = spaceOutDigits( 489 mChosenAction.getStringExtra( 490 RecognizerEngine.SENTENCE_EXTRA)); 491 492 mState = SPEAKING_CHOSEN_ACTION; 493 mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, 494 CHOSEN_ACTION_UTTERANCE); 495 mTts.speak(sentenceSpoken, 496 TextToSpeech.QUEUE_FLUSH, 497 mTtsParams); 498 499 // Normally, the intent will be dispatched after the 500 // utterance completes. As a fallback in case the utterance 501 // does not complete, post a delayed runnable to fire 502 // the intent. 503 mFallbackRunnable = new OnTtsCompletionRunnable(true); 504 mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY); 505 } else { 506 // just dispatch the intent 507 startActivityHelp(mChosenAction); 508 finish(); 509 } 510 } 511 512 private void waitForChoice() { 513 if (mUsingBluetooth) { 514 // We are running in bluetooth mode, and we have 515 // multiple matches. Speak the choices and let 516 // the user choose. 517 518 // We will not start listening until the utterance 519 // of the choice list completes. 520 speakChoices(); 521 522 // Normally, listening will begin after the 523 // utterance completes. As a fallback in case the utterance 524 // does not complete, post a delayed runnable to begin 525 // listening. 526 mFallbackRunnable = new OnTtsCompletionRunnable(true); 527 mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY); 528 } else { 529 // We are not running in bluetooth mode, so all 530 // we need to do is wait for the user to select 531 // a choice from the alert dialog. We will wait 532 // indefinitely for this. 533 mState = WAITING_FOR_DIALOG_CHOICE; 534 } 535 } 536 537 private class CommandRecognizerClient implements RecognizerClient { 538 static final int MIN_VOLUME_TO_SKIP = 2; 539 /** 540 * Called by the {@link RecognizerEngine} when the microphone is started. 541 */ 542 public void onMicrophoneStart(InputStream mic) { 543 if (Config.LOGD) Log.d(TAG, "onMicrophoneStart"); 544 545 if (!mUsingBluetooth) { 546 playSound(ToneGenerator.TONE_PROP_BEEP); 547 548 int ringVolume = mAudioManager.getStreamVolume( 549 AudioManager.STREAM_RING); 550 Log.d(TAG, "ringVolume " + ringVolume); 551 552 if (ringVolume >= MIN_VOLUME_TO_SKIP) { 553 // now we're playing a sound, and corrupting the input sample. 554 // So we need to pull that junk off of the input stream so that the 555 // recognizer won't see it. 556 try { 557 skipBeep(mic); 558 } catch (java.io.IOException e) { 559 Log.e(TAG, "IOException " + e); 560 } 561 } else { 562 if (Config.LOGD) Log.d(TAG, "no tone"); 563 } 564 } 565 566 mHandler.post(new Runnable() { 567 public void run() { 568 findViewById(R.id.retry_view).setVisibility(View.INVISIBLE); 569 findViewById(R.id.microphone_loading_view).setVisibility( 570 View.INVISIBLE); 571 ((TextView)findViewById(R.id.state)).setText(R.string.listening); 572 mHandler.post(mMicFlasher); 573 } 574 }); 575 } 576 577 /** 578 * Beep detection 579 */ 580 private static final int START_WINDOW_MS = 500; // Beep detection window duration in ms 581 private static final int SINE_FREQ = 400; // base sine frequency on beep 582 private static final int NUM_PERIODS_BLOCK = 10; // number of sine periods in one energy averaging block 583 private static final int THRESHOLD = 8; // absolute pseudo energy threshold 584 private static final int START = 0; // beep detection start 585 private static final int RISING = 1; // beep rising edge start 586 private static final int TOP = 2; // beep constant energy detected 587 588 void skipBeep(InputStream is) throws IOException { 589 int sampleCount = ((mSampleRate / SINE_FREQ) * NUM_PERIODS_BLOCK); 590 int blockSize = 2 * sampleCount; // energy averaging block 591 592 if (is == null || blockSize == 0) { 593 return; 594 } 595 596 byte[] buf = new byte[blockSize]; 597 int maxBytes = 2 * ((START_WINDOW_MS * mSampleRate) / 1000); 598 maxBytes = ((maxBytes-1) / blockSize + 1) * blockSize; 599 600 int count = 0; 601 int state = START; // detection state 602 long prevE = 0; // previous pseudo energy 603 long peak = 0; 604 int threshold = THRESHOLD*sampleCount; // absolute energy threshold 605 Log.d(TAG, "blockSize " + blockSize); 606 607 while (count < maxBytes) { 608 int cnt = 0; 609 while (cnt < blockSize) { 610 int n = is.read(buf, cnt, blockSize-cnt); 611 if (n < 0) { 612 throw new java.io.IOException(); 613 } 614 cnt += n; 615 } 616 617 // compute pseudo energy 618 cnt = blockSize; 619 long sumx = 0; 620 long sumxx = 0; 621 while (cnt >= 2) { 622 short smp = (short)((buf[cnt - 1] << 8) + (buf[cnt - 2] & 0xFF)); 623 sumx += smp; 624 sumxx += smp*smp; 625 cnt -= 2; 626 } 627 long energy = (sumxx*sampleCount - sumx*sumx)/(sampleCount*sampleCount); 628 Log.d(TAG, "sumx " + sumx + " sumxx " + sumxx + " ee " + energy); 629 630 switch (state) { 631 case START: 632 if (energy > threshold && energy > (prevE * 2) && prevE != 0) { 633 // rising edge if energy doubled and > abs threshold 634 state = RISING; 635 if (Config.LOGD) Log.d(TAG, "start RISING: " + count +" time: "+ (((1000*count)/2)/mSampleRate)); 636 } 637 break; 638 case RISING: 639 if (energy < threshold || energy < (prevE / 2)){ 640 // energy fell back below half of previous, back to start 641 if (Config.LOGD) Log.d(TAG, "back to START: " + count +" time: "+ (((1000*count)/2)/mSampleRate)); 642 peak = 0; 643 state = START; 644 } else if (energy > (prevE / 2) && energy < (prevE * 2)) { 645 // Start of constant energy 646 if (Config.LOGD) Log.d(TAG, "start TOP: " + count +" time: "+ (((1000*count)/2)/mSampleRate)); 647 if (peak < energy) { 648 peak = energy; 649 } 650 state = TOP; 651 } 652 break; 653 case TOP: 654 if (energy < threshold || energy < (peak / 2)) { 655 // e went to less than half of the peak 656 if (Config.LOGD) Log.d(TAG, "end TOP: " + count +" time: "+ (((1000*count)/2)/mSampleRate)); 657 return; 658 } 659 break; 660 } 661 prevE = energy; 662 count += blockSize; 663 } 664 if (Config.LOGD) Log.d(TAG, "no beep detected, timed out"); 665 } 666 667 /** 668 * Called by the {@link RecognizerEngine} if the recognizer fails. 669 */ 670 public void onRecognitionFailure(final String msg) { 671 if (Config.LOGD) Log.d(TAG, "onRecognitionFailure " + msg); 672 // we had zero results. Just try again. 673 askToTryAgain(); 674 } 675 676 /** 677 * Called by the {@link RecognizerEngine} on an internal error. 678 */ 679 public void onRecognitionError(final String msg) { 680 if (Config.LOGD) Log.d(TAG, "onRecognitionError " + msg); 681 mHandler.post(new ErrorRunnable(R.string.recognition_error)); 682 exitActivity(); 683 } 684 685 /** 686 * Called by the {@link RecognizerEngine} when is succeeds. If there is 687 * only one item, then the Intent is dispatched immediately. 688 * If there are more, then an AlertDialog is displayed and the user is 689 * prompted to select. 690 * @param intents a list of Intents corresponding to the sentences. 691 */ 692 public void onRecognitionSuccess(final Intent[] intents) { 693 if (Config.LOGD) Log.d(TAG, "CommandRecognizerClient onRecognitionSuccess " + 694 intents.length); 695 if (mState != WAITING_FOR_COMMAND) { 696 if (Config.LOGD) Log.d(TAG, "not waiting for command, ignoring"); 697 return; 698 } 699 700 // store the intents in a member variable so that we can access it 701 // later when the user chooses which action to perform. 702 mAvailableChoices = intents; 703 704 mHandler.post(new Runnable() { 705 public void run() { 706 if (!mUsingBluetooth) { 707 playSound(ToneGenerator.TONE_PROP_ACK); 708 } 709 mHandler.removeCallbacks(mMicFlasher); 710 711 String[] sentences = new String[intents.length]; 712 for (int i = 0; i < intents.length; i++) { 713 sentences[i] = intents[i].getStringExtra( 714 RecognizerEngine.SENTENCE_EXTRA); 715 } 716 717 if (intents.length == 0) { 718 onRecognitionFailure("zero intents"); 719 return; 720 } 721 722 if (intents.length > 0) { 723 // see if we the response was "exit" or "cancel". 724 String value = intents[0].getStringExtra( 725 RecognizerEngine.SEMANTIC_EXTRA); 726 if (Config.LOGD) Log.d(TAG, "value " + value); 727 if ("X".equals(value)) { 728 exitActivity(); 729 return; 730 } 731 } 732 733 if (mUsingBluetooth && 734 (intents.length == 1 || 735 !Intent.ACTION_CALL_PRIVILEGED.equals( 736 intents[0].getAction()))) { 737 // When we're running in bluetooth mode, we expect 738 // that the user is not looking at the screen and cannot 739 // interact with the device in any way besides voice 740 // commands. In this case we need to minimize how many 741 // interactions the user has to perform in order to call 742 // someone. 743 // So if there is only one match, instead of making the 744 // user confirm, we just assume it's correct, speak 745 // the choice over TTS, and then dispatch it. 746 // If there are multiple matches for some intent type 747 // besides "call", it's too difficult for the user to 748 // explain which one they meant, so we just take the highest 749 // confidence match and dispatch that. 750 751 // Speak the sentence for the action we are about 752 // to dispatch so that the user knows what is happening. 753 mChosenAction = intents[0]; 754 performChoice(); 755 756 return; 757 } else { 758 // Either we are not running in bluetooth mode, 759 // or we had multiple matches. Either way, we need 760 // the user to confirm the choice. 761 // Put up a dialog from which the user can select 762 // his/her choice. 763 DialogInterface.OnCancelListener cancelListener = 764 new DialogInterface.OnCancelListener() { 765 766 public void onCancel(DialogInterface dialog) { 767 if (Config.LOGD) { 768 Log.d(TAG, "cancelListener.onCancel"); 769 } 770 dialog.dismiss(); 771 finish(); 772 } 773 }; 774 775 DialogInterface.OnClickListener clickListener = 776 new DialogInterface.OnClickListener() { 777 778 public void onClick(DialogInterface dialog, int which) { 779 if (Config.LOGD) { 780 Log.d(TAG, "clickListener.onClick " + which); 781 } 782 startActivityHelp(intents[which]); 783 dialog.dismiss(); 784 finish(); 785 } 786 }; 787 788 DialogInterface.OnClickListener negativeListener = 789 new DialogInterface.OnClickListener() { 790 791 public void onClick(DialogInterface dialog, int which) { 792 if (Config.LOGD) { 793 Log.d(TAG, "negativeListener.onClick " + 794 which); 795 } 796 dialog.dismiss(); 797 finish(); 798 } 799 }; 800 801 mAlertDialog = 802 new AlertDialog.Builder(VoiceDialerActivity.this) 803 .setTitle(R.string.title) 804 .setItems(sentences, clickListener) 805 .setOnCancelListener(cancelListener) 806 .setNegativeButton(android.R.string.cancel, 807 negativeListener) 808 .show(); 809 810 waitForChoice(); 811 } 812 } 813 }); 814 } 815 } 816 817 private class ChoiceRecognizerClient implements RecognizerClient { 818 public void onRecognitionSuccess(final Intent[] intents) { 819 if (Config.LOGD) Log.d(TAG, "ChoiceRecognizerClient onRecognitionSuccess"); 820 if (mState != WAITING_FOR_CHOICE) { 821 if (Config.LOGD) Log.d(TAG, "not waiting for choice, ignoring"); 822 return; 823 } 824 825 if (mAlertDialog != null) { 826 mAlertDialog.dismiss(); 827 } 828 829 // disregard all but the first intent. 830 if (intents.length > 0) { 831 String value = intents[0].getStringExtra( 832 RecognizerEngine.SEMANTIC_EXTRA); 833 if (Config.LOGD) Log.d(TAG, "value " + value); 834 if ("R".equals(value)) { 835 if (mUsingBluetooth) { 836 mHandler.post(new GreetingRunnable()); 837 } else { 838 listenForCommand(); 839 } 840 } else if ("X".equals(value)) { 841 exitActivity(); 842 } else { 843 // it's a phone type response 844 mChosenAction = null; 845 for (int i = 0; i < mAvailableChoices.length; i++) { 846 if (value.equalsIgnoreCase( 847 mAvailableChoices[i].getStringExtra( 848 CommandRecognizerEngine.PHONE_TYPE_EXTRA))) { 849 mChosenAction = mAvailableChoices[i]; 850 } 851 } 852 853 if (mChosenAction != null) { 854 performChoice(); 855 } else { 856 // invalid choice 857 if (Config.LOGD) Log.d(TAG, "invalid choice" + value); 858 859 if (mUsingBluetooth) { 860 mTtsParams.remove(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID); 861 mTts.speak(getString(R.string.invalid_choice_tts), 862 TextToSpeech.QUEUE_FLUSH, 863 mTtsParams); 864 } 865 waitForChoice(); 866 } 867 } 868 } 869 } 870 871 public void onRecognitionFailure(String msg) { 872 if (Config.LOGD) Log.d(TAG, "ChoiceRecognizerClient onRecognitionFailure"); 873 exitActivity(); 874 } 875 876 public void onRecognitionError(String err) { 877 if (Config.LOGD) Log.d(TAG, "ChoiceRecognizerClient onRecognitionError"); 878 mHandler.post(new ErrorRunnable(R.string.recognition_error)); 879 exitActivity(); 880 } 881 882 public void onMicrophoneStart(InputStream mic) { 883 if (Config.LOGD) Log.d(TAG, "ChoiceRecognizerClient onMicrophoneStart"); 884 } 885 } 886 887 private void speakChoices() { 888 if (Config.LOGD) Log.d(TAG, "speakChoices"); 889 mState = SPEAKING_CHOICES; 890 891 String sentenceSpoken = spaceOutDigits( 892 mAvailableChoices[0].getStringExtra( 893 RecognizerEngine.SENTENCE_EXTRA)); 894 895 // When we have multiple choices, they will be of the form 896 // "call jack jones at home", "call jack jones on mobile". 897 // Speak the entire first sentence, then the last word from each 898 // of the remaining sentences. This will come out to something 899 // like "call jack jones at home mobile or work". 900 StringBuilder builder = new StringBuilder(); 901 builder.append(sentenceSpoken); 902 903 int count = mAvailableChoices.length; 904 for (int i=1; i < count; i++) { 905 if (i == count-1) { 906 builder.append(" or "); 907 } else { 908 builder.append(" "); 909 } 910 String tmpSentence = mAvailableChoices[i].getStringExtra( 911 RecognizerEngine.SENTENCE_EXTRA); 912 String[] words = tmpSentence.trim().split(" "); 913 builder.append(words[words.length-1]); 914 } 915 mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, 916 CHOICES_UTTERANCE); 917 mTts.speak(builder.toString(), 918 TextToSpeech.QUEUE_ADD, 919 mTtsParams); 920 } 921 922 923 private static String spaceOutDigits(String sentenceDisplay) { 924 // if we have a sentence of the form "dial 123 456 7890", 925 // we need to insert a space between each digit, otherwise 926 // the TTS engine will say "dial one hundred twenty three...." 927 // When there already is a space, we also insert a comma, 928 // so that it pauses between sections. For the displayable 929 // sentence "dial 123 456 7890" it will speak 930 // "dial 1 2 3, 4 5 6, 7 8 9 0" 931 char buffer[] = sentenceDisplay.toCharArray(); 932 StringBuilder builder = new StringBuilder(); 933 boolean buildingNumber = false; 934 int l = sentenceDisplay.length(); 935 for (int index = 0; index < l; index++) { 936 char c = buffer[index]; 937 if (Character.isDigit(c)) { 938 if (buildingNumber) { 939 builder.append(" "); 940 } 941 buildingNumber = true; 942 builder.append(c); 943 } else if (c == ' ') { 944 if (buildingNumber) { 945 builder.append(","); 946 } else { 947 builder.append(" "); 948 } 949 } else { 950 buildingNumber = false; 951 builder.append(c); 952 } 953 } 954 return builder.toString(); 955 } 956 957 private void startActivityHelp(Intent intent) { 958 startActivity(intent); 959 } 960 961 private void listenForCommand() { 962 if (Config.LOGD) Log.d(TAG, "" 963 + "Command(): MICROPHONE_EXTRA: "+getArg(MICROPHONE_EXTRA)+ 964 ", CONTACTS_EXTRA: "+getArg(CONTACTS_EXTRA)); 965 966 mState = WAITING_FOR_COMMAND; 967 mRecognizerThread = new Thread() { 968 public void run() { 969 mCommandEngine.recognize(mCommandClient, 970 VoiceDialerActivity.this, 971 newFile(getArg(MICROPHONE_EXTRA)), 972 mSampleRate); 973 } 974 }; 975 mRecognizerThread.start(); 976 } 977 978 private void listenForChoice() { 979 if (Config.LOGD) Log.d(TAG, "listenForChoice(): MICROPHONE_EXTRA: " + 980 getArg(MICROPHONE_EXTRA)); 981 982 mState = WAITING_FOR_CHOICE; 983 mRecognizerThread = new Thread() { 984 public void run() { 985 mPhoneTypeChoiceEngine.recognize(mChoiceClient, 986 VoiceDialerActivity.this, 987 newFile(getArg(MICROPHONE_EXTRA)), mSampleRate); 988 } 989 }; 990 mRecognizerThread.start(); 991 } 992 993 private void exitActivity() { 994 synchronized(this) { 995 if (mState != EXITING) { 996 if (Config.LOGD) Log.d(TAG, "exitActivity"); 997 mState = SPEAKING_GOODBYE; 998 if (mUsingBluetooth) { 999 mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, 1000 GOODBYE_UTTERANCE); 1001 mTts.speak(getString(R.string.goodbye_tts), 1002 TextToSpeech.QUEUE_FLUSH, 1003 mTtsParams); 1004 // Normally, the activity will finish() after the 1005 // utterance completes. As a fallback in case the utterance 1006 // does not complete, post a delayed runnable finish the 1007 // activity. 1008 mFallbackRunnable = new OnTtsCompletionRunnable(true); 1009 mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY); 1010 } else { 1011 mHandler.postDelayed(new Runnable() { 1012 public void run() { 1013 finish(); 1014 } 1015 }, EXIT_DELAY); 1016 } 1017 } 1018 } 1019 } 1020 1021 private String getArg(String name) { 1022 if (name == null) return null; 1023 String arg = getIntent().getStringExtra(name); 1024 if (arg != null) return arg; 1025 arg = SystemProperties.get("app.voicedialer." + name); 1026 return arg != null && arg.length() > 0 ? arg : null; 1027 } 1028 1029 private static File newFile(String name) { 1030 return name != null ? new File(name) : null; 1031 } 1032 1033 private int playSound(int toneType) { 1034 int msecDelay = 1; 1035 1036 // use the MediaPlayer to prompt the user 1037 if (mToneGenerator != null) { 1038 mToneGenerator.startTone(toneType); 1039 msecDelay = StrictMath.max(msecDelay, 300); 1040 } 1041 // use the Vibrator to prompt the user 1042 if (mAudioManager != null && 1043 mAudioManager.shouldVibrate(AudioManager.VIBRATE_TYPE_RINGER)) { 1044 final int VIBRATOR_TIME = 150; 1045 final int VIBRATOR_GUARD_TIME = 150; 1046 Vibrator vibrator = new Vibrator(); 1047 vibrator.vibrate(VIBRATOR_TIME); 1048 msecDelay = StrictMath.max(msecDelay, 1049 VIBRATOR_TIME + VIBRATOR_GUARD_TIME); 1050 } 1051 1052 1053 return msecDelay; 1054 } 1055 1056 protected void onStop() { 1057 if (Config.LOGD) Log.d(TAG, "onStop"); 1058 1059 synchronized(this) { 1060 mState = EXITING; 1061 } 1062 1063 if (mAlertDialog != null) { 1064 mAlertDialog.dismiss(); 1065 } 1066 1067 // set the volume back to the level it was before we started. 1068 mAudioManager.setStreamVolume(AudioManager.STREAM_BLUETOOTH_SCO, 1069 mBluetoothVoiceVolume, 0); 1070 mAudioManager.abandonAudioFocus(null); 1071 1072 // shut down bluetooth, if it exists 1073 if (mBluetoothHeadset != null) { 1074 mBluetoothHeadset.stopVoiceRecognition(); 1075 mBluetoothHeadset.close(); 1076 mBluetoothHeadset = null; 1077 } 1078 1079 // shut down recognizer and wait for the thread to complete 1080 if (mRecognizerThread != null) { 1081 mRecognizerThread.interrupt(); 1082 try { 1083 mRecognizerThread.join(); 1084 } catch (InterruptedException e) { 1085 if (Config.LOGD) Log.d(TAG, "onStop mRecognizerThread.join exception " + e); 1086 } 1087 mRecognizerThread = null; 1088 } 1089 1090 // clean up UI 1091 mHandler.removeCallbacks(mMicFlasher); 1092 mHandler.removeMessages(0); 1093 1094 if (mTts != null) { 1095 mTts.stop(); 1096 mTts.shutdown(); 1097 mTts = null; 1098 } 1099 unregisterReceiver(mReceiver); 1100 1101 super.onStop(); 1102 1103 // It makes no sense to have this activity maintain state when in 1104 // background. When it stops, it should just be destroyed. 1105 finish(); 1106 } 1107 1108 private Runnable mMicFlasher = new Runnable() { 1109 int visible = View.VISIBLE; 1110 1111 public void run() { 1112 findViewById(R.id.microphone_view).setVisibility(visible); 1113 findViewById(R.id.state).setVisibility(visible); 1114 visible = visible == View.VISIBLE ? View.INVISIBLE : View.VISIBLE; 1115 mHandler.postDelayed(this, 750); 1116 } 1117 }; 1118 1119 @Override 1120 protected void onDestroy() { 1121 if (Config.LOGD) Log.d(TAG, "onDestroy"); 1122 super.onDestroy(); 1123 } 1124}