VoiceDialerActivity.java revision ed873c21cd8ad85df735ec841b147f5ac0f740a9
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.voicedialer;
18
19import android.app.Activity;
20import android.app.AlertDialog;
21import android.bluetooth.BluetoothHeadset;
22import android.content.BroadcastReceiver;
23import android.content.Context;
24import android.content.DialogInterface;
25import android.content.Intent;
26import android.content.IntentFilter;
27import android.media.AudioManager;
28import android.media.ToneGenerator;
29import android.os.Bundle;
30import android.os.Environment;
31import android.os.Handler;
32import android.os.SystemProperties;
33import android.os.Vibrator;
34import android.speech.tts.TextToSpeech;
35import android.util.Config;
36import android.util.Log;
37import android.view.View;
38import android.view.WindowManager;
39import android.widget.TextView;
40import java.io.File;
41import java.io.InputStream;
42import java.util.HashMap;
43import java.io.IOException;
44
45/**
46 * TODO: get rid of the anonymous classes
47 *
48 * This class is the user interface of the VoiceDialer application.
49 * It begins in the INITIALIZING state.
50 *
51 * INITIALIZING :
52 *  This transitions out on events from TTS and the BluetoothHeadset
53 *   once TTS initialized and SCO channel set up:
54 *     * prompt the user "speak now"
55 *     * transition to the SPEAKING_GREETING state
56 *
57 * SPEAKING_GREETING:
58 *  This transitions out only on events from TTS or the fallback runnable
59 *   once the greeting utterance completes:
60 *     * begin listening for the command using the {@link CommandRecognizerEngine}
61 *     * transition to the WAITING_FOR_COMMAND state
62 *
63 * WAITING_FOR_COMMAND :
64 * This transitions out only on events from the recognizer
65 *   on RecognitionFailure or RecognitionError:
66 *     * begin speaking "try again."
67 *     * transition to state SPEAKING_TRY_AGAIN
68 *   on RecognitionSuccess:
69 *     single result:
70 *       * begin speaking the sentence describing the intent
71 *       * transition to the SPEAKING_CHOSEN_ACTION
72 *     multiple results:
73 *       * begin speaking each of the choices in order
74 *       * transition to the SPEAKING_CHOICES state
75 *
76 * SPEAKING_TRY_AGAIN:
77 * This transitions out only on events from TTS or the fallback runnable
78 *   once the try again utterance completes:
79 *     * begin listening for the command using the {@link CommandRecognizerEngine}
80 *     * transition to the LISTENING_FOR_COMMAND state
81 *
82 * SPEAKING_CHOSEN_ACTION:
83 *  This transitions out only on events from TTS or the fallback runnable
84 *   once the utterance completes:
85 *     * dispatch the intent that was chosen
86 *     * transition to the EXITING state
87 *     * finish the activity
88 *
89 * SPEAKING_CHOICES:
90 *  This transitions out only on events from TTS or the fallback runnable
91 *   once the utterance completes:
92 *     * begin listening for the user's choice using the
93 *         {@link PhoneTypeChoiceRecognizerEngine}
94 *     * transition to the WAITING_FOR_CHOICE state.
95 *
96 * WAITING_FOR_CHOICE:
97 *  This transitions out only on events from the recognizer
98 *   on RecognitionFailure or RecognitionError:
99 *     * begin speaking the "invalid choice" message, along with the list
100 *       of choices
101 *     * transition to the SPEAKING_CHOICES state
102 *   on RecognitionSuccess:
103 *     if the result is "try again", prompt the user to say a command, begin
104 *       listening for the command, and transition back to the WAITING_FOR_COMMAND
105 *       state.
106 *     if the result is "exit", then being speaking the "goodbye" message and
107 *       transition to the SPEAKING_GOODBYE state.
108 *     if the result is a valid choice, begin speaking the action chosen,initiate
109 *       the command the user has choose and exit.
110 *     if not a valid choice, speak the "invalid choice" message, begin
111 *       speaking the choices in order again, transition to the
112 *       SPEAKING_CHOICES
113 *
114 * SPEAKING_GOODBYE:
115 *  This transitions out only on events from TTS or the fallback runnable
116 *   after a time out, finish the activity.
117 *
118 */
119
120public class VoiceDialerActivity extends Activity {
121
122    private static final String TAG = "VoiceDialerActivity";
123
124    private static final String MICROPHONE_EXTRA = "microphone";
125    private static final String CONTACTS_EXTRA = "contacts";
126
127    private static final String SPEAK_NOW_UTTERANCE = "speak_now";
128    private static final String TRY_AGAIN_UTTERANCE = "try_again";
129    private static final String CHOSEN_ACTION_UTTERANCE = "chose_action";
130    private static final String GOODBYE_UTTERANCE = "goodbye";
131    private static final String CHOICES_UTTERANCE = "choices";
132
133    private static final int FIRST_UTTERANCE_DELAY = 300;
134    private static final int MAX_TTS_DELAY = 6000;
135    private static final int EXIT_DELAY = 2000;
136
137    private static final int BLUETOOTH_SAMPLE_RATE = 8000;
138    private static final int REGULAR_SAMPLE_RATE = 11025;
139
140    private static final int INITIALIZING = 0;
141    private static final int SPEAKING_GREETING = 1;
142    private static final int WAITING_FOR_COMMAND = 2;
143    private static final int SPEAKING_TRY_AGAIN = 3;
144    private static final int SPEAKING_CHOICES = 4;
145    private static final int WAITING_FOR_CHOICE = 5;
146    private static final int WAITING_FOR_DIALOG_CHOICE = 6;
147    private static final int SPEAKING_CHOSEN_ACTION = 7;
148    private static final int SPEAKING_GOODBYE = 8;
149    private static final int EXITING = 9;
150
151    private static final CommandRecognizerEngine mCommandEngine =
152            new CommandRecognizerEngine();
153    private static final PhoneTypeChoiceRecognizerEngine mPhoneTypeChoiceEngine =
154            new PhoneTypeChoiceRecognizerEngine();
155    private CommandRecognizerClient mCommandClient;
156    private ChoiceRecognizerClient mChoiceClient;
157    private ToneGenerator mToneGenerator;
158    private Handler mHandler;
159    private Thread mRecognizerThread = null;
160    private AudioManager mAudioManager;
161    private BluetoothHeadset mBluetoothHeadset;
162    private TextToSpeech mTts;
163    private HashMap<String, String> mTtsParams;
164    private VoiceDialerBroadcastReceiver mReceiver;
165    private int mBluetoothAudioState;
166    private boolean mWaitingForTts;
167    private boolean mWaitingForScoConnection;
168    private Intent[] mAvailableChoices;
169    private Intent mChosenAction;
170    private int mBluetoothVoiceVolume;
171    private int mState;
172    private AlertDialog mAlertDialog;
173    private Runnable mFallbackRunnable;
174    private boolean mUsingBluetooth = false;
175    private int mSampleRate;
176
177    @Override
178    protected void onCreate(Bundle icicle) {
179        if (Config.LOGD) Log.d(TAG, "onCreate");
180        super.onCreate(icicle);
181        mHandler = new Handler();
182        mAudioManager = (AudioManager)getSystemService(AUDIO_SERVICE);
183        mToneGenerator = new ToneGenerator(AudioManager.STREAM_RING,
184                ToneGenerator.MAX_VOLUME);
185    }
186
187    protected void onStart() {
188        if (Config.LOGD) Log.d(TAG, "onStart " + getIntent());
189        super.onStart();
190
191        mState = INITIALIZING;
192        mChosenAction = null;
193        mAudioManager.requestAudioFocus(
194                null, AudioManager.STREAM_MUSIC,
195                AudioManager.AUDIOFOCUS_GAIN_TRANSIENT);
196
197        // set this flag so this activity will stay in front of the keyguard
198        int flags = WindowManager.LayoutParams.FLAG_SHOW_WHEN_LOCKED;
199        getWindow().addFlags(flags);
200
201        // open main window
202        setTheme(android.R.style.Theme_Dialog);
203        setTitle(R.string.title);
204        setContentView(R.layout.voice_dialing);
205        findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE);
206        findViewById(R.id.retry_view).setVisibility(View.INVISIBLE);
207        findViewById(R.id.microphone_loading_view).setVisibility(View.VISIBLE);
208        if (RecognizerLogger.isEnabled(this)) {
209            ((TextView) findViewById(R.id.substate)).setText(R.string.logging_enabled);
210        }
211
212        // Get handle to BluetoothHeadset object
213        IntentFilter audioStateFilter;
214        audioStateFilter = new IntentFilter();
215        audioStateFilter.addAction(BluetoothHeadset.ACTION_AUDIO_STATE_CHANGED);
216        mReceiver = new VoiceDialerBroadcastReceiver();
217        registerReceiver(mReceiver, audioStateFilter);
218
219        mCommandEngine.setContactsFile(newFile(getArg(CONTACTS_EXTRA)));
220        mCommandEngine.setMinimizeResults(true);
221        mCommandEngine.setAllowOpenEntries(false);
222        mCommandClient = new CommandRecognizerClient();
223        mChoiceClient = new ChoiceRecognizerClient();
224
225        mBluetoothAudioState = BluetoothHeadset.STATE_ERROR;
226
227        if (BluetoothHeadset.isBluetoothVoiceDialingEnabled(this) &&
228                Intent.ACTION_VOICE_COMMAND.equals(getIntent().getAction())) {
229            mUsingBluetooth = true;
230        } else {
231            mUsingBluetooth = false;
232        }
233
234        if (mUsingBluetooth) {
235            if (Config.LOGD) Log.d(TAG, "using bluetooth");
236            mSampleRate = BLUETOOTH_SAMPLE_RATE;
237            mCommandEngine.setMinimizeResults(true);
238            mCommandEngine.setAllowOpenEntries(false);
239
240            // we can't start recognizing until we get connected to the BluetoothHeadset
241            // and have a connected audio state.  We will listen for these
242            // states to change.
243            mWaitingForScoConnection = true;
244            mBluetoothHeadset = new BluetoothHeadset(this,
245                    mBluetoothHeadsetServiceListener);
246
247            // initialize the text to speech system
248            mWaitingForTts = true;
249            mTts = new TextToSpeech(this, new TtsInitListener());
250            mTtsParams = new HashMap<String, String>();
251            mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_STREAM,
252                    String.valueOf(AudioManager.STREAM_VOICE_CALL));
253            // we need to wait for the TTS system and the SCO connection
254            // before we can start listening.
255        } else {
256            if (Config.LOGD) Log.d(TAG, "not using bluetooth");
257            mSampleRate = REGULAR_SAMPLE_RATE;
258            mCommandEngine.setMinimizeResults(false);
259            mCommandEngine.setAllowOpenEntries(true);
260
261            // we're not using bluetooth apparently, just start listening.
262            listenForCommand();
263        }
264    }
265
266    class ErrorRunnable implements Runnable {
267        private int mErrorMsg;
268        public ErrorRunnable(int errorMsg) {
269            mErrorMsg = errorMsg;
270        }
271
272        public void run() {
273            // put up an error and exit
274            mHandler.removeCallbacks(mMicFlasher);
275            ((TextView)findViewById(R.id.state)).setText(R.string.failure);
276            ((TextView)findViewById(R.id.substate)).setText(mErrorMsg);
277            ((TextView)findViewById(R.id.substate)).setText(
278                    R.string.headset_connection_lost);
279            findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE);
280            findViewById(R.id.retry_view).setVisibility(View.VISIBLE);
281
282
283            if (!mUsingBluetooth) {
284                playSound(ToneGenerator.TONE_PROP_NACK);
285            }
286        }
287    }
288
289    class OnTtsCompletionRunnable implements Runnable {
290        private boolean mFallback;
291
292        OnTtsCompletionRunnable(boolean fallback) {
293            mFallback = fallback;
294        }
295
296        public void run() {
297            if (mFallback) {
298                Log.e(TAG, "utterance completion not delivered, using fallback");
299            }
300            Log.d(TAG, "onTtsCompletionRunnable");
301            if (mState == SPEAKING_GREETING || mState == SPEAKING_TRY_AGAIN) {
302                listenForCommand();
303            } else if (mState == SPEAKING_CHOICES) {
304                listenForChoice();
305            } else if (mState == SPEAKING_GOODBYE) {
306                mState = EXITING;
307                finish();
308            } else if (mState == SPEAKING_CHOSEN_ACTION) {
309                mState = EXITING;
310                startActivityHelp(mChosenAction);
311                finish();
312            }
313        }
314    }
315
316    class GreetingRunnable implements Runnable {
317        public void run() {
318            mState = SPEAKING_GREETING;
319            mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID,
320                    SPEAK_NOW_UTTERANCE);
321            mTts.speak(getString(R.string.speak_now_tts),
322                TextToSpeech.QUEUE_FLUSH,
323                mTtsParams);
324            // Normally, we will begin listening for the command after the
325            // utterance completes.  As a fallback in case the utterance
326            // does not complete, post a delayed runnable to fire
327            // the intent.
328            mFallbackRunnable = new OnTtsCompletionRunnable(true);
329            mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY);
330        }
331    }
332
333    class TtsInitListener implements TextToSpeech.OnInitListener {
334        public void onInit(int status) {
335            // status can be either TextToSpeech.SUCCESS or TextToSpeech.ERROR.
336            if (Config.LOGD) Log.d(TAG, "onInit for tts");
337            if (status != TextToSpeech.SUCCESS) {
338                // Initialization failed.
339                Log.e(TAG, "Could not initialize TextToSpeech.");
340                mHandler.post(new ErrorRunnable(R.string.recognition_error));
341                exitActivity();
342                return;
343            }
344
345            if (mTts == null) {
346                Log.e(TAG, "null tts");
347                mHandler.post(new ErrorRunnable(R.string.recognition_error));
348                exitActivity();
349                return;
350            }
351
352            mTts.setOnUtteranceCompletedListener(new OnUtteranceCompletedListener());
353
354            // The TTS engine has been successfully initialized.
355            mWaitingForTts = false;
356
357            // TTS over bluetooth is really loud,
358            // Limit volume to -18dB. Stream volume range represents approximately 50dB
359            // (See AudioSystem.cpp linearToLog()) so the number of steps corresponding
360            // to 18dB is 18 / (50 / maxSteps).
361            mBluetoothVoiceVolume = mAudioManager.getStreamVolume(
362                    AudioManager.STREAM_BLUETOOTH_SCO);
363            int maxVolume = mAudioManager.getStreamMaxVolume(AudioManager.STREAM_BLUETOOTH_SCO);
364            int volume = maxVolume - ((18 / (50/maxVolume)) + 1);
365            if (mBluetoothVoiceVolume > volume) {
366                mAudioManager.setStreamVolume(AudioManager.STREAM_BLUETOOTH_SCO, volume, 0);
367            }
368
369            if (mWaitingForScoConnection) {
370                // the bluetooth connection is not up yet, still waiting.
371            } else {
372                // we now have SCO connection and TTS, so we can start.
373                mHandler.postDelayed(new GreetingRunnable(), FIRST_UTTERANCE_DELAY);
374            }
375        }
376    }
377
378    class OnUtteranceCompletedListener
379            implements TextToSpeech.OnUtteranceCompletedListener {
380        public void onUtteranceCompleted(String utteranceId) {
381            Log.d(TAG, "onUtteranceCompleted " + utteranceId);
382            // since the utterance has completed, we no longer need the fallback.
383            mHandler.removeCallbacks(mFallbackRunnable);
384            mFallbackRunnable = null;
385            mHandler.post(new OnTtsCompletionRunnable(false));
386        }
387    }
388
389    private BluetoothHeadset.ServiceListener mBluetoothHeadsetServiceListener =
390            new BluetoothHeadset.ServiceListener() {
391        public void onServiceConnected() {
392            if (mBluetoothHeadset != null &&
393                    mBluetoothHeadset.getState() == BluetoothHeadset.STATE_CONNECTED) {
394                mBluetoothHeadset.startVoiceRecognition();
395            }
396
397            if (Config.LOGD) Log.d(TAG, "onServiceConnected");
398        }
399        public void onServiceDisconnected() {}
400    };
401
402    private class VoiceDialerBroadcastReceiver extends BroadcastReceiver {
403        @Override
404        public void onReceive(Context context, Intent intent) {
405            String action = intent.getAction();
406            if (action.equals(BluetoothHeadset.ACTION_AUDIO_STATE_CHANGED)) {
407                mBluetoothAudioState = intent.getIntExtra(
408                        BluetoothHeadset.EXTRA_AUDIO_STATE,
409                        BluetoothHeadset.STATE_ERROR);
410                if (Config.LOGD) Log.d(TAG, "HEADSET AUDIO_STATE_CHANGED -> " +
411                        mBluetoothAudioState);
412
413                if (mBluetoothAudioState == BluetoothHeadset.AUDIO_STATE_CONNECTED &&
414                    mWaitingForScoConnection) {
415                    // SCO channel has just become available.
416                    mWaitingForScoConnection = false;
417                    if (mWaitingForTts) {
418                        // still waiting for the TTS to be set up.
419                    } else {
420                        // we now have SCO connection and TTS, so we can start.
421                        mHandler.postDelayed(new GreetingRunnable(), FIRST_UTTERANCE_DELAY);
422                    }
423                } else {
424                    if (!mWaitingForScoConnection) {
425                        // apparently our connection to the headset has dropped.
426                        // we won't be able to continue voicedialing.
427                        if (Config.LOGD) Log.d(TAG, "lost sco connection");
428
429                        mHandler.post(new ErrorRunnable(
430                                R.string.headset_connection_lost));
431
432                        exitActivity();
433                    }
434                }
435            }
436        }
437    }
438
439    private void askToTryAgain() {
440        // get work off UAPI thread
441        mHandler.post(new Runnable() {
442            public void run() {
443                if (mAlertDialog != null) {
444                    mAlertDialog.dismiss();
445                }
446
447                mHandler.removeCallbacks(mMicFlasher);
448                ((TextView)findViewById(R.id.state)).setText(R.string.please_try_again);
449                findViewById(R.id.state).setVisibility(View.VISIBLE);
450                findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE);
451                findViewById(R.id.retry_view).setVisibility(View.VISIBLE);
452
453                if (mUsingBluetooth) {
454                    mState = SPEAKING_TRY_AGAIN;
455                    mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID,
456                            TRY_AGAIN_UTTERANCE);
457                    mTts.speak(getString(R.string.no_results_tts),
458                        TextToSpeech.QUEUE_FLUSH,
459                        mTtsParams);
460
461                    // Normally, the we will start listening after the
462                    // utterance completes.  As a fallback in case the utterance
463                    // does not complete, post a delayed runnable to fire
464                    // the intent.
465                    mFallbackRunnable = new OnTtsCompletionRunnable(true);
466                    mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY);
467                } else {
468                    try {
469                        Thread.sleep(playSound(ToneGenerator.TONE_PROP_NACK));
470                    } catch (InterruptedException e) {
471                    }
472                    // we are not using tts, so we just start listening again.
473                    listenForCommand();
474                }
475            }
476        });
477    }
478
479    private void performChoice() {
480        if (mUsingBluetooth) {
481            String sentenceSpoken = spaceOutDigits(
482                    mChosenAction.getStringExtra(
483                        RecognizerEngine.SENTENCE_EXTRA));
484
485            mState = SPEAKING_CHOSEN_ACTION;
486            mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID,
487                    CHOSEN_ACTION_UTTERANCE);
488            mTts.speak(sentenceSpoken,
489                TextToSpeech.QUEUE_FLUSH,
490                mTtsParams);
491
492            // Normally, the intent will be dispatched after the
493            // utterance completes.  As a fallback in case the utterance
494            // does not complete, post a delayed runnable to fire
495            // the intent.
496            mFallbackRunnable = new OnTtsCompletionRunnable(true);
497            mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY);
498        } else {
499            // just dispatch the intent
500            startActivityHelp(mChosenAction);
501            finish();
502        }
503    }
504
505    private void waitForChoice() {
506        if (mUsingBluetooth) {
507            // We are running in bluetooth mode, and we have
508            // multiple matches.  Speak the choices and let
509            // the user choose.
510
511            // We will not start listening until the utterance
512            // of the choice list completes.
513            speakChoices();
514
515            // Normally, listening will begin after the
516            // utterance completes.  As a fallback in case the utterance
517            // does not complete, post a delayed runnable to begin
518            // listening.
519            mFallbackRunnable = new OnTtsCompletionRunnable(true);
520            mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY);
521        } else {
522            // We are not running in bluetooth mode, so all
523            // we need to do is wait for the user to select
524            // a choice from the alert dialog.  We will wait
525            // indefinitely for this.
526            mState = WAITING_FOR_DIALOG_CHOICE;
527        }
528    }
529
530    private class CommandRecognizerClient implements RecognizerClient {
531         static final int MIN_VOLUME_TO_SKIP = 2;
532       /**
533         * Called by the {@link RecognizerEngine} when the microphone is started.
534         */
535        public void onMicrophoneStart(InputStream mic) {
536            if (Config.LOGD) Log.d(TAG, "onMicrophoneStart");
537
538           if (!mUsingBluetooth) {
539               playSound(ToneGenerator.TONE_PROP_BEEP);
540
541                int ringVolume = mAudioManager.getStreamVolume(
542                        AudioManager.STREAM_RING);
543                Log.d(TAG, "ringVolume " + ringVolume);
544
545                if (ringVolume >= MIN_VOLUME_TO_SKIP) {
546                    // now we're playing a sound, and corrupting the input sample.
547                    // So we need to pull that junk off of the input stream so that the
548                    // recognizer won't see it.
549                    try {
550                        skipBeep(mic);
551                    } catch (java.io.IOException e) {
552                        Log.e(TAG, "IOException " + e);
553                    }
554                } else {
555                    if (Config.LOGD) Log.d(TAG, "no tone");
556                }
557            }
558
559            mHandler.post(new Runnable() {
560                public void run() {
561                    findViewById(R.id.retry_view).setVisibility(View.INVISIBLE);
562                    findViewById(R.id.microphone_loading_view).setVisibility(
563                            View.INVISIBLE);
564                    ((TextView)findViewById(R.id.state)).setText(R.string.listening);
565                    mHandler.post(mMicFlasher);
566                }
567            });
568        }
569
570        /**
571         *  Beep detection
572         */
573        private static final int START_WINDOW_MS = 500;  // Beep detection window duration in ms
574        private static final int SINE_FREQ = 400;        // base sine frequency on beep
575        private static final int NUM_PERIODS_BLOCK = 10; // number of sine periods in one energy averaging block
576        private static final int THRESHOLD = 8;          // absolute pseudo energy threshold
577        private static final int START = 0;              // beep detection start
578        private static final int RISING = 1;             // beep rising edge start
579        private static final int TOP = 2;                // beep constant energy detected
580
581        void skipBeep(InputStream is) throws IOException {
582            int sampleCount = ((mSampleRate / SINE_FREQ) * NUM_PERIODS_BLOCK);
583            int blockSize = 2 * sampleCount; // energy averaging block
584
585            if (is == null || blockSize == 0) {
586                return;
587            }
588
589            byte[] buf = new byte[blockSize];
590            int maxBytes = 2 * ((START_WINDOW_MS * mSampleRate) / 1000);
591            maxBytes = ((maxBytes-1) / blockSize + 1) * blockSize;
592
593            int count = 0;
594            int state = START;  // detection state
595            long prevE = 0; // previous pseudo energy
596            long peak = 0;
597            int threshold =  THRESHOLD*sampleCount;  // absolute energy threshold
598            Log.d(TAG, "blockSize " + blockSize);
599
600            while (count < maxBytes) {
601                int cnt = 0;
602                while (cnt < blockSize) {
603                    int n = is.read(buf, cnt, blockSize-cnt);
604                    if (n < 0) {
605                        throw new java.io.IOException();
606                    }
607                    cnt += n;
608                }
609
610                // compute pseudo energy
611                cnt = blockSize;
612                long sumx = 0;
613                long sumxx = 0;
614                while (cnt >= 2) {
615                    short smp = (short)((buf[cnt - 1] << 8) + (buf[cnt - 2] & 0xFF));
616                    sumx += smp;
617                    sumxx += smp*smp;
618                    cnt -= 2;
619                }
620                long energy = (sumxx*sampleCount - sumx*sumx)/(sampleCount*sampleCount);
621                Log.d(TAG, "sumx " + sumx + " sumxx " + sumxx + " ee " + energy);
622
623                switch (state) {
624                    case START:
625                        if (energy > threshold && energy > (prevE * 2) && prevE != 0) {
626                            // rising edge if energy doubled and > abs threshold
627                            state = RISING;
628                            if (Config.LOGD) Log.d(TAG, "start RISING: " + count +" time: "+ (((1000*count)/2)/mSampleRate));
629                        }
630                        break;
631                    case RISING:
632                        if (energy < threshold || energy < (prevE / 2)){
633                            // energy fell back below half of previous, back to start
634                            if (Config.LOGD) Log.d(TAG, "back to START: " + count +" time: "+ (((1000*count)/2)/mSampleRate));
635                            peak = 0;
636                            state = START;
637                        } else if (energy > (prevE / 2) && energy < (prevE * 2)) {
638                            // Start of constant energy
639                            if (Config.LOGD) Log.d(TAG, "start TOP: " + count +" time: "+ (((1000*count)/2)/mSampleRate));
640                            if (peak < energy) {
641                                peak = energy;
642                            }
643                            state = TOP;
644                        }
645                        break;
646                    case TOP:
647                        if (energy < threshold || energy < (peak / 2)) {
648                            // e went to less than half of the peak
649                            if (Config.LOGD) Log.d(TAG, "end TOP: " + count +" time: "+ (((1000*count)/2)/mSampleRate));
650                            return;
651                        }
652                        break;
653                    }
654                prevE = energy;
655                count += blockSize;
656            }
657            if (Config.LOGD) Log.d(TAG, "no beep detected, timed out");
658        }
659
660        /**
661         * Called by the {@link RecognizerEngine} if the recognizer fails.
662         */
663        public void onRecognitionFailure(final String msg) {
664            if (Config.LOGD) Log.d(TAG, "onRecognitionFailure " + msg);
665            // we had zero results.  Just try again.
666            askToTryAgain();
667        }
668
669        /**
670         * Called by the {@link RecognizerEngine} on an internal error.
671         */
672        public void onRecognitionError(final String msg) {
673            if (Config.LOGD) Log.d(TAG, "onRecognitionError " + msg);
674            mHandler.post(new ErrorRunnable(R.string.recognition_error));
675            exitActivity();
676        }
677
678        /**
679         * Called by the {@link RecognizerEngine} when is succeeds.  If there is
680         * only one item, then the Intent is dispatched immediately.
681         * If there are more, then an AlertDialog is displayed and the user is
682         * prompted to select.
683         * @param intents a list of Intents corresponding to the sentences.
684         */
685        public void onRecognitionSuccess(final Intent[] intents) {
686            if (Config.LOGD) Log.d(TAG, "CommandRecognizerClient onRecognitionSuccess " +
687                    intents.length);
688            if (mState != WAITING_FOR_COMMAND) {
689                if (Config.LOGD) Log.d(TAG, "not waiting for command, ignoring");
690                return;
691            }
692
693            // store the intents in a member variable so that we can access it
694            // later when the user chooses which action to perform.
695            mAvailableChoices = intents;
696
697            mHandler.post(new Runnable() {
698                public void run() {
699                    if (!mUsingBluetooth) {
700                        playSound(ToneGenerator.TONE_PROP_ACK);
701                    }
702                    mHandler.removeCallbacks(mMicFlasher);
703
704                    String[] sentences = new String[intents.length];
705                    for (int i = 0; i < intents.length; i++) {
706                        sentences[i] = intents[i].getStringExtra(
707                                RecognizerEngine.SENTENCE_EXTRA);
708                    }
709
710                    if (intents.length == 0) {
711                        onRecognitionFailure("zero intents");
712                        return;
713                    }
714
715                    if (intents.length > 0) {
716                        // see if we the response was "exit" or "cancel".
717                        String value = intents[0].getStringExtra(
718                            RecognizerEngine.SEMANTIC_EXTRA);
719                        if (Config.LOGD) Log.d(TAG, "value " + value);
720                        if ("X".equals(value)) {
721                            exitActivity();
722                            return;
723                        }
724                    }
725
726                    if (mUsingBluetooth &&
727                            (intents.length == 1 ||
728                             !Intent.ACTION_CALL_PRIVILEGED.equals(
729                                    intents[0].getAction()))) {
730                        // When we're running in bluetooth mode, we expect
731                        // that the user is not looking at the screen and cannot
732                        // interact with the device in any way besides voice
733                        // commands.  In this case we need to minimize how many
734                        // interactions the user has to perform in order to call
735                        // someone.
736                        // So if there is only one match, instead of making the
737                        // user confirm, we just assume it's correct, speak
738                        // the choice over TTS, and then dispatch it.
739                        // If there are multiple matches for some intent type
740                        // besides "call", it's too difficult for the user to
741                        // explain which one they meant, so we just take the highest
742                        // confidence match and dispatch that.
743
744                        // Speak the sentence for the action we are about
745                        // to dispatch so that the user knows what is happening.
746                        mChosenAction = intents[0];
747                        performChoice();
748
749                        return;
750                    } else {
751                        // Either we are not running in bluetooth mode,
752                        // or we had multiple matches.  Either way, we need
753                        // the user to confirm the choice.
754                        // Put up a dialog from which the user can select
755                        // his/her choice.
756                        DialogInterface.OnCancelListener cancelListener =
757                            new DialogInterface.OnCancelListener() {
758
759                            public void onCancel(DialogInterface dialog) {
760                                if (Config.LOGD) {
761                                    Log.d(TAG, "cancelListener.onCancel");
762                                }
763                                dialog.dismiss();
764                                finish();
765                            }
766                       };
767
768                        DialogInterface.OnClickListener clickListener =
769                            new DialogInterface.OnClickListener() {
770
771                            public void onClick(DialogInterface dialog, int which) {
772                                if (Config.LOGD) {
773                                    Log.d(TAG, "clickListener.onClick " + which);
774                                }
775                                startActivityHelp(intents[which]);
776                                dialog.dismiss();
777                                finish();
778                            }
779                        };
780
781                        DialogInterface.OnClickListener negativeListener =
782                            new DialogInterface.OnClickListener() {
783
784                            public void onClick(DialogInterface dialog, int which) {
785                                if (Config.LOGD) {
786                                    Log.d(TAG, "negativeListener.onClick " +
787                                        which);
788                                }
789                                dialog.dismiss();
790                                finish();
791                            }
792                        };
793
794                        mAlertDialog =
795                                new AlertDialog.Builder(VoiceDialerActivity.this)
796                                .setTitle(R.string.title)
797                                .setItems(sentences, clickListener)
798                                .setOnCancelListener(cancelListener)
799                                .setNegativeButton(android.R.string.cancel,
800                                        negativeListener)
801                                .show();
802
803                        waitForChoice();
804                    }
805                }
806            });
807        }
808    }
809
810    private class ChoiceRecognizerClient implements RecognizerClient {
811        public void onRecognitionSuccess(final Intent[] intents) {
812            if (Config.LOGD) Log.d(TAG, "ChoiceRecognizerClient onRecognitionSuccess");
813            if (mState != WAITING_FOR_CHOICE) {
814                if (Config.LOGD) Log.d(TAG, "not waiting for choice, ignoring");
815                return;
816            }
817
818            if (mAlertDialog != null) {
819                mAlertDialog.dismiss();
820            }
821
822            // disregard all but the first intent.
823            if (intents.length > 0) {
824                String value = intents[0].getStringExtra(
825                    RecognizerEngine.SEMANTIC_EXTRA);
826                if (Config.LOGD) Log.d(TAG, "value " + value);
827                if ("R".equals(value)) {
828                    if (mUsingBluetooth) {
829                        mHandler.post(new GreetingRunnable());
830                    } else {
831                        listenForCommand();
832                    }
833                } else if ("X".equals(value)) {
834                    exitActivity();
835                } else {
836                    // it's a phone type response
837                    mChosenAction = null;
838                    for (int i = 0; i < mAvailableChoices.length; i++) {
839                        if (value.equalsIgnoreCase(
840                                mAvailableChoices[i].getStringExtra(
841                                        CommandRecognizerEngine.PHONE_TYPE_EXTRA))) {
842                            mChosenAction = mAvailableChoices[i];
843                        }
844                    }
845
846                    if (mChosenAction != null) {
847                        performChoice();
848                    } else {
849                        // invalid choice
850                        if (Config.LOGD) Log.d(TAG, "invalid choice" + value);
851
852                        if (mUsingBluetooth) {
853                            mTtsParams.remove(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID);
854                            mTts.speak(getString(R.string.invalid_choice_tts),
855                                TextToSpeech.QUEUE_FLUSH,
856                                mTtsParams);
857                        }
858                        waitForChoice();
859                    }
860                }
861            }
862        }
863
864        public void onRecognitionFailure(String msg) {
865            if (Config.LOGD) Log.d(TAG, "ChoiceRecognizerClient onRecognitionFailure");
866            exitActivity();
867        }
868
869        public void onRecognitionError(String err) {
870            if (Config.LOGD) Log.d(TAG, "ChoiceRecognizerClient onRecognitionError");
871            mHandler.post(new ErrorRunnable(R.string.recognition_error));
872            exitActivity();
873        }
874
875        public void onMicrophoneStart(InputStream mic) {
876            if (Config.LOGD) Log.d(TAG, "ChoiceRecognizerClient onMicrophoneStart");
877        }
878    }
879
880    private void speakChoices() {
881        if (Config.LOGD) Log.d(TAG, "speakChoices");
882        mState = SPEAKING_CHOICES;
883
884        String sentenceSpoken = spaceOutDigits(
885                mAvailableChoices[0].getStringExtra(
886                    RecognizerEngine.SENTENCE_EXTRA));
887
888        // When we have multiple choices, they will be of the form
889        // "call jack jones at home", "call jack jones on mobile".
890        // Speak the entire first sentence, then the last word from each
891        // of the remaining sentences.  This will come out to something
892        // like "call jack jones at home mobile or work".
893        StringBuilder builder = new StringBuilder();
894        builder.append(sentenceSpoken);
895
896        int count = mAvailableChoices.length;
897        for (int i=1; i < count; i++) {
898            if (i == count-1) {
899                builder.append(" or ");
900            } else {
901                builder.append(" ");
902            }
903            String tmpSentence = mAvailableChoices[i].getStringExtra(
904                    RecognizerEngine.SENTENCE_EXTRA);
905            String[] words = tmpSentence.trim().split(" ");
906            builder.append(words[words.length-1]);
907        }
908        mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID,
909                CHOICES_UTTERANCE);
910        mTts.speak(builder.toString(),
911            TextToSpeech.QUEUE_ADD,
912            mTtsParams);
913    }
914
915
916    private static String spaceOutDigits(String sentenceDisplay) {
917        // if we have a sentence of the form "dial 123 456 7890",
918        // we need to insert a space between each digit, otherwise
919        // the TTS engine will say "dial one hundred twenty three...."
920        // When there already is a space, we also insert a comma,
921        // so that it pauses between sections.  For the displayable
922        // sentence "dial 123 456 7890" it will speak
923        // "dial 1 2 3, 4 5 6, 7 8 9 0"
924        char buffer[] = sentenceDisplay.toCharArray();
925        StringBuilder builder = new StringBuilder();
926        boolean buildingNumber = false;
927        int l = sentenceDisplay.length();
928        for (int index = 0; index < l; index++) {
929            char c = buffer[index];
930            if (Character.isDigit(c)) {
931                if (buildingNumber) {
932                    builder.append(" ");
933                }
934                buildingNumber = true;
935                builder.append(c);
936            } else if (c == ' ') {
937                if (buildingNumber) {
938                    builder.append(",");
939                } else {
940                    builder.append(" ");
941                }
942            } else {
943                buildingNumber = false;
944                builder.append(c);
945            }
946        }
947        return builder.toString();
948    }
949
950    private void startActivityHelp(Intent intent) {
951        startActivity(intent);
952    }
953
954    private void listenForCommand() {
955        if (Config.LOGD) Log.d(TAG, ""
956                + "Command(): MICROPHONE_EXTRA: "+getArg(MICROPHONE_EXTRA)+
957                ", CONTACTS_EXTRA: "+getArg(CONTACTS_EXTRA));
958
959        mState = WAITING_FOR_COMMAND;
960        mRecognizerThread = new Thread() {
961            public void run() {
962                mCommandEngine.recognize(mCommandClient,
963                        VoiceDialerActivity.this,
964                        newFile(getArg(MICROPHONE_EXTRA)),
965                        mSampleRate);
966            }
967        };
968        mRecognizerThread.start();
969    }
970
971    private void listenForChoice() {
972        if (Config.LOGD) Log.d(TAG, "listenForChoice(): MICROPHONE_EXTRA: " +
973                getArg(MICROPHONE_EXTRA));
974
975        mState = WAITING_FOR_CHOICE;
976        mRecognizerThread = new Thread() {
977            public void run() {
978                mPhoneTypeChoiceEngine.recognize(mChoiceClient,
979                        VoiceDialerActivity.this,
980                        newFile(getArg(MICROPHONE_EXTRA)), mSampleRate);
981            }
982        };
983        mRecognizerThread.start();
984    }
985
986    private void exitActivity() {
987        synchronized(this) {
988            if (mState != EXITING) {
989                if (Config.LOGD) Log.d(TAG, "exitActivity");
990                mState = SPEAKING_GOODBYE;
991                if (mUsingBluetooth) {
992                    mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID,
993                            GOODBYE_UTTERANCE);
994                    mTts.speak(getString(R.string.goodbye_tts),
995                        TextToSpeech.QUEUE_FLUSH,
996                        mTtsParams);
997                    // Normally, the activity will finish() after the
998                    // utterance completes.  As a fallback in case the utterance
999                    // does not complete, post a delayed runnable finish the
1000                    // activity.
1001                    mFallbackRunnable = new OnTtsCompletionRunnable(true);
1002                    mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY);
1003                } else {
1004                    mHandler.postDelayed(new Runnable() {
1005                        public void run() {
1006                            finish();
1007                        }
1008                    }, EXIT_DELAY);
1009                }
1010            }
1011        }
1012    }
1013
1014    private String getArg(String name) {
1015        if (name == null) return null;
1016        String arg = getIntent().getStringExtra(name);
1017        if (arg != null) return arg;
1018        arg = SystemProperties.get("app.voicedialer." + name);
1019        return arg != null && arg.length() > 0 ? arg : null;
1020    }
1021
1022    private static File newFile(String name) {
1023        return name != null ? new File(name) : null;
1024    }
1025
1026    private int playSound(int toneType) {
1027        int msecDelay = 1;
1028
1029        // use the MediaPlayer to prompt the user
1030        if (mToneGenerator != null) {
1031            mToneGenerator.startTone(toneType);
1032            msecDelay = StrictMath.max(msecDelay, 300);
1033        }
1034        // use the Vibrator to prompt the user
1035        if (mAudioManager != null &&
1036                mAudioManager.shouldVibrate(AudioManager.VIBRATE_TYPE_RINGER)) {
1037            final int VIBRATOR_TIME = 150;
1038            final int VIBRATOR_GUARD_TIME = 150;
1039            Vibrator vibrator = new Vibrator();
1040            vibrator.vibrate(VIBRATOR_TIME);
1041            msecDelay = StrictMath.max(msecDelay,
1042                    VIBRATOR_TIME + VIBRATOR_GUARD_TIME);
1043        }
1044
1045
1046        return msecDelay;
1047    }
1048
1049    protected void onStop() {
1050        if (Config.LOGD) Log.d(TAG, "onStop");
1051
1052        synchronized(this) {
1053            mState = EXITING;
1054        }
1055
1056        if (mAlertDialog != null) {
1057            mAlertDialog.dismiss();
1058        }
1059
1060        // set the volume back to the level it was before we started.
1061        mAudioManager.setStreamVolume(AudioManager.STREAM_BLUETOOTH_SCO,
1062                                      mBluetoothVoiceVolume, 0);
1063        mAudioManager.abandonAudioFocus(null);
1064
1065        // shut down bluetooth, if it exists
1066        if (mBluetoothHeadset != null) {
1067            mBluetoothHeadset.stopVoiceRecognition();
1068            mBluetoothHeadset.close();
1069            mBluetoothHeadset = null;
1070        }
1071
1072        // shut down recognizer and wait for the thread to complete
1073        if (mRecognizerThread !=  null) {
1074            mRecognizerThread.interrupt();
1075            try {
1076                mRecognizerThread.join();
1077            } catch (InterruptedException e) {
1078                if (Config.LOGD) Log.d(TAG, "onStop mRecognizerThread.join exception " + e);
1079            }
1080            mRecognizerThread = null;
1081        }
1082
1083        // clean up UI
1084        mHandler.removeCallbacks(mMicFlasher);
1085        mHandler.removeMessages(0);
1086
1087        if (mTts != null) {
1088            mTts.stop();
1089            mTts.shutdown();
1090            mTts = null;
1091        }
1092        unregisterReceiver(mReceiver);
1093
1094        super.onStop();
1095
1096        // It makes no sense to have this activity maintain state when in
1097        // background.  When it stops, it should just be destroyed.
1098        finish();
1099    }
1100
1101    private Runnable mMicFlasher = new Runnable() {
1102        int visible = View.VISIBLE;
1103
1104        public void run() {
1105            findViewById(R.id.microphone_view).setVisibility(visible);
1106            findViewById(R.id.state).setVisibility(visible);
1107            visible = visible == View.VISIBLE ? View.INVISIBLE : View.VISIBLE;
1108            mHandler.postDelayed(this, 750);
1109        }
1110    };
1111
1112    @Override
1113    protected void onDestroy() {
1114        if (Config.LOGD) Log.d(TAG, "onDestroy");
1115        super.onDestroy();
1116    }
1117}