VoiceDialerActivity.java revision ef5d3e8cd31873c7af4902986ae61b408d0343bb
1/*
2 * Copyright (C) 2007 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.voicedialer;
18
19import android.app.Activity;
20import android.app.AlertDialog;
21import android.app.Dialog;
22import android.content.Intent;
23import android.content.DialogInterface;
24import android.media.ToneGenerator;
25import android.media.AudioManager;
26import android.os.Bundle;
27import android.os.Handler;
28import android.os.SystemProperties;
29import android.os.Vibrator;
30import android.util.Config;
31import android.util.Log;
32import android.view.View;
33import android.widget.TextView;
34import android.widget.Toast;
35import java.io.File;
36import java.io.InputStream;
37import java.io.IOException;
38
39/**
40 * TODO: get rid of the anonymous classes
41 * TODO: merge with BluetoothVoiceDialerActivity
42 *
43 * This class is the user interface of the VoiceDialer application.
44 * Its life cycle is as follows:
45 * <ul>
46 * <li>The user presses the recognize key, and the VoiceDialerActivity starts.
47 * <li>A {@link RecognizerEngine} instance is created.
48 * <li>The RecognizerEngine signals the user to speak with the Vibrator.
49 * <li>The RecognizerEngine captures, processes, and recognizes speech
50 * against the names in the contact list.
51 * <li>The RecognizerEngine calls onRecognizerSuccess with a list of
52 * sentences and corresponding Intents.
53 * <li>If the list is one element long, the corresponding Intent is dispatched.
54 * <li>Else an {@link AlertDialog} containing the list of sentences is
55 * displayed.
56 * <li>The user selects the desired sentence from the list,
57 * and the corresponding Intent is dispatched.
58 * <ul>
59 * Notes:
60 * <ul>
61 * <li>The RecognizerEngine is kept and reused for the next recognition cycle.
62 * </ul>
63 */
64public class VoiceDialerActivity extends Activity {
65
66    private static final String TAG = "VoiceDialerActivity";
67
68    private static final String MICROPHONE_EXTRA = "microphone";
69    private static final String CONTACTS_EXTRA = "contacts";
70    private static final String SAMPLE_RATE_EXTRA = "samplerate";
71    private static final String INTENTS_KEY = "intents";
72
73    private static final int FAIL_PAUSE_MSEC = 5000;
74    private static final int SAMPLE_RATE = 11025;
75
76    private static final int DIALOG_ID = 1;
77
78    private final static CommandRecognizerEngine mCommandEngine =
79            new CommandRecognizerEngine();
80    private CommandRecognizerClient mCommandClient;
81    private VoiceDialerTester mVoiceDialerTester;
82    private Handler mHandler;
83    private Thread mRecognizerThread = null;
84    private AudioManager mAudioManager;
85    private ToneGenerator mToneGenerator;
86    private AlertDialog mAlertDialog;
87
88    @Override
89    protected void onCreate(Bundle icicle) {
90        if (Config.LOGD) Log.d(TAG, "onCreate");
91        super.onCreate(icicle);
92        mHandler = new Handler();
93        mAudioManager = (AudioManager)getSystemService(AUDIO_SERVICE);
94        mToneGenerator = new ToneGenerator(AudioManager.STREAM_RING,
95                ToneGenerator.MAX_VOLUME);
96    }
97
98    protected void onStart() {
99        if (Config.LOGD) Log.d(TAG, "onStart "  + getIntent());
100        super.onStart();
101        mAudioManager.requestAudioFocus(
102                null, AudioManager.STREAM_MUSIC,
103                AudioManager.AUDIOFOCUS_GAIN_TRANSIENT);
104
105        mCommandEngine.setContactsFile(newFile(getArg(CONTACTS_EXTRA)));
106        mCommandClient = new CommandRecognizerClient();
107        mCommandEngine.setMinimizeResults(false);
108        mCommandEngine.setAllowOpenEntries(true);
109
110        // open main window
111        setTheme(android.R.style.Theme_Dialog);
112        setTitle(R.string.title);
113        setContentView(R.layout.voice_dialing);
114        findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE);
115        findViewById(R.id.retry_view).setVisibility(View.INVISIBLE);
116        findViewById(R.id.microphone_loading_view).setVisibility(View.VISIBLE);
117        if (RecognizerLogger.isEnabled(this)) {
118            ((TextView)findViewById(R.id.substate)).setText(R.string.logging_enabled);
119        }
120
121        // start the tester, if present
122        mVoiceDialerTester = null;
123        File micDir = newFile(getArg(MICROPHONE_EXTRA));
124        if (micDir != null && micDir.isDirectory()) {
125            mVoiceDialerTester = new VoiceDialerTester(micDir);
126            startNextTest();
127            return;
128        }
129
130        startWork();
131    }
132
133    private void startWork() {
134        // start the engine
135        mRecognizerThread = new Thread() {
136            public void run() {
137                if (Config.LOGD) Log.d(TAG, "onCreate.Runnable.run");
138                String sampleRateStr = getArg(SAMPLE_RATE_EXTRA);
139                int sampleRate = SAMPLE_RATE;
140                if (sampleRateStr != null) {
141                    sampleRate = Integer.parseInt(sampleRateStr);
142                }
143                mCommandEngine.recognize(mCommandClient, VoiceDialerActivity.this,
144                        newFile(getArg(MICROPHONE_EXTRA)),
145                        sampleRate);
146            }
147        };
148        mRecognizerThread.start();
149    }
150
151    private String getArg(String name) {
152        if (name == null) return null;
153        String arg = getIntent().getStringExtra(name);
154        if (arg != null) return arg;
155        arg = SystemProperties.get("app.voicedialer." + name);
156        return arg != null && arg.length() > 0 ? arg : null;
157    }
158
159    private static File newFile(String name) {
160        return name != null ? new File(name) : null;
161    }
162
163    private void startNextTest() {
164        mHandler.postDelayed(new Runnable() {
165            public void run() {
166                if (mVoiceDialerTester == null) {
167                    return;
168                }
169                if (!mVoiceDialerTester.stepToNextTest()) {
170                    mVoiceDialerTester.report();
171                    notifyText("Test completed!");
172                    finish();
173                    return;
174                }
175                File microphone = mVoiceDialerTester.getWavFile();
176                File contacts = newFile(getArg(CONTACTS_EXTRA));
177
178                notifyText("Testing\n" + microphone + "\n" + contacts);
179                mCommandEngine.recognize(mCommandClient, VoiceDialerActivity.this,
180                        microphone, SAMPLE_RATE);
181            }
182        }, 2000);
183    }
184
185    private int playSound(int toneType) {
186        int msecDelay = 1;
187
188        // use the MediaPlayer to prompt the user
189        if (mToneGenerator != null) {
190            mToneGenerator.startTone(toneType);
191            msecDelay = StrictMath.max(msecDelay, 300);
192        }
193
194        // use the Vibrator to prompt the user
195        if ((mAudioManager != null) &&
196                (mAudioManager.shouldVibrate(AudioManager.VIBRATE_TYPE_RINGER))) {
197            final int VIBRATOR_TIME = 150;
198            final int VIBRATOR_GUARD_TIME = 150;
199            Vibrator vibrator = new Vibrator();
200            vibrator.vibrate(VIBRATOR_TIME);
201            msecDelay = StrictMath.max(msecDelay,
202                    VIBRATOR_TIME + VIBRATOR_GUARD_TIME);
203        }
204
205        return msecDelay;
206    }
207
208    @Override
209    protected void onStop() {
210        if (Config.LOGD) Log.d(TAG, "onStop");
211
212        mAudioManager.abandonAudioFocus(null);
213
214        // no more tester
215        mVoiceDialerTester = null;
216
217        // shut down recognizer and wait for the thread to complete
218        if (mRecognizerThread !=  null) {
219            mRecognizerThread.interrupt();
220            try {
221                mRecognizerThread.join();
222            } catch (InterruptedException e) {
223                if (Config.LOGD) Log.d(TAG, "onStop mRecognizerThread.join exception " + e);
224            }
225            mRecognizerThread = null;
226        }
227
228        // clean up UI
229        mHandler.removeCallbacks(mMicFlasher);
230        mHandler.removeMessages(0);
231
232        // clean up ToneGenerator
233        if (mToneGenerator != null) {
234            mToneGenerator.release();
235            mToneGenerator = null;
236        }
237
238        super.onStop();
239
240        // It makes no sense to have this activity maintain state when in
241        // background.  When it stops, it should just be destroyed.
242        finish();
243    }
244
245    private void notifyText(final CharSequence msg) {
246        Toast.makeText(VoiceDialerActivity.this, msg, Toast.LENGTH_SHORT).show();
247    }
248
249    private Runnable mMicFlasher = new Runnable() {
250        int visible = View.VISIBLE;
251
252        public void run() {
253            findViewById(R.id.microphone_view).setVisibility(visible);
254            findViewById(R.id.state).setVisibility(visible);
255            visible = visible == View.VISIBLE ? View.INVISIBLE : View.VISIBLE;
256            mHandler.postDelayed(this, 750);
257        }
258    };
259
260
261    protected Dialog onCreateDialog(int id, Bundle args) {
262        final Intent intents[] = (Intent[])args.getParcelableArray(INTENTS_KEY);
263
264        DialogInterface.OnClickListener clickListener =
265            new DialogInterface.OnClickListener() {
266
267            public void onClick(DialogInterface dialog, int which) {
268                if (Config.LOGD) Log.d(TAG, "clickListener.onClick " + which);
269                startActivityHelp(intents[which]);
270                dismissDialog(DIALOG_ID);
271                mAlertDialog = null;
272                finish();
273            }
274
275        };
276
277        DialogInterface.OnCancelListener cancelListener =
278            new DialogInterface.OnCancelListener() {
279
280            public void onCancel(DialogInterface dialog) {
281                if (Config.LOGD) Log.d(TAG, "cancelListener.onCancel");
282                dismissDialog(DIALOG_ID);
283                mAlertDialog = null;
284                finish();
285            }
286
287        };
288
289        DialogInterface.OnClickListener positiveListener =
290            new DialogInterface.OnClickListener() {
291
292            public void onClick(DialogInterface dialog, int which) {
293                if (Config.LOGD) Log.d(TAG, "positiveListener.onClick " + which);
294                if (intents.length == 1 && which == -1) which = 0;
295                startActivityHelp(intents[which]);
296                dismissDialog(DIALOG_ID);
297                mAlertDialog = null;
298                finish();
299            }
300
301        };
302
303        DialogInterface.OnClickListener negativeListener =
304            new DialogInterface.OnClickListener() {
305
306            public void onClick(DialogInterface dialog, int which) {
307                if (Config.LOGD) Log.d(TAG, "negativeListener.onClick " + which);
308                dismissDialog(DIALOG_ID);
309                mAlertDialog = null;
310                finish();
311            }
312
313        };
314
315        String[] sentences = new String[intents.length];
316        for (int i = 0; i < intents.length; i++) {
317            sentences[i] = intents[i].getStringExtra(
318                    RecognizerEngine.SENTENCE_EXTRA);
319        }
320
321        mAlertDialog = intents.length > 1 ?
322                new AlertDialog.Builder(VoiceDialerActivity.this)
323                .setTitle(R.string.title)
324                .setItems(sentences, clickListener)
325                .setOnCancelListener(cancelListener)
326                .setNegativeButton(android.R.string.cancel, negativeListener)
327                .show()
328                :
329                new AlertDialog.Builder(VoiceDialerActivity.this)
330                .setTitle(R.string.title)
331                .setItems(sentences, clickListener)
332                .setOnCancelListener(cancelListener)
333                .setPositiveButton(android.R.string.ok, positiveListener)
334                .setNegativeButton(android.R.string.cancel, negativeListener)
335                .show();
336
337        return mAlertDialog;
338    }
339
340    private class CommandRecognizerClient implements RecognizerClient {
341        static final int MIN_VOLUME_TO_SKIP = 2;
342        /**
343         * Called by the {@link RecognizerEngine} when the microphone is started.
344         */
345        public void onMicrophoneStart(InputStream mic) {
346            if (Config.LOGD) Log.d(TAG, "onMicrophoneStart");
347            playSound(ToneGenerator.TONE_PROP_BEEP);
348
349            int ringVolume = mAudioManager.getStreamVolume(
350                    AudioManager.STREAM_RING);
351            Log.d(TAG, "ringVolume " + ringVolume);
352
353            if (ringVolume >= MIN_VOLUME_TO_SKIP) {
354                // now we're playing a sound, and corrupting the input sample.
355                // So we need to pull that junk off of the input stream so that the
356                // recognizer won't see it.
357                try {
358                    skipBeep(mic);
359                } catch (java.io.IOException e) {
360                    Log.e(TAG, "IOException " + e);
361                }
362            } else {
363                Log.d(TAG, "no tone");
364            }
365
366            if (mVoiceDialerTester != null) return;
367
368            mHandler.post(new Runnable() {
369                public void run() {
370                    findViewById(R.id.microphone_loading_view).setVisibility(View.INVISIBLE);
371                    ((TextView)findViewById(R.id.state)).setText(R.string.listening);
372                    mHandler.post(mMicFlasher);
373                }
374            });
375        }
376
377        /**
378         *  Beep detection
379         */
380        private static final int START_WINDOW_MS = 500;  // Beep detection window duration in ms
381        private static final int SINE_FREQ = 400;        // base sine frequency on beep
382        private static final int NUM_PERIODS_BLOCK = 10; // number of sine periods in one energy averaging block
383        private static final int THRESHOLD = 8;          // absolute pseudo energy threshold
384        private static final int START = 0;              // beep detection start
385        private static final int RISING = 1;             // beep rising edge start
386        private static final int TOP = 2;                // beep constant energy detected
387
388        void skipBeep(InputStream is) throws IOException {
389            int sampleCount = ((SAMPLE_RATE / SINE_FREQ) * NUM_PERIODS_BLOCK);
390            int blockSize = 2 * sampleCount; // energy averaging block
391
392            if (is == null || blockSize == 0) {
393                return;
394            }
395
396            byte[] buf = new byte[blockSize];
397            int maxBytes = 2 * ((START_WINDOW_MS * SAMPLE_RATE) / 1000);
398            maxBytes = ((maxBytes-1) / blockSize + 1) * blockSize;
399
400            int count = 0;
401            int state = START;  // detection state
402            long prevE = 0; // previous pseudo energy
403            long peak = 0;
404            int threshold =  THRESHOLD*sampleCount;  // absolute energy threshold
405            Log.d(TAG, "blockSize " + blockSize);
406
407            while (count < maxBytes) {
408                int cnt = 0;
409                while (cnt < blockSize) {
410                    int n = is.read(buf, cnt, blockSize-cnt);
411                    if (n < 0) {
412                        throw new java.io.IOException();
413                    }
414                    cnt += n;
415                }
416
417                // compute pseudo energy
418                cnt = blockSize;
419                long sumx = 0;
420                long sumxx = 0;
421                while (cnt >= 2) {
422                    short smp = (short)((buf[cnt - 1] << 8) + (buf[cnt - 2] & 0xFF));
423                    sumx += smp;
424                    sumxx += smp*smp;
425                    cnt -= 2;
426                }
427                long energy = (sumxx*sampleCount - sumx*sumx)/(sampleCount*sampleCount);
428                Log.d(TAG, "sumx " + sumx + " sumxx " + sumxx + " ee " + energy);
429
430                switch (state) {
431                    case START:
432                        if (energy > threshold && energy > (prevE * 2) && prevE != 0) {
433                            // rising edge if energy doubled and > abs threshold
434                            state = RISING;
435                            if (Config.LOGD) Log.d(TAG, "start RISING: " + count +" time: "+ (((1000*count)/2)/SAMPLE_RATE));
436                        }
437                        break;
438                    case RISING:
439                        if (energy < threshold || energy < (prevE / 2)){
440                            // energy fell back below half of previous, back to start
441                            if (Config.LOGD) Log.d(TAG, "back to START: " + count +" time: "+ (((1000*count)/2)/SAMPLE_RATE));
442                            peak = 0;
443                            state = START;
444                        } else if (energy > (prevE / 2) && energy < (prevE * 2)) {
445                            // Start of constant energy
446                            if (Config.LOGD) Log.d(TAG, "start TOP: " + count +" time: "+ (((1000*count)/2)/SAMPLE_RATE));
447                            if (peak < energy) {
448                                peak = energy;
449                            }
450                            state = TOP;
451                        }
452                        break;
453                    case TOP:
454                        if (energy < threshold || energy < (peak / 2)) {
455                            // e went to less than half of the peak
456                            if (Config.LOGD) Log.d(TAG, "end TOP: " + count +" time: "+ (((1000*count)/2)/SAMPLE_RATE));
457                            return;
458                        }
459                        break;
460                    }
461                prevE = energy;
462                count += blockSize;
463            }
464            if (Config.LOGD) Log.d(TAG, "no beep detected, timed out");
465        }
466
467        /**
468         * Called by the {@link RecognizerEngine} if the recognizer fails.
469         */
470        public void onRecognitionFailure(final String msg) {
471            if (Config.LOGD) Log.d(TAG, "onRecognitionFailure " + msg);
472
473            // get work off UAPI thread
474            mHandler.post(new Runnable() {
475                public void run() {
476                    // failure, so beep about it
477                    playSound(ToneGenerator.TONE_PROP_NACK);
478
479                    mHandler.removeCallbacks(mMicFlasher);
480                    ((TextView)findViewById(R.id.state)).setText(R.string.please_try_again);
481                    findViewById(R.id.state).setVisibility(View.VISIBLE);
482                    findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE);
483                    findViewById(R.id.retry_view).setVisibility(View.VISIBLE);
484
485                    if (mVoiceDialerTester != null) {
486                        mVoiceDialerTester.onRecognitionFailure(msg);
487                        startNextTest();
488                        return;
489                    }
490
491                    mHandler.postDelayed(new Runnable() {
492                        public void run() {
493                            finish();
494                        }
495                    }, FAIL_PAUSE_MSEC);
496                }
497            });
498        }
499
500        /**
501         * Called by the {@link RecognizerEngine} on an internal error.
502         */
503        public void onRecognitionError(final String msg) {
504            if (Config.LOGD) Log.d(TAG, "onRecognitionError " + msg);
505
506            // get work off UAPI thread
507            mHandler.post(new Runnable() {
508                public void run() {
509                    // error, so beep about it
510                    playSound(ToneGenerator.TONE_PROP_NACK);
511
512                    mHandler.removeCallbacks(mMicFlasher);
513                    ((TextView)findViewById(R.id.state)).setText(R.string.please_try_again);
514                    ((TextView)findViewById(R.id.substate)).setText(R.string.recognition_error);
515                    findViewById(R.id.state).setVisibility(View.VISIBLE);
516                    findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE);
517                    findViewById(R.id.retry_view).setVisibility(View.VISIBLE);
518
519                    if (mVoiceDialerTester != null) {
520                        mVoiceDialerTester.onRecognitionError(msg);
521                        startNextTest();
522                        return;
523                    }
524
525                    mHandler.postDelayed(new Runnable() {
526                        public void run() {
527                            finish();
528                        }
529                    }, FAIL_PAUSE_MSEC);
530                }
531            });
532        }
533
534        /**
535         * Called by the {@link RecognizerEngine} when is succeeds.  If there is
536         * only one item, then the Intent is dispatched immediately.
537         * If there are more, then an AlertDialog is displayed and the user is
538         * prompted to select.
539         * @param intents a list of Intents corresponding to the sentences.
540         */
541        public void onRecognitionSuccess(final Intent[] intents) {
542            if (Config.LOGD) Log.d(TAG, "onRecognitionSuccess " + intents.length);
543            // repackage our intents as a bundle so that we can pass it into
544            // showDialog.  This in required so that we can handle it when
545            // orientation changes and the activity is destroyed and recreated.
546            final Bundle args = new Bundle();
547            args.putParcelableArray(INTENTS_KEY, intents);
548
549            mHandler.post(new Runnable() {
550
551                public void run() {
552                    // success, so beep about it
553                    playSound(ToneGenerator.TONE_PROP_ACK);
554
555                    mHandler.removeCallbacks(mMicFlasher);
556
557                    showDialog(DIALOG_ID, args);
558
559                    // start the next test
560                    if (mVoiceDialerTester != null) {
561                        mVoiceDialerTester.onRecognitionSuccess(intents);
562                        startNextTest();
563                        mHandler.postDelayed(new Runnable() {
564                            public void run() {
565                                dismissDialog(DIALOG_ID);
566                                mAlertDialog = null;
567                            }
568                        }, 2000);
569                    }
570                }
571            });
572        }
573    }
574
575    // post a Toast if not real contacts or microphone
576    private void startActivityHelp(Intent intent) {
577        if (getArg(MICROPHONE_EXTRA) == null &&
578                getArg(CONTACTS_EXTRA) == null) {
579            startActivity(intent);
580        } else {
581            notifyText(intent.
582                    getStringExtra(RecognizerEngine.SENTENCE_EXTRA) +
583                    "\n" + intent.toString());
584        }
585
586    }
587    @Override
588    protected void onDestroy() {
589        if (Config.LOGD) Log.d(TAG, "onDestroy");
590        super.onDestroy();
591    }
592}
593