SpeechRecognition.java revision a36e5920737c6adbddd3e43b760e5de8431db6e0
1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5package org.chromium.content.browser;
6
7import android.content.ComponentName;
8import android.content.Context;
9import android.content.Intent;
10import android.content.pm.PackageManager;
11import android.content.pm.PackageManager.NameNotFoundException;
12import android.content.pm.ResolveInfo;
13import android.content.pm.ServiceInfo;
14import android.os.Bundle;
15import android.speech.RecognitionListener;
16import android.speech.RecognitionService;
17import android.speech.RecognizerIntent;
18import android.speech.SpeechRecognizer;
19
20import org.chromium.base.CalledByNative;
21import org.chromium.base.JNINamespace;
22import org.chromium.content.browser.SpeechRecognitionError;
23
24import java.util.ArrayList;
25import java.util.List;
26
27/**
28 * This class uses Android's SpeechRecognizer to perform speech recognition for the Web Speech API
29 * on Android. Using Android's platform recognizer offers several benefits, like good quality and
30 * good local fallback when no data connection is available.
31 */
32@JNINamespace("content")
33public class SpeechRecognition {
34
35    // Constants describing the speech recognition provider we depend on.
36    private static final String PROVIDER_PACKAGE_NAME = "com.google.android.googlequicksearchbox";
37    private static final int PROVIDER_MIN_VERSION = 300207030;
38
39    // We track the recognition state to remember what events we need to send when recognition is
40    // being aborted. Once Android's recognizer is cancelled, its listener won't yield any more
41    // events, but we still need to call OnSoundEnd and OnAudioEnd if corresponding On*Start were
42    // called before.
43    private static final int STATE_IDLE = 0;
44    private static final int STATE_AWAITING_SPEECH = 1;
45    private static final int STATE_CAPTURING_SPEECH = 2;
46    private int mState;
47
48    // The speech recognition provider (if any) matching PROVIDER_PACKAGE_NAME and
49    // PROVIDER_MIN_VERSION as selected by initialize().
50    private static ComponentName mRecognitionProvider;
51
52    private final Context mContext;
53    private final Intent mIntent;
54    private final RecognitionListener mListener;
55    private SpeechRecognizer mRecognizer;
56
57    // Native pointer to C++ SpeechRecognizerImplAndroid.
58    private int mNativeSpeechRecognizerImplAndroid;
59
60    // Remember if we are using continuous recognition.
61    private boolean mContinuous;
62
63    // Internal class to handle events from Android's SpeechRecognizer and route them to native.
64    class Listener implements RecognitionListener {
65
66        @Override
67        public void onBeginningOfSpeech() {
68            mState = STATE_CAPTURING_SPEECH;
69            nativeOnSoundStart(mNativeSpeechRecognizerImplAndroid);
70        }
71
72        @Override
73        public void onBufferReceived(byte[] buffer) { }
74
75        @Override
76        public void onEndOfSpeech() {
77            // Ignore onEndOfSpeech in continuous mode to let terminate() take care of ending
78            // events. The Android API documentation is vague as to when onEndOfSpeech is called in
79            // continuous mode, whereas the Web Speech API defines a stronger semantic on the
80            // equivalent (onsoundend) event. Thus, the only way to provide a valid onsoundend
81            // event is to trigger it when the last result is received or the session is aborted.
82            if (!mContinuous) {
83                nativeOnSoundEnd(mNativeSpeechRecognizerImplAndroid);
84                // Since Android doesn't have a dedicated event for when audio capture is finished,
85                // we fire it after speech has ended.
86                nativeOnAudioEnd(mNativeSpeechRecognizerImplAndroid);
87                mState = STATE_IDLE;
88            }
89        }
90
91        @Override
92        public void onError(int error) {
93            int code = SpeechRecognitionError.NONE;
94
95            // Translate Android SpeechRecognizer errors to Web Speech API errors.
96            switch(error) {
97                case SpeechRecognizer.ERROR_AUDIO:
98                    code = SpeechRecognitionError.AUDIO;
99                    break;
100                case SpeechRecognizer.ERROR_CLIENT:
101                    code = SpeechRecognitionError.ABORTED;
102                    break;
103                case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
104                case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
105                    code = SpeechRecognitionError.NOT_ALLOWED;
106                    break;
107                case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
108                case SpeechRecognizer.ERROR_NETWORK:
109                case SpeechRecognizer.ERROR_SERVER:
110                    code = SpeechRecognitionError.NETWORK;
111                    break;
112                case SpeechRecognizer.ERROR_NO_MATCH:
113                    code = SpeechRecognitionError.NO_MATCH;
114                    break;
115                case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
116                    code = SpeechRecognitionError.NO_SPEECH;
117                    break;
118                default:
119                    assert false;
120                    return;
121            }
122
123            terminate(code);
124        }
125
126        @Override
127        public void onEvent(int event, Bundle bundle) { }
128
129        @Override
130        public void onPartialResults(Bundle bundle) {
131            handleResults(bundle, true);
132        }
133
134        @Override
135        public void onReadyForSpeech(Bundle bundle) {
136            mState = STATE_AWAITING_SPEECH;
137            nativeOnAudioStart(mNativeSpeechRecognizerImplAndroid);
138        }
139
140        @Override
141        public void onResults(Bundle bundle) {
142            handleResults(bundle, false);
143            // We assume that onResults is called only once, at the end of a session, thus we
144            // terminate. If one day the recognition provider changes dictation mode behavior to
145            // call onResults several times, we should terminate only if (!mContinuous).
146            terminate(SpeechRecognitionError.NONE);
147        }
148
149        @Override
150        public void onRmsChanged(float rms) { }
151
152        private void handleResults(Bundle bundle, boolean provisional) {
153            if (mContinuous && provisional) {
154                // In continuous mode, Android's recognizer sends final results as provisional.
155                provisional = false;
156            }
157
158            ArrayList<String> list = bundle.getStringArrayList(
159                    SpeechRecognizer.RESULTS_RECOGNITION);
160            String[] results = list.toArray(new String[list.size()]);
161
162            float[] scores = bundle.getFloatArray(SpeechRecognizer.CONFIDENCE_SCORES);
163
164            nativeOnRecognitionResults(mNativeSpeechRecognizerImplAndroid,
165                                       results,
166                                       scores,
167                                       provisional);
168        }
169    }
170
171    // This method must be called before any instance of SpeechRecognition can be created. It will
172    // query Android's package manager to find a suitable speech recognition provider that supports
173    // continuous recognition.
174    public static boolean initialize(Context context) {
175        if (!SpeechRecognizer.isRecognitionAvailable(context))
176            return false;
177
178        PackageManager pm = context.getPackageManager();
179        Intent intent = new Intent(RecognitionService.SERVICE_INTERFACE);
180        final List<ResolveInfo> list = pm.queryIntentServices(intent, PackageManager.GET_SERVICES);
181
182        for (ResolveInfo resolve : list) {
183            ServiceInfo service = resolve.serviceInfo;
184
185            if (!service.packageName.equals(PROVIDER_PACKAGE_NAME))
186                continue;
187
188            int versionCode;
189            try {
190                versionCode = pm.getPackageInfo(service.packageName, 0).versionCode;
191            } catch (NameNotFoundException e) {
192                continue;
193            }
194
195            if (versionCode < PROVIDER_MIN_VERSION)
196                continue;
197
198            mRecognitionProvider = new ComponentName(service.packageName, service.name);
199
200            return true;
201        }
202
203        // If we reach this point, we failed to find a suitable recognition provider.
204        return false;
205    }
206
207    private SpeechRecognition(final Context context, int nativeSpeechRecognizerImplAndroid) {
208        mContext = context;
209        mContinuous = false;
210        mNativeSpeechRecognizerImplAndroid = nativeSpeechRecognizerImplAndroid;
211        mListener = new Listener();
212        mIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
213
214        if (mRecognitionProvider != null) {
215            mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext, mRecognitionProvider);
216        } else {
217            // It is possible to force-enable the speech recognition web platform feature (using a
218            // command-line flag) even if initialize() failed to find the PROVIDER_PACKAGE_NAME
219            // provider, in which case the first available speech recognition provider is used.
220            // Caveat: Continuous mode may not work as expected with a different provider.
221            mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext);
222        }
223
224        mRecognizer.setRecognitionListener(mListener);
225    }
226
227    // This function destroys everything when recognition is done, taking care to properly tear
228    // down by calling On{Sound,Audio}End if corresponding On{Audio,Sound}Start were called.
229    private void terminate(int error) {
230
231        if (mState != STATE_IDLE) {
232            if (mState == STATE_CAPTURING_SPEECH) {
233                nativeOnSoundEnd(mNativeSpeechRecognizerImplAndroid);
234            }
235            nativeOnAudioEnd(mNativeSpeechRecognizerImplAndroid);
236            mState = STATE_IDLE;
237        }
238
239        if (error != SpeechRecognitionError.NONE)
240            nativeOnRecognitionError(mNativeSpeechRecognizerImplAndroid, error);
241
242        mRecognizer.destroy();
243        mRecognizer = null;
244        nativeOnRecognitionEnd(mNativeSpeechRecognizerImplAndroid);
245        mNativeSpeechRecognizerImplAndroid = 0;
246    }
247
248    @CalledByNative
249    private static SpeechRecognition createSpeechRecognition(
250            Context context, int nativeSpeechRecognizerImplAndroid) {
251        return new SpeechRecognition(context, nativeSpeechRecognizerImplAndroid);
252    }
253
254    @CalledByNative
255    private void startRecognition(String language, boolean continuous, boolean interim_results) {
256        if (mRecognizer == null)
257            return;
258
259        mContinuous = continuous;
260        mIntent.putExtra("android.speech.extra.DICTATION_MODE", continuous);
261        mIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, language);
262        mIntent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, interim_results);
263        mRecognizer.startListening(mIntent);
264    }
265
266    @CalledByNative
267    private void abortRecognition() {
268        if (mRecognizer == null)
269            return;
270
271        mRecognizer.cancel();
272        terminate(SpeechRecognitionError.ABORTED);
273    }
274
275    @CalledByNative
276    private void stopRecognition() {
277        if (mRecognizer == null)
278            return;
279
280        mContinuous = false;
281        mRecognizer.stopListening();
282    }
283
284    // Native JNI calls to content/browser/speech/speech_recognizer_impl_android.cc
285    private native void nativeOnAudioStart(int nativeSpeechRecognizerImplAndroid);
286    private native void nativeOnSoundStart(int nativeSpeechRecognizerImplAndroid);
287    private native void nativeOnSoundEnd(int nativeSpeechRecognizerImplAndroid);
288    private native void nativeOnAudioEnd(int nativeSpeechRecognizerImplAndroid);
289    private native void nativeOnRecognitionResults(int nativeSpeechRecognizerImplAndroid,
290                                                   String[] results,
291                                                   float[] scores,
292                                                   boolean provisional);
293    private native void nativeOnRecognitionError(int nativeSpeechRecognizerImplAndroid, int error);
294    private native void nativeOnRecognitionEnd(int nativeSpeechRecognizerImplAndroid);
295}
296