SpeechRecognition.java revision a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7
1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5package org.chromium.content.browser;
6
7import android.content.ComponentName;
8import android.content.Context;
9import android.content.Intent;
10import android.content.pm.PackageManager;
11import android.content.pm.PackageManager.NameNotFoundException;
12import android.content.pm.ResolveInfo;
13import android.content.pm.ServiceInfo;
14import android.os.Bundle;
15import android.speech.RecognitionListener;
16import android.speech.RecognitionService;
17import android.speech.RecognizerIntent;
18import android.speech.SpeechRecognizer;
19
20import org.chromium.base.CalledByNative;
21import org.chromium.base.JNINamespace;
22
23import java.util.ArrayList;
24import java.util.List;
25
26/**
27 * This class uses Android's SpeechRecognizer to perform speech recognition for the Web Speech API
28 * on Android. Using Android's platform recognizer offers several benefits, like good quality and
29 * good local fallback when no data connection is available.
30 */
31@JNINamespace("content")
32public class SpeechRecognition {
33
34    // Constants describing the speech recognition provider we depend on.
35    private static final String PROVIDER_PACKAGE_NAME = "com.google.android.googlequicksearchbox";
36    private static final int PROVIDER_MIN_VERSION = 300207030;
37
38    // We track the recognition state to remember what events we need to send when recognition is
39    // being aborted. Once Android's recognizer is cancelled, its listener won't yield any more
40    // events, but we still need to call OnSoundEnd and OnAudioEnd if corresponding On*Start were
41    // called before.
42    private static final int STATE_IDLE = 0;
43    private static final int STATE_AWAITING_SPEECH = 1;
44    private static final int STATE_CAPTURING_SPEECH = 2;
45    private int mState;
46
47    // The speech recognition provider (if any) matching PROVIDER_PACKAGE_NAME and
48    // PROVIDER_MIN_VERSION as selected by initialize().
49    private static ComponentName sRecognitionProvider;
50
51    private final Context mContext;
52    private final Intent mIntent;
53    private final RecognitionListener mListener;
54    private SpeechRecognizer mRecognizer;
55
56    // Native pointer to C++ SpeechRecognizerImplAndroid.
57    private long mNativeSpeechRecognizerImplAndroid;
58
59    // Remember if we are using continuous recognition.
60    private boolean mContinuous;
61
62    // Internal class to handle events from Android's SpeechRecognizer and route them to native.
63    class Listener implements RecognitionListener {
64
65        @Override
66        public void onBeginningOfSpeech() {
67            mState = STATE_CAPTURING_SPEECH;
68            nativeOnSoundStart(mNativeSpeechRecognizerImplAndroid);
69        }
70
71        @Override
72        public void onBufferReceived(byte[] buffer) { }
73
74        @Override
75        public void onEndOfSpeech() {
76            // Ignore onEndOfSpeech in continuous mode to let terminate() take care of ending
77            // events. The Android API documentation is vague as to when onEndOfSpeech is called in
78            // continuous mode, whereas the Web Speech API defines a stronger semantic on the
79            // equivalent (onsoundend) event. Thus, the only way to provide a valid onsoundend
80            // event is to trigger it when the last result is received or the session is aborted.
81            if (!mContinuous) {
82                nativeOnSoundEnd(mNativeSpeechRecognizerImplAndroid);
83                // Since Android doesn't have a dedicated event for when audio capture is finished,
84                // we fire it after speech has ended.
85                nativeOnAudioEnd(mNativeSpeechRecognizerImplAndroid);
86                mState = STATE_IDLE;
87            }
88        }
89
90        @Override
91        public void onError(int error) {
92            int code = SpeechRecognitionError.NONE;
93
94            // Translate Android SpeechRecognizer errors to Web Speech API errors.
95            switch(error) {
96                case SpeechRecognizer.ERROR_AUDIO:
97                    code = SpeechRecognitionError.AUDIO;
98                    break;
99                case SpeechRecognizer.ERROR_CLIENT:
100                    code = SpeechRecognitionError.ABORTED;
101                    break;
102                case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
103                case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
104                    code = SpeechRecognitionError.NOT_ALLOWED;
105                    break;
106                case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
107                case SpeechRecognizer.ERROR_NETWORK:
108                case SpeechRecognizer.ERROR_SERVER:
109                    code = SpeechRecognitionError.NETWORK;
110                    break;
111                case SpeechRecognizer.ERROR_NO_MATCH:
112                    code = SpeechRecognitionError.NO_MATCH;
113                    break;
114                case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
115                    code = SpeechRecognitionError.NO_SPEECH;
116                    break;
117                default:
118                    assert false;
119                    return;
120            }
121
122            terminate(code);
123        }
124
125        @Override
126        public void onEvent(int event, Bundle bundle) { }
127
128        @Override
129        public void onPartialResults(Bundle bundle) {
130            handleResults(bundle, true);
131        }
132
133        @Override
134        public void onReadyForSpeech(Bundle bundle) {
135            mState = STATE_AWAITING_SPEECH;
136            nativeOnAudioStart(mNativeSpeechRecognizerImplAndroid);
137        }
138
139        @Override
140        public void onResults(Bundle bundle) {
141            handleResults(bundle, false);
142            // We assume that onResults is called only once, at the end of a session, thus we
143            // terminate. If one day the recognition provider changes dictation mode behavior to
144            // call onResults several times, we should terminate only if (!mContinuous).
145            terminate(SpeechRecognitionError.NONE);
146        }
147
148        @Override
149        public void onRmsChanged(float rms) { }
150
151        private void handleResults(Bundle bundle, boolean provisional) {
152            if (mContinuous && provisional) {
153                // In continuous mode, Android's recognizer sends final results as provisional.
154                provisional = false;
155            }
156
157            ArrayList<String> list = bundle.getStringArrayList(
158                    SpeechRecognizer.RESULTS_RECOGNITION);
159            String[] results = list.toArray(new String[list.size()]);
160
161            float[] scores = bundle.getFloatArray(SpeechRecognizer.CONFIDENCE_SCORES);
162
163            nativeOnRecognitionResults(mNativeSpeechRecognizerImplAndroid,
164                                       results,
165                                       scores,
166                                       provisional);
167        }
168    }
169
170    /**
171     * This method must be called before any instance of SpeechRecognition can be created. It will
172     * query Android's package manager to find a suitable speech recognition provider that supports
173     * continuous recognition.
174     */
175    public static boolean initialize(Context context) {
176        if (!SpeechRecognizer.isRecognitionAvailable(context))
177            return false;
178
179        PackageManager pm = context.getPackageManager();
180        Intent intent = new Intent(RecognitionService.SERVICE_INTERFACE);
181        final List<ResolveInfo> list = pm.queryIntentServices(intent, PackageManager.GET_SERVICES);
182
183        for (ResolveInfo resolve : list) {
184            ServiceInfo service = resolve.serviceInfo;
185
186            if (!service.packageName.equals(PROVIDER_PACKAGE_NAME))
187                continue;
188
189            int versionCode;
190            try {
191                versionCode = pm.getPackageInfo(service.packageName, 0).versionCode;
192            } catch (NameNotFoundException e) {
193                continue;
194            }
195
196            if (versionCode < PROVIDER_MIN_VERSION)
197                continue;
198
199            sRecognitionProvider = new ComponentName(service.packageName, service.name);
200
201            return true;
202        }
203
204        // If we reach this point, we failed to find a suitable recognition provider.
205        return false;
206    }
207
208    private SpeechRecognition(final Context context, long nativeSpeechRecognizerImplAndroid) {
209        mContext = context;
210        mContinuous = false;
211        mNativeSpeechRecognizerImplAndroid = nativeSpeechRecognizerImplAndroid;
212        mListener = new Listener();
213        mIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
214
215        if (sRecognitionProvider != null) {
216            mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext, sRecognitionProvider);
217        } else {
218            // It is possible to force-enable the speech recognition web platform feature (using a
219            // command-line flag) even if initialize() failed to find the PROVIDER_PACKAGE_NAME
220            // provider, in which case the first available speech recognition provider is used.
221            // Caveat: Continuous mode may not work as expected with a different provider.
222            mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext);
223        }
224
225        mRecognizer.setRecognitionListener(mListener);
226    }
227
228    // This function destroys everything when recognition is done, taking care to properly tear
229    // down by calling On{Sound,Audio}End if corresponding On{Audio,Sound}Start were called.
230    private void terminate(int error) {
231
232        if (mState != STATE_IDLE) {
233            if (mState == STATE_CAPTURING_SPEECH) {
234                nativeOnSoundEnd(mNativeSpeechRecognizerImplAndroid);
235            }
236            nativeOnAudioEnd(mNativeSpeechRecognizerImplAndroid);
237            mState = STATE_IDLE;
238        }
239
240        if (error != SpeechRecognitionError.NONE)
241            nativeOnRecognitionError(mNativeSpeechRecognizerImplAndroid, error);
242
243        mRecognizer.destroy();
244        mRecognizer = null;
245        nativeOnRecognitionEnd(mNativeSpeechRecognizerImplAndroid);
246        mNativeSpeechRecognizerImplAndroid = 0;
247    }
248
249    @CalledByNative
250    private static SpeechRecognition createSpeechRecognition(
251            Context context, long nativeSpeechRecognizerImplAndroid) {
252        return new SpeechRecognition(context, nativeSpeechRecognizerImplAndroid);
253    }
254
255    @CalledByNative
256    private void startRecognition(String language, boolean continuous, boolean interimResults) {
257        if (mRecognizer == null)
258            return;
259
260        mContinuous = continuous;
261        mIntent.putExtra("android.speech.extra.DICTATION_MODE", continuous);
262        mIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, language);
263        mIntent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, interimResults);
264        mRecognizer.startListening(mIntent);
265    }
266
267    @CalledByNative
268    private void abortRecognition() {
269        if (mRecognizer == null)
270            return;
271
272        mRecognizer.cancel();
273        terminate(SpeechRecognitionError.ABORTED);
274    }
275
276    @CalledByNative
277    private void stopRecognition() {
278        if (mRecognizer == null)
279            return;
280
281        mContinuous = false;
282        mRecognizer.stopListening();
283    }
284
285    // Native JNI calls to content/browser/speech/speech_recognizer_impl_android.cc
286    private native void nativeOnAudioStart(long nativeSpeechRecognizerImplAndroid);
287    private native void nativeOnSoundStart(long nativeSpeechRecognizerImplAndroid);
288    private native void nativeOnSoundEnd(long nativeSpeechRecognizerImplAndroid);
289    private native void nativeOnAudioEnd(long nativeSpeechRecognizerImplAndroid);
290    private native void nativeOnRecognitionResults(long nativeSpeechRecognizerImplAndroid,
291                                                   String[] results,
292                                                   float[] scores,
293                                                   boolean provisional);
294    private native void nativeOnRecognitionError(long nativeSpeechRecognizerImplAndroid, int error);
295    private native void nativeOnRecognitionEnd(long nativeSpeechRecognizerImplAndroid);
296}
297