SpeechRecognition.java revision a36e5920737c6adbddd3e43b760e5de8431db6e0
1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5package org.chromium.content.browser; 6 7import android.content.ComponentName; 8import android.content.Context; 9import android.content.Intent; 10import android.content.pm.PackageManager; 11import android.content.pm.PackageManager.NameNotFoundException; 12import android.content.pm.ResolveInfo; 13import android.content.pm.ServiceInfo; 14import android.os.Bundle; 15import android.speech.RecognitionListener; 16import android.speech.RecognitionService; 17import android.speech.RecognizerIntent; 18import android.speech.SpeechRecognizer; 19 20import org.chromium.base.CalledByNative; 21import org.chromium.base.JNINamespace; 22import org.chromium.content.browser.SpeechRecognitionError; 23 24import java.util.ArrayList; 25import java.util.List; 26 27/** 28 * This class uses Android's SpeechRecognizer to perform speech recognition for the Web Speech API 29 * on Android. Using Android's platform recognizer offers several benefits, like good quality and 30 * good local fallback when no data connection is available. 31 */ 32@JNINamespace("content") 33public class SpeechRecognition { 34 35 // Constants describing the speech recognition provider we depend on. 36 private static final String PROVIDER_PACKAGE_NAME = "com.google.android.googlequicksearchbox"; 37 private static final int PROVIDER_MIN_VERSION = 300207030; 38 39 // We track the recognition state to remember what events we need to send when recognition is 40 // being aborted. Once Android's recognizer is cancelled, its listener won't yield any more 41 // events, but we still need to call OnSoundEnd and OnAudioEnd if corresponding On*Start were 42 // called before. 43 private static final int STATE_IDLE = 0; 44 private static final int STATE_AWAITING_SPEECH = 1; 45 private static final int STATE_CAPTURING_SPEECH = 2; 46 private int mState; 47 48 // The speech recognition provider (if any) matching PROVIDER_PACKAGE_NAME and 49 // PROVIDER_MIN_VERSION as selected by initialize(). 50 private static ComponentName mRecognitionProvider; 51 52 private final Context mContext; 53 private final Intent mIntent; 54 private final RecognitionListener mListener; 55 private SpeechRecognizer mRecognizer; 56 57 // Native pointer to C++ SpeechRecognizerImplAndroid. 58 private int mNativeSpeechRecognizerImplAndroid; 59 60 // Remember if we are using continuous recognition. 61 private boolean mContinuous; 62 63 // Internal class to handle events from Android's SpeechRecognizer and route them to native. 64 class Listener implements RecognitionListener { 65 66 @Override 67 public void onBeginningOfSpeech() { 68 mState = STATE_CAPTURING_SPEECH; 69 nativeOnSoundStart(mNativeSpeechRecognizerImplAndroid); 70 } 71 72 @Override 73 public void onBufferReceived(byte[] buffer) { } 74 75 @Override 76 public void onEndOfSpeech() { 77 // Ignore onEndOfSpeech in continuous mode to let terminate() take care of ending 78 // events. The Android API documentation is vague as to when onEndOfSpeech is called in 79 // continuous mode, whereas the Web Speech API defines a stronger semantic on the 80 // equivalent (onsoundend) event. Thus, the only way to provide a valid onsoundend 81 // event is to trigger it when the last result is received or the session is aborted. 82 if (!mContinuous) { 83 nativeOnSoundEnd(mNativeSpeechRecognizerImplAndroid); 84 // Since Android doesn't have a dedicated event for when audio capture is finished, 85 // we fire it after speech has ended. 86 nativeOnAudioEnd(mNativeSpeechRecognizerImplAndroid); 87 mState = STATE_IDLE; 88 } 89 } 90 91 @Override 92 public void onError(int error) { 93 int code = SpeechRecognitionError.NONE; 94 95 // Translate Android SpeechRecognizer errors to Web Speech API errors. 96 switch(error) { 97 case SpeechRecognizer.ERROR_AUDIO: 98 code = SpeechRecognitionError.AUDIO; 99 break; 100 case SpeechRecognizer.ERROR_CLIENT: 101 code = SpeechRecognitionError.ABORTED; 102 break; 103 case SpeechRecognizer.ERROR_RECOGNIZER_BUSY: 104 case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS: 105 code = SpeechRecognitionError.NOT_ALLOWED; 106 break; 107 case SpeechRecognizer.ERROR_NETWORK_TIMEOUT: 108 case SpeechRecognizer.ERROR_NETWORK: 109 case SpeechRecognizer.ERROR_SERVER: 110 code = SpeechRecognitionError.NETWORK; 111 break; 112 case SpeechRecognizer.ERROR_NO_MATCH: 113 code = SpeechRecognitionError.NO_MATCH; 114 break; 115 case SpeechRecognizer.ERROR_SPEECH_TIMEOUT: 116 code = SpeechRecognitionError.NO_SPEECH; 117 break; 118 default: 119 assert false; 120 return; 121 } 122 123 terminate(code); 124 } 125 126 @Override 127 public void onEvent(int event, Bundle bundle) { } 128 129 @Override 130 public void onPartialResults(Bundle bundle) { 131 handleResults(bundle, true); 132 } 133 134 @Override 135 public void onReadyForSpeech(Bundle bundle) { 136 mState = STATE_AWAITING_SPEECH; 137 nativeOnAudioStart(mNativeSpeechRecognizerImplAndroid); 138 } 139 140 @Override 141 public void onResults(Bundle bundle) { 142 handleResults(bundle, false); 143 // We assume that onResults is called only once, at the end of a session, thus we 144 // terminate. If one day the recognition provider changes dictation mode behavior to 145 // call onResults several times, we should terminate only if (!mContinuous). 146 terminate(SpeechRecognitionError.NONE); 147 } 148 149 @Override 150 public void onRmsChanged(float rms) { } 151 152 private void handleResults(Bundle bundle, boolean provisional) { 153 if (mContinuous && provisional) { 154 // In continuous mode, Android's recognizer sends final results as provisional. 155 provisional = false; 156 } 157 158 ArrayList<String> list = bundle.getStringArrayList( 159 SpeechRecognizer.RESULTS_RECOGNITION); 160 String[] results = list.toArray(new String[list.size()]); 161 162 float[] scores = bundle.getFloatArray(SpeechRecognizer.CONFIDENCE_SCORES); 163 164 nativeOnRecognitionResults(mNativeSpeechRecognizerImplAndroid, 165 results, 166 scores, 167 provisional); 168 } 169 } 170 171 // This method must be called before any instance of SpeechRecognition can be created. It will 172 // query Android's package manager to find a suitable speech recognition provider that supports 173 // continuous recognition. 174 public static boolean initialize(Context context) { 175 if (!SpeechRecognizer.isRecognitionAvailable(context)) 176 return false; 177 178 PackageManager pm = context.getPackageManager(); 179 Intent intent = new Intent(RecognitionService.SERVICE_INTERFACE); 180 final List<ResolveInfo> list = pm.queryIntentServices(intent, PackageManager.GET_SERVICES); 181 182 for (ResolveInfo resolve : list) { 183 ServiceInfo service = resolve.serviceInfo; 184 185 if (!service.packageName.equals(PROVIDER_PACKAGE_NAME)) 186 continue; 187 188 int versionCode; 189 try { 190 versionCode = pm.getPackageInfo(service.packageName, 0).versionCode; 191 } catch (NameNotFoundException e) { 192 continue; 193 } 194 195 if (versionCode < PROVIDER_MIN_VERSION) 196 continue; 197 198 mRecognitionProvider = new ComponentName(service.packageName, service.name); 199 200 return true; 201 } 202 203 // If we reach this point, we failed to find a suitable recognition provider. 204 return false; 205 } 206 207 private SpeechRecognition(final Context context, int nativeSpeechRecognizerImplAndroid) { 208 mContext = context; 209 mContinuous = false; 210 mNativeSpeechRecognizerImplAndroid = nativeSpeechRecognizerImplAndroid; 211 mListener = new Listener(); 212 mIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH); 213 214 if (mRecognitionProvider != null) { 215 mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext, mRecognitionProvider); 216 } else { 217 // It is possible to force-enable the speech recognition web platform feature (using a 218 // command-line flag) even if initialize() failed to find the PROVIDER_PACKAGE_NAME 219 // provider, in which case the first available speech recognition provider is used. 220 // Caveat: Continuous mode may not work as expected with a different provider. 221 mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext); 222 } 223 224 mRecognizer.setRecognitionListener(mListener); 225 } 226 227 // This function destroys everything when recognition is done, taking care to properly tear 228 // down by calling On{Sound,Audio}End if corresponding On{Audio,Sound}Start were called. 229 private void terminate(int error) { 230 231 if (mState != STATE_IDLE) { 232 if (mState == STATE_CAPTURING_SPEECH) { 233 nativeOnSoundEnd(mNativeSpeechRecognizerImplAndroid); 234 } 235 nativeOnAudioEnd(mNativeSpeechRecognizerImplAndroid); 236 mState = STATE_IDLE; 237 } 238 239 if (error != SpeechRecognitionError.NONE) 240 nativeOnRecognitionError(mNativeSpeechRecognizerImplAndroid, error); 241 242 mRecognizer.destroy(); 243 mRecognizer = null; 244 nativeOnRecognitionEnd(mNativeSpeechRecognizerImplAndroid); 245 mNativeSpeechRecognizerImplAndroid = 0; 246 } 247 248 @CalledByNative 249 private static SpeechRecognition createSpeechRecognition( 250 Context context, int nativeSpeechRecognizerImplAndroid) { 251 return new SpeechRecognition(context, nativeSpeechRecognizerImplAndroid); 252 } 253 254 @CalledByNative 255 private void startRecognition(String language, boolean continuous, boolean interim_results) { 256 if (mRecognizer == null) 257 return; 258 259 mContinuous = continuous; 260 mIntent.putExtra("android.speech.extra.DICTATION_MODE", continuous); 261 mIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, language); 262 mIntent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, interim_results); 263 mRecognizer.startListening(mIntent); 264 } 265 266 @CalledByNative 267 private void abortRecognition() { 268 if (mRecognizer == null) 269 return; 270 271 mRecognizer.cancel(); 272 terminate(SpeechRecognitionError.ABORTED); 273 } 274 275 @CalledByNative 276 private void stopRecognition() { 277 if (mRecognizer == null) 278 return; 279 280 mContinuous = false; 281 mRecognizer.stopListening(); 282 } 283 284 // Native JNI calls to content/browser/speech/speech_recognizer_impl_android.cc 285 private native void nativeOnAudioStart(int nativeSpeechRecognizerImplAndroid); 286 private native void nativeOnSoundStart(int nativeSpeechRecognizerImplAndroid); 287 private native void nativeOnSoundEnd(int nativeSpeechRecognizerImplAndroid); 288 private native void nativeOnAudioEnd(int nativeSpeechRecognizerImplAndroid); 289 private native void nativeOnRecognitionResults(int nativeSpeechRecognizerImplAndroid, 290 String[] results, 291 float[] scores, 292 boolean provisional); 293 private native void nativeOnRecognitionError(int nativeSpeechRecognizerImplAndroid, int error); 294 private native void nativeOnRecognitionEnd(int nativeSpeechRecognizerImplAndroid); 295} 296