AlwaysOnHotwordDetector.java revision f63bc523eadbe01ce0a5ad52868a5dccb3d5f6dd
1/** 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.service.voice; 18 19import android.content.Intent; 20import android.hardware.soundtrigger.IRecognitionStatusCallback; 21import android.hardware.soundtrigger.KeyphraseEnrollmentInfo; 22import android.hardware.soundtrigger.KeyphraseMetadata; 23import android.hardware.soundtrigger.SoundTrigger; 24import android.hardware.soundtrigger.SoundTrigger.ConfidenceLevel; 25import android.hardware.soundtrigger.SoundTrigger.Keyphrase; 26import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionExtra; 27import android.hardware.soundtrigger.SoundTrigger.KeyphraseSoundModel; 28import android.hardware.soundtrigger.SoundTrigger.ModuleProperties; 29import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig; 30import android.os.AsyncTask; 31import android.os.Handler; 32import android.os.Message; 33import android.os.RemoteException; 34import android.util.Slog; 35 36import com.android.internal.app.IVoiceInteractionManagerService; 37 38import java.util.List; 39 40/** 41 * A class that lets a VoiceInteractionService implementation interact with 42 * always-on keyphrase detection APIs. 43 */ 44public class AlwaysOnHotwordDetector { 45 //---- States of Keyphrase availability. Return codes for onAvailabilityChanged() ----// 46 /** 47 * Indicates that this hotword detector is no longer valid for any recognition 48 * and should not be used anymore. 49 */ 50 public static final int STATE_INVALID = -3; 51 /** 52 * Indicates that recognition for the given keyphrase is not available on the system 53 * because of the hardware configuration. 54 */ 55 public static final int STATE_HARDWARE_UNAVAILABLE = -2; 56 /** 57 * Indicates that recognition for the given keyphrase is not supported. 58 */ 59 public static final int STATE_KEYPHRASE_UNSUPPORTED = -1; 60 /** 61 * Indicates that the given keyphrase is not enrolled. 62 */ 63 public static final int STATE_KEYPHRASE_UNENROLLED = 1; 64 /** 65 * Indicates that the given keyphrase is currently enrolled and it's possible to start 66 * recognition for it. 67 */ 68 public static final int STATE_KEYPHRASE_ENROLLED = 2; 69 70 /** 71 * Indicates that the detector isn't ready currently. 72 */ 73 private static final int STATE_NOT_READY = 0; 74 75 // Keyphrase management actions. Used in getManageIntent() ----// 76 /** Indicates that we need to enroll. */ 77 public static final int MANAGE_ACTION_ENROLL = 0; 78 /** Indicates that we need to re-enroll. */ 79 public static final int MANAGE_ACTION_RE_ENROLL = 1; 80 /** Indicates that we need to un-enroll. */ 81 public static final int MANAGE_ACTION_UN_ENROLL = 2; 82 83 /** 84 * Return codes for {@link #startRecognition(int)}, {@link #stopRecognition()} 85 */ 86 public static final int STATUS_ERROR = SoundTrigger.STATUS_ERROR; 87 public static final int STATUS_OK = SoundTrigger.STATUS_OK; 88 89 //-- Flags for startRecogntion ----// 90 /** Empty flag for {@link #startRecognition(int)}. */ 91 public static final int RECOGNITION_FLAG_NONE = 0; 92 /** 93 * Recognition flag for {@link #startRecognition(int)} that indicates 94 * whether the trigger audio for hotword needs to be captured. 95 */ 96 public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1; 97 98 //---- Recognition mode flags. Return codes for getSupportedRecognitionModes() ----// 99 // Must be kept in sync with the related attribute defined as searchKeyphraseRecognitionFlags. 100 101 /** 102 * Simple recognition of the key phrase. Returned by {@link #getSupportedRecognitionModes()} 103 */ 104 public static final int RECOGNITION_MODE_VOICE_TRIGGER 105 = SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER; 106 /** 107 * Trigger only if one user is identified. Returned by {@link #getSupportedRecognitionModes()} 108 */ 109 public static final int RECOGNITION_MODE_USER_IDENTIFICATION 110 = SoundTrigger.RECOGNITION_MODE_USER_IDENTIFICATION; 111 112 static final String TAG = "AlwaysOnHotwordDetector"; 113 // TODO: Set to false. 114 static final boolean DBG = true; 115 116 private static final int MSG_STATE_CHANGED = 1; 117 private static final int MSG_HOTWORD_DETECTED = 2; 118 private static final int MSG_DETECTION_STOPPED = 3; 119 120 private final String mText; 121 private final String mLocale; 122 /** 123 * The metadata of the Keyphrase, derived from the enrollment application. 124 * This may be null if this keyphrase isn't supported by the enrollment application. 125 */ 126 private final KeyphraseMetadata mKeyphraseMetadata; 127 private final KeyphraseEnrollmentInfo mKeyphraseEnrollmentInfo; 128 private final IVoiceInteractionService mVoiceInteractionService; 129 private final IVoiceInteractionManagerService mModelManagementService; 130 private final SoundTriggerListener mInternalCallback; 131 private final Callback mExternalCallback; 132 private final Object mLock = new Object(); 133 private final Handler mHandler; 134 135 /** 136 * The sound model for the keyphrase, derived from the model management service 137 * (IVoiceInteractionManagerService). May be null if the keyphrase isn't enrolled yet. 138 */ 139 private KeyphraseSoundModel mEnrolledSoundModel; 140 private int mAvailability = STATE_NOT_READY; 141 142 /** 143 * Callbacks for always-on hotword detection. 144 */ 145 public interface Callback { 146 /** 147 * Called when the hotword availability changes. 148 * This indicates a change in the availability of recognition for the given keyphrase. 149 * It's called at least once with the initial availability.<p/> 150 * 151 * Availability implies whether the hardware on this system is capable of listening for 152 * the given keyphrase or not. <p/> 153 * If the return code is one of {@link #STATE_HARDWARE_UNAVAILABLE} or 154 * {@link #STATE_KEYPHRASE_UNSUPPORTED}, 155 * detection is not possible and no further interaction should be 156 * performed with this detector. <br/> 157 * If it is {@link #STATE_KEYPHRASE_UNENROLLED} the caller may choose to begin 158 * an enrollment flow for the keyphrase. <br/> 159 * and for {@link #STATE_KEYPHRASE_ENROLLED} a recognition can be started as desired. <p/> 160 * 161 * If the return code is {@link #STATE_INVALID}, this detector is stale. 162 * A new detector should be obtained for use in the future. 163 */ 164 void onAvailabilityChanged(int status); 165 /** 166 * Called when the keyphrase is spoken. 167 * 168 * @param data Optional trigger audio data, if it was requested during 169 * {@link AlwaysOnHotwordDetector#startRecognition(int)}. 170 */ 171 void onDetected(byte[] data); 172 /** 173 * Called when the detection for the associated keyphrase stops. 174 */ 175 void onDetectionStopped(); 176 } 177 178 /** 179 * @param text The keyphrase text to get the detector for. 180 * @param locale The java locale for the detector. 181 * @param callback A non-null Callback for receiving the recognition events. 182 * @param voiceInteractionService The current voice interaction service. 183 * @param modelManagementService A service that allows management of sound models. 184 * 185 * @hide 186 */ 187 public AlwaysOnHotwordDetector(String text, String locale, Callback callback, 188 KeyphraseEnrollmentInfo keyphraseEnrollmentInfo, 189 IVoiceInteractionService voiceInteractionService, 190 IVoiceInteractionManagerService modelManagementService) { 191 mText = text; 192 mLocale = locale; 193 mKeyphraseEnrollmentInfo = keyphraseEnrollmentInfo; 194 mKeyphraseMetadata = mKeyphraseEnrollmentInfo.getKeyphraseMetadata(text, locale); 195 mExternalCallback = callback; 196 mHandler = new MyHandler(); 197 mInternalCallback = new SoundTriggerListener(mHandler); 198 mVoiceInteractionService = voiceInteractionService; 199 mModelManagementService = modelManagementService; 200 new RefreshAvailabiltyTask().execute(); 201 } 202 203 /** 204 * Gets the recognition modes supported by the associated keyphrase. 205 * 206 * @throws UnsupportedOperationException if the keyphrase itself isn't supported. 207 * Callers should only call this method after a supported state callback on 208 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 209 */ 210 public int getSupportedRecognitionModes() { 211 synchronized (mLock) { 212 return getSupportedRecognitionModesLocked(); 213 } 214 } 215 216 private int getSupportedRecognitionModesLocked() { 217 // This method only makes sense if we can actually support a recognition. 218 if (mAvailability != STATE_KEYPHRASE_ENROLLED 219 && mAvailability != STATE_KEYPHRASE_UNENROLLED) { 220 throw new UnsupportedOperationException( 221 "Getting supported recognition modes for the keyphrase is not supported"); 222 } 223 224 return mKeyphraseMetadata.recognitionModeFlags; 225 } 226 227 /** 228 * Starts recognition for the associated keyphrase. 229 * 230 * @param recognitionFlags The flags to control the recognition properties. 231 * The allowed flags are {@link #RECOGNITION_FLAG_NONE} and 232 * {@link #RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO}. 233 * @return {@link #STATUS_OK} if the call succeeds, an error code otherwise. 234 * @throws UnsupportedOperationException if the recognition isn't supported. 235 * Callers should only call this method after a supported state callback on 236 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 237 */ 238 public int startRecognition(int recognitionFlags) { 239 synchronized (mLock) { 240 return startRecognitionLocked(recognitionFlags); 241 } 242 } 243 244 private int startRecognitionLocked(int recognitionFlags) { 245 // This method only makes sense if we can start a recognition. 246 if (mAvailability != STATE_KEYPHRASE_ENROLLED) { 247 throw new UnsupportedOperationException( 248 "Recognition for the given keyphrase is not supported"); 249 } 250 251 KeyphraseRecognitionExtra[] recognitionExtra = new KeyphraseRecognitionExtra[1]; 252 // TODO: Do we need to do something about the confidence level here? 253 recognitionExtra[0] = new KeyphraseRecognitionExtra(mKeyphraseMetadata.id, 254 mKeyphraseMetadata.recognitionModeFlags, new ConfidenceLevel[0]); 255 boolean captureTriggerAudio = 256 (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0; 257 int code = STATUS_ERROR; 258 try { 259 code = mModelManagementService.startRecognition(mVoiceInteractionService, 260 mKeyphraseMetadata.id, mEnrolledSoundModel, mInternalCallback, 261 new RecognitionConfig( 262 captureTriggerAudio, recognitionExtra, null /* additional data */)); 263 } catch (RemoteException e) { 264 Slog.w(TAG, "RemoteException in startRecognition!"); 265 } 266 if (code != STATUS_OK) { 267 Slog.w(TAG, "startRecognition() failed with error code " + code); 268 } 269 return code; 270 } 271 272 /** 273 * Stops recognition for the associated keyphrase. 274 * 275 * @return {@link #STATUS_OK} if the call succeeds, an error code otherwise. 276 * @throws UnsupportedOperationException if the recognition isn't supported. 277 * Callers should only call this method after a supported state callback on 278 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 279 */ 280 public int stopRecognition() { 281 synchronized (mLock) { 282 return stopRecognitionLocked(); 283 } 284 } 285 286 private int stopRecognitionLocked() { 287 // This method only makes sense if we can start a recognition. 288 if (mAvailability != STATE_KEYPHRASE_ENROLLED) { 289 throw new UnsupportedOperationException( 290 "Recognition for the given keyphrase is not supported"); 291 } 292 293 int code = STATUS_ERROR; 294 try { 295 code = mModelManagementService.stopRecognition( 296 mVoiceInteractionService, mKeyphraseMetadata.id, mInternalCallback); 297 } catch (RemoteException e) { 298 Slog.w(TAG, "RemoteException in stopRecognition!"); 299 } 300 301 if (code != STATUS_OK) { 302 Slog.w(TAG, "stopRecognition() failed with error code " + code); 303 } 304 return code; 305 } 306 307 /** 308 * Gets an intent to manage the associated keyphrase. 309 * 310 * @param action The manage action that needs to be performed. 311 * One of {@link #MANAGE_ACTION_ENROLL}, {@link #MANAGE_ACTION_RE_ENROLL} or 312 * {@link #MANAGE_ACTION_UN_ENROLL}. 313 * @return An {@link Intent} to manage the given keyphrase. 314 * @throws UnsupportedOperationException if managing they keyphrase isn't supported. 315 * Callers should only call this method after a supported state callback on 316 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 317 */ 318 public Intent getManageIntent(int action) { 319 // This method only makes sense if we can actually support a recognition. 320 if (mAvailability != STATE_KEYPHRASE_ENROLLED 321 && mAvailability != STATE_KEYPHRASE_UNENROLLED) { 322 throw new UnsupportedOperationException( 323 "Managing the given keyphrase is not supported"); 324 } 325 if (action != MANAGE_ACTION_ENROLL 326 && action != MANAGE_ACTION_RE_ENROLL 327 && action != MANAGE_ACTION_UN_ENROLL) { 328 throw new IllegalArgumentException("Invalid action specified " + action); 329 } 330 331 return mKeyphraseEnrollmentInfo.getManageKeyphraseIntent(action, mText, mLocale); 332 } 333 334 /** 335 * Invalidates this hotword detector so that any future calls to this result 336 * in an IllegalStateException. 337 * 338 * @hide 339 */ 340 void invalidate() { 341 synchronized (mLock) { 342 mAvailability = STATE_INVALID; 343 notifyStateChangedLocked(); 344 } 345 } 346 347 /** 348 * Reloads the sound models from the service. 349 * 350 * @hide 351 */ 352 void onSoundModelsChanged() { 353 synchronized (mLock) { 354 // TODO: This should stop the recognition if it was using an enrolled sound model 355 // that's no longer available. 356 if (mAvailability == STATE_INVALID 357 || mAvailability == STATE_HARDWARE_UNAVAILABLE 358 || mAvailability == STATE_KEYPHRASE_UNSUPPORTED) { 359 Slog.w(TAG, "Received onSoundModelsChanged for an unsupported keyphrase/config"); 360 return; 361 } 362 363 // Execute a refresh availability task - which should then notify of a change. 364 new RefreshAvailabiltyTask().execute(); 365 } 366 } 367 368 private void notifyStateChangedLocked() { 369 Message message = Message.obtain(mHandler, MSG_STATE_CHANGED); 370 message.arg1 = mAvailability; 371 message.sendToTarget(); 372 } 373 374 /** @hide */ 375 static final class SoundTriggerListener extends IRecognitionStatusCallback.Stub { 376 private final Handler mHandler; 377 378 public SoundTriggerListener(Handler handler) { 379 mHandler = handler; 380 } 381 382 @Override 383 public void onDetected(byte[] data) { 384 Slog.i(TAG, "onDetected"); 385 Message message = Message.obtain(mHandler, MSG_HOTWORD_DETECTED); 386 message.obj = data; 387 message.sendToTarget(); 388 } 389 390 @Override 391 public void onDetectionStopped() { 392 Slog.i(TAG, "onDetectionStopped"); 393 mHandler.sendEmptyMessage(MSG_DETECTION_STOPPED); 394 } 395 } 396 397 class MyHandler extends Handler { 398 @Override 399 public void handleMessage(Message msg) { 400 switch (msg.what) { 401 case MSG_STATE_CHANGED: 402 mExternalCallback.onAvailabilityChanged(msg.arg1); 403 break; 404 case MSG_HOTWORD_DETECTED: 405 mExternalCallback.onDetected((byte[]) msg.obj); 406 break; 407 case MSG_DETECTION_STOPPED: 408 mExternalCallback.onDetectionStopped(); 409 default: 410 super.handleMessage(msg); 411 } 412 } 413 } 414 415 class RefreshAvailabiltyTask extends AsyncTask<Void, Void, Void> { 416 417 @Override 418 public Void doInBackground(Void... params) { 419 int availability = internalGetInitialAvailability(); 420 KeyphraseSoundModel soundModel = null; 421 // Fetch the sound model if the availability is one of the supported ones. 422 if (availability == STATE_NOT_READY 423 || availability == STATE_KEYPHRASE_UNENROLLED 424 || availability == STATE_KEYPHRASE_ENROLLED) { 425 soundModel = 426 internalGetKeyphraseSoundModel(mKeyphraseMetadata.id); 427 if (soundModel == null) { 428 availability = STATE_KEYPHRASE_UNENROLLED; 429 } else { 430 availability = STATE_KEYPHRASE_ENROLLED; 431 } 432 } 433 434 synchronized (mLock) { 435 if (DBG) { 436 Slog.d(TAG, "Hotword availability changed from " + mAvailability 437 + " -> " + availability); 438 } 439 mAvailability = availability; 440 mEnrolledSoundModel = soundModel; 441 notifyStateChangedLocked(); 442 } 443 return null; 444 } 445 446 /** 447 * @return The initial availability without checking the enrollment status. 448 */ 449 private int internalGetInitialAvailability() { 450 synchronized (mLock) { 451 // This detector has already been invalidated. 452 if (mAvailability == STATE_INVALID) { 453 return STATE_INVALID; 454 } 455 } 456 457 ModuleProperties dspModuleProperties = null; 458 try { 459 dspModuleProperties = 460 mModelManagementService.getDspModuleProperties(mVoiceInteractionService); 461 } catch (RemoteException e) { 462 Slog.w(TAG, "RemoteException in getDspProperties!"); 463 } 464 // No DSP available 465 if (dspModuleProperties == null) { 466 return STATE_HARDWARE_UNAVAILABLE; 467 } 468 // No enrollment application supports this keyphrase/locale 469 if (mKeyphraseMetadata == null) { 470 return STATE_KEYPHRASE_UNSUPPORTED; 471 } 472 return STATE_NOT_READY; 473 } 474 475 /** 476 * @return The corresponding {@link KeyphraseSoundModel} or null if none is found. 477 */ 478 private KeyphraseSoundModel internalGetKeyphraseSoundModel(int keyphraseId) { 479 List<KeyphraseSoundModel> soundModels; 480 try { 481 soundModels = mModelManagementService 482 .listRegisteredKeyphraseSoundModels(mVoiceInteractionService); 483 if (soundModels == null || soundModels.isEmpty()) { 484 Slog.i(TAG, "No available sound models for keyphrase ID: " + keyphraseId); 485 return null; 486 } 487 for (int i = 0; i < soundModels.size(); i++) { 488 KeyphraseSoundModel soundModel = soundModels.get(i); 489 if (soundModel.keyphrases == null || soundModel.keyphrases.length == 0) { 490 continue; 491 } 492 for (int j = 0; i < soundModel.keyphrases.length; j++) { 493 Keyphrase keyphrase = soundModel.keyphrases[j]; 494 if (keyphrase.id == keyphraseId) { 495 return soundModel; 496 } 497 } 498 } 499 } catch (RemoteException e) { 500 Slog.w(TAG, "RemoteException in listRegisteredKeyphraseSoundModels!"); 501 } 502 return null; 503 } 504 } 505} 506