AlwaysOnHotwordDetector.java revision 1ed12ddb8c46193cc4d790b9c7d6a5d61afb3311
1/** 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.service.voice; 18 19import android.content.Intent; 20import android.hardware.soundtrigger.IRecognitionStatusCallback; 21import android.hardware.soundtrigger.KeyphraseEnrollmentInfo; 22import android.hardware.soundtrigger.KeyphraseMetadata; 23import android.hardware.soundtrigger.SoundTrigger; 24import android.hardware.soundtrigger.SoundTrigger.ConfidenceLevel; 25import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionEvent; 26import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionExtra; 27import android.hardware.soundtrigger.SoundTrigger.KeyphraseSoundModel; 28import android.hardware.soundtrigger.SoundTrigger.ModuleProperties; 29import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig; 30import android.os.AsyncTask; 31import android.os.Handler; 32import android.os.Message; 33import android.os.RemoteException; 34import android.util.Slog; 35 36import com.android.internal.app.IVoiceInteractionManagerService; 37 38/** 39 * A class that lets a VoiceInteractionService implementation interact with 40 * always-on keyphrase detection APIs. 41 */ 42public class AlwaysOnHotwordDetector { 43 //---- States of Keyphrase availability. Return codes for onAvailabilityChanged() ----// 44 /** 45 * Indicates that this hotword detector is no longer valid for any recognition 46 * and should not be used anymore. 47 */ 48 public static final int STATE_INVALID = -3; 49 /** 50 * Indicates that recognition for the given keyphrase is not available on the system 51 * because of the hardware configuration. 52 */ 53 public static final int STATE_HARDWARE_UNAVAILABLE = -2; 54 /** 55 * Indicates that recognition for the given keyphrase is not supported. 56 */ 57 public static final int STATE_KEYPHRASE_UNSUPPORTED = -1; 58 /** 59 * Indicates that the given keyphrase is not enrolled. 60 */ 61 public static final int STATE_KEYPHRASE_UNENROLLED = 1; 62 /** 63 * Indicates that the given keyphrase is currently enrolled and it's possible to start 64 * recognition for it. 65 */ 66 public static final int STATE_KEYPHRASE_ENROLLED = 2; 67 68 /** 69 * Indicates that the detector isn't ready currently. 70 */ 71 private static final int STATE_NOT_READY = 0; 72 73 // Keyphrase management actions. Used in getManageIntent() ----// 74 /** Indicates that we need to enroll. */ 75 public static final int MANAGE_ACTION_ENROLL = 0; 76 /** Indicates that we need to re-enroll. */ 77 public static final int MANAGE_ACTION_RE_ENROLL = 1; 78 /** Indicates that we need to un-enroll. */ 79 public static final int MANAGE_ACTION_UN_ENROLL = 2; 80 81 //-- Flags for startRecogntion ----// 82 /** Empty flag for {@link #startRecognition(int)}. */ 83 public static final int RECOGNITION_FLAG_NONE = 0; 84 /** 85 * Recognition flag for {@link #startRecognition(int)} that indicates 86 * whether the trigger audio for hotword needs to be captured. 87 */ 88 public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1; 89 90 //---- Recognition mode flags. Return codes for getSupportedRecognitionModes() ----// 91 // Must be kept in sync with the related attribute defined as searchKeyphraseRecognitionFlags. 92 93 /** 94 * Simple recognition of the key phrase. Returned by {@link #getSupportedRecognitionModes()} 95 */ 96 public static final int RECOGNITION_MODE_VOICE_TRIGGER 97 = SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER; 98 /** 99 * Trigger only if one user is identified. Returned by {@link #getSupportedRecognitionModes()} 100 */ 101 public static final int RECOGNITION_MODE_USER_IDENTIFICATION 102 = SoundTrigger.RECOGNITION_MODE_USER_IDENTIFICATION; 103 104 static final String TAG = "AlwaysOnHotwordDetector"; 105 // TODO: Set to false. 106 static final boolean DBG = true; 107 108 private static final int STATUS_ERROR = SoundTrigger.STATUS_ERROR; 109 private static final int STATUS_OK = SoundTrigger.STATUS_OK; 110 111 private static final int MSG_STATE_CHANGED = 1; 112 private static final int MSG_HOTWORD_DETECTED = 2; 113 private static final int MSG_DETECTION_STARTED = 3; 114 private static final int MSG_DETECTION_STOPPED = 4; 115 private static final int MSG_DETECTION_ERROR = 5; 116 117 private static final int FLAG_REQUESTED = 0x1; 118 private static final int FLAG_STARTED = 0x2; 119 private static final int FLAG_CALL_ACTIVE = 0x4; 120 private static final int FLAG_MICROPHONE_OPEN = 0x8; 121 122 private final String mText; 123 private final String mLocale; 124 /** 125 * The metadata of the Keyphrase, derived from the enrollment application. 126 * This may be null if this keyphrase isn't supported by the enrollment application. 127 */ 128 private final KeyphraseMetadata mKeyphraseMetadata; 129 private final KeyphraseEnrollmentInfo mKeyphraseEnrollmentInfo; 130 private final IVoiceInteractionService mVoiceInteractionService; 131 private final IVoiceInteractionManagerService mModelManagementService; 132 private final SoundTriggerListener mInternalCallback; 133 private final Callback mExternalCallback; 134 private final Object mLock = new Object(); 135 private final Handler mHandler; 136 137 private int mAvailability = STATE_NOT_READY; 138 private int mInternalState = 0; 139 private int mRecognitionFlags = RECOGNITION_FLAG_NONE; 140 141 /** 142 * Callbacks for always-on hotword detection. 143 */ 144 public interface Callback { 145 /** 146 * Called when the hotword availability changes. 147 * This indicates a change in the availability of recognition for the given keyphrase. 148 * It's called at least once with the initial availability.<p/> 149 * 150 * Availability implies whether the hardware on this system is capable of listening for 151 * the given keyphrase or not. <p/> 152 * If the return code is one of {@link #STATE_HARDWARE_UNAVAILABLE} or 153 * {@link #STATE_KEYPHRASE_UNSUPPORTED}, 154 * detection is not possible and no further interaction should be 155 * performed with this detector. <br/> 156 * If it is {@link #STATE_KEYPHRASE_UNENROLLED} the caller may choose to begin 157 * an enrollment flow for the keyphrase. <br/> 158 * and for {@link #STATE_KEYPHRASE_ENROLLED} a recognition can be started as desired. <p/> 159 * 160 * If the return code is {@link #STATE_INVALID}, this detector is stale. 161 * A new detector should be obtained for use in the future. 162 */ 163 void onAvailabilityChanged(int status); 164 /** 165 * Called when the keyphrase is spoken. 166 * This implicitly stops listening for the keyphrase once it's detected. 167 * Clients should start a recognition again once they are done handling this 168 * detection. 169 * 170 * @param data Optional trigger audio data, if it was requested during 171 * {@link AlwaysOnHotwordDetector#startRecognition(int)}. 172 */ 173 void onDetected(byte[] data); 174 /** 175 * Called when the detection for the associated keyphrase starts. 176 * This is called as a result of a successful call to 177 * {@link AlwaysOnHotwordDetector#startRecognition(int)}. 178 */ 179 void onDetectionStarted(); 180 /** 181 * Called when the detection for the associated keyphrase stops. 182 * This is called as a result of a successful call to 183 * {@link AlwaysOnHotwordDetector#stopRecognition()}. 184 */ 185 void onDetectionStopped(); 186 /** 187 * Called when the detection fails due to an error. 188 */ 189 void onError(); 190 } 191 192 /** 193 * @param text The keyphrase text to get the detector for. 194 * @param locale The java locale for the detector. 195 * @param callback A non-null Callback for receiving the recognition events. 196 * @param voiceInteractionService The current voice interaction service. 197 * @param modelManagementService A service that allows management of sound models. 198 * 199 * @hide 200 */ 201 public AlwaysOnHotwordDetector(String text, String locale, Callback callback, 202 KeyphraseEnrollmentInfo keyphraseEnrollmentInfo, 203 IVoiceInteractionService voiceInteractionService, 204 IVoiceInteractionManagerService modelManagementService) { 205 mText = text; 206 mLocale = locale; 207 mKeyphraseEnrollmentInfo = keyphraseEnrollmentInfo; 208 mKeyphraseMetadata = mKeyphraseEnrollmentInfo.getKeyphraseMetadata(text, locale); 209 mExternalCallback = callback; 210 mHandler = new MyHandler(); 211 mInternalCallback = new SoundTriggerListener(mHandler); 212 mVoiceInteractionService = voiceInteractionService; 213 mModelManagementService = modelManagementService; 214 new RefreshAvailabiltyTask().execute(); 215 } 216 217 /** 218 * Gets the recognition modes supported by the associated keyphrase. 219 * 220 * @throws UnsupportedOperationException if the keyphrase itself isn't supported. 221 * Callers should only call this method after a supported state callback on 222 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 223 */ 224 public int getSupportedRecognitionModes() { 225 synchronized (mLock) { 226 return getSupportedRecognitionModesLocked(); 227 } 228 } 229 230 private int getSupportedRecognitionModesLocked() { 231 // This method only makes sense if we can actually support a recognition. 232 if (mAvailability != STATE_KEYPHRASE_ENROLLED 233 && mAvailability != STATE_KEYPHRASE_UNENROLLED) { 234 throw new UnsupportedOperationException( 235 "Getting supported recognition modes for the keyphrase is not supported"); 236 } 237 238 return mKeyphraseMetadata.recognitionModeFlags; 239 } 240 241 /** 242 * Starts recognition for the associated keyphrase. 243 * 244 * @param recognitionFlags The flags to control the recognition properties. 245 * The allowed flags are {@link #RECOGNITION_FLAG_NONE} and 246 * {@link #RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO}. 247 * @throws UnsupportedOperationException if the recognition isn't supported. 248 * Callers should only call this method after a supported state callback on 249 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 250 */ 251 public void startRecognition(int recognitionFlags) { 252 synchronized (mLock) { 253 // Check if we can start/stop a recognition. 254 if (mAvailability != STATE_KEYPHRASE_ENROLLED) { 255 throw new UnsupportedOperationException( 256 "Recognition for the given keyphrase is not supported"); 257 } 258 259 mInternalState |= FLAG_REQUESTED; 260 mRecognitionFlags = recognitionFlags; 261 updateRecognitionLocked(); 262 } 263 } 264 265 /** 266 * Stops recognition for the associated keyphrase. 267 * 268 * @throws UnsupportedOperationException if the recognition isn't supported. 269 * Callers should only call this method after a supported state callback on 270 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 271 */ 272 public void stopRecognition() { 273 synchronized (mLock) { 274 // Check if we can start/stop a recognition. 275 if (mAvailability != STATE_KEYPHRASE_ENROLLED) { 276 throw new UnsupportedOperationException( 277 "Recognition for the given keyphrase is not supported"); 278 } 279 280 mInternalState &= ~FLAG_REQUESTED; 281 mRecognitionFlags = RECOGNITION_FLAG_NONE; 282 updateRecognitionLocked(); 283 } 284 } 285 286 /** 287 * Gets an intent to manage the associated keyphrase. 288 * 289 * @param action The manage action that needs to be performed. 290 * One of {@link #MANAGE_ACTION_ENROLL}, {@link #MANAGE_ACTION_RE_ENROLL} or 291 * {@link #MANAGE_ACTION_UN_ENROLL}. 292 * @return An {@link Intent} to manage the given keyphrase. 293 * @throws UnsupportedOperationException if managing they keyphrase isn't supported. 294 * Callers should only call this method after a supported state callback on 295 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 296 */ 297 public Intent getManageIntent(int action) { 298 // This method only makes sense if we can actually support a recognition. 299 if (mAvailability != STATE_KEYPHRASE_ENROLLED 300 && mAvailability != STATE_KEYPHRASE_UNENROLLED) { 301 throw new UnsupportedOperationException( 302 "Managing the given keyphrase is not supported"); 303 } 304 if (action != MANAGE_ACTION_ENROLL 305 && action != MANAGE_ACTION_RE_ENROLL 306 && action != MANAGE_ACTION_UN_ENROLL) { 307 throw new IllegalArgumentException("Invalid action specified " + action); 308 } 309 310 return mKeyphraseEnrollmentInfo.getManageKeyphraseIntent(action, mText, mLocale); 311 } 312 313 /** 314 * Invalidates this hotword detector so that any future calls to this result 315 * in an IllegalStateException. 316 * 317 * @hide 318 */ 319 void invalidate() { 320 synchronized (mLock) { 321 mAvailability = STATE_INVALID; 322 notifyStateChangedLocked(); 323 } 324 } 325 326 /** 327 * Reloads the sound models from the service. 328 * 329 * @hide 330 */ 331 void onSoundModelsChanged() { 332 synchronized (mLock) { 333 // TODO: This should stop the recognition if it was using an enrolled sound model 334 // that's no longer available. 335 if (mAvailability == STATE_INVALID 336 || mAvailability == STATE_HARDWARE_UNAVAILABLE 337 || mAvailability == STATE_KEYPHRASE_UNSUPPORTED) { 338 Slog.w(TAG, "Received onSoundModelsChanged for an unsupported keyphrase/config"); 339 return; 340 } 341 342 // Execute a refresh availability task - which should then notify of a change. 343 new RefreshAvailabiltyTask().execute(); 344 } 345 } 346 347 @SuppressWarnings("unused") 348 private void onCallStateChanged(boolean active) { 349 synchronized (mLock) { 350 if (active) { 351 mInternalState |= FLAG_CALL_ACTIVE; 352 } else { 353 mInternalState &= ~FLAG_CALL_ACTIVE; 354 } 355 356 updateRecognitionLocked(); 357 } 358 } 359 360 @SuppressWarnings("unused") 361 private void onMicrophoneStateChanged(boolean open) { 362 synchronized (mLock) { 363 if (open) { 364 mInternalState |= FLAG_MICROPHONE_OPEN; 365 } else { 366 mInternalState &= ~FLAG_MICROPHONE_OPEN; 367 } 368 369 updateRecognitionLocked(); 370 } 371 } 372 373 private void updateRecognitionLocked() { 374 // Don't attempt to update the recognition state if keyphrase isn't enrolled. 375 if (mAvailability != STATE_KEYPHRASE_ENROLLED) { 376 return; 377 } 378 379 // Start recognition if requested and not in a call/reading from the microphone 380 boolean start = (mInternalState&FLAG_REQUESTED) != 0 381 && (mInternalState&FLAG_CALL_ACTIVE) == 0 382 && (mInternalState&FLAG_MICROPHONE_OPEN) == 0; 383 boolean requested = (mInternalState&FLAG_REQUESTED) != 0; 384 385 if (start && (mInternalState&FLAG_STARTED) == 0) { 386 // Start recognition. 387 if (DBG) Slog.d(TAG, "starting recognition..."); 388 int status = startRecognitionLocked(); 389 if (status == STATUS_OK) { 390 mInternalState |= FLAG_STARTED; 391 mHandler.sendEmptyMessage(MSG_DETECTION_STARTED); 392 } else { 393 if (DBG) Slog.d(TAG, "failed to start recognition: " + status); 394 mHandler.sendEmptyMessage(MSG_DETECTION_ERROR); 395 } 396 // Post the callback 397 return; 398 } 399 400 if (!start && (mInternalState&FLAG_STARTED) != 0) { 401 // Stop recognition 402 // Only notify the callback if a recognition was *not* requested. 403 // For internal stoppages, don't notify the callback. 404 if (DBG) Slog.d(TAG, "stopping recognition..."); 405 int status = stopRecognitionLocked(); 406 if (status == STATUS_OK) { 407 mInternalState &= ~FLAG_STARTED; 408 if (!requested) mHandler.sendEmptyMessage(MSG_DETECTION_STOPPED); 409 } else { 410 if (!requested) mHandler.sendEmptyMessage(MSG_DETECTION_ERROR); 411 if (DBG) Slog.d(TAG, "failed to stop recognition: " + status); 412 } 413 return; 414 } 415 } 416 417 private int startRecognitionLocked() { 418 KeyphraseRecognitionExtra[] recognitionExtra = new KeyphraseRecognitionExtra[1]; 419 // TODO: Do we need to do something about the confidence level here? 420 recognitionExtra[0] = new KeyphraseRecognitionExtra(mKeyphraseMetadata.id, 421 mKeyphraseMetadata.recognitionModeFlags, new ConfidenceLevel[0]); 422 boolean captureTriggerAudio = 423 (mRecognitionFlags&RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0; 424 int code = STATUS_ERROR; 425 try { 426 code = mModelManagementService.startRecognition(mVoiceInteractionService, 427 mKeyphraseMetadata.id, mInternalCallback, 428 new RecognitionConfig( 429 captureTriggerAudio, recognitionExtra, null /* additional data */)); 430 } catch (RemoteException e) { 431 Slog.w(TAG, "RemoteException in startRecognition!"); 432 } 433 if (code != STATUS_OK) { 434 Slog.w(TAG, "startRecognition() failed with error code " + code); 435 } 436 return code; 437 } 438 439 private int stopRecognitionLocked() { 440 int code = STATUS_ERROR; 441 try { 442 code = mModelManagementService.stopRecognition( 443 mVoiceInteractionService, mKeyphraseMetadata.id, mInternalCallback); 444 } catch (RemoteException e) { 445 Slog.w(TAG, "RemoteException in stopRecognition!"); 446 } 447 448 if (code != STATUS_OK) { 449 Slog.w(TAG, "stopRecognition() failed with error code " + code); 450 } 451 return code; 452 } 453 454 private void notifyStateChangedLocked() { 455 Message message = Message.obtain(mHandler, MSG_STATE_CHANGED); 456 message.arg1 = mAvailability; 457 message.sendToTarget(); 458 } 459 460 /** @hide */ 461 static final class SoundTriggerListener extends IRecognitionStatusCallback.Stub { 462 private final Handler mHandler; 463 464 public SoundTriggerListener(Handler handler) { 465 mHandler = handler; 466 } 467 468 @Override 469 public void onDetected(KeyphraseRecognitionEvent event) { 470 Slog.i(TAG, "onDetected"); 471 Message message = Message.obtain(mHandler, MSG_HOTWORD_DETECTED); 472 message.obj = event.data; 473 message.sendToTarget(); 474 } 475 476 @Override 477 public void onError(int status) { 478 Slog.i(TAG, "onError: " + status); 479 mHandler.sendEmptyMessage(MSG_DETECTION_ERROR); 480 } 481 } 482 483 class MyHandler extends Handler { 484 @Override 485 public void handleMessage(Message msg) { 486 switch (msg.what) { 487 case MSG_STATE_CHANGED: 488 mExternalCallback.onAvailabilityChanged(msg.arg1); 489 break; 490 case MSG_HOTWORD_DETECTED: 491 mExternalCallback.onDetected((byte[]) msg.obj); 492 break; 493 case MSG_DETECTION_STARTED: 494 mExternalCallback.onDetectionStarted(); 495 break; 496 case MSG_DETECTION_STOPPED: 497 mExternalCallback.onDetectionStopped(); 498 break; 499 case MSG_DETECTION_ERROR: 500 mExternalCallback.onError(); 501 break; 502 default: 503 super.handleMessage(msg); 504 } 505 } 506 } 507 508 class RefreshAvailabiltyTask extends AsyncTask<Void, Void, Void> { 509 510 @Override 511 public Void doInBackground(Void... params) { 512 int availability = internalGetInitialAvailability(); 513 boolean enrolled = false; 514 // Fetch the sound model if the availability is one of the supported ones. 515 if (availability == STATE_NOT_READY 516 || availability == STATE_KEYPHRASE_UNENROLLED 517 || availability == STATE_KEYPHRASE_ENROLLED) { 518 enrolled = internalGetIsEnrolled(mKeyphraseMetadata.id); 519 if (!enrolled) { 520 availability = STATE_KEYPHRASE_UNENROLLED; 521 } else { 522 availability = STATE_KEYPHRASE_ENROLLED; 523 } 524 } 525 526 synchronized (mLock) { 527 if (DBG) { 528 Slog.d(TAG, "Hotword availability changed from " + mAvailability 529 + " -> " + availability); 530 } 531 mAvailability = availability; 532 notifyStateChangedLocked(); 533 } 534 return null; 535 } 536 537 /** 538 * @return The initial availability without checking the enrollment status. 539 */ 540 private int internalGetInitialAvailability() { 541 synchronized (mLock) { 542 // This detector has already been invalidated. 543 if (mAvailability == STATE_INVALID) { 544 return STATE_INVALID; 545 } 546 } 547 548 ModuleProperties dspModuleProperties = null; 549 try { 550 dspModuleProperties = 551 mModelManagementService.getDspModuleProperties(mVoiceInteractionService); 552 } catch (RemoteException e) { 553 Slog.w(TAG, "RemoteException in getDspProperties!"); 554 } 555 // No DSP available 556 if (dspModuleProperties == null) { 557 return STATE_HARDWARE_UNAVAILABLE; 558 } 559 // No enrollment application supports this keyphrase/locale 560 if (mKeyphraseMetadata == null) { 561 return STATE_KEYPHRASE_UNSUPPORTED; 562 } 563 return STATE_NOT_READY; 564 } 565 566 /** 567 * @return The corresponding {@link KeyphraseSoundModel} or null if none is found. 568 */ 569 private boolean internalGetIsEnrolled(int keyphraseId) { 570 try { 571 return mModelManagementService.isEnrolledForKeyphrase( 572 mVoiceInteractionService, keyphraseId); 573 } catch (RemoteException e) { 574 Slog.w(TAG, "RemoteException in listRegisteredKeyphraseSoundModels!"); 575 } 576 return false; 577 } 578 } 579} 580