AlwaysOnHotwordDetector.java revision 2178e2e085056186141ac44563103c6f455de89c
1/** 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.service.voice; 18 19import android.annotation.IntDef; 20import android.annotation.NonNull; 21import android.annotation.Nullable; 22import android.content.Intent; 23import android.hardware.soundtrigger.IRecognitionStatusCallback; 24import android.hardware.soundtrigger.KeyphraseEnrollmentInfo; 25import android.hardware.soundtrigger.KeyphraseMetadata; 26import android.hardware.soundtrigger.SoundTrigger; 27import android.hardware.soundtrigger.SoundTrigger.ConfidenceLevel; 28import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionEvent; 29import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionExtra; 30import android.hardware.soundtrigger.SoundTrigger.KeyphraseSoundModel; 31import android.hardware.soundtrigger.SoundTrigger.ModuleProperties; 32import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig; 33import android.media.AudioFormat; 34import android.os.AsyncTask; 35import android.os.Handler; 36import android.os.Message; 37import android.os.RemoteException; 38import android.util.Slog; 39 40import com.android.internal.app.IVoiceInteractionManagerService; 41 42import java.lang.annotation.Retention; 43import java.lang.annotation.RetentionPolicy; 44 45/** 46 * A class that lets a VoiceInteractionService implementation interact with 47 * always-on keyphrase detection APIs. 48 */ 49public class AlwaysOnHotwordDetector { 50 //---- States of Keyphrase availability. Return codes for onAvailabilityChanged() ----// 51 /** 52 * Indicates that this hotword detector is no longer valid for any recognition 53 * and should not be used anymore. 54 */ 55 private static final int STATE_INVALID = -3; 56 57 /** 58 * Indicates that recognition for the given keyphrase is not available on the system 59 * because of the hardware configuration. 60 * No further interaction should be performed with the detector that returns this availability. 61 */ 62 public static final int STATE_HARDWARE_UNAVAILABLE = -2; 63 /** 64 * Indicates that recognition for the given keyphrase is not supported. 65 * No further interaction should be performed with the detector that returns this availability. 66 */ 67 public static final int STATE_KEYPHRASE_UNSUPPORTED = -1; 68 /** 69 * Indicates that the given keyphrase is not enrolled. 70 * The caller may choose to begin an enrollment flow for the keyphrase. 71 */ 72 public static final int STATE_KEYPHRASE_UNENROLLED = 1; 73 /** 74 * Indicates that the given keyphrase is currently enrolled and it's possible to start 75 * recognition for it. 76 */ 77 public static final int STATE_KEYPHRASE_ENROLLED = 2; 78 79 /** 80 * Indicates that the detector isn't ready currently. 81 */ 82 private static final int STATE_NOT_READY = 0; 83 84 // Keyphrase management actions. Used in getManageIntent() ----// 85 /** @hide */ 86 @Retention(RetentionPolicy.SOURCE) 87 @IntDef(value = { 88 MANAGE_ACTION_ENROLL, 89 MANAGE_ACTION_RE_ENROLL, 90 MANAGE_ACTION_UN_ENROLL 91 }) 92 public @interface ManageActions {} 93 94 /** Indicates that we need to enroll. */ 95 public static final int MANAGE_ACTION_ENROLL = 0; 96 /** Indicates that we need to re-enroll. */ 97 public static final int MANAGE_ACTION_RE_ENROLL = 1; 98 /** Indicates that we need to un-enroll. */ 99 public static final int MANAGE_ACTION_UN_ENROLL = 2; 100 101 //-- Flags for startRecognition ----// 102 /** @hide */ 103 @Retention(RetentionPolicy.SOURCE) 104 @IntDef(flag = true, 105 value = { 106 RECOGNITION_FLAG_NONE, 107 RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO, 108 RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS 109 }) 110 public @interface RecognitionFlags {} 111 112 /** Empty flag for {@link #startRecognition(int)}. */ 113 public static final int RECOGNITION_FLAG_NONE = 0; 114 /** 115 * Recognition flag for {@link #startRecognition(int)} that indicates 116 * whether the trigger audio for hotword needs to be captured. 117 */ 118 public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1; 119 /** 120 * Recognition flag for {@link #startRecognition(int)} that indicates 121 * whether the recognition should keep going on even after the keyphrase triggers. 122 * If this flag is specified, it's possible to get multiple triggers after a 123 * call to {@link #startRecognition(int)} if the user speaks the keyphrase multiple times. 124 * When this isn't specified, the default behavior is to stop recognition once the 125 * keyphrase is spoken, till the caller starts recognition again. 126 */ 127 public static final int RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS = 0x2; 128 129 //---- Recognition mode flags. Return codes for getSupportedRecognitionModes() ----// 130 // Must be kept in sync with the related attribute defined as searchKeyphraseRecognitionFlags. 131 132 /** @hide */ 133 @Retention(RetentionPolicy.SOURCE) 134 @IntDef(flag = true, 135 value = { 136 RECOGNITION_MODE_VOICE_TRIGGER, 137 RECOGNITION_MODE_USER_IDENTIFICATION, 138 }) 139 public @interface RecognitionModes {} 140 141 /** 142 * Simple recognition of the key phrase. 143 * Returned by {@link #getSupportedRecognitionModes()} 144 */ 145 public static final int RECOGNITION_MODE_VOICE_TRIGGER 146 = SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER; 147 /** 148 * User identification performed with the keyphrase recognition. 149 * Returned by {@link #getSupportedRecognitionModes()} 150 */ 151 public static final int RECOGNITION_MODE_USER_IDENTIFICATION 152 = SoundTrigger.RECOGNITION_MODE_USER_IDENTIFICATION; 153 154 static final String TAG = "AlwaysOnHotwordDetector"; 155 // TODO: Set to false. 156 static final boolean DBG = true; 157 158 private static final int STATUS_ERROR = SoundTrigger.STATUS_ERROR; 159 private static final int STATUS_OK = SoundTrigger.STATUS_OK; 160 161 private static final int MSG_AVAILABILITY_CHANGED = 1; 162 private static final int MSG_HOTWORD_DETECTED = 2; 163 private static final int MSG_DETECTION_ERROR = 3; 164 165 private final String mText; 166 private final String mLocale; 167 /** 168 * The metadata of the Keyphrase, derived from the enrollment application. 169 * This may be null if this keyphrase isn't supported by the enrollment application. 170 */ 171 private final KeyphraseMetadata mKeyphraseMetadata; 172 private final KeyphraseEnrollmentInfo mKeyphraseEnrollmentInfo; 173 private final IVoiceInteractionService mVoiceInteractionService; 174 private final IVoiceInteractionManagerService mModelManagementService; 175 private final SoundTriggerListener mInternalCallback; 176 private final Callback mExternalCallback; 177 private final Object mLock = new Object(); 178 private final Handler mHandler; 179 180 private int mAvailability = STATE_NOT_READY; 181 182 /** 183 * Additional payload for {@link Callback#onDetected}. 184 */ 185 public static class EventPayload { 186 /** 187 * Indicates if {@code data} is the audio that triggered the keyphrase. 188 */ 189 public final boolean isTriggerAudio; 190 /** 191 * Format of {@code data}. May be null if {@code isTriggerAudio} is false. 192 */ 193 @Nullable 194 public final AudioFormat audioFormat; 195 /** 196 * Raw data associated with the event. 197 * This is the audio that triggered the keyphrase if {@code isTriggerAudio} is true. 198 */ 199 @Nullable 200 public final byte[] data; 201 202 private EventPayload(boolean _isTriggerAudio, AudioFormat _audioFormat, byte[] _data) { 203 isTriggerAudio = _isTriggerAudio; 204 audioFormat = _audioFormat; 205 data = _data; 206 } 207 } 208 209 /** 210 * Callbacks for always-on hotword detection. 211 */ 212 public interface Callback { 213 /** 214 * Called when the hotword availability changes. 215 * This indicates a change in the availability of recognition for the given keyphrase. 216 * It's called at least once with the initial availability.<p/> 217 * 218 * Availability implies whether the hardware on this system is capable of listening for 219 * the given keyphrase or not. <p/> 220 * 221 * @see AlwaysOnHotwordDetector#STATE_HARDWARE_UNAVAILABLE 222 * @see AlwaysOnHotwordDetector#STATE_KEYPHRASE_UNSUPPORTED 223 * @see AlwaysOnHotwordDetector#STATE_KEYPHRASE_UNENROLLED 224 * @see AlwaysOnHotwordDetector#STATE_KEYPHRASE_ENROLLED 225 */ 226 void onAvailabilityChanged(int status); 227 /** 228 * Called when the keyphrase is spoken. 229 * This implicitly stops listening for the keyphrase once it's detected. 230 * Clients should start a recognition again once they are done handling this 231 * detection. 232 * 233 * @param eventPayload Payload data for the detection event. 234 * This may contain the trigger audio, if requested when calling 235 * {@link AlwaysOnHotwordDetector#startRecognition(int)}. 236 */ 237 void onDetected(@NonNull EventPayload eventPayload); 238 /** 239 * Called when the detection fails due to an error. 240 */ 241 void onError(); 242 } 243 244 /** 245 * @param text The keyphrase text to get the detector for. 246 * @param locale The java locale for the detector. 247 * @param callback A non-null Callback for receiving the recognition events. 248 * @param voiceInteractionService The current voice interaction service. 249 * @param modelManagementService A service that allows management of sound models. 250 * 251 * @hide 252 */ 253 public AlwaysOnHotwordDetector(String text, String locale, Callback callback, 254 KeyphraseEnrollmentInfo keyphraseEnrollmentInfo, 255 IVoiceInteractionService voiceInteractionService, 256 IVoiceInteractionManagerService modelManagementService) { 257 mText = text; 258 mLocale = locale; 259 mKeyphraseEnrollmentInfo = keyphraseEnrollmentInfo; 260 mKeyphraseMetadata = mKeyphraseEnrollmentInfo.getKeyphraseMetadata(text, locale); 261 mExternalCallback = callback; 262 mHandler = new MyHandler(); 263 mInternalCallback = new SoundTriggerListener(mHandler); 264 mVoiceInteractionService = voiceInteractionService; 265 mModelManagementService = modelManagementService; 266 new RefreshAvailabiltyTask().execute(); 267 } 268 269 /** 270 * Gets the recognition modes supported by the associated keyphrase. 271 * 272 * @see #RECOGNITION_MODE_USER_IDENTIFICATION 273 * @see #RECOGNITION_MODE_VOICE_TRIGGER 274 * 275 * @throws UnsupportedOperationException if the keyphrase itself isn't supported. 276 * Callers should only call this method after a supported state callback on 277 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 278 * @throws IllegalStateException if the detector is in an invalid state. 279 * This may happen if another detector has been instantiated or the 280 * {@link VoiceInteractionService} hosting this detector has been shut down. 281 */ 282 public @RecognitionModes int getSupportedRecognitionModes() { 283 if (DBG) Slog.d(TAG, "getSupportedRecognitionModes()"); 284 synchronized (mLock) { 285 return getSupportedRecognitionModesLocked(); 286 } 287 } 288 289 private int getSupportedRecognitionModesLocked() { 290 if (mAvailability == STATE_INVALID) { 291 throw new IllegalStateException( 292 "getSupportedRecognitionModes called on an invalid detector"); 293 } 294 295 // This method only makes sense if we can actually support a recognition. 296 if (mAvailability != STATE_KEYPHRASE_ENROLLED 297 && mAvailability != STATE_KEYPHRASE_UNENROLLED) { 298 throw new UnsupportedOperationException( 299 "Getting supported recognition modes for the keyphrase is not supported"); 300 } 301 302 return mKeyphraseMetadata.recognitionModeFlags; 303 } 304 305 /** 306 * Starts recognition for the associated keyphrase. 307 * 308 * @param recognitionFlags The flags to control the recognition properties. 309 * The allowed flags are {@link #RECOGNITION_FLAG_NONE}, 310 * {@link #RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO} and 311 * {@link #RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS}. 312 * @return Indicates whether the call succeeded or not. 313 * @throws UnsupportedOperationException if the recognition isn't supported. 314 * Callers should only call this method after a supported state callback on 315 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 316 * @throws IllegalStateException if the detector is in an invalid state. 317 * This may happen if another detector has been instantiated or the 318 * {@link VoiceInteractionService} hosting this detector has been shut down. 319 */ 320 public boolean startRecognition(@RecognitionFlags int recognitionFlags) { 321 if (DBG) Slog.d(TAG, "startRecognition(" + recognitionFlags + ")"); 322 synchronized (mLock) { 323 if (mAvailability == STATE_INVALID) { 324 throw new IllegalStateException("startRecognition called on an invalid detector"); 325 } 326 327 // Check if we can start/stop a recognition. 328 if (mAvailability != STATE_KEYPHRASE_ENROLLED) { 329 throw new UnsupportedOperationException( 330 "Recognition for the given keyphrase is not supported"); 331 } 332 333 return startRecognitionLocked(recognitionFlags) == STATUS_OK; 334 } 335 } 336 337 /** 338 * Stops recognition for the associated keyphrase. 339 * 340 * @return Indicates whether the call succeeded or not. 341 * @throws UnsupportedOperationException if the recognition isn't supported. 342 * Callers should only call this method after a supported state callback on 343 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 344 * @throws IllegalStateException if the detector is in an invalid state. 345 * This may happen if another detector has been instantiated or the 346 * {@link VoiceInteractionService} hosting this detector has been shut down. 347 */ 348 public boolean stopRecognition() { 349 if (DBG) Slog.d(TAG, "stopRecognition()"); 350 synchronized (mLock) { 351 if (mAvailability == STATE_INVALID) { 352 throw new IllegalStateException("stopRecognition called on an invalid detector"); 353 } 354 355 // Check if we can start/stop a recognition. 356 if (mAvailability != STATE_KEYPHRASE_ENROLLED) { 357 throw new UnsupportedOperationException( 358 "Recognition for the given keyphrase is not supported"); 359 } 360 361 return stopRecognitionLocked() == STATUS_OK; 362 } 363 } 364 365 /** 366 * Gets an intent to manage the associated keyphrase. 367 * 368 * @param action The manage action that needs to be performed. 369 * One of {@link #MANAGE_ACTION_ENROLL}, {@link #MANAGE_ACTION_RE_ENROLL} or 370 * {@link #MANAGE_ACTION_UN_ENROLL}. 371 * @return An {@link Intent} to manage the given keyphrase. 372 * @throws UnsupportedOperationException if managing they keyphrase isn't supported. 373 * Callers should only call this method after a supported state callback on 374 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 375 * @throws IllegalStateException if the detector is in an invalid state. 376 * This may happen if another detector has been instantiated or the 377 * {@link VoiceInteractionService} hosting this detector has been shut down. 378 */ 379 public Intent getManageIntent(@ManageActions int action) { 380 if (DBG) Slog.d(TAG, "getManageIntent(" + action + ")"); 381 synchronized (mLock) { 382 return getManageIntentLocked(action); 383 } 384 } 385 386 private Intent getManageIntentLocked(int action) { 387 if (mAvailability == STATE_INVALID) { 388 throw new IllegalStateException("getManageIntent called on an invalid detector"); 389 } 390 391 // This method only makes sense if we can actually support a recognition. 392 if (mAvailability != STATE_KEYPHRASE_ENROLLED 393 && mAvailability != STATE_KEYPHRASE_UNENROLLED) { 394 throw new UnsupportedOperationException( 395 "Managing the given keyphrase is not supported"); 396 } 397 398 if (action != MANAGE_ACTION_ENROLL 399 && action != MANAGE_ACTION_RE_ENROLL 400 && action != MANAGE_ACTION_UN_ENROLL) { 401 throw new IllegalArgumentException("Invalid action specified " + action); 402 } 403 404 return mKeyphraseEnrollmentInfo.getManageKeyphraseIntent(action, mText, mLocale); 405 } 406 407 /** 408 * Invalidates this hotword detector so that any future calls to this result 409 * in an IllegalStateException. 410 * 411 * @hide 412 */ 413 void invalidate() { 414 synchronized (mLock) { 415 mAvailability = STATE_INVALID; 416 notifyStateChangedLocked(); 417 } 418 } 419 420 /** 421 * Reloads the sound models from the service. 422 * 423 * @hide 424 */ 425 void onSoundModelsChanged() { 426 synchronized (mLock) { 427 // FIXME: This should stop the recognition if it was using an enrolled sound model 428 // that's no longer available. 429 if (mAvailability == STATE_INVALID 430 || mAvailability == STATE_HARDWARE_UNAVAILABLE 431 || mAvailability == STATE_KEYPHRASE_UNSUPPORTED) { 432 Slog.w(TAG, "Received onSoundModelsChanged for an unsupported keyphrase/config"); 433 return; 434 } 435 436 // Execute a refresh availability task - which should then notify of a change. 437 new RefreshAvailabiltyTask().execute(); 438 } 439 } 440 441 private int startRecognitionLocked(int recognitionFlags) { 442 KeyphraseRecognitionExtra[] recognitionExtra = new KeyphraseRecognitionExtra[1]; 443 // TODO: Do we need to do something about the confidence level here? 444 recognitionExtra[0] = new KeyphraseRecognitionExtra(mKeyphraseMetadata.id, 445 mKeyphraseMetadata.recognitionModeFlags, 0, new ConfidenceLevel[0]); 446 boolean captureTriggerAudio = 447 (recognitionFlags&RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0; 448 boolean allowMultipleTriggers = 449 (recognitionFlags&RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS) != 0; 450 int code = STATUS_ERROR; 451 try { 452 code = mModelManagementService.startRecognition(mVoiceInteractionService, 453 mKeyphraseMetadata.id, mInternalCallback, 454 new RecognitionConfig(captureTriggerAudio, allowMultipleTriggers, 455 recognitionExtra, null /* additional data */)); 456 } catch (RemoteException e) { 457 Slog.w(TAG, "RemoteException in startRecognition!"); 458 } 459 if (code != STATUS_OK) { 460 Slog.w(TAG, "startRecognition() failed with error code " + code); 461 } 462 return code; 463 } 464 465 private int stopRecognitionLocked() { 466 int code = STATUS_ERROR; 467 try { 468 code = mModelManagementService.stopRecognition( 469 mVoiceInteractionService, mKeyphraseMetadata.id, mInternalCallback); 470 } catch (RemoteException e) { 471 Slog.w(TAG, "RemoteException in stopRecognition!"); 472 } 473 474 if (code != STATUS_OK) { 475 Slog.w(TAG, "stopRecognition() failed with error code " + code); 476 } 477 return code; 478 } 479 480 private void notifyStateChangedLocked() { 481 Message message = Message.obtain(mHandler, MSG_AVAILABILITY_CHANGED); 482 message.arg1 = mAvailability; 483 message.sendToTarget(); 484 } 485 486 /** @hide */ 487 static final class SoundTriggerListener extends IRecognitionStatusCallback.Stub { 488 private final Handler mHandler; 489 490 public SoundTriggerListener(Handler handler) { 491 mHandler = handler; 492 } 493 494 @Override 495 public void onDetected(KeyphraseRecognitionEvent event) { 496 if (DBG) { 497 Slog.d(TAG, "onDetected(" + event + ")"); 498 } else { 499 Slog.i(TAG, "onDetected"); 500 } 501 Message.obtain(mHandler, MSG_HOTWORD_DETECTED, 502 new EventPayload(event.triggerInData, event.captureFormat, event.data)) 503 .sendToTarget(); 504 } 505 506 @Override 507 public void onError(int status) { 508 Slog.i(TAG, "onError: " + status); 509 mHandler.sendEmptyMessage(MSG_DETECTION_ERROR); 510 } 511 } 512 513 class MyHandler extends Handler { 514 @Override 515 public void handleMessage(Message msg) { 516 synchronized (mLock) { 517 if (mAvailability == STATE_INVALID) { 518 Slog.w(TAG, "Received message: " + msg.what + " for an invalid detector"); 519 return; 520 } 521 } 522 523 switch (msg.what) { 524 case MSG_AVAILABILITY_CHANGED: 525 mExternalCallback.onAvailabilityChanged(msg.arg1); 526 break; 527 case MSG_HOTWORD_DETECTED: 528 mExternalCallback.onDetected((EventPayload) msg.obj); 529 break; 530 case MSG_DETECTION_ERROR: 531 mExternalCallback.onError(); 532 break; 533 default: 534 super.handleMessage(msg); 535 } 536 } 537 } 538 539 class RefreshAvailabiltyTask extends AsyncTask<Void, Void, Void> { 540 541 @Override 542 public Void doInBackground(Void... params) { 543 int availability = internalGetInitialAvailability(); 544 boolean enrolled = false; 545 // Fetch the sound model if the availability is one of the supported ones. 546 if (availability == STATE_NOT_READY 547 || availability == STATE_KEYPHRASE_UNENROLLED 548 || availability == STATE_KEYPHRASE_ENROLLED) { 549 enrolled = internalGetIsEnrolled(mKeyphraseMetadata.id); 550 if (!enrolled) { 551 availability = STATE_KEYPHRASE_UNENROLLED; 552 } else { 553 availability = STATE_KEYPHRASE_ENROLLED; 554 } 555 } 556 557 synchronized (mLock) { 558 if (DBG) { 559 Slog.d(TAG, "Hotword availability changed from " + mAvailability 560 + " -> " + availability); 561 } 562 mAvailability = availability; 563 notifyStateChangedLocked(); 564 } 565 return null; 566 } 567 568 /** 569 * @return The initial availability without checking the enrollment status. 570 */ 571 private int internalGetInitialAvailability() { 572 synchronized (mLock) { 573 // This detector has already been invalidated. 574 if (mAvailability == STATE_INVALID) { 575 return STATE_INVALID; 576 } 577 } 578 579 ModuleProperties dspModuleProperties = null; 580 try { 581 dspModuleProperties = 582 mModelManagementService.getDspModuleProperties(mVoiceInteractionService); 583 } catch (RemoteException e) { 584 Slog.w(TAG, "RemoteException in getDspProperties!"); 585 } 586 // No DSP available 587 if (dspModuleProperties == null) { 588 return STATE_HARDWARE_UNAVAILABLE; 589 } 590 // No enrollment application supports this keyphrase/locale 591 if (mKeyphraseMetadata == null) { 592 return STATE_KEYPHRASE_UNSUPPORTED; 593 } 594 return STATE_NOT_READY; 595 } 596 597 /** 598 * @return The corresponding {@link KeyphraseSoundModel} or null if none is found. 599 */ 600 private boolean internalGetIsEnrolled(int keyphraseId) { 601 try { 602 return mModelManagementService.isEnrolledForKeyphrase( 603 mVoiceInteractionService, keyphraseId); 604 } catch (RemoteException e) { 605 Slog.w(TAG, "RemoteException in listRegisteredKeyphraseSoundModels!"); 606 } 607 return false; 608 } 609 } 610} 611