AlwaysOnHotwordDetector.java revision 0db30899f0d44e4fbaddffb79cc3415db6efb657
1/** 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.service.voice; 18 19import android.annotation.IntDef; 20import android.annotation.NonNull; 21import android.annotation.Nullable; 22import android.content.Intent; 23import android.hardware.soundtrigger.IRecognitionStatusCallback; 24import android.hardware.soundtrigger.KeyphraseEnrollmentInfo; 25import android.hardware.soundtrigger.KeyphraseMetadata; 26import android.hardware.soundtrigger.SoundTrigger; 27import android.hardware.soundtrigger.SoundTrigger.ConfidenceLevel; 28import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionEvent; 29import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionExtra; 30import android.hardware.soundtrigger.SoundTrigger.KeyphraseSoundModel; 31import android.hardware.soundtrigger.SoundTrigger.ModuleProperties; 32import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig; 33import android.media.AudioFormat; 34import android.os.AsyncTask; 35import android.os.Handler; 36import android.os.Message; 37import android.os.RemoteException; 38import android.util.Slog; 39 40import com.android.internal.app.IVoiceInteractionManagerService; 41 42import java.lang.annotation.Retention; 43import java.lang.annotation.RetentionPolicy; 44 45/** 46 * A class that lets a VoiceInteractionService implementation interact with 47 * always-on keyphrase detection APIs. 48 */ 49public class AlwaysOnHotwordDetector { 50 //---- States of Keyphrase availability. Return codes for onAvailabilityChanged() ----// 51 /** 52 * Indicates that this hotword detector is no longer valid for any recognition 53 * and should not be used anymore. 54 */ 55 private static final int STATE_INVALID = -3; 56 57 /** 58 * Indicates that recognition for the given keyphrase is not available on the system 59 * because of the hardware configuration. 60 * No further interaction should be performed with the detector that returns this availability. 61 */ 62 public static final int STATE_HARDWARE_UNAVAILABLE = -2; 63 /** 64 * Indicates that recognition for the given keyphrase is not supported. 65 * No further interaction should be performed with the detector that returns this availability. 66 */ 67 public static final int STATE_KEYPHRASE_UNSUPPORTED = -1; 68 /** 69 * Indicates that the given keyphrase is not enrolled. 70 * The caller may choose to begin an enrollment flow for the keyphrase. 71 */ 72 public static final int STATE_KEYPHRASE_UNENROLLED = 1; 73 /** 74 * Indicates that the given keyphrase is currently enrolled and it's possible to start 75 * recognition for it. 76 */ 77 public static final int STATE_KEYPHRASE_ENROLLED = 2; 78 79 /** 80 * Indicates that the detector isn't ready currently. 81 */ 82 private static final int STATE_NOT_READY = 0; 83 84 // Keyphrase management actions. Used in getManageIntent() ----// 85 /** @hide */ 86 @Retention(RetentionPolicy.SOURCE) 87 @IntDef(value = { 88 MANAGE_ACTION_ENROLL, 89 MANAGE_ACTION_RE_ENROLL, 90 MANAGE_ACTION_UN_ENROLL 91 }) 92 public @interface ManageActions {} 93 94 /** Indicates that we need to enroll. */ 95 public static final int MANAGE_ACTION_ENROLL = 0; 96 /** Indicates that we need to re-enroll. */ 97 public static final int MANAGE_ACTION_RE_ENROLL = 1; 98 /** Indicates that we need to un-enroll. */ 99 public static final int MANAGE_ACTION_UN_ENROLL = 2; 100 101 //-- Flags for startRecognition ----// 102 /** @hide */ 103 @Retention(RetentionPolicy.SOURCE) 104 @IntDef(flag = true, 105 value = { 106 RECOGNITION_FLAG_NONE, 107 RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO, 108 RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS 109 }) 110 public @interface RecognitionFlags {} 111 112 /** Empty flag for {@link #startRecognition(int)}. */ 113 public static final int RECOGNITION_FLAG_NONE = 0; 114 /** 115 * Recognition flag for {@link #startRecognition(int)} that indicates 116 * whether the trigger audio for hotword needs to be captured. 117 */ 118 public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1; 119 /** 120 * Recognition flag for {@link #startRecognition(int)} that indicates 121 * whether the recognition should keep going on even after the keyphrase triggers. 122 * If this flag is specified, it's possible to get multiple triggers after a 123 * call to {@link #startRecognition(int)} if the user speaks the keyphrase multiple times. 124 * When this isn't specified, the default behavior is to stop recognition once the 125 * keyphrase is spoken, till the caller starts recognition again. 126 */ 127 public static final int RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS = 0x2; 128 129 //---- Recognition mode flags. Return codes for getSupportedRecognitionModes() ----// 130 // Must be kept in sync with the related attribute defined as searchKeyphraseRecognitionFlags. 131 132 /** @hide */ 133 @Retention(RetentionPolicy.SOURCE) 134 @IntDef(flag = true, 135 value = { 136 RECOGNITION_MODE_VOICE_TRIGGER, 137 RECOGNITION_MODE_USER_IDENTIFICATION, 138 }) 139 public @interface RecognitionModes {} 140 141 /** 142 * Simple recognition of the key phrase. 143 * Returned by {@link #getSupportedRecognitionModes()} 144 */ 145 public static final int RECOGNITION_MODE_VOICE_TRIGGER 146 = SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER; 147 /** 148 * User identification performed with the keyphrase recognition. 149 * Returned by {@link #getSupportedRecognitionModes()} 150 */ 151 public static final int RECOGNITION_MODE_USER_IDENTIFICATION 152 = SoundTrigger.RECOGNITION_MODE_USER_IDENTIFICATION; 153 154 static final String TAG = "AlwaysOnHotwordDetector"; 155 // TODO: Set to false. 156 static final boolean DBG = true; 157 158 private static final int STATUS_ERROR = SoundTrigger.STATUS_ERROR; 159 private static final int STATUS_OK = SoundTrigger.STATUS_OK; 160 161 private static final int MSG_AVAILABILITY_CHANGED = 1; 162 private static final int MSG_HOTWORD_DETECTED = 2; 163 private static final int MSG_DETECTION_ERROR = 3; 164 165 private final String mText; 166 private final String mLocale; 167 /** 168 * The metadata of the Keyphrase, derived from the enrollment application. 169 * This may be null if this keyphrase isn't supported by the enrollment application. 170 */ 171 private final KeyphraseMetadata mKeyphraseMetadata; 172 private final KeyphraseEnrollmentInfo mKeyphraseEnrollmentInfo; 173 private final IVoiceInteractionService mVoiceInteractionService; 174 private final IVoiceInteractionManagerService mModelManagementService; 175 private final SoundTriggerListener mInternalCallback; 176 private final Callback mExternalCallback; 177 private final Object mLock = new Object(); 178 private final Handler mHandler; 179 180 private int mAvailability = STATE_NOT_READY; 181 182 /** 183 * Details of the audio that triggered the keyphrase. 184 */ 185 public static class TriggerAudio { 186 /** 187 * Format of {@code data}. 188 */ 189 @NonNull 190 public final AudioFormat audioFormat; 191 /** 192 * Raw audio data that triggered they keyphrase. 193 */ 194 @NonNull 195 public final byte[] data; 196 197 private TriggerAudio(AudioFormat _audioFormat, byte[] _data) { 198 audioFormat = _audioFormat; 199 data = _data; 200 } 201 } 202 203 /** 204 * Callbacks for always-on hotword detection. 205 */ 206 public interface Callback { 207 /** 208 * Called when the hotword availability changes. 209 * This indicates a change in the availability of recognition for the given keyphrase. 210 * It's called at least once with the initial availability.<p/> 211 * 212 * Availability implies whether the hardware on this system is capable of listening for 213 * the given keyphrase or not. <p/> 214 * 215 * @see AlwaysOnHotwordDetector#STATE_HARDWARE_UNAVAILABLE 216 * @see AlwaysOnHotwordDetector#STATE_KEYPHRASE_UNSUPPORTED 217 * @see AlwaysOnHotwordDetector#STATE_KEYPHRASE_UNENROLLED 218 * @see AlwaysOnHotwordDetector#STATE_KEYPHRASE_ENROLLED 219 */ 220 void onAvailabilityChanged(int status); 221 /** 222 * Called when the keyphrase is spoken. 223 * This implicitly stops listening for the keyphrase once it's detected. 224 * Clients should start a recognition again once they are done handling this 225 * detection. 226 * 227 * @param triggerAudio Optional trigger audio data, if it was requested during 228 * {@link AlwaysOnHotwordDetector#startRecognition(int)}. 229 */ 230 void onDetected(@Nullable TriggerAudio triggerAudio); 231 /** 232 * Called when the detection fails due to an error. 233 */ 234 void onError(); 235 } 236 237 /** 238 * @param text The keyphrase text to get the detector for. 239 * @param locale The java locale for the detector. 240 * @param callback A non-null Callback for receiving the recognition events. 241 * @param voiceInteractionService The current voice interaction service. 242 * @param modelManagementService A service that allows management of sound models. 243 * 244 * @hide 245 */ 246 public AlwaysOnHotwordDetector(String text, String locale, Callback callback, 247 KeyphraseEnrollmentInfo keyphraseEnrollmentInfo, 248 IVoiceInteractionService voiceInteractionService, 249 IVoiceInteractionManagerService modelManagementService) { 250 mText = text; 251 mLocale = locale; 252 mKeyphraseEnrollmentInfo = keyphraseEnrollmentInfo; 253 mKeyphraseMetadata = mKeyphraseEnrollmentInfo.getKeyphraseMetadata(text, locale); 254 mExternalCallback = callback; 255 mHandler = new MyHandler(); 256 mInternalCallback = new SoundTriggerListener(mHandler); 257 mVoiceInteractionService = voiceInteractionService; 258 mModelManagementService = modelManagementService; 259 new RefreshAvailabiltyTask().execute(); 260 } 261 262 /** 263 * Gets the recognition modes supported by the associated keyphrase. 264 * 265 * @see #RECOGNITION_MODE_USER_IDENTIFICATION 266 * @see #RECOGNITION_MODE_VOICE_TRIGGER 267 * 268 * @throws UnsupportedOperationException if the keyphrase itself isn't supported. 269 * Callers should only call this method after a supported state callback on 270 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 271 * @throws IllegalStateException if the detector is in an invalid state. 272 * This may happen if another detector has been instantiated or the 273 * {@link VoiceInteractionService} hosting this detector has been shut down. 274 */ 275 public @RecognitionModes int getSupportedRecognitionModes() { 276 if (DBG) Slog.d(TAG, "getSupportedRecognitionModes()"); 277 synchronized (mLock) { 278 return getSupportedRecognitionModesLocked(); 279 } 280 } 281 282 private int getSupportedRecognitionModesLocked() { 283 if (mAvailability == STATE_INVALID) { 284 throw new IllegalStateException( 285 "getSupportedRecognitionModes called on an invalid detector"); 286 } 287 288 // This method only makes sense if we can actually support a recognition. 289 if (mAvailability != STATE_KEYPHRASE_ENROLLED 290 && mAvailability != STATE_KEYPHRASE_UNENROLLED) { 291 throw new UnsupportedOperationException( 292 "Getting supported recognition modes for the keyphrase is not supported"); 293 } 294 295 return mKeyphraseMetadata.recognitionModeFlags; 296 } 297 298 /** 299 * Starts recognition for the associated keyphrase. 300 * 301 * @param recognitionFlags The flags to control the recognition properties. 302 * The allowed flags are {@link #RECOGNITION_FLAG_NONE}, 303 * {@link #RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO} and 304 * {@link #RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS}. 305 * @return Indicates whether the call succeeded or not. 306 * @throws UnsupportedOperationException if the recognition isn't supported. 307 * Callers should only call this method after a supported state callback on 308 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 309 * @throws IllegalStateException if the detector is in an invalid state. 310 * This may happen if another detector has been instantiated or the 311 * {@link VoiceInteractionService} hosting this detector has been shut down. 312 */ 313 public boolean startRecognition(@RecognitionFlags int recognitionFlags) { 314 if (DBG) Slog.d(TAG, "startRecognition(" + recognitionFlags + ")"); 315 synchronized (mLock) { 316 if (mAvailability == STATE_INVALID) { 317 throw new IllegalStateException("startRecognition called on an invalid detector"); 318 } 319 320 // Check if we can start/stop a recognition. 321 if (mAvailability != STATE_KEYPHRASE_ENROLLED) { 322 throw new UnsupportedOperationException( 323 "Recognition for the given keyphrase is not supported"); 324 } 325 326 return startRecognitionLocked(recognitionFlags) == STATUS_OK; 327 } 328 } 329 330 /** 331 * Stops recognition for the associated keyphrase. 332 * 333 * @return Indicates whether the call succeeded or not. 334 * @throws UnsupportedOperationException if the recognition isn't supported. 335 * Callers should only call this method after a supported state callback on 336 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 337 * @throws IllegalStateException if the detector is in an invalid state. 338 * This may happen if another detector has been instantiated or the 339 * {@link VoiceInteractionService} hosting this detector has been shut down. 340 */ 341 public boolean stopRecognition() { 342 if (DBG) Slog.d(TAG, "stopRecognition()"); 343 synchronized (mLock) { 344 if (mAvailability == STATE_INVALID) { 345 throw new IllegalStateException("stopRecognition called on an invalid detector"); 346 } 347 348 // Check if we can start/stop a recognition. 349 if (mAvailability != STATE_KEYPHRASE_ENROLLED) { 350 throw new UnsupportedOperationException( 351 "Recognition for the given keyphrase is not supported"); 352 } 353 354 return stopRecognitionLocked() == STATUS_OK; 355 } 356 } 357 358 /** 359 * Gets an intent to manage the associated keyphrase. 360 * 361 * @param action The manage action that needs to be performed. 362 * One of {@link #MANAGE_ACTION_ENROLL}, {@link #MANAGE_ACTION_RE_ENROLL} or 363 * {@link #MANAGE_ACTION_UN_ENROLL}. 364 * @return An {@link Intent} to manage the given keyphrase. 365 * @throws UnsupportedOperationException if managing they keyphrase isn't supported. 366 * Callers should only call this method after a supported state callback on 367 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 368 * @throws IllegalStateException if the detector is in an invalid state. 369 * This may happen if another detector has been instantiated or the 370 * {@link VoiceInteractionService} hosting this detector has been shut down. 371 */ 372 public Intent getManageIntent(@ManageActions int action) { 373 if (DBG) Slog.d(TAG, "getManageIntent(" + action + ")"); 374 synchronized (mLock) { 375 return getManageIntentLocked(action); 376 } 377 } 378 379 private Intent getManageIntentLocked(int action) { 380 if (mAvailability == STATE_INVALID) { 381 throw new IllegalStateException("getManageIntent called on an invalid detector"); 382 } 383 384 // This method only makes sense if we can actually support a recognition. 385 if (mAvailability != STATE_KEYPHRASE_ENROLLED 386 && mAvailability != STATE_KEYPHRASE_UNENROLLED) { 387 throw new UnsupportedOperationException( 388 "Managing the given keyphrase is not supported"); 389 } 390 391 if (action != MANAGE_ACTION_ENROLL 392 && action != MANAGE_ACTION_RE_ENROLL 393 && action != MANAGE_ACTION_UN_ENROLL) { 394 throw new IllegalArgumentException("Invalid action specified " + action); 395 } 396 397 return mKeyphraseEnrollmentInfo.getManageKeyphraseIntent(action, mText, mLocale); 398 } 399 400 /** 401 * Invalidates this hotword detector so that any future calls to this result 402 * in an IllegalStateException. 403 * 404 * @hide 405 */ 406 void invalidate() { 407 synchronized (mLock) { 408 mAvailability = STATE_INVALID; 409 notifyStateChangedLocked(); 410 } 411 } 412 413 /** 414 * Reloads the sound models from the service. 415 * 416 * @hide 417 */ 418 void onSoundModelsChanged() { 419 synchronized (mLock) { 420 // FIXME: This should stop the recognition if it was using an enrolled sound model 421 // that's no longer available. 422 if (mAvailability == STATE_INVALID 423 || mAvailability == STATE_HARDWARE_UNAVAILABLE 424 || mAvailability == STATE_KEYPHRASE_UNSUPPORTED) { 425 Slog.w(TAG, "Received onSoundModelsChanged for an unsupported keyphrase/config"); 426 return; 427 } 428 429 // Execute a refresh availability task - which should then notify of a change. 430 new RefreshAvailabiltyTask().execute(); 431 } 432 } 433 434 private int startRecognitionLocked(int recognitionFlags) { 435 KeyphraseRecognitionExtra[] recognitionExtra = new KeyphraseRecognitionExtra[1]; 436 // TODO: Do we need to do something about the confidence level here? 437 recognitionExtra[0] = new KeyphraseRecognitionExtra(mKeyphraseMetadata.id, 438 mKeyphraseMetadata.recognitionModeFlags, 0, new ConfidenceLevel[0]); 439 boolean captureTriggerAudio = 440 (recognitionFlags&RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0; 441 boolean allowMultipleTriggers = 442 (recognitionFlags&RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS) != 0; 443 int code = STATUS_ERROR; 444 try { 445 code = mModelManagementService.startRecognition(mVoiceInteractionService, 446 mKeyphraseMetadata.id, mInternalCallback, 447 new RecognitionConfig(captureTriggerAudio, allowMultipleTriggers, 448 recognitionExtra, null /* additional data */)); 449 } catch (RemoteException e) { 450 Slog.w(TAG, "RemoteException in startRecognition!"); 451 } 452 if (code != STATUS_OK) { 453 Slog.w(TAG, "startRecognition() failed with error code " + code); 454 } 455 return code; 456 } 457 458 private int stopRecognitionLocked() { 459 int code = STATUS_ERROR; 460 try { 461 code = mModelManagementService.stopRecognition( 462 mVoiceInteractionService, mKeyphraseMetadata.id, mInternalCallback); 463 } catch (RemoteException e) { 464 Slog.w(TAG, "RemoteException in stopRecognition!"); 465 } 466 467 if (code != STATUS_OK) { 468 Slog.w(TAG, "stopRecognition() failed with error code " + code); 469 } 470 return code; 471 } 472 473 private void notifyStateChangedLocked() { 474 Message message = Message.obtain(mHandler, MSG_AVAILABILITY_CHANGED); 475 message.arg1 = mAvailability; 476 message.sendToTarget(); 477 } 478 479 /** @hide */ 480 static final class SoundTriggerListener extends IRecognitionStatusCallback.Stub { 481 private final Handler mHandler; 482 483 public SoundTriggerListener(Handler handler) { 484 mHandler = handler; 485 } 486 487 @Override 488 public void onDetected(KeyphraseRecognitionEvent event) { 489 if (DBG) { 490 Slog.d(TAG, "OnDetected(" + event + ")"); 491 } else { 492 Slog.i(TAG, "onDetected"); 493 } 494 Message message = Message.obtain(mHandler, MSG_HOTWORD_DETECTED); 495 // FIXME: Check whether the event contains trigger data or not. 496 // FIXME: Read the audio format from the event. 497 if (event.data != null) { 498 AudioFormat audioFormat = new AudioFormat.Builder() 499 .setChannelMask(AudioFormat.CHANNEL_IN_MONO) 500 .setEncoding(AudioFormat.ENCODING_PCM_16BIT) 501 .setSampleRate(16000) 502 .build(); 503 message.obj = new TriggerAudio(audioFormat, event.data); 504 } 505 message.sendToTarget(); 506 } 507 508 @Override 509 public void onError(int status) { 510 Slog.i(TAG, "onError: " + status); 511 mHandler.sendEmptyMessage(MSG_DETECTION_ERROR); 512 } 513 } 514 515 class MyHandler extends Handler { 516 @Override 517 public void handleMessage(Message msg) { 518 synchronized (mLock) { 519 if (mAvailability == STATE_INVALID) { 520 Slog.w(TAG, "Received message: " + msg.what + " for an invalid detector"); 521 return; 522 } 523 } 524 525 switch (msg.what) { 526 case MSG_AVAILABILITY_CHANGED: 527 mExternalCallback.onAvailabilityChanged(msg.arg1); 528 break; 529 case MSG_HOTWORD_DETECTED: 530 mExternalCallback.onDetected((TriggerAudio) msg.obj); 531 break; 532 case MSG_DETECTION_ERROR: 533 mExternalCallback.onError(); 534 break; 535 default: 536 super.handleMessage(msg); 537 } 538 } 539 } 540 541 class RefreshAvailabiltyTask extends AsyncTask<Void, Void, Void> { 542 543 @Override 544 public Void doInBackground(Void... params) { 545 int availability = internalGetInitialAvailability(); 546 boolean enrolled = false; 547 // Fetch the sound model if the availability is one of the supported ones. 548 if (availability == STATE_NOT_READY 549 || availability == STATE_KEYPHRASE_UNENROLLED 550 || availability == STATE_KEYPHRASE_ENROLLED) { 551 enrolled = internalGetIsEnrolled(mKeyphraseMetadata.id); 552 if (!enrolled) { 553 availability = STATE_KEYPHRASE_UNENROLLED; 554 } else { 555 availability = STATE_KEYPHRASE_ENROLLED; 556 } 557 } 558 559 synchronized (mLock) { 560 if (DBG) { 561 Slog.d(TAG, "Hotword availability changed from " + mAvailability 562 + " -> " + availability); 563 } 564 mAvailability = availability; 565 notifyStateChangedLocked(); 566 } 567 return null; 568 } 569 570 /** 571 * @return The initial availability without checking the enrollment status. 572 */ 573 private int internalGetInitialAvailability() { 574 synchronized (mLock) { 575 // This detector has already been invalidated. 576 if (mAvailability == STATE_INVALID) { 577 return STATE_INVALID; 578 } 579 } 580 581 ModuleProperties dspModuleProperties = null; 582 try { 583 dspModuleProperties = 584 mModelManagementService.getDspModuleProperties(mVoiceInteractionService); 585 } catch (RemoteException e) { 586 Slog.w(TAG, "RemoteException in getDspProperties!"); 587 } 588 // No DSP available 589 if (dspModuleProperties == null) { 590 return STATE_HARDWARE_UNAVAILABLE; 591 } 592 // No enrollment application supports this keyphrase/locale 593 if (mKeyphraseMetadata == null) { 594 return STATE_KEYPHRASE_UNSUPPORTED; 595 } 596 return STATE_NOT_READY; 597 } 598 599 /** 600 * @return The corresponding {@link KeyphraseSoundModel} or null if none is found. 601 */ 602 private boolean internalGetIsEnrolled(int keyphraseId) { 603 try { 604 return mModelManagementService.isEnrolledForKeyphrase( 605 mVoiceInteractionService, keyphraseId); 606 } catch (RemoteException e) { 607 Slog.w(TAG, "RemoteException in listRegisteredKeyphraseSoundModels!"); 608 } 609 return false; 610 } 611 } 612} 613