AlwaysOnHotwordDetector.java revision d3b8223377b8046280e4c09e728edc600171f941
1/** 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.service.voice; 18 19import android.annotation.IntDef; 20import android.annotation.NonNull; 21import android.annotation.Nullable; 22import android.content.Intent; 23import android.hardware.soundtrigger.IRecognitionStatusCallback; 24import android.hardware.soundtrigger.KeyphraseEnrollmentInfo; 25import android.hardware.soundtrigger.KeyphraseMetadata; 26import android.hardware.soundtrigger.SoundTrigger; 27import android.hardware.soundtrigger.SoundTrigger.ConfidenceLevel; 28import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionEvent; 29import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionExtra; 30import android.hardware.soundtrigger.SoundTrigger.KeyphraseSoundModel; 31import android.hardware.soundtrigger.SoundTrigger.ModuleProperties; 32import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig; 33import android.media.AudioFormat; 34import android.os.AsyncTask; 35import android.os.Handler; 36import android.os.Message; 37import android.os.RemoteException; 38import android.util.Slog; 39 40import com.android.internal.app.IVoiceInteractionManagerService; 41 42import java.lang.annotation.Retention; 43import java.lang.annotation.RetentionPolicy; 44 45/** 46 * A class that lets a VoiceInteractionService implementation interact with 47 * always-on keyphrase detection APIs. 48 */ 49public class AlwaysOnHotwordDetector { 50 //---- States of Keyphrase availability. Return codes for onAvailabilityChanged() ----// 51 /** 52 * Indicates that this hotword detector is no longer valid for any recognition 53 * and should not be used anymore. 54 */ 55 private static final int STATE_INVALID = -3; 56 57 /** 58 * Indicates that recognition for the given keyphrase is not available on the system 59 * because of the hardware configuration. 60 * No further interaction should be performed with the detector that returns this availability. 61 */ 62 public static final int STATE_HARDWARE_UNAVAILABLE = -2; 63 /** 64 * Indicates that recognition for the given keyphrase is not supported. 65 * No further interaction should be performed with the detector that returns this availability. 66 */ 67 public static final int STATE_KEYPHRASE_UNSUPPORTED = -1; 68 /** 69 * Indicates that the given keyphrase is not enrolled. 70 * The caller may choose to begin an enrollment flow for the keyphrase. 71 */ 72 public static final int STATE_KEYPHRASE_UNENROLLED = 1; 73 /** 74 * Indicates that the given keyphrase is currently enrolled and it's possible to start 75 * recognition for it. 76 */ 77 public static final int STATE_KEYPHRASE_ENROLLED = 2; 78 79 /** 80 * Indicates that the detector isn't ready currently. 81 */ 82 private static final int STATE_NOT_READY = 0; 83 84 // Keyphrase management actions. Used in getManageIntent() ----// 85 /** Indicates that we need to enroll. */ 86 public static final int MANAGE_ACTION_ENROLL = 0; 87 /** Indicates that we need to re-enroll. */ 88 public static final int MANAGE_ACTION_RE_ENROLL = 1; 89 /** Indicates that we need to un-enroll. */ 90 public static final int MANAGE_ACTION_UN_ENROLL = 2; 91 92 //-- Flags for startRecognition ----// 93 /** @hide */ 94 @Retention(RetentionPolicy.SOURCE) 95 @IntDef(flag = true, 96 value = { 97 RECOGNITION_FLAG_NONE, 98 RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO, 99 RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS 100 }) 101 public @interface RecognitionFlags {} 102 103 /** Empty flag for {@link #startRecognition(int)}. */ 104 public static final int RECOGNITION_FLAG_NONE = 0; 105 /** 106 * Recognition flag for {@link #startRecognition(int)} that indicates 107 * whether the trigger audio for hotword needs to be captured. 108 */ 109 public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1; 110 /** 111 * Recognition flag for {@link #startRecognition(int)} that indicates 112 * whether the recognition should keep going on even after the keyphrase triggers. 113 * If this flag is specified, it's possible to get multiple triggers after a 114 * call to {@link #startRecognition(int)} if the user speaks the keyphrase multiple times. 115 * When this isn't specified, the default behavior is to stop recognition once the 116 * keyphrase is spoken, till the caller starts recognition again. 117 */ 118 public static final int RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS = 0x2; 119 120 //---- Recognition mode flags. Return codes for getSupportedRecognitionModes() ----// 121 // Must be kept in sync with the related attribute defined as searchKeyphraseRecognitionFlags. 122 123 /** @hide */ 124 @Retention(RetentionPolicy.SOURCE) 125 @IntDef(flag = true, 126 value = { 127 RECOGNITION_MODE_VOICE_TRIGGER, 128 RECOGNITION_MODE_USER_IDENTIFICATION, 129 }) 130 public @interface RecognitionModes {} 131 132 /** 133 * Simple recognition of the key phrase. 134 * Returned by {@link #getSupportedRecognitionModes()} 135 */ 136 public static final int RECOGNITION_MODE_VOICE_TRIGGER 137 = SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER; 138 /** 139 * User identification performed with the keyphrase recognition. 140 * Returned by {@link #getSupportedRecognitionModes()} 141 */ 142 public static final int RECOGNITION_MODE_USER_IDENTIFICATION 143 = SoundTrigger.RECOGNITION_MODE_USER_IDENTIFICATION; 144 145 static final String TAG = "AlwaysOnHotwordDetector"; 146 // TODO: Set to false. 147 static final boolean DBG = true; 148 149 private static final int STATUS_ERROR = SoundTrigger.STATUS_ERROR; 150 private static final int STATUS_OK = SoundTrigger.STATUS_OK; 151 152 private static final int MSG_AVAILABILITY_CHANGED = 1; 153 private static final int MSG_HOTWORD_DETECTED = 2; 154 private static final int MSG_DETECTION_ERROR = 3; 155 156 private final String mText; 157 private final String mLocale; 158 /** 159 * The metadata of the Keyphrase, derived from the enrollment application. 160 * This may be null if this keyphrase isn't supported by the enrollment application. 161 */ 162 private final KeyphraseMetadata mKeyphraseMetadata; 163 private final KeyphraseEnrollmentInfo mKeyphraseEnrollmentInfo; 164 private final IVoiceInteractionService mVoiceInteractionService; 165 private final IVoiceInteractionManagerService mModelManagementService; 166 private final SoundTriggerListener mInternalCallback; 167 private final Callback mExternalCallback; 168 private final Object mLock = new Object(); 169 private final Handler mHandler; 170 171 private int mAvailability = STATE_NOT_READY; 172 173 /** 174 * Details of the audio that triggered the keyphrase. 175 */ 176 public static class TriggerAudio { 177 /** 178 * Format of {@code data}. 179 */ 180 @NonNull 181 public final AudioFormat audioFormat; 182 /** 183 * Raw audio data that triggered they keyphrase. 184 */ 185 @NonNull 186 public final byte[] data; 187 188 private TriggerAudio(AudioFormat _audioFormat, byte[] _data) { 189 audioFormat = _audioFormat; 190 data = _data; 191 } 192 } 193 194 /** 195 * Callbacks for always-on hotword detection. 196 */ 197 public interface Callback { 198 /** 199 * Called when the hotword availability changes. 200 * This indicates a change in the availability of recognition for the given keyphrase. 201 * It's called at least once with the initial availability.<p/> 202 * 203 * Availability implies whether the hardware on this system is capable of listening for 204 * the given keyphrase or not. <p/> 205 * 206 * @see AlwaysOnHotwordDetector#STATE_HARDWARE_UNAVAILABLE 207 * @see AlwaysOnHotwordDetector#STATE_KEYPHRASE_UNSUPPORTED 208 * @see AlwaysOnHotwordDetector#STATE_KEYPHRASE_UNENROLLED 209 * @see AlwaysOnHotwordDetector#STATE_KEYPHRASE_ENROLLED 210 */ 211 void onAvailabilityChanged(int status); 212 /** 213 * Called when the keyphrase is spoken. 214 * This implicitly stops listening for the keyphrase once it's detected. 215 * Clients should start a recognition again once they are done handling this 216 * detection. 217 * 218 * @param triggerAudio Optional trigger audio data, if it was requested during 219 * {@link AlwaysOnHotwordDetector#startRecognition(int)}. 220 */ 221 void onDetected(@Nullable TriggerAudio triggerAudio); 222 /** 223 * Called when the detection fails due to an error. 224 */ 225 void onError(); 226 } 227 228 /** 229 * @param text The keyphrase text to get the detector for. 230 * @param locale The java locale for the detector. 231 * @param callback A non-null Callback for receiving the recognition events. 232 * @param voiceInteractionService The current voice interaction service. 233 * @param modelManagementService A service that allows management of sound models. 234 * 235 * @hide 236 */ 237 public AlwaysOnHotwordDetector(String text, String locale, Callback callback, 238 KeyphraseEnrollmentInfo keyphraseEnrollmentInfo, 239 IVoiceInteractionService voiceInteractionService, 240 IVoiceInteractionManagerService modelManagementService) { 241 mText = text; 242 mLocale = locale; 243 mKeyphraseEnrollmentInfo = keyphraseEnrollmentInfo; 244 mKeyphraseMetadata = mKeyphraseEnrollmentInfo.getKeyphraseMetadata(text, locale); 245 mExternalCallback = callback; 246 mHandler = new MyHandler(); 247 mInternalCallback = new SoundTriggerListener(mHandler); 248 mVoiceInteractionService = voiceInteractionService; 249 mModelManagementService = modelManagementService; 250 new RefreshAvailabiltyTask().execute(); 251 } 252 253 /** 254 * Gets the recognition modes supported by the associated keyphrase. 255 * 256 * @see #RECOGNITION_MODE_USER_IDENTIFICATION 257 * @see #RECOGNITION_MODE_VOICE_TRIGGER 258 * 259 * @throws UnsupportedOperationException if the keyphrase itself isn't supported. 260 * Callers should only call this method after a supported state callback on 261 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 262 * @throws IllegalStateException if the detector is in an invalid state. 263 * This may happen if another detector has been instantiated or the 264 * {@link VoiceInteractionService} hosting this detector has been shut down. 265 */ 266 public @RecognitionModes int getSupportedRecognitionModes() { 267 if (DBG) Slog.d(TAG, "getSupportedRecognitionModes()"); 268 synchronized (mLock) { 269 return getSupportedRecognitionModesLocked(); 270 } 271 } 272 273 private int getSupportedRecognitionModesLocked() { 274 if (mAvailability == STATE_INVALID) { 275 throw new IllegalStateException( 276 "getSupportedRecognitionModes called on an invalid detector"); 277 } 278 279 // This method only makes sense if we can actually support a recognition. 280 if (mAvailability != STATE_KEYPHRASE_ENROLLED 281 && mAvailability != STATE_KEYPHRASE_UNENROLLED) { 282 throw new UnsupportedOperationException( 283 "Getting supported recognition modes for the keyphrase is not supported"); 284 } 285 286 return mKeyphraseMetadata.recognitionModeFlags; 287 } 288 289 /** 290 * Starts recognition for the associated keyphrase. 291 * 292 * @param recognitionFlags The flags to control the recognition properties. 293 * The allowed flags are {@link #RECOGNITION_FLAG_NONE}, 294 * {@link #RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO} and 295 * {@link #RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS}. 296 * @return Indicates whether the call succeeded or not. 297 * @throws UnsupportedOperationException if the recognition isn't supported. 298 * Callers should only call this method after a supported state callback on 299 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 300 * @throws IllegalStateException if the detector is in an invalid state. 301 * This may happen if another detector has been instantiated or the 302 * {@link VoiceInteractionService} hosting this detector has been shut down. 303 */ 304 public boolean startRecognition(@RecognitionFlags int recognitionFlags) { 305 if (DBG) Slog.d(TAG, "startRecognition(" + recognitionFlags + ")"); 306 synchronized (mLock) { 307 if (mAvailability == STATE_INVALID) { 308 throw new IllegalStateException("startRecognition called on an invalid detector"); 309 } 310 311 // Check if we can start/stop a recognition. 312 if (mAvailability != STATE_KEYPHRASE_ENROLLED) { 313 throw new UnsupportedOperationException( 314 "Recognition for the given keyphrase is not supported"); 315 } 316 317 return startRecognitionLocked(recognitionFlags) == STATUS_OK; 318 } 319 } 320 321 /** 322 * Stops recognition for the associated keyphrase. 323 * 324 * @return Indicates whether the call succeeded or not. 325 * @throws UnsupportedOperationException if the recognition isn't supported. 326 * Callers should only call this method after a supported state callback on 327 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 328 * @throws IllegalStateException if the detector is in an invalid state. 329 * This may happen if another detector has been instantiated or the 330 * {@link VoiceInteractionService} hosting this detector has been shut down. 331 */ 332 public boolean stopRecognition() { 333 if (DBG) Slog.d(TAG, "stopRecognition()"); 334 synchronized (mLock) { 335 if (mAvailability == STATE_INVALID) { 336 throw new IllegalStateException("stopRecognition called on an invalid detector"); 337 } 338 339 // Check if we can start/stop a recognition. 340 if (mAvailability != STATE_KEYPHRASE_ENROLLED) { 341 throw new UnsupportedOperationException( 342 "Recognition for the given keyphrase is not supported"); 343 } 344 345 return stopRecognitionLocked() == STATUS_OK; 346 } 347 } 348 349 /** 350 * Gets an intent to manage the associated keyphrase. 351 * 352 * @param action The manage action that needs to be performed. 353 * One of {@link #MANAGE_ACTION_ENROLL}, {@link #MANAGE_ACTION_RE_ENROLL} or 354 * {@link #MANAGE_ACTION_UN_ENROLL}. 355 * @return An {@link Intent} to manage the given keyphrase. 356 * @throws UnsupportedOperationException if managing they keyphrase isn't supported. 357 * Callers should only call this method after a supported state callback on 358 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 359 * @throws IllegalStateException if the detector is in an invalid state. 360 * This may happen if another detector has been instantiated or the 361 * {@link VoiceInteractionService} hosting this detector has been shut down. 362 */ 363 public Intent getManageIntent(int action) { 364 if (DBG) Slog.d(TAG, "getManageIntent(" + action + ")"); 365 synchronized (mLock) { 366 return getManageIntentLocked(action); 367 } 368 } 369 370 private Intent getManageIntentLocked(int action) { 371 if (mAvailability == STATE_INVALID) { 372 throw new IllegalStateException("getManageIntent called on an invalid detector"); 373 } 374 375 // This method only makes sense if we can actually support a recognition. 376 if (mAvailability != STATE_KEYPHRASE_ENROLLED 377 && mAvailability != STATE_KEYPHRASE_UNENROLLED) { 378 throw new UnsupportedOperationException( 379 "Managing the given keyphrase is not supported"); 380 } 381 382 if (action != MANAGE_ACTION_ENROLL 383 && action != MANAGE_ACTION_RE_ENROLL 384 && action != MANAGE_ACTION_UN_ENROLL) { 385 throw new IllegalArgumentException("Invalid action specified " + action); 386 } 387 388 return mKeyphraseEnrollmentInfo.getManageKeyphraseIntent(action, mText, mLocale); 389 } 390 391 /** 392 * Invalidates this hotword detector so that any future calls to this result 393 * in an IllegalStateException. 394 * 395 * @hide 396 */ 397 void invalidate() { 398 synchronized (mLock) { 399 mAvailability = STATE_INVALID; 400 notifyStateChangedLocked(); 401 } 402 } 403 404 /** 405 * Reloads the sound models from the service. 406 * 407 * @hide 408 */ 409 void onSoundModelsChanged() { 410 synchronized (mLock) { 411 // FIXME: This should stop the recognition if it was using an enrolled sound model 412 // that's no longer available. 413 if (mAvailability == STATE_INVALID 414 || mAvailability == STATE_HARDWARE_UNAVAILABLE 415 || mAvailability == STATE_KEYPHRASE_UNSUPPORTED) { 416 Slog.w(TAG, "Received onSoundModelsChanged for an unsupported keyphrase/config"); 417 return; 418 } 419 420 // Execute a refresh availability task - which should then notify of a change. 421 new RefreshAvailabiltyTask().execute(); 422 } 423 } 424 425 private int startRecognitionLocked(int recognitionFlags) { 426 KeyphraseRecognitionExtra[] recognitionExtra = new KeyphraseRecognitionExtra[1]; 427 // TODO: Do we need to do something about the confidence level here? 428 recognitionExtra[0] = new KeyphraseRecognitionExtra(mKeyphraseMetadata.id, 429 mKeyphraseMetadata.recognitionModeFlags, 0, new ConfidenceLevel[0]); 430 boolean captureTriggerAudio = 431 (recognitionFlags&RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0; 432 boolean allowMultipleTriggers = 433 (recognitionFlags&RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS) != 0; 434 int code = STATUS_ERROR; 435 try { 436 code = mModelManagementService.startRecognition(mVoiceInteractionService, 437 mKeyphraseMetadata.id, mInternalCallback, 438 new RecognitionConfig(captureTriggerAudio, allowMultipleTriggers, 439 recognitionExtra, null /* additional data */)); 440 } catch (RemoteException e) { 441 Slog.w(TAG, "RemoteException in startRecognition!"); 442 } 443 if (code != STATUS_OK) { 444 Slog.w(TAG, "startRecognition() failed with error code " + code); 445 } 446 return code; 447 } 448 449 private int stopRecognitionLocked() { 450 int code = STATUS_ERROR; 451 try { 452 code = mModelManagementService.stopRecognition( 453 mVoiceInteractionService, mKeyphraseMetadata.id, mInternalCallback); 454 } catch (RemoteException e) { 455 Slog.w(TAG, "RemoteException in stopRecognition!"); 456 } 457 458 if (code != STATUS_OK) { 459 Slog.w(TAG, "stopRecognition() failed with error code " + code); 460 } 461 return code; 462 } 463 464 private void notifyStateChangedLocked() { 465 Message message = Message.obtain(mHandler, MSG_AVAILABILITY_CHANGED); 466 message.arg1 = mAvailability; 467 message.sendToTarget(); 468 } 469 470 /** @hide */ 471 static final class SoundTriggerListener extends IRecognitionStatusCallback.Stub { 472 private final Handler mHandler; 473 474 public SoundTriggerListener(Handler handler) { 475 mHandler = handler; 476 } 477 478 @Override 479 public void onDetected(KeyphraseRecognitionEvent event) { 480 if (DBG) { 481 Slog.d(TAG, "OnDetected(" + event + ")"); 482 } else { 483 Slog.i(TAG, "onDetected"); 484 } 485 Message message = Message.obtain(mHandler, MSG_HOTWORD_DETECTED); 486 // FIXME: Check whether the event contains trigger data or not. 487 // FIXME: Read the audio format from the event. 488 if (event.data != null) { 489 AudioFormat audioFormat = new AudioFormat.Builder() 490 .setChannelMask(AudioFormat.CHANNEL_IN_MONO) 491 .setEncoding(AudioFormat.ENCODING_PCM_16BIT) 492 .setSampleRate(16000) 493 .build(); 494 message.obj = new TriggerAudio(audioFormat, event.data); 495 } 496 message.sendToTarget(); 497 } 498 499 @Override 500 public void onError(int status) { 501 Slog.i(TAG, "onError: " + status); 502 mHandler.sendEmptyMessage(MSG_DETECTION_ERROR); 503 } 504 } 505 506 class MyHandler extends Handler { 507 @Override 508 public void handleMessage(Message msg) { 509 synchronized (mLock) { 510 if (mAvailability == STATE_INVALID) { 511 Slog.w(TAG, "Received message: " + msg.what + " for an invalid detector"); 512 return; 513 } 514 } 515 516 switch (msg.what) { 517 case MSG_AVAILABILITY_CHANGED: 518 mExternalCallback.onAvailabilityChanged(msg.arg1); 519 break; 520 case MSG_HOTWORD_DETECTED: 521 mExternalCallback.onDetected((TriggerAudio) msg.obj); 522 break; 523 case MSG_DETECTION_ERROR: 524 mExternalCallback.onError(); 525 break; 526 default: 527 super.handleMessage(msg); 528 } 529 } 530 } 531 532 class RefreshAvailabiltyTask extends AsyncTask<Void, Void, Void> { 533 534 @Override 535 public Void doInBackground(Void... params) { 536 int availability = internalGetInitialAvailability(); 537 boolean enrolled = false; 538 // Fetch the sound model if the availability is one of the supported ones. 539 if (availability == STATE_NOT_READY 540 || availability == STATE_KEYPHRASE_UNENROLLED 541 || availability == STATE_KEYPHRASE_ENROLLED) { 542 enrolled = internalGetIsEnrolled(mKeyphraseMetadata.id); 543 if (!enrolled) { 544 availability = STATE_KEYPHRASE_UNENROLLED; 545 } else { 546 availability = STATE_KEYPHRASE_ENROLLED; 547 } 548 } 549 550 synchronized (mLock) { 551 if (DBG) { 552 Slog.d(TAG, "Hotword availability changed from " + mAvailability 553 + " -> " + availability); 554 } 555 mAvailability = availability; 556 notifyStateChangedLocked(); 557 } 558 return null; 559 } 560 561 /** 562 * @return The initial availability without checking the enrollment status. 563 */ 564 private int internalGetInitialAvailability() { 565 synchronized (mLock) { 566 // This detector has already been invalidated. 567 if (mAvailability == STATE_INVALID) { 568 return STATE_INVALID; 569 } 570 } 571 572 ModuleProperties dspModuleProperties = null; 573 try { 574 dspModuleProperties = 575 mModelManagementService.getDspModuleProperties(mVoiceInteractionService); 576 } catch (RemoteException e) { 577 Slog.w(TAG, "RemoteException in getDspProperties!"); 578 } 579 // No DSP available 580 if (dspModuleProperties == null) { 581 return STATE_HARDWARE_UNAVAILABLE; 582 } 583 // No enrollment application supports this keyphrase/locale 584 if (mKeyphraseMetadata == null) { 585 return STATE_KEYPHRASE_UNSUPPORTED; 586 } 587 return STATE_NOT_READY; 588 } 589 590 /** 591 * @return The corresponding {@link KeyphraseSoundModel} or null if none is found. 592 */ 593 private boolean internalGetIsEnrolled(int keyphraseId) { 594 try { 595 return mModelManagementService.isEnrolledForKeyphrase( 596 mVoiceInteractionService, keyphraseId); 597 } catch (RemoteException e) { 598 Slog.w(TAG, "RemoteException in listRegisteredKeyphraseSoundModels!"); 599 } 600 return false; 601 } 602 } 603} 604