AlwaysOnHotwordDetector.java revision 6817337118655d5792e36e954b123e6daa4174a6
1/** 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.service.voice; 18 19import android.content.Intent; 20import android.hardware.soundtrigger.IRecognitionStatusCallback; 21import android.hardware.soundtrigger.KeyphraseEnrollmentInfo; 22import android.hardware.soundtrigger.KeyphraseMetadata; 23import android.hardware.soundtrigger.SoundTrigger; 24import android.hardware.soundtrigger.SoundTrigger.ConfidenceLevel; 25import android.hardware.soundtrigger.SoundTrigger.Keyphrase; 26import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionEvent; 27import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionExtra; 28import android.hardware.soundtrigger.SoundTrigger.KeyphraseSoundModel; 29import android.hardware.soundtrigger.SoundTrigger.ModuleProperties; 30import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig; 31import android.hardware.soundtrigger.SoundTrigger.RecognitionEvent; 32import android.os.AsyncTask; 33import android.os.Handler; 34import android.os.Message; 35import android.os.RemoteException; 36import android.util.Slog; 37 38import com.android.internal.app.IVoiceInteractionManagerService; 39 40import java.util.List; 41 42/** 43 * A class that lets a VoiceInteractionService implementation interact with 44 * always-on keyphrase detection APIs. 45 */ 46public class AlwaysOnHotwordDetector { 47 //---- States of Keyphrase availability. Return codes for onAvailabilityChanged() ----// 48 /** 49 * Indicates that this hotword detector is no longer valid for any recognition 50 * and should not be used anymore. 51 */ 52 public static final int STATE_INVALID = -3; 53 /** 54 * Indicates that recognition for the given keyphrase is not available on the system 55 * because of the hardware configuration. 56 */ 57 public static final int STATE_HARDWARE_UNAVAILABLE = -2; 58 /** 59 * Indicates that recognition for the given keyphrase is not supported. 60 */ 61 public static final int STATE_KEYPHRASE_UNSUPPORTED = -1; 62 /** 63 * Indicates that the given keyphrase is not enrolled. 64 */ 65 public static final int STATE_KEYPHRASE_UNENROLLED = 1; 66 /** 67 * Indicates that the given keyphrase is currently enrolled and it's possible to start 68 * recognition for it. 69 */ 70 public static final int STATE_KEYPHRASE_ENROLLED = 2; 71 72 /** 73 * Indicates that the detector isn't ready currently. 74 */ 75 private static final int STATE_NOT_READY = 0; 76 77 // Keyphrase management actions. Used in getManageIntent() ----// 78 /** Indicates that we need to enroll. */ 79 public static final int MANAGE_ACTION_ENROLL = 0; 80 /** Indicates that we need to re-enroll. */ 81 public static final int MANAGE_ACTION_RE_ENROLL = 1; 82 /** Indicates that we need to un-enroll. */ 83 public static final int MANAGE_ACTION_UN_ENROLL = 2; 84 85 /** 86 * Return codes for {@link #startRecognition(int)}, {@link #stopRecognition()} 87 */ 88 public static final int STATUS_ERROR = SoundTrigger.STATUS_ERROR; 89 public static final int STATUS_OK = SoundTrigger.STATUS_OK; 90 91 //-- Flags for startRecogntion ----// 92 /** Empty flag for {@link #startRecognition(int)}. */ 93 public static final int RECOGNITION_FLAG_NONE = 0; 94 /** 95 * Recognition flag for {@link #startRecognition(int)} that indicates 96 * whether the trigger audio for hotword needs to be captured. 97 */ 98 public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1; 99 100 //---- Recognition mode flags. Return codes for getSupportedRecognitionModes() ----// 101 // Must be kept in sync with the related attribute defined as searchKeyphraseRecognitionFlags. 102 103 /** 104 * Simple recognition of the key phrase. Returned by {@link #getSupportedRecognitionModes()} 105 */ 106 public static final int RECOGNITION_MODE_VOICE_TRIGGER 107 = SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER; 108 /** 109 * Trigger only if one user is identified. Returned by {@link #getSupportedRecognitionModes()} 110 */ 111 public static final int RECOGNITION_MODE_USER_IDENTIFICATION 112 = SoundTrigger.RECOGNITION_MODE_USER_IDENTIFICATION; 113 114 static final String TAG = "AlwaysOnHotwordDetector"; 115 // TODO: Set to false. 116 static final boolean DBG = true; 117 118 private static final int MSG_STATE_CHANGED = 1; 119 private static final int MSG_HOTWORD_DETECTED = 2; 120 private static final int MSG_DETECTION_STOPPED = 3; 121 122 private final String mText; 123 private final String mLocale; 124 /** 125 * The metadata of the Keyphrase, derived from the enrollment application. 126 * This may be null if this keyphrase isn't supported by the enrollment application. 127 */ 128 private final KeyphraseMetadata mKeyphraseMetadata; 129 private final KeyphraseEnrollmentInfo mKeyphraseEnrollmentInfo; 130 private final IVoiceInteractionService mVoiceInteractionService; 131 private final IVoiceInteractionManagerService mModelManagementService; 132 private final SoundTriggerListener mInternalCallback; 133 private final Callback mExternalCallback; 134 private final Object mLock = new Object(); 135 private final Handler mHandler; 136 137 private int mAvailability = STATE_NOT_READY; 138 139 /** 140 * Callbacks for always-on hotword detection. 141 */ 142 public interface Callback { 143 /** 144 * Called when the hotword availability changes. 145 * This indicates a change in the availability of recognition for the given keyphrase. 146 * It's called at least once with the initial availability.<p/> 147 * 148 * Availability implies whether the hardware on this system is capable of listening for 149 * the given keyphrase or not. <p/> 150 * If the return code is one of {@link #STATE_HARDWARE_UNAVAILABLE} or 151 * {@link #STATE_KEYPHRASE_UNSUPPORTED}, 152 * detection is not possible and no further interaction should be 153 * performed with this detector. <br/> 154 * If it is {@link #STATE_KEYPHRASE_UNENROLLED} the caller may choose to begin 155 * an enrollment flow for the keyphrase. <br/> 156 * and for {@link #STATE_KEYPHRASE_ENROLLED} a recognition can be started as desired. <p/> 157 * 158 * If the return code is {@link #STATE_INVALID}, this detector is stale. 159 * A new detector should be obtained for use in the future. 160 */ 161 void onAvailabilityChanged(int status); 162 /** 163 * Called when the keyphrase is spoken. 164 * 165 * @param data Optional trigger audio data, if it was requested during 166 * {@link AlwaysOnHotwordDetector#startRecognition(int)}. 167 */ 168 void onDetected(byte[] data); 169 /** 170 * Called when the detection for the associated keyphrase stops. 171 */ 172 void onDetectionStopped(); 173 } 174 175 /** 176 * @param text The keyphrase text to get the detector for. 177 * @param locale The java locale for the detector. 178 * @param callback A non-null Callback for receiving the recognition events. 179 * @param voiceInteractionService The current voice interaction service. 180 * @param modelManagementService A service that allows management of sound models. 181 * 182 * @hide 183 */ 184 public AlwaysOnHotwordDetector(String text, String locale, Callback callback, 185 KeyphraseEnrollmentInfo keyphraseEnrollmentInfo, 186 IVoiceInteractionService voiceInteractionService, 187 IVoiceInteractionManagerService modelManagementService) { 188 mText = text; 189 mLocale = locale; 190 mKeyphraseEnrollmentInfo = keyphraseEnrollmentInfo; 191 mKeyphraseMetadata = mKeyphraseEnrollmentInfo.getKeyphraseMetadata(text, locale); 192 mExternalCallback = callback; 193 mHandler = new MyHandler(); 194 mInternalCallback = new SoundTriggerListener(mHandler); 195 mVoiceInteractionService = voiceInteractionService; 196 mModelManagementService = modelManagementService; 197 new RefreshAvailabiltyTask().execute(); 198 } 199 200 /** 201 * Gets the recognition modes supported by the associated keyphrase. 202 * 203 * @throws UnsupportedOperationException if the keyphrase itself isn't supported. 204 * Callers should only call this method after a supported state callback on 205 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 206 */ 207 public int getSupportedRecognitionModes() { 208 synchronized (mLock) { 209 return getSupportedRecognitionModesLocked(); 210 } 211 } 212 213 private int getSupportedRecognitionModesLocked() { 214 // This method only makes sense if we can actually support a recognition. 215 if (mAvailability != STATE_KEYPHRASE_ENROLLED 216 && mAvailability != STATE_KEYPHRASE_UNENROLLED) { 217 throw new UnsupportedOperationException( 218 "Getting supported recognition modes for the keyphrase is not supported"); 219 } 220 221 return mKeyphraseMetadata.recognitionModeFlags; 222 } 223 224 /** 225 * Starts recognition for the associated keyphrase. 226 * 227 * @param recognitionFlags The flags to control the recognition properties. 228 * The allowed flags are {@link #RECOGNITION_FLAG_NONE} and 229 * {@link #RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO}. 230 * @return {@link #STATUS_OK} if the call succeeds, an error code otherwise. 231 * @throws UnsupportedOperationException if the recognition isn't supported. 232 * Callers should only call this method after a supported state callback on 233 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 234 */ 235 public int startRecognition(int recognitionFlags) { 236 synchronized (mLock) { 237 return startRecognitionLocked(recognitionFlags); 238 } 239 } 240 241 private int startRecognitionLocked(int recognitionFlags) { 242 // This method only makes sense if we can start a recognition. 243 if (mAvailability != STATE_KEYPHRASE_ENROLLED) { 244 throw new UnsupportedOperationException( 245 "Recognition for the given keyphrase is not supported"); 246 } 247 248 KeyphraseRecognitionExtra[] recognitionExtra = new KeyphraseRecognitionExtra[1]; 249 // TODO: Do we need to do something about the confidence level here? 250 recognitionExtra[0] = new KeyphraseRecognitionExtra(mKeyphraseMetadata.id, 251 mKeyphraseMetadata.recognitionModeFlags, new ConfidenceLevel[0]); 252 boolean captureTriggerAudio = 253 (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0; 254 int code = STATUS_ERROR; 255 try { 256 code = mModelManagementService.startRecognition(mVoiceInteractionService, 257 mKeyphraseMetadata.id, mInternalCallback, 258 new RecognitionConfig( 259 captureTriggerAudio, recognitionExtra, null /* additional data */)); 260 } catch (RemoteException e) { 261 Slog.w(TAG, "RemoteException in startRecognition!"); 262 } 263 if (code != STATUS_OK) { 264 Slog.w(TAG, "startRecognition() failed with error code " + code); 265 } 266 return code; 267 } 268 269 /** 270 * Stops recognition for the associated keyphrase. 271 * 272 * @return {@link #STATUS_OK} if the call succeeds, an error code otherwise. 273 * @throws UnsupportedOperationException if the recognition isn't supported. 274 * Callers should only call this method after a supported state callback on 275 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 276 */ 277 public int stopRecognition() { 278 synchronized (mLock) { 279 return stopRecognitionLocked(); 280 } 281 } 282 283 private int stopRecognitionLocked() { 284 // This method only makes sense if we can start a recognition. 285 if (mAvailability != STATE_KEYPHRASE_ENROLLED) { 286 throw new UnsupportedOperationException( 287 "Recognition for the given keyphrase is not supported"); 288 } 289 290 int code = STATUS_ERROR; 291 try { 292 code = mModelManagementService.stopRecognition( 293 mVoiceInteractionService, mKeyphraseMetadata.id, mInternalCallback); 294 } catch (RemoteException e) { 295 Slog.w(TAG, "RemoteException in stopRecognition!"); 296 } 297 298 if (code != STATUS_OK) { 299 Slog.w(TAG, "stopRecognition() failed with error code " + code); 300 } 301 return code; 302 } 303 304 /** 305 * Gets an intent to manage the associated keyphrase. 306 * 307 * @param action The manage action that needs to be performed. 308 * One of {@link #MANAGE_ACTION_ENROLL}, {@link #MANAGE_ACTION_RE_ENROLL} or 309 * {@link #MANAGE_ACTION_UN_ENROLL}. 310 * @return An {@link Intent} to manage the given keyphrase. 311 * @throws UnsupportedOperationException if managing they keyphrase isn't supported. 312 * Callers should only call this method after a supported state callback on 313 * {@link Callback#onAvailabilityChanged(int)} to avoid this exception. 314 */ 315 public Intent getManageIntent(int action) { 316 // This method only makes sense if we can actually support a recognition. 317 if (mAvailability != STATE_KEYPHRASE_ENROLLED 318 && mAvailability != STATE_KEYPHRASE_UNENROLLED) { 319 throw new UnsupportedOperationException( 320 "Managing the given keyphrase is not supported"); 321 } 322 if (action != MANAGE_ACTION_ENROLL 323 && action != MANAGE_ACTION_RE_ENROLL 324 && action != MANAGE_ACTION_UN_ENROLL) { 325 throw new IllegalArgumentException("Invalid action specified " + action); 326 } 327 328 return mKeyphraseEnrollmentInfo.getManageKeyphraseIntent(action, mText, mLocale); 329 } 330 331 /** 332 * Invalidates this hotword detector so that any future calls to this result 333 * in an IllegalStateException. 334 * 335 * @hide 336 */ 337 void invalidate() { 338 synchronized (mLock) { 339 mAvailability = STATE_INVALID; 340 notifyStateChangedLocked(); 341 } 342 } 343 344 /** 345 * Reloads the sound models from the service. 346 * 347 * @hide 348 */ 349 void onSoundModelsChanged() { 350 synchronized (mLock) { 351 // TODO: This should stop the recognition if it was using an enrolled sound model 352 // that's no longer available. 353 if (mAvailability == STATE_INVALID 354 || mAvailability == STATE_HARDWARE_UNAVAILABLE 355 || mAvailability == STATE_KEYPHRASE_UNSUPPORTED) { 356 Slog.w(TAG, "Received onSoundModelsChanged for an unsupported keyphrase/config"); 357 return; 358 } 359 360 // Execute a refresh availability task - which should then notify of a change. 361 new RefreshAvailabiltyTask().execute(); 362 } 363 } 364 365 private void notifyStateChangedLocked() { 366 Message message = Message.obtain(mHandler, MSG_STATE_CHANGED); 367 message.arg1 = mAvailability; 368 message.sendToTarget(); 369 } 370 371 /** @hide */ 372 static final class SoundTriggerListener extends IRecognitionStatusCallback.Stub { 373 private final Handler mHandler; 374 375 public SoundTriggerListener(Handler handler) { 376 mHandler = handler; 377 } 378 379 @Override 380 public void onDetected(KeyphraseRecognitionEvent event) { 381 Slog.i(TAG, "onDetected"); 382 Message message = Message.obtain(mHandler, MSG_HOTWORD_DETECTED); 383 message.obj = event.data; 384 message.sendToTarget(); 385 } 386 387 @Override 388 public void onDetectionStopped() { 389 Slog.i(TAG, "onDetectionStopped"); 390 mHandler.sendEmptyMessage(MSG_DETECTION_STOPPED); 391 } 392 } 393 394 class MyHandler extends Handler { 395 @Override 396 public void handleMessage(Message msg) { 397 switch (msg.what) { 398 case MSG_STATE_CHANGED: 399 mExternalCallback.onAvailabilityChanged(msg.arg1); 400 break; 401 case MSG_HOTWORD_DETECTED: 402 mExternalCallback.onDetected((byte[]) msg.obj); 403 break; 404 case MSG_DETECTION_STOPPED: 405 mExternalCallback.onDetectionStopped(); 406 default: 407 super.handleMessage(msg); 408 } 409 } 410 } 411 412 class RefreshAvailabiltyTask extends AsyncTask<Void, Void, Void> { 413 414 @Override 415 public Void doInBackground(Void... params) { 416 int availability = internalGetInitialAvailability(); 417 boolean enrolled = false; 418 // Fetch the sound model if the availability is one of the supported ones. 419 if (availability == STATE_NOT_READY 420 || availability == STATE_KEYPHRASE_UNENROLLED 421 || availability == STATE_KEYPHRASE_ENROLLED) { 422 enrolled = internalGetIsEnrolled(mKeyphraseMetadata.id); 423 if (!enrolled) { 424 availability = STATE_KEYPHRASE_UNENROLLED; 425 } else { 426 availability = STATE_KEYPHRASE_ENROLLED; 427 } 428 } 429 430 synchronized (mLock) { 431 if (DBG) { 432 Slog.d(TAG, "Hotword availability changed from " + mAvailability 433 + " -> " + availability); 434 } 435 mAvailability = availability; 436 notifyStateChangedLocked(); 437 } 438 return null; 439 } 440 441 /** 442 * @return The initial availability without checking the enrollment status. 443 */ 444 private int internalGetInitialAvailability() { 445 synchronized (mLock) { 446 // This detector has already been invalidated. 447 if (mAvailability == STATE_INVALID) { 448 return STATE_INVALID; 449 } 450 } 451 452 ModuleProperties dspModuleProperties = null; 453 try { 454 dspModuleProperties = 455 mModelManagementService.getDspModuleProperties(mVoiceInteractionService); 456 } catch (RemoteException e) { 457 Slog.w(TAG, "RemoteException in getDspProperties!"); 458 } 459 // No DSP available 460 if (dspModuleProperties == null) { 461 return STATE_HARDWARE_UNAVAILABLE; 462 } 463 // No enrollment application supports this keyphrase/locale 464 if (mKeyphraseMetadata == null) { 465 return STATE_KEYPHRASE_UNSUPPORTED; 466 } 467 return STATE_NOT_READY; 468 } 469 470 /** 471 * @return The corresponding {@link KeyphraseSoundModel} or null if none is found. 472 */ 473 private boolean internalGetIsEnrolled(int keyphraseId) { 474 try { 475 return mModelManagementService.isEnrolledForKeyphrase( 476 mVoiceInteractionService, keyphraseId); 477 } catch (RemoteException e) { 478 Slog.w(TAG, "RemoteException in listRegisteredKeyphraseSoundModels!"); 479 } 480 return false; 481 } 482 } 483} 484