AlwaysOnHotwordDetector.java revision 055897208d659e9734a82def88be4a806ff55448
1/** 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.service.voice; 18 19import android.content.Intent; 20import android.hardware.soundtrigger.IRecognitionStatusCallback; 21import android.hardware.soundtrigger.KeyphraseEnrollmentInfo; 22import android.hardware.soundtrigger.KeyphraseMetadata; 23import android.hardware.soundtrigger.SoundTrigger; 24import android.hardware.soundtrigger.SoundTrigger.ConfidenceLevel; 25import android.hardware.soundtrigger.SoundTrigger.Keyphrase; 26import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionExtra; 27import android.hardware.soundtrigger.SoundTrigger.KeyphraseSoundModel; 28import android.hardware.soundtrigger.SoundTrigger.ModuleProperties; 29import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig; 30import android.os.RemoteException; 31import android.util.Slog; 32 33import com.android.internal.app.IVoiceInteractionManagerService; 34 35import java.util.List; 36 37/** 38 * A class that lets a VoiceInteractionService implementation interact with 39 * always-on keyphrase detection APIs. 40 */ 41public class AlwaysOnHotwordDetector { 42 //---- States of Keyphrase availability ----// 43 /** 44 * Indicates that the given keyphrase is not available on the system because of the 45 * hardware configuration. 46 */ 47 public static final int KEYPHRASE_HARDWARE_UNAVAILABLE = -2; 48 /** 49 * Indicates that the given keyphrase is not supported. 50 */ 51 public static final int KEYPHRASE_UNSUPPORTED = -1; 52 /** 53 * Indicates that the given keyphrase is not enrolled. 54 */ 55 public static final int KEYPHRASE_UNENROLLED = 1; 56 /** 57 * Indicates that the given keyphrase is currently enrolled but not being actively listened for. 58 */ 59 public static final int KEYPHRASE_ENROLLED = 2; 60 61 // Keyphrase management actions ----// 62 /** Indicates that we need to enroll. */ 63 public static final int MANAGE_ACTION_ENROLL = 0; 64 /** Indicates that we need to re-enroll. */ 65 public static final int MANAGE_ACTION_RE_ENROLL = 1; 66 /** Indicates that we need to un-enroll. */ 67 public static final int MANAGE_ACTION_UN_ENROLL = 2; 68 69 /** 70 * Return codes for {@link #startRecognition(int)}, {@link #stopRecognition()} 71 */ 72 public static final int STATUS_ERROR = SoundTrigger.STATUS_ERROR; 73 public static final int STATUS_OK = SoundTrigger.STATUS_OK; 74 75 //---- Keyphrase recognition status ----// 76 /** Indicates that recognition is not available. */ 77 public static final int RECOGNITION_STATUS_NOT_AVAILABLE = 0x01; 78 /** Indicates that recognition has not been requested. */ 79 public static final int RECOGNITION_STATUS_NOT_REQUESTED = 0x02; 80 /** Indicates that recognition has been requested. */ 81 public static final int RECOGNITION_STATUS_REQUESTED = 0x04; 82 /** Indicates that recognition has been temporarily disabled. */ 83 public static final int RECOGNITION_STATUS_DISABLED_TEMPORARILY = 0x08; 84 /** Indicates that recognition is currently active . */ 85 public static final int RECOGNITION_STATUS_ACTIVE = 0x10; 86 87 //-- Flags for startRecogntion ----// 88 /** Empty flag for {@link #startRecognition(int)}. */ 89 public static final int RECOGNITION_FLAG_NONE = 0; 90 /** 91 * Recognition flag for {@link #startRecognition(int)} that indicates 92 * whether the trigger audio for hotword needs to be captured. 93 */ 94 public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1; 95 96 //---- Recognition mode flags ----// 97 // Must be kept in sync with the related attribute defined as searchKeyphraseRecognitionFlags. 98 99 /** Simple recognition of the key phrase. Returned by {@link #getRecognitionStatus()} */ 100 public static final int RECOGNITION_MODE_VOICE_TRIGGER 101 = SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER; 102 /** Trigger only if one user is identified. Returned by {@link #getRecognitionStatus()} */ 103 public static final int RECOGNITION_MODE_USER_IDENTIFICATION 104 = SoundTrigger.RECOGNITION_MODE_USER_IDENTIFICATION; 105 106 static final String TAG = "AlwaysOnHotwordDetector"; 107 108 private final String mText; 109 private final String mLocale; 110 /** 111 * The metadata of the Keyphrase, derived from the enrollment application. 112 * This may be null if this keyphrase isn't supported by the enrollment application. 113 */ 114 private final KeyphraseMetadata mKeyphraseMetadata; 115 /** 116 * The sound model for the keyphrase, derived from the model management service 117 * (IVoiceInteractionManagerService). May be null if the keyphrase isn't enrolled yet. 118 */ 119 private final KeyphraseSoundModel mEnrolledSoundModel; 120 private final KeyphraseEnrollmentInfo mKeyphraseEnrollmentInfo; 121 private final int mAvailability; 122 private final IVoiceInteractionService mVoiceInteractionService; 123 private final IVoiceInteractionManagerService mModelManagementService; 124 private final SoundTriggerListener mInternalCallback; 125 126 private int mRecognitionState; 127 128 /** 129 * Callbacks for always-on hotword detection. 130 */ 131 public interface Callback { 132 /** 133 * Called when the keyphrase is spoken. 134 * 135 * @param data Optional trigger audio data, if it was requested during 136 * {@link AlwaysOnHotwordDetector#startRecognition(int)}. 137 */ 138 void onDetected(byte[] data); 139 /** 140 * Called when the detection for the associated keyphrase starts. 141 */ 142 void onDetectionStarted(); 143 /** 144 * Called when the detection for the associated keyphrase stops. 145 */ 146 void onDetectionStopped(); 147 } 148 149 /** 150 * @param text The keyphrase text to get the detector for. 151 * @param locale The java locale for the detector. 152 * @param callback A non-null Callback for receiving the recognition events. 153 * @param voiceInteractionService The current voice interaction service. 154 * @param modelManagementService A service that allows management of sound models. 155 * 156 * @hide 157 */ 158 public AlwaysOnHotwordDetector(String text, String locale, Callback callback, 159 KeyphraseEnrollmentInfo keyphraseEnrollmentInfo, 160 IVoiceInteractionService voiceInteractionService, 161 IVoiceInteractionManagerService modelManagementService) { 162 mText = text; 163 mLocale = locale; 164 mKeyphraseEnrollmentInfo = keyphraseEnrollmentInfo; 165 mKeyphraseMetadata = mKeyphraseEnrollmentInfo.getKeyphraseMetadata(text, locale); 166 mInternalCallback = new SoundTriggerListener(callback); 167 mVoiceInteractionService = voiceInteractionService; 168 mModelManagementService = modelManagementService; 169 if (mKeyphraseMetadata != null) { 170 mEnrolledSoundModel = internalGetKeyphraseSoundModel(mKeyphraseMetadata.id); 171 } else { 172 mEnrolledSoundModel = null; 173 } 174 mAvailability = internalGetAvailability(); 175 } 176 177 /** 178 * Gets the state of always-on hotword detection for the given keyphrase and locale 179 * on this system. 180 * Availability implies that the hardware on this system is capable of listening for 181 * the given keyphrase or not. 182 * 183 * @return Indicates if always-on hotword detection is available for the given keyphrase. 184 * The return code is one of {@link #KEYPHRASE_HARDWARE_UNAVAILABLE}, 185 * {@link #KEYPHRASE_UNSUPPORTED}, {@link #KEYPHRASE_UNENROLLED} or 186 * {@link #KEYPHRASE_ENROLLED}. 187 */ 188 public int getAvailability() { 189 return mAvailability; 190 } 191 192 /** 193 * Gets the recognition modes supported by the associated keyphrase. 194 * 195 * @throws UnsupportedOperationException if the keyphrase itself isn't supported. 196 * Callers should check the availability by calling {@link #getAvailability()} 197 * before calling this method to avoid this exception. 198 */ 199 public int getSupportedRecognitionModes() { 200 if (mAvailability == KEYPHRASE_HARDWARE_UNAVAILABLE 201 || mAvailability == KEYPHRASE_UNSUPPORTED) { 202 throw new UnsupportedOperationException( 203 "Getting supported recognition modes for the keyphrase is not supported"); 204 } 205 206 return mKeyphraseMetadata.recognitionModeFlags; 207 } 208 209 /** 210 * Gets the status of the recognition. 211 * @return A flag comprised of {@link #RECOGNITION_STATUS_NOT_AVAILABLE}, 212 * {@link #RECOGNITION_STATUS_NOT_REQUESTED}, {@link #RECOGNITION_STATUS_REQUESTED}, 213 * {@link #RECOGNITION_STATUS_DISABLED_TEMPORARILY} and 214 * {@link #RECOGNITION_STATUS_ACTIVE}. 215 */ 216 public int getRecognitionStatus() { 217 return mRecognitionState; 218 } 219 220 /** 221 * Starts recognition for the associated keyphrase. 222 * 223 * @param recognitionFlags The flags to control the recognition properties. 224 * The allowed flags are {@link #RECOGNITION_FLAG_NONE} and 225 * {@link #RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO}. 226 * @return {@link #STATUS_OK} if the call succeeds, an error code otherwise. 227 * @throws UnsupportedOperationException if the recognition isn't supported. 228 * Callers should check the availability by calling {@link #getAvailability()} 229 * before calling this method to avoid this exception. 230 */ 231 public int startRecognition(int recognitionFlags) { 232 if (mAvailability != KEYPHRASE_ENROLLED 233 || (mRecognitionState&RECOGNITION_STATUS_NOT_AVAILABLE) != 0) { 234 throw new UnsupportedOperationException( 235 "Recognition for the given keyphrase is not supported"); 236 } 237 238 mRecognitionState &= RECOGNITION_STATUS_REQUESTED; 239 KeyphraseRecognitionExtra[] recognitionExtra = new KeyphraseRecognitionExtra[1]; 240 // TODO: Do we need to do something about the confidence level here? 241 // TODO: Take in captureTriggerAudio as a method param here. 242 recognitionExtra[0] = new KeyphraseRecognitionExtra(mKeyphraseMetadata.id, 243 mKeyphraseMetadata.recognitionModeFlags, new ConfidenceLevel[0]); 244 boolean captureTriggerAudio = 245 (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0; 246 int code = STATUS_ERROR; 247 try { 248 code = mModelManagementService.startRecognition(mVoiceInteractionService, 249 mKeyphraseMetadata.id, mEnrolledSoundModel, mInternalCallback, 250 new RecognitionConfig( 251 captureTriggerAudio, recognitionExtra, null /* additional data */)); 252 } catch (RemoteException e) { 253 Slog.w(TAG, "RemoteException in startRecognition!"); 254 } 255 if (code != STATUS_OK) { 256 Slog.w(TAG, "startRecognition() failed with error code " + code); 257 } 258 return code; 259 } 260 261 /** 262 * Stops recognition for the associated keyphrase. 263 * 264 * @return {@link #STATUS_OK} if the call succeeds, an error code otherwise. 265 * @throws UnsupportedOperationException if the recognition isn't supported. 266 * Callers should check the availability by calling {@link #getAvailability()} 267 * before calling this method to avoid this exception. 268 */ 269 public int stopRecognition() { 270 if (mAvailability != KEYPHRASE_ENROLLED) { 271 throw new UnsupportedOperationException( 272 "Recognition for the given keyphrase is not supported"); 273 } 274 275 mRecognitionState &= ~RECOGNITION_STATUS_NOT_REQUESTED; 276 int code = STATUS_ERROR; 277 try { 278 code = mModelManagementService.stopRecognition( 279 mVoiceInteractionService, mKeyphraseMetadata.id, mInternalCallback); 280 } catch (RemoteException e) { 281 Slog.w(TAG, "RemoteException in stopRecognition!"); 282 } 283 284 if (code != STATUS_OK) { 285 Slog.w(TAG, "stopRecognition() failed with error code " + code); 286 } 287 return code; 288 } 289 290 /** 291 * Gets an intent to manage the associated keyphrase. 292 * 293 * @param action The manage action that needs to be performed. 294 * One of {@link #MANAGE_ACTION_ENROLL}, {@link #MANAGE_ACTION_RE_ENROLL} or 295 * {@link #MANAGE_ACTION_UN_ENROLL}. 296 * @return An {@link Intent} to manage the given keyphrase. 297 * @throws UnsupportedOperationException if managing they keyphrase isn't supported. 298 * Callers should check the availability by calling {@link #getAvailability()} 299 * before calling this method to avoid this exception. 300 */ 301 public Intent getManageIntent(int action) { 302 if (mAvailability == KEYPHRASE_HARDWARE_UNAVAILABLE 303 || mAvailability == KEYPHRASE_UNSUPPORTED) { 304 throw new UnsupportedOperationException( 305 "Managing the given keyphrase is not supported"); 306 } 307 if (action != MANAGE_ACTION_ENROLL 308 && action != MANAGE_ACTION_RE_ENROLL 309 && action != MANAGE_ACTION_UN_ENROLL) { 310 throw new IllegalArgumentException("Invalid action specified " + action); 311 } 312 313 return mKeyphraseEnrollmentInfo.getManageKeyphraseIntent(action, mText, mLocale); 314 } 315 316 private int internalGetAvailability() { 317 ModuleProperties dspModuleProperties = null; 318 try { 319 dspModuleProperties = 320 mModelManagementService.getDspModuleProperties(mVoiceInteractionService); 321 } catch (RemoteException e) { 322 Slog.w(TAG, "RemoteException in getDspProperties!"); 323 } 324 // No DSP available 325 if (dspModuleProperties == null) { 326 mRecognitionState = RECOGNITION_STATUS_NOT_AVAILABLE; 327 return KEYPHRASE_HARDWARE_UNAVAILABLE; 328 } 329 // No enrollment application supports this keyphrase/locale 330 if (mKeyphraseMetadata == null) { 331 mRecognitionState = RECOGNITION_STATUS_NOT_AVAILABLE; 332 return KEYPHRASE_UNSUPPORTED; 333 } 334 // This keyphrase hasn't been enrolled. 335 if (mEnrolledSoundModel == null) { 336 mRecognitionState = RECOGNITION_STATUS_NOT_AVAILABLE; 337 return KEYPHRASE_UNENROLLED; 338 } 339 // Mark recognition as available 340 mRecognitionState &= ~RECOGNITION_STATUS_NOT_AVAILABLE; 341 return KEYPHRASE_ENROLLED; 342 } 343 344 /** 345 * @return The corresponding {@link KeyphraseSoundModel} or null if none is found. 346 */ 347 private KeyphraseSoundModel internalGetKeyphraseSoundModel(int keyphraseId) { 348 List<KeyphraseSoundModel> soundModels; 349 try { 350 soundModels = mModelManagementService 351 .listRegisteredKeyphraseSoundModels(mVoiceInteractionService); 352 if (soundModels == null || soundModels.isEmpty()) { 353 Slog.i(TAG, "No available sound models for keyphrase ID: " + keyphraseId); 354 return null; 355 } 356 for (KeyphraseSoundModel soundModel : soundModels) { 357 if (soundModel.keyphrases == null) { 358 continue; 359 } 360 for (Keyphrase keyphrase : soundModel.keyphrases) { 361 // TODO: Check the user handle here to only load a model for the current user. 362 if (keyphrase.id == keyphraseId) { 363 return soundModel; 364 } 365 } 366 } 367 } catch (RemoteException e) { 368 Slog.w(TAG, "RemoteException in listRegisteredKeyphraseSoundModels!"); 369 } 370 return null; 371 } 372 373 /** @hide */ 374 static final class SoundTriggerListener extends IRecognitionStatusCallback.Stub { 375 private final Callback mCallback; 376 377 public SoundTriggerListener(Callback callback) { 378 this.mCallback = callback; 379 } 380 381 @Override 382 public void onDetected(byte[] data) { 383 Slog.i(TAG, "onKeyphraseSpoken"); 384 mCallback.onDetected(data); 385 } 386 387 @Override 388 public void onDetectionStarted() { 389 // TODO: Set the RECOGNITION_STATUS_ACTIVE flag here. 390 mCallback.onDetectionStarted(); 391 } 392 393 @Override 394 public void onDetectionStopped() { 395 // TODO: Unset the RECOGNITION_STATUS_ACTIVE flag here. 396 mCallback.onDetectionStopped(); 397 } 398 } 399} 400