AlwaysOnHotwordDetector.java revision e6cd2476aa9d07df0de0a0081ab66d8401a7e228
1/** 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.service.voice; 18 19import android.content.Intent; 20import android.hardware.soundtrigger.Keyphrase; 21import android.hardware.soundtrigger.KeyphraseEnrollmentInfo; 22import android.hardware.soundtrigger.KeyphraseMetadata; 23import android.hardware.soundtrigger.KeyphraseSoundModel; 24import android.hardware.soundtrigger.SoundTrigger; 25import android.hardware.soundtrigger.SoundTrigger.ConfidenceLevel; 26import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionExtra; 27import android.hardware.soundtrigger.SoundTriggerHelper; 28import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig; 29import android.os.RemoteException; 30import android.util.Slog; 31 32import com.android.internal.app.IVoiceInteractionManagerService; 33 34import java.util.List; 35 36/** 37 * A class that lets a VoiceInteractionService implementation interact with 38 * always-on keyphrase detection APIs. 39 */ 40public class AlwaysOnHotwordDetector { 41 //---- States of Keyphrase availability ----// 42 /** 43 * Indicates that the given keyphrase is not available on the system because of the 44 * hardware configuration. 45 */ 46 public static final int KEYPHRASE_HARDWARE_UNAVAILABLE = -2; 47 /** 48 * Indicates that the given keyphrase is not supported. 49 */ 50 public static final int KEYPHRASE_UNSUPPORTED = -1; 51 /** 52 * Indicates that the given keyphrase is not enrolled. 53 */ 54 public static final int KEYPHRASE_UNENROLLED = 1; 55 /** 56 * Indicates that the given keyphrase is currently enrolled but not being actively listened for. 57 */ 58 public static final int KEYPHRASE_ENROLLED = 2; 59 60 // Keyphrase management actions ----// 61 /** Indicates that we need to enroll. */ 62 public static final int MANAGE_ACTION_ENROLL = 0; 63 /** Indicates that we need to re-enroll. */ 64 public static final int MANAGE_ACTION_RE_ENROLL = 1; 65 /** Indicates that we need to un-enroll. */ 66 public static final int MANAGE_ACTION_UN_ENROLL = 2; 67 68 /** 69 * Return codes for {@link #startRecognition(int)}, {@link #stopRecognition()} 70 */ 71 public static final int STATUS_ERROR = Integer.MIN_VALUE; 72 public static final int STATUS_OK = 1; 73 74 //---- Keyphrase recognition status ----// 75 /** Indicates that recognition is not available. */ 76 public static final int RECOGNITION_STATUS_NOT_AVAILABLE = 0x01; 77 /** Indicates that recognition has not been requested. */ 78 public static final int RECOGNITION_STATUS_NOT_REQUESTED = 0x02; 79 /** Indicates that recognition has been requested. */ 80 public static final int RECOGNITION_STATUS_REQUESTED = 0x04; 81 /** Indicates that recognition has been temporarily disabled. */ 82 public static final int RECOGNITION_STATUS_DISABLED_TEMPORARILY = 0x08; 83 /** Indicates that recognition is currently active . */ 84 public static final int RECOGNITION_STATUS_ACTIVE = 0x10; 85 86 //-- Flags for startRecogntion ----// 87 /** Empty flag for {@link #startRecognition(int)}. */ 88 public static final int RECOGNITION_FLAG_NONE = 0; 89 /** 90 * Recognition flag for {@link #startRecognition(int)} that indicates 91 * whether the trigger audio for hotword needs to be captured. 92 */ 93 public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1; 94 95 //---- Recognition mode flags ----// 96 // Must be kept in sync with the related attribute defined as searchKeyphraseRecognitionFlags. 97 98 /** Simple recognition of the key phrase. Returned by {@link #getRecognitionStatus()} */ 99 public static final int RECOGNITION_MODE_VOICE_TRIGGER 100 = SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER; 101 /** Trigger only if one user is identified. Returned by {@link #getRecognitionStatus()} */ 102 public static final int RECOGNITION_MODE_USER_IDENTIFICATION 103 = SoundTrigger.RECOGNITION_MODE_USER_IDENTIFICATION; 104 105 static final String TAG = "AlwaysOnHotwordDetector"; 106 107 private final String mText; 108 private final String mLocale; 109 /** 110 * The metadata of the Keyphrase, derived from the enrollment application. 111 * This may be null if this keyphrase isn't supported by the enrollment application. 112 */ 113 private final KeyphraseMetadata mKeyphraseMetadata; 114 /** 115 * The sound model for the keyphrase, derived from the model management service 116 * (IVoiceInteractionManagerService). May be null if the keyphrase isn't enrolled yet. 117 */ 118 private final KeyphraseSoundModel mEnrolledSoundModel; 119 private final KeyphraseEnrollmentInfo mKeyphraseEnrollmentInfo; 120 private final SoundTriggerHelper mSoundTriggerHelper; 121 private final SoundTriggerHelper.Listener mListener; 122 private final int mAvailability; 123 private final IVoiceInteractionService mVoiceInteractionService; 124 private final IVoiceInteractionManagerService mModelManagementService; 125 126 private int mRecognitionState; 127 128 /** 129 * Callbacks for always-on hotword detection. 130 */ 131 public interface Callback { 132 /** 133 * Called when the keyphrase is spoken. 134 * 135 * @param data Optional trigger audio data, if it was requested during 136 * {@link AlwaysOnHotwordDetector#startRecognition(int)}. 137 */ 138 void onDetected(byte[] data); 139 /** 140 * Called when the detection for the associated keyphrase starts. 141 */ 142 void onDetectionStarted(); 143 /** 144 * Called when the detection for the associated keyphrase stops. 145 */ 146 void onDetectionStopped(); 147 } 148 149 /** 150 * @param text The keyphrase text to get the detector for. 151 * @param locale The java locale for the detector. 152 * @param callback A non-null Callback for receiving the recognition events. 153 * @param voiceInteractionService The current voice interaction service. 154 * @param modelManagementService A service that allows management of sound models. 155 * 156 * @hide 157 */ 158 public AlwaysOnHotwordDetector(String text, String locale, Callback callback, 159 KeyphraseEnrollmentInfo keyphraseEnrollmentInfo, 160 SoundTriggerHelper soundTriggerHelper, 161 IVoiceInteractionService voiceInteractionService, 162 IVoiceInteractionManagerService modelManagementService) { 163 mText = text; 164 mLocale = locale; 165 mKeyphraseEnrollmentInfo = keyphraseEnrollmentInfo; 166 mKeyphraseMetadata = mKeyphraseEnrollmentInfo.getKeyphraseMetadata(text, locale); 167 mListener = new SoundTriggerListener(callback); 168 mSoundTriggerHelper = soundTriggerHelper; 169 mVoiceInteractionService = voiceInteractionService; 170 mModelManagementService = modelManagementService; 171 if (mKeyphraseMetadata != null) { 172 mEnrolledSoundModel = internalGetKeyphraseSoundModel(mKeyphraseMetadata.id); 173 } else { 174 mEnrolledSoundModel = null; 175 } 176 mAvailability = internalGetAvailability(); 177 } 178 179 /** 180 * Gets the state of always-on hotword detection for the given keyphrase and locale 181 * on this system. 182 * Availability implies that the hardware on this system is capable of listening for 183 * the given keyphrase or not. 184 * 185 * @return Indicates if always-on hotword detection is available for the given keyphrase. 186 * The return code is one of {@link #KEYPHRASE_HARDWARE_UNAVAILABLE}, 187 * {@link #KEYPHRASE_UNSUPPORTED}, {@link #KEYPHRASE_UNENROLLED} or 188 * {@link #KEYPHRASE_ENROLLED}. 189 */ 190 public int getAvailability() { 191 return mAvailability; 192 } 193 194 /** 195 * Gets the recognition modes supported by the associated keyphrase. 196 * 197 * @throws UnsupportedOperationException if the keyphrase itself isn't supported. 198 * Callers should check the availability by calling {@link #getAvailability()} 199 * before calling this method to avoid this exception. 200 */ 201 public int getSupportedRecognitionModes() { 202 if (mAvailability == KEYPHRASE_HARDWARE_UNAVAILABLE 203 || mAvailability == KEYPHRASE_UNSUPPORTED) { 204 throw new UnsupportedOperationException( 205 "Getting supported recognition modes for the keyphrase is not supported"); 206 } 207 208 return mKeyphraseMetadata.recognitionModeFlags; 209 } 210 211 /** 212 * Gets the status of the recognition. 213 * @return A flag comprised of {@link #RECOGNITION_STATUS_NOT_AVAILABLE}, 214 * {@link #RECOGNITION_STATUS_NOT_REQUESTED}, {@link #RECOGNITION_STATUS_REQUESTED}, 215 * {@link #RECOGNITION_STATUS_DISABLED_TEMPORARILY} and 216 * {@link #RECOGNITION_STATUS_ACTIVE}. 217 */ 218 public int getRecognitionStatus() { 219 return mRecognitionState; 220 } 221 222 /** 223 * Starts recognition for the associated keyphrase. 224 * 225 * @param recognitionFlags The flags to control the recognition properties. 226 * The allowed flags are {@link #RECOGNITION_FLAG_NONE} and 227 * {@link #RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO}. 228 * @return One of {@link #STATUS_ERROR} or {@link #STATUS_OK}. 229 * @throws UnsupportedOperationException if the recognition isn't supported. 230 * Callers should check the availability by calling {@link #getAvailability()} 231 * before calling this method to avoid this exception. 232 */ 233 public int startRecognition(int recognitionFlags) { 234 if (mAvailability != KEYPHRASE_ENROLLED 235 || (mRecognitionState&RECOGNITION_STATUS_NOT_AVAILABLE) != 0) { 236 throw new UnsupportedOperationException( 237 "Recognition for the given keyphrase is not supported"); 238 } 239 240 mRecognitionState &= RECOGNITION_STATUS_REQUESTED; 241 KeyphraseRecognitionExtra[] recognitionExtra = new KeyphraseRecognitionExtra[1]; 242 // TODO: Do we need to do something about the confidence level here? 243 // TODO: Take in captureTriggerAudio as a method param here. 244 recognitionExtra[0] = new KeyphraseRecognitionExtra(mKeyphraseMetadata.id, 245 mKeyphraseMetadata.recognitionModeFlags, new ConfidenceLevel[0]); 246 boolean captureTriggerAudio = 247 (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0; 248 int code = mSoundTriggerHelper.startRecognition(mKeyphraseMetadata.id, 249 mEnrolledSoundModel.convertToSoundTriggerKeyphraseSoundModel(), mListener, 250 new RecognitionConfig( 251 captureTriggerAudio, recognitionExtra,null /* additional data */)); 252 if (code != SoundTriggerHelper.STATUS_OK) { 253 Slog.w(TAG, "startRecognition() failed with error code " + code); 254 return STATUS_ERROR; 255 } else { 256 return STATUS_OK; 257 } 258 } 259 260 /** 261 * Stops recognition for the associated keyphrase. 262 * 263 * @return One of {@link #STATUS_ERROR} or {@link #STATUS_OK}. 264 * @throws UnsupportedOperationException if the recognition isn't supported. 265 * Callers should check the availability by calling {@link #getAvailability()} 266 * before calling this method to avoid this exception. 267 */ 268 public int stopRecognition() { 269 if (mAvailability != KEYPHRASE_ENROLLED) { 270 throw new UnsupportedOperationException( 271 "Recognition for the given keyphrase is not supported"); 272 } 273 274 mRecognitionState &= ~RECOGNITION_STATUS_NOT_REQUESTED; 275 int code = mSoundTriggerHelper.stopRecognition(mKeyphraseMetadata.id, mListener); 276 277 if (code != SoundTriggerHelper.STATUS_OK) { 278 Slog.w(TAG, "stopRecognition() failed with error code " + code); 279 return STATUS_ERROR; 280 } else { 281 return STATUS_OK; 282 } 283 } 284 285 /** 286 * Gets an intent to manage the associated keyphrase. 287 * 288 * @param action The manage action that needs to be performed. 289 * One of {@link #MANAGE_ACTION_ENROLL}, {@link #MANAGE_ACTION_RE_ENROLL} or 290 * {@link #MANAGE_ACTION_UN_ENROLL}. 291 * @return An {@link Intent} to manage the given keyphrase. 292 * @throws UnsupportedOperationException if managing they keyphrase isn't supported. 293 * Callers should check the availability by calling {@link #getAvailability()} 294 * before calling this method to avoid this exception. 295 */ 296 public Intent getManageIntent(int action) { 297 if (mAvailability == KEYPHRASE_HARDWARE_UNAVAILABLE 298 || mAvailability == KEYPHRASE_UNSUPPORTED) { 299 throw new UnsupportedOperationException( 300 "Managing the given keyphrase is not supported"); 301 } 302 if (action != MANAGE_ACTION_ENROLL 303 && action != MANAGE_ACTION_RE_ENROLL 304 && action != MANAGE_ACTION_UN_ENROLL) { 305 throw new IllegalArgumentException("Invalid action specified " + action); 306 } 307 308 return mKeyphraseEnrollmentInfo.getManageKeyphraseIntent(action, mText, mLocale); 309 } 310 311 private int internalGetAvailability() { 312 // No DSP available 313 if (mSoundTriggerHelper.dspInfo == null) { 314 mRecognitionState = RECOGNITION_STATUS_NOT_AVAILABLE; 315 return KEYPHRASE_HARDWARE_UNAVAILABLE; 316 } 317 // No enrollment application supports this keyphrase/locale 318 if (mKeyphraseMetadata == null) { 319 mRecognitionState = RECOGNITION_STATUS_NOT_AVAILABLE; 320 return KEYPHRASE_UNSUPPORTED; 321 } 322 // This keyphrase hasn't been enrolled. 323 if (mEnrolledSoundModel == null) { 324 mRecognitionState = RECOGNITION_STATUS_NOT_AVAILABLE; 325 return KEYPHRASE_UNENROLLED; 326 } 327 // Mark recognition as available 328 mRecognitionState &= ~RECOGNITION_STATUS_NOT_AVAILABLE; 329 return KEYPHRASE_ENROLLED; 330 } 331 332 /** 333 * @return The corresponding {@link KeyphraseSoundModel} or null if none is found. 334 */ 335 private KeyphraseSoundModel internalGetKeyphraseSoundModel(int keyphraseId) { 336 List<KeyphraseSoundModel> soundModels; 337 try { 338 soundModels = mModelManagementService 339 .listRegisteredKeyphraseSoundModels(mVoiceInteractionService); 340 if (soundModels == null || soundModels.isEmpty()) { 341 Slog.i(TAG, "No available sound models for keyphrase ID: " + keyphraseId); 342 return null; 343 } 344 for (KeyphraseSoundModel soundModel : soundModels) { 345 if (soundModel.keyphrases == null) { 346 continue; 347 } 348 for (Keyphrase keyphrase : soundModel.keyphrases) { 349 // TODO: Check the user handle here to only load a model for the current user. 350 if (keyphrase.id == keyphraseId) { 351 return soundModel; 352 } 353 } 354 } 355 } catch (RemoteException e) { 356 Slog.w(TAG, "RemoteException in listRegisteredKeyphraseSoundModels!"); 357 } 358 return null; 359 } 360 361 /** @hide */ 362 static final class SoundTriggerListener implements SoundTriggerHelper.Listener { 363 private final Callback mCallback; 364 365 public SoundTriggerListener(Callback callback) { 366 this.mCallback = callback; 367 } 368 369 @Override 370 public void onKeyphraseSpoken(byte[] data) { 371 Slog.i(TAG, "onKeyphraseSpoken"); 372 mCallback.onDetected(data); 373 } 374 375 @Override 376 public void onListeningStateChanged(int state) { 377 Slog.i(TAG, "onListeningStateChanged: state=" + state); 378 // TODO: Set/unset the RECOGNITION_STATUS_ACTIVE flag here. 379 if (state == SoundTriggerHelper.STATE_STARTED) { 380 mCallback.onDetectionStarted(); 381 } else if (state == SoundTriggerHelper.STATE_STOPPED) { 382 mCallback.onDetectionStopped(); 383 } 384 } 385 } 386} 387